diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-08-19 03:34:07 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-08-19 03:34:07 +0200 |
commit | 2879a927bb7a3cf91ae3906a5e59215f9c17dd75 (patch) | |
tree | 870bdd1bd530a3d5d2abd10539700446b2878188 /fs | |
parent | 7e7b43892b87b6be259479ef4de14029dcb4012f (diff) | |
parent | 20211e4d344729f4d4c93da37a590fc1c3a1fd9b (diff) |
Merge branch 'x86/oprofile' into oprofile
Diffstat (limited to 'fs')
320 files changed, 9742 insertions, 7975 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 97e3bdedb1e..d3873583360 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -1383,6 +1383,19 @@ config MINIX_FS partition (the one containing the directory /) cannot be compiled as a module. +config OMFS_FS + tristate "SonicBlue Optimized MPEG File System support" + depends on BLOCK + select CRC_ITU_T + help + This is the proprietary file system used by the Rio Karma music + player and ReplayTV DVR. Despite the name, this filesystem is not + more efficient than a standard FS for MPEG files, in fact likely + the opposite is true. Say Y if you have either of these devices + and wish to mount its disk. + + To compile this file system support as a module, choose M here: the + module will be called omfs. If unsure, say N. config HPFS_FS tristate "OS/2 HPFS file system support" diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 3263084eef9..4a551af6f3f 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -30,7 +30,7 @@ config COMPAT_BINFMT_ELF config BINFMT_ELF_FDPIC bool "Kernel support for FDPIC ELF binaries" default y - depends on (FRV || BLACKFIN) + depends on (FRV || BLACKFIN || (SUPERH32 && !MMU)) help ELF FDPIC binaries are based on ELF, but allow the individual load segments of a binary to be located in memory independently of each diff --git a/fs/Makefile b/fs/Makefile index 3b2178b4bb6..a1482a5eff1 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -111,6 +111,7 @@ obj-$(CONFIG_ADFS_FS) += adfs/ obj-$(CONFIG_FUSE_FS) += fuse/ obj-$(CONFIG_UDF_FS) += udf/ obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/ +obj-$(CONFIG_OMFS_FS) += omfs/ obj-$(CONFIG_JFS_FS) += jfs/ obj-$(CONFIG_XFS_FS) += xfs/ obj-$(CONFIG_9P_FS) += 9p/ diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 9e421eeb672..26f3b43726b 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -249,7 +249,7 @@ static void adfs_destroy_inode(struct inode *inode) kmem_cache_free(adfs_inode_cachep, ADFS_I(inode)); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct adfs_inode_info *ei = (struct adfs_inode_info *) foo; diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 223b1917093..e9ec915f755 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -2,6 +2,7 @@ #include <linux/fs.h> #include <linux/buffer_head.h> #include <linux/amigaffs.h> +#include <linux/mutex.h> /* AmigaOS allows file names with up to 30 characters length. * Names longer than that will be silently truncated. If you @@ -98,7 +99,7 @@ struct affs_sb_info { gid_t s_gid; /* gid to override */ umode_t s_mode; /* mode to override */ struct buffer_head *s_root_bh; /* Cached root block. */ - struct semaphore s_bmlock; /* Protects bitmap access. */ + struct mutex s_bmlock; /* Protects bitmap access. */ struct affs_bm_info *s_bitmap; /* Bitmap infos. */ u32 s_bmap_count; /* # of bitmap blocks. */ u32 s_bmap_bits; /* # of bits in one bitmap blocks */ diff --git a/fs/affs/bitmap.c b/fs/affs/bitmap.c index c4a5ad09ddf..dc5ef14bdc1 100644 --- a/fs/affs/bitmap.c +++ b/fs/affs/bitmap.c @@ -45,14 +45,14 @@ affs_count_free_blocks(struct super_block *sb) if (sb->s_flags & MS_RDONLY) return 0; - down(&AFFS_SB(sb)->s_bmlock); + mutex_lock(&AFFS_SB(sb)->s_bmlock); bm = AFFS_SB(sb)->s_bitmap; free = 0; for (i = AFFS_SB(sb)->s_bmap_count; i > 0; bm++, i--) free += bm->bm_free; - up(&AFFS_SB(sb)->s_bmlock); + mutex_unlock(&AFFS_SB(sb)->s_bmlock); return free; } @@ -76,7 +76,7 @@ affs_free_block(struct super_block *sb, u32 block) bit = blk % sbi->s_bmap_bits; bm = &sbi->s_bitmap[bmap]; - down(&sbi->s_bmlock); + mutex_lock(&sbi->s_bmlock); bh = sbi->s_bmap_bh; if (sbi->s_last_bmap != bmap) { @@ -105,19 +105,19 @@ affs_free_block(struct super_block *sb, u32 block) sb->s_dirt = 1; bm->bm_free++; - up(&sbi->s_bmlock); + mutex_unlock(&sbi->s_bmlock); return; err_free: affs_warning(sb,"affs_free_block","Trying to free block %u which is already free", block); - up(&sbi->s_bmlock); + mutex_unlock(&sbi->s_bmlock); return; err_bh_read: affs_error(sb,"affs_free_block","Cannot read bitmap block %u", bm->bm_key); sbi->s_bmap_bh = NULL; sbi->s_last_bmap = ~0; - up(&sbi->s_bmlock); + mutex_unlock(&sbi->s_bmlock); return; err_range: @@ -168,7 +168,7 @@ affs_alloc_block(struct inode *inode, u32 goal) bmap = blk / sbi->s_bmap_bits; bm = &sbi->s_bitmap[bmap]; - down(&sbi->s_bmlock); + mutex_lock(&sbi->s_bmlock); if (bm->bm_free) goto find_bmap_bit; @@ -249,7 +249,7 @@ find_bit: mark_buffer_dirty(bh); sb->s_dirt = 1; - up(&sbi->s_bmlock); + mutex_unlock(&sbi->s_bmlock); pr_debug("%d\n", blk); return blk; @@ -259,7 +259,7 @@ err_bh_read: sbi->s_bmap_bh = NULL; sbi->s_last_bmap = ~0; err_full: - up(&sbi->s_bmlock); + mutex_unlock(&sbi->s_bmlock); pr_debug("failed\n"); return 0; } diff --git a/fs/affs/file.c b/fs/affs/file.c index 6eac7bdeec9..1377b1240b6 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -46,8 +46,6 @@ const struct inode_operations affs_file_inode_operations = { static int affs_file_open(struct inode *inode, struct file *filp) { - if (atomic_read(&filp->f_count) != 1) - return 0; pr_debug("AFFS: open(%lu,%d)\n", inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt)); atomic_inc(&AFFS_I(inode)->i_opencnt); @@ -57,8 +55,6 @@ affs_file_open(struct inode *inode, struct file *filp) static int affs_file_release(struct inode *inode, struct file *filp) { - if (atomic_read(&filp->f_count) != 0) - return 0; pr_debug("AFFS: release(%lu, %d)\n", inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt)); diff --git a/fs/affs/super.c b/fs/affs/super.c index d214837d5e4..3a89094f93d 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -90,7 +90,7 @@ static void affs_destroy_inode(struct inode *inode) kmem_cache_free(affs_inode_cachep, AFFS_I(inode)); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct affs_inode_info *ei = (struct affs_inode_info *) foo; @@ -290,7 +290,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent) if (!sbi) return -ENOMEM; sb->s_fs_info = sbi; - init_MUTEX(&sbi->s_bmlock); + mutex_init(&sbi->s_bmlock); if (!parse_options(data,&uid,&gid,&i,&reserved,&root_block, &blocksize,&sbi->s_prefix, diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 7102824ba84..3cb6920ff30 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -469,8 +469,6 @@ extern bool afs_cm_incoming_call(struct afs_call *); extern const struct inode_operations afs_dir_inode_operations; extern const struct file_operations afs_dir_file_operations; -extern int afs_permission(struct inode *, int, struct nameidata *); - /* * file.c */ @@ -605,7 +603,7 @@ extern void afs_clear_permits(struct afs_vnode *); extern void afs_cache_permit(struct afs_vnode *, struct key *, long); extern void afs_zap_permits(struct rcu_head *); extern struct key *afs_request_key(struct afs_cell *); -extern int afs_permission(struct inode *, int, struct nameidata *); +extern int afs_permission(struct inode *, int); /* * server.c diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 2f5503902c3..78db4953a80 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -232,7 +232,7 @@ static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd) } mntget(newmnt); - err = do_add_mount(newmnt, nd, MNT_SHRINKABLE, &afs_vfsmounts); + err = do_add_mount(newmnt, &nd->path, MNT_SHRINKABLE, &afs_vfsmounts); switch (err) { case 0: path_put(&nd->path); diff --git a/fs/afs/security.c b/fs/afs/security.c index 3bcbeceba1b..3ef50437003 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -284,7 +284,7 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key, * - AFS ACLs are attached to directories only, and a file is controlled by its * parent directory's ACL */ -int afs_permission(struct inode *inode, int mask, struct nameidata *nd) +int afs_permission(struct inode *inode, int mask) { struct afs_vnode *vnode = AFS_FS_I(inode); afs_access_t uninitialized_var(access); diff --git a/fs/afs/super.c b/fs/afs/super.c index 7e3faeef681..250d8c4d66e 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -27,7 +27,7 @@ #define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */ -static void afs_i_init_once(struct kmem_cache *cachep, void *foo); +static void afs_i_init_once(void *foo); static int afs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt); @@ -449,7 +449,7 @@ static void afs_put_super(struct super_block *sb) /* * initialise an inode cache slab element prior to any use */ -static void afs_i_init_once(struct kmem_cache *cachep, void *_vnode) +static void afs_i_init_once(void *_vnode) { struct afs_vnode *vnode = _vnode; diff --git a/fs/afs/write.c b/fs/afs/write.c index 9a849ad3c48..065b4e10681 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -404,7 +404,7 @@ static int afs_write_back_from_locked_page(struct afs_writeback *wb, page = pages[loop]; if (page->index > wb->last) break; - if (TestSetPageLocked(page)) + if (!trylock_page(page)) break; if (!PageDirty(page) || page_private(page) != (unsigned long) wb) { @@ -512,8 +512,8 @@ static void aio_fput_routine(struct work_struct *data) */ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) { - dprintk(KERN_DEBUG "aio_put(%p): f_count=%d\n", - req, atomic_read(&req->ki_filp->f_count)); + dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n", + req, atomic_long_read(&req->ki_filp->f_count)); assert_spin_locked(&ctx->ctx_lock); @@ -528,7 +528,7 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) /* Must be done under the lock to serialise against cancellation. * Call this aio_fput as it duplicates fput via the fput_work. */ - if (unlikely(atomic_dec_and_test(&req->ki_filp->f_count))) { + if (unlikely(atomic_long_dec_and_test(&req->ki_filp->f_count))) { get_ioctx(ctx); spin_lock(&fput_lock); list_add(&req->ki_list, &fput_head); diff --git a/fs/attr.c b/fs/attr.c index 966b73e25f8..26c71ba1eed 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -51,7 +51,7 @@ int inode_change_ok(struct inode *inode, struct iattr *attr) } /* Check for setting the inode time. */ - if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET)) { + if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) { if (!is_owner_or_cap(inode)) goto error; } @@ -108,6 +108,11 @@ int notify_change(struct dentry * dentry, struct iattr * attr) struct timespec now; unsigned int ia_valid = attr->ia_valid; + if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) { + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + return -EPERM; + } + now = current_fs_time(inode->i_sb); attr->ia_ctime = now; diff --git a/fs/bad_inode.c b/fs/bad_inode.c index f1c2ea8342f..5f1538c03b1 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -243,8 +243,7 @@ static int bad_inode_readlink(struct dentry *dentry, char __user *buffer, return -EIO; } -static int bad_inode_permission(struct inode *inode, int mask, - struct nameidata *nd) +static int bad_inode_permission(struct inode *inode, int mask) { return -EIO; } diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index e8717de3bab..02c6e62b72f 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -289,7 +289,7 @@ befs_destroy_inode(struct inode *inode) kmem_cache_free(befs_inode_cachep, BEFS_I(inode)); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct befs_inode_info *bi = (struct befs_inode_info *) foo; diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h index 70f5d3a8eed..7109e451abf 100644 --- a/fs/bfs/bfs.h +++ b/fs/bfs/bfs.h @@ -16,8 +16,9 @@ struct bfs_sb_info { unsigned long si_freei; unsigned long si_lf_eblk; unsigned long si_lasti; - unsigned long * si_imap; - struct buffer_head * si_sbh; /* buffer header w/superblock */ + unsigned long *si_imap; + struct buffer_head *si_sbh; /* buffer header w/superblock */ + struct mutex bfs_lock; }; /* diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 034950cb3cb..87ee5ccee34 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -32,16 +32,17 @@ static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir) struct inode *dir = f->f_path.dentry->d_inode; struct buffer_head *bh; struct bfs_dirent *de; + struct bfs_sb_info *info = BFS_SB(dir->i_sb); unsigned int offset; int block; - lock_kernel(); + mutex_lock(&info->bfs_lock); if (f->f_pos & (BFS_DIRENT_SIZE - 1)) { printf("Bad f_pos=%08lx for %s:%08lx\n", (unsigned long)f->f_pos, dir->i_sb->s_id, dir->i_ino); - unlock_kernel(); + mutex_unlock(&info->bfs_lock); return -EBADF; } @@ -61,7 +62,7 @@ static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir) le16_to_cpu(de->ino), DT_UNKNOWN) < 0) { brelse(bh); - unlock_kernel(); + mutex_unlock(&info->bfs_lock); return 0; } } @@ -71,7 +72,7 @@ static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir) brelse(bh); } - unlock_kernel(); + mutex_unlock(&info->bfs_lock); return 0; } @@ -95,10 +96,10 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, int mode, inode = new_inode(s); if (!inode) return -ENOSPC; - lock_kernel(); + mutex_lock(&info->bfs_lock); ino = find_first_zero_bit(info->si_imap, info->si_lasti); if (ino > info->si_lasti) { - unlock_kernel(); + mutex_unlock(&info->bfs_lock); iput(inode); return -ENOSPC; } @@ -125,10 +126,10 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, int mode, if (err) { inode_dec_link_count(inode); iput(inode); - unlock_kernel(); + mutex_unlock(&info->bfs_lock); return err; } - unlock_kernel(); + mutex_unlock(&info->bfs_lock); d_instantiate(dentry, inode); return 0; } @@ -139,22 +140,23 @@ static struct dentry *bfs_lookup(struct inode *dir, struct dentry *dentry, struct inode *inode = NULL; struct buffer_head *bh; struct bfs_dirent *de; + struct bfs_sb_info *info = BFS_SB(dir->i_sb); if (dentry->d_name.len > BFS_NAMELEN) return ERR_PTR(-ENAMETOOLONG); - lock_kernel(); + mutex_lock(&info->bfs_lock); bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de); if (bh) { unsigned long ino = (unsigned long)le16_to_cpu(de->ino); brelse(bh); inode = bfs_iget(dir->i_sb, ino); if (IS_ERR(inode)) { - unlock_kernel(); + mutex_unlock(&info->bfs_lock); return ERR_CAST(inode); } } - unlock_kernel(); + mutex_unlock(&info->bfs_lock); d_add(dentry, inode); return NULL; } @@ -163,13 +165,14 @@ static int bfs_link(struct dentry *old, struct inode *dir, struct dentry *new) { struct inode *inode = old->d_inode; + struct bfs_sb_info *info = BFS_SB(inode->i_sb); int err; - lock_kernel(); + mutex_lock(&info->bfs_lock); err = bfs_add_entry(dir, new->d_name.name, new->d_name.len, inode->i_ino); if (err) { - unlock_kernel(); + mutex_unlock(&info->bfs_lock); return err; } inc_nlink(inode); @@ -177,19 +180,19 @@ static int bfs_link(struct dentry *old, struct inode *dir, mark_inode_dirty(inode); atomic_inc(&inode->i_count); d_instantiate(new, inode); - unlock_kernel(); + mutex_unlock(&info->bfs_lock); return 0; } static int bfs_unlink(struct inode *dir, struct dentry *dentry) { int error = -ENOENT; - struct inode *inode; + struct inode *inode = dentry->d_inode; struct buffer_head *bh; struct bfs_dirent *de; + struct bfs_sb_info *info = BFS_SB(inode->i_sb); - inode = dentry->d_inode; - lock_kernel(); + mutex_lock(&info->bfs_lock); bh = bfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len, &de); if (!bh || (le16_to_cpu(de->ino) != inode->i_ino)) goto out_brelse; @@ -210,7 +213,7 @@ static int bfs_unlink(struct inode *dir, struct dentry *dentry) out_brelse: brelse(bh); - unlock_kernel(); + mutex_unlock(&info->bfs_lock); return error; } @@ -220,6 +223,7 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *old_inode, *new_inode; struct buffer_head *old_bh, *new_bh; struct bfs_dirent *old_de, *new_de; + struct bfs_sb_info *info; int error = -ENOENT; old_bh = new_bh = NULL; @@ -227,7 +231,9 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (S_ISDIR(old_inode->i_mode)) return -EINVAL; - lock_kernel(); + info = BFS_SB(old_inode->i_sb); + + mutex_lock(&info->bfs_lock); old_bh = bfs_find_entry(old_dir, old_dentry->d_name.name, old_dentry->d_name.len, &old_de); @@ -264,7 +270,7 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry, error = 0; end_rename: - unlock_kernel(); + mutex_unlock(&info->bfs_lock); brelse(old_bh); brelse(new_bh); return error; diff --git a/fs/bfs/file.c b/fs/bfs/file.c index b11e63e8fbc..6a021265f01 100644 --- a/fs/bfs/file.c +++ b/fs/bfs/file.c @@ -99,7 +99,7 @@ static int bfs_get_block(struct inode *inode, sector_t block, return -ENOSPC; /* The rest has to be protected against itself. */ - lock_kernel(); + mutex_lock(&info->bfs_lock); /* * If the last data block for this file is the last allocated @@ -151,7 +151,7 @@ static int bfs_get_block(struct inode *inode, sector_t block, mark_buffer_dirty(sbh); map_bh(bh_result, sb, phys); out: - unlock_kernel(); + mutex_unlock(&info->bfs_lock); return err; } diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 8db623838b5..0ed57b5ee01 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -104,6 +104,7 @@ static int bfs_write_inode(struct inode *inode, int unused) struct bfs_inode *di; struct buffer_head *bh; int block, off; + struct bfs_sb_info *info = BFS_SB(inode->i_sb); dprintf("ino=%08x\n", ino); @@ -112,13 +113,13 @@ static int bfs_write_inode(struct inode *inode, int unused) return -EIO; } - lock_kernel(); + mutex_lock(&info->bfs_lock); block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1; bh = sb_bread(inode->i_sb, block); if (!bh) { printf("Unable to read inode %s:%08x\n", inode->i_sb->s_id, ino); - unlock_kernel(); + mutex_unlock(&info->bfs_lock); return -EIO; } @@ -145,7 +146,7 @@ static int bfs_write_inode(struct inode *inode, int unused) mark_buffer_dirty(bh); brelse(bh); - unlock_kernel(); + mutex_unlock(&info->bfs_lock); return 0; } @@ -170,7 +171,7 @@ static void bfs_delete_inode(struct inode *inode) inode->i_size = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; - lock_kernel(); + mutex_lock(&info->bfs_lock); mark_inode_dirty(inode); block = (ino - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1; @@ -178,7 +179,7 @@ static void bfs_delete_inode(struct inode *inode) if (!bh) { printf("Unable to read inode %s:%08lx\n", inode->i_sb->s_id, ino); - unlock_kernel(); + mutex_unlock(&info->bfs_lock); return; } off = (ino - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK; @@ -204,14 +205,16 @@ static void bfs_delete_inode(struct inode *inode) info->si_lf_eblk = bi->i_sblock - 1; mark_buffer_dirty(info->si_sbh); } - unlock_kernel(); + mutex_unlock(&info->bfs_lock); clear_inode(inode); } static void bfs_put_super(struct super_block *s) { struct bfs_sb_info *info = BFS_SB(s); + brelse(info->si_sbh); + mutex_destroy(&info->bfs_lock); kfree(info->si_imap); kfree(info); s->s_fs_info = NULL; @@ -236,11 +239,13 @@ static int bfs_statfs(struct dentry *dentry, struct kstatfs *buf) static void bfs_write_super(struct super_block *s) { - lock_kernel(); + struct bfs_sb_info *info = BFS_SB(s); + + mutex_lock(&info->bfs_lock); if (!(s->s_flags & MS_RDONLY)) - mark_buffer_dirty(BFS_SB(s)->si_sbh); + mark_buffer_dirty(info->si_sbh); s->s_dirt = 0; - unlock_kernel(); + mutex_unlock(&info->bfs_lock); } static struct kmem_cache *bfs_inode_cachep; @@ -259,7 +264,7 @@ static void bfs_destroy_inode(struct inode *inode) kmem_cache_free(bfs_inode_cachep, BFS_I(inode)); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct bfs_inode_info *bi = foo; @@ -380,7 +385,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) struct bfs_inode *di; int block = (i - BFS_ROOT_INO) / BFS_INODES_PER_BLOCK + 1; int off = (i - BFS_ROOT_INO) % BFS_INODES_PER_BLOCK; - unsigned long sblock, eblock; + unsigned long eblock; if (!off) { brelse(bh); @@ -399,7 +404,6 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) set_bit(i, info->si_imap); info->si_freeb -= BFS_FILEBLOCKS(di); - sblock = le32_to_cpu(di->i_sblock); eblock = le32_to_cpu(di->i_eblock); if (eblock > info->si_lf_eblk) info->si_lf_eblk = eblock; @@ -410,6 +414,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) s->s_dirt = 1; } dump_imap("read_super", s); + mutex_init(&info->bfs_lock); return 0; out: diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index ba4cddb92f1..204cfd1d767 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -444,12 +444,6 @@ beyond_if: regs->gp = ex.a_gpvalue; #endif start_thread(regs, ex.a_entry, current->mm->start_stack); - if (unlikely(current->ptrace & PT_PTRACED)) { - if (current->ptrace & PT_TRACE_EXEC) - ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP); - else - send_sig(SIGTRAP, current, 0); - } return 0; } diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 3b6ff854d98..655ed8d30a8 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1003,12 +1003,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) #endif start_thread(regs, elf_entry, bprm->p); - if (unlikely(current->ptrace & PT_PTRACED)) { - if (current->ptrace & PT_TRACE_EXEC) - ptrace_notify ((PTRACE_EVENT_EXEC << 8) | SIGTRAP); - else - send_sig(SIGTRAP, current, 0); - } retval = 0; out: kfree(loc); diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 1b59b1edf26..80c1f952ef7 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -433,13 +433,6 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, entryaddr = interp_params.entry_addr ?: exec_params.entry_addr; start_thread(regs, entryaddr, current->mm->start_stack); - if (unlikely(current->ptrace & PT_PTRACED)) { - if (current->ptrace & PT_TRACE_EXEC) - ptrace_notify((PTRACE_EVENT_EXEC << 8) | SIGTRAP); - else - send_sig(SIGTRAP, current, 0); - } - retval = 0; error: @@ -477,6 +470,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, char __user *u_platform, *p; long hwcap; int loop; + int nr; /* reset for each csp adjustment */ /* we're going to shovel a whole load of stuff onto the stack */ #ifdef CONFIG_MMU @@ -549,10 +543,7 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, /* force 16 byte _final_ alignment here for generality */ #define DLINFO_ITEMS 13 - nitems = 1 + DLINFO_ITEMS + (k_platform ? 1 : 0); -#ifdef DLINFO_ARCH_ITEMS - nitems += DLINFO_ARCH_ITEMS; -#endif + nitems = 1 + DLINFO_ITEMS + (k_platform ? 1 : 0) + AT_VECTOR_SIZE_ARCH; csp = sp; sp -= nitems * 2 * sizeof(unsigned long); @@ -564,39 +555,46 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, sp -= sp & 15UL; /* put the ELF interpreter info on the stack */ -#define NEW_AUX_ENT(nr, id, val) \ +#define NEW_AUX_ENT(id, val) \ do { \ struct { unsigned long _id, _val; } __user *ent; \ \ ent = (void __user *) csp; \ __put_user((id), &ent[nr]._id); \ __put_user((val), &ent[nr]._val); \ + nr++; \ } while (0) + nr = 0; csp -= 2 * sizeof(unsigned long); - NEW_AUX_ENT(0, AT_NULL, 0); + NEW_AUX_ENT(AT_NULL, 0); if (k_platform) { + nr = 0; csp -= 2 * sizeof(unsigned long); - NEW_AUX_ENT(0, AT_PLATFORM, + NEW_AUX_ENT(AT_PLATFORM, (elf_addr_t) (unsigned long) u_platform); } + nr = 0; csp -= DLINFO_ITEMS * 2 * sizeof(unsigned long); - NEW_AUX_ENT( 0, AT_HWCAP, hwcap); - NEW_AUX_ENT( 1, AT_PAGESZ, PAGE_SIZE); - NEW_AUX_ENT( 2, AT_CLKTCK, CLOCKS_PER_SEC); - NEW_AUX_ENT( 3, AT_PHDR, exec_params->ph_addr); - NEW_AUX_ENT( 4, AT_PHENT, sizeof(struct elf_phdr)); - NEW_AUX_ENT( 5, AT_PHNUM, exec_params->hdr.e_phnum); - NEW_AUX_ENT( 6, AT_BASE, interp_params->elfhdr_addr); - NEW_AUX_ENT( 7, AT_FLAGS, 0); - NEW_AUX_ENT( 8, AT_ENTRY, exec_params->entry_addr); - NEW_AUX_ENT( 9, AT_UID, (elf_addr_t) current->uid); - NEW_AUX_ENT(10, AT_EUID, (elf_addr_t) current->euid); - NEW_AUX_ENT(11, AT_GID, (elf_addr_t) current->gid); - NEW_AUX_ENT(12, AT_EGID, (elf_addr_t) current->egid); + NEW_AUX_ENT(AT_HWCAP, hwcap); + NEW_AUX_ENT(AT_PAGESZ, PAGE_SIZE); + NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC); + NEW_AUX_ENT(AT_PHDR, exec_params->ph_addr); + NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr)); + NEW_AUX_ENT(AT_PHNUM, exec_params->hdr.e_phnum); + NEW_AUX_ENT(AT_BASE, interp_params->elfhdr_addr); + NEW_AUX_ENT(AT_FLAGS, 0); + NEW_AUX_ENT(AT_ENTRY, exec_params->entry_addr); + NEW_AUX_ENT(AT_UID, (elf_addr_t) current->uid); + NEW_AUX_ENT(AT_EUID, (elf_addr_t) current->euid); + NEW_AUX_ENT(AT_GID, (elf_addr_t) current->gid); + NEW_AUX_ENT(AT_EGID, (elf_addr_t) current->egid); #ifdef ARCH_DLINFO + nr = 0; + csp -= AT_VECTOR_SIZE_ARCH * 2 * sizeof(unsigned long); + /* ARCH_DLINFO must come last so platform specific code can enforce * special alignment requirements on the AUXV if necessary (eg. PPC). */ diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 2cb1acda3a8..56372ecf169 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -920,9 +920,6 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) start_thread(regs, start_addr, current->mm->start_stack); - if (current->ptrace & PT_PTRACED) - send_sig(SIGTRAP, current, 0); - return 0; } diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c index fdc36bfd6a7..68be580ba28 100644 --- a/fs/binfmt_som.c +++ b/fs/binfmt_som.c @@ -274,8 +274,6 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs) map_hpux_gateway_page(current,current->mm); start_thread_som(regs, som_entry, bprm->p); - if (current->ptrace & PT_PTRACED) - send_sig(SIGTRAP, current, 0); return 0; /* error cleanup */ diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 63e2ee63058..c3e174b35fe 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -705,7 +705,6 @@ void __init bio_integrity_init_slab(void) bio_integrity_slab = KMEM_CACHE(bio_integrity_payload, SLAB_HWCACHE_ALIGN|SLAB_PANIC); } -EXPORT_SYMBOL(bio_integrity_init_slab); static int __init integrity_init(void) { @@ -77,11 +77,8 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct */ bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask); - if (bvl) { - struct biovec_slab *bp = bvec_slabs + *idx; - - memset(bvl, 0, bp->nr_vecs * sizeof(struct bio_vec)); - } + if (bvl) + memset(bvl, 0, bvec_nr_vecs(*idx) * sizeof(struct bio_vec)); return bvl; } @@ -149,7 +146,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) goto out; } bio->bi_flags |= idx << BIO_POOL_OFFSET; - bio->bi_max_vecs = bvec_slabs[idx].nr_vecs; + bio->bi_max_vecs = bvec_nr_vecs(idx); } bio->bi_io_vec = bvl; } @@ -721,12 +718,8 @@ static struct bio *__bio_map_user_iov(struct request_queue *q, const int local_nr_pages = end - start; const int page_limit = cur_page + local_nr_pages; - down_read(¤t->mm->mmap_sem); - ret = get_user_pages(current, current->mm, uaddr, - local_nr_pages, - write_to_vm, 0, &pages[cur_page], NULL); - up_read(¤t->mm->mmap_sem); - + ret = get_user_pages_fast(uaddr, local_nr_pages, + write_to_vm, &pages[cur_page]); if (ret < local_nr_pages) { ret = -EFAULT; goto out_unmap; diff --git a/fs/block_dev.c b/fs/block_dev.c index 10d8a0aa871..aff54219e04 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -271,7 +271,7 @@ static void bdev_destroy_inode(struct inode *inode) kmem_cache_free(bdev_cachep, bdi); } -static void init_once(struct kmem_cache * cachep, void *foo) +static void init_once(void *foo) { struct bdev_inode *ei = (struct bdev_inode *) foo; struct block_device *bdev = &ei->bdev; @@ -941,8 +941,10 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) * hooks: /n/, see "layering violations". */ ret = devcgroup_inode_permission(bdev->bd_inode, perm); - if (ret != 0) + if (ret != 0) { + bdput(bdev); return ret; + } ret = -ENXIO; file->f_mapping = bdev->bd_inode->i_mapping; @@ -1234,6 +1236,7 @@ fail: bdev = ERR_PTR(error); goto out; } +EXPORT_SYMBOL(lookup_bdev); /** * open_bdev_excl - open a block device by name and set it up for use diff --git a/fs/buffer.c b/fs/buffer.c index d48caee12e2..38653e36e22 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -580,7 +580,7 @@ EXPORT_SYMBOL(mark_buffer_async_write); /* * The buffer's backing address_space's private_lock must be held */ -static inline void __remove_assoc_queue(struct buffer_head *bh) +static void __remove_assoc_queue(struct buffer_head *bh) { list_del_init(&bh->b_assoc_buffers); WARN_ON(!bh->b_assoc_map); @@ -706,7 +706,7 @@ static int __set_page_dirty(struct page *page, if (TestSetPageDirty(page)) return 0; - write_lock_irq(&mapping->tree_lock); + spin_lock_irq(&mapping->tree_lock); if (page->mapping) { /* Race with truncate? */ WARN_ON_ONCE(warn && !PageUptodate(page)); @@ -719,7 +719,7 @@ static int __set_page_dirty(struct page *page, radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); } - write_unlock_irq(&mapping->tree_lock); + spin_unlock_irq(&mapping->tree_lock); __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); return 1; @@ -1214,8 +1214,7 @@ void __brelse(struct buffer_head * buf) put_bh(buf); return; } - printk(KERN_ERR "VFS: brelse: Trying to free free buffer\n"); - WARN_ON(1); + WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n"); } /* @@ -1721,7 +1720,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page, */ if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { lock_buffer(bh); - } else if (test_set_buffer_locked(bh)) { + } else if (!trylock_buffer(bh)) { redirty_page_for_writepage(wbc, page); continue; } @@ -2097,6 +2096,52 @@ int generic_write_end(struct file *file, struct address_space *mapping, EXPORT_SYMBOL(generic_write_end); /* + * block_is_partially_uptodate checks whether buffers within a page are + * uptodate or not. + * + * Returns true if all buffers which correspond to a file portion + * we want to read are uptodate. + */ +int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, + unsigned long from) +{ + struct inode *inode = page->mapping->host; + unsigned block_start, block_end, blocksize; + unsigned to; + struct buffer_head *bh, *head; + int ret = 1; + + if (!page_has_buffers(page)) + return 0; + + blocksize = 1 << inode->i_blkbits; + to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count); + to = from + to; + if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize) + return 0; + + head = page_buffers(page); + bh = head; + block_start = 0; + do { + block_end = block_start + blocksize; + if (block_end > from && block_start < to) { + if (!buffer_uptodate(bh)) { + ret = 0; + break; + } + if (block_end >= to) + break; + } + block_start = block_end; + bh = bh->b_this_page; + } while (bh != head); + + return ret; +} +EXPORT_SYMBOL(block_is_partially_uptodate); + +/* * Generic "read page" function for block devices that have the normal * get_block functionality. This is most of the block device filesystems. * Reads the page asynchronously --- the unlock_buffer() and @@ -2955,7 +3000,7 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) if (rw == SWRITE || rw == SWRITE_SYNC) lock_buffer(bh); - else if (test_set_buffer_locked(bh)) + else if (!trylock_buffer(bh)) continue; if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC) { @@ -3272,7 +3317,7 @@ int bh_submit_read(struct buffer_head *bh) EXPORT_SYMBOL(bh_submit_read); static void -init_buffer_head(struct kmem_cache *cachep, void *data) +init_buffer_head(void *data) { struct buffer_head *bh = data; diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 1f3465201fd..f5d0083e09f 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -1,3 +1,11 @@ +Version 1.54 +------------ +Fix premature write failure on congested networks (we would give up +on EAGAIN from the socket too quickly on large writes). +Cifs_mkdir and cifs_create now respect the setgid bit on parent dir. +Fix endian problems in acl (mode from/to cifs acl) on bigendian +architectures. + Version 1.53 ------------ DFS support added (Microsoft Distributed File System client support needed diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c index f58e41d3ba4..5fabd2caf93 100644 --- a/fs/cifs/asn1.c +++ b/fs/cifs/asn1.c @@ -400,7 +400,7 @@ asn1_oid_decode(struct asn1_ctx *ctx, size = eoc - ctx->pointer + 1; /* first subid actually encodes first two subids */ - if (size < 2 || size > ULONG_MAX/sizeof(unsigned long)) + if (size < 2 || size > UINT_MAX/sizeof(unsigned long)) return 0; *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC); @@ -483,6 +483,7 @@ decode_negTokenInit(unsigned char *security_blob, int length, asn1_open(&ctx, security_blob, length); + /* GSSAPI header */ if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { cFYI(1, ("Error decoding negTokenInit header")); return 0; @@ -490,153 +491,142 @@ decode_negTokenInit(unsigned char *security_blob, int length, || (tag != ASN1_EOC)) { cFYI(1, ("cls = %d con = %d tag = %d", cls, con, tag)); return 0; - } else { - /* remember to free obj->oid */ - rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag); - if (rc) { - if ((tag == ASN1_OJI) && (cls == ASN1_PRI)) { - rc = asn1_oid_decode(&ctx, end, &oid, &oidlen); - if (rc) { - rc = compare_oid(oid, oidlen, - SPNEGO_OID, - SPNEGO_OID_LEN); - kfree(oid); - } - } else - rc = 0; - } + } - if (!rc) { - cFYI(1, ("Error decoding negTokenInit header")); - return 0; - } + /* Check for SPNEGO OID -- remember to free obj->oid */ + rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag); + if (rc) { + if ((tag == ASN1_OJI) && (con == ASN1_PRI) && + (cls == ASN1_UNI)) { + rc = asn1_oid_decode(&ctx, end, &oid, &oidlen); + if (rc) { + rc = compare_oid(oid, oidlen, SPNEGO_OID, + SPNEGO_OID_LEN); + kfree(oid); + } + } else + rc = 0; + } - if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - cFYI(1, ("Error decoding negTokenInit")); - return 0; - } else if ((cls != ASN1_CTX) || (con != ASN1_CON) - || (tag != ASN1_EOC)) { - cFYI(1, - ("cls = %d con = %d tag = %d end = %p (%d) exit 0", - cls, con, tag, end, *end)); - return 0; - } + /* SPNEGO OID not present or garbled -- bail out */ + if (!rc) { + cFYI(1, ("Error decoding negTokenInit header")); + return 0; + } - if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - cFYI(1, ("Error decoding negTokenInit")); - return 0; - } else if ((cls != ASN1_UNI) || (con != ASN1_CON) - || (tag != ASN1_SEQ)) { - cFYI(1, - ("cls = %d con = %d tag = %d end = %p (%d) exit 1", - cls, con, tag, end, *end)); - return 0; - } + if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { + cFYI(1, ("Error decoding negTokenInit")); + return 0; + } else if ((cls != ASN1_CTX) || (con != ASN1_CON) + || (tag != ASN1_EOC)) { + cFYI(1, + ("cls = %d con = %d tag = %d end = %p (%d) exit 0", + cls, con, tag, end, *end)); + return 0; + } - if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - cFYI(1, ("Error decoding 2nd part of negTokenInit")); - return 0; - } else if ((cls != ASN1_CTX) || (con != ASN1_CON) - || (tag != ASN1_EOC)) { - cFYI(1, - ("cls = %d con = %d tag = %d end = %p (%d) exit 0", - cls, con, tag, end, *end)); - return 0; - } + if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { + cFYI(1, ("Error decoding negTokenInit")); + return 0; + } else if ((cls != ASN1_UNI) || (con != ASN1_CON) + || (tag != ASN1_SEQ)) { + cFYI(1, + ("cls = %d con = %d tag = %d end = %p (%d) exit 1", + cls, con, tag, end, *end)); + return 0; + } - if (asn1_header_decode - (&ctx, &sequence_end, &cls, &con, &tag) == 0) { - cFYI(1, ("Error decoding 2nd part of negTokenInit")); - return 0; - } else if ((cls != ASN1_UNI) || (con != ASN1_CON) - || (tag != ASN1_SEQ)) { - cFYI(1, - ("cls = %d con = %d tag = %d end = %p (%d) exit 1", - cls, con, tag, end, *end)); - return 0; - } + if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { + cFYI(1, ("Error decoding 2nd part of negTokenInit")); + return 0; + } else if ((cls != ASN1_CTX) || (con != ASN1_CON) + || (tag != ASN1_EOC)) { + cFYI(1, + ("cls = %d con = %d tag = %d end = %p (%d) exit 0", + cls, con, tag, end, *end)); + return 0; + } - while (!asn1_eoc_decode(&ctx, sequence_end)) { - rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag); - if (!rc) { - cFYI(1, - ("Error decoding negTokenInit hdr exit2")); - return 0; - } - if ((tag == ASN1_OJI) && (con == ASN1_PRI)) { - if (asn1_oid_decode(&ctx, end, &oid, &oidlen)) { - - cFYI(1, - ("OID len = %d oid = 0x%lx 0x%lx " - "0x%lx 0x%lx", - oidlen, *oid, *(oid + 1), - *(oid + 2), *(oid + 3))); - - if (compare_oid(oid, oidlen, - MSKRB5_OID, - MSKRB5_OID_LEN)) - use_kerberos = true; - else if (compare_oid(oid, oidlen, - KRB5_OID, - KRB5_OID_LEN)) - use_kerberos = true; - else if (compare_oid(oid, oidlen, - NTLMSSP_OID, - NTLMSSP_OID_LEN)) - use_ntlmssp = true; - - kfree(oid); - } - } else { - cFYI(1, ("Should be an oid what is going on?")); - } - } + if (asn1_header_decode + (&ctx, &sequence_end, &cls, &con, &tag) == 0) { + cFYI(1, ("Error decoding 2nd part of negTokenInit")); + return 0; + } else if ((cls != ASN1_UNI) || (con != ASN1_CON) + || (tag != ASN1_SEQ)) { + cFYI(1, + ("cls = %d con = %d tag = %d end = %p (%d) exit 1", + cls, con, tag, end, *end)); + return 0; + } - if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - cFYI(1, - ("Error decoding last part negTokenInit exit3")); - return 0; - } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { - /* tag = 3 indicating mechListMIC */ + while (!asn1_eoc_decode(&ctx, sequence_end)) { + rc = asn1_header_decode(&ctx, &end, &cls, &con, &tag); + if (!rc) { cFYI(1, - ("Exit 4 cls = %d con = %d tag = %d end = %p (%d)", - cls, con, tag, end, *end)); + ("Error decoding negTokenInit hdr exit2")); return 0; } - if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - cFYI(1, - ("Error decoding last part negTokenInit exit5")); - return 0; - } else if ((cls != ASN1_UNI) || (con != ASN1_CON) - || (tag != ASN1_SEQ)) { - cFYI(1, ("cls = %d con = %d tag = %d end = %p (%d)", - cls, con, tag, end, *end)); + if ((tag == ASN1_OJI) && (con == ASN1_PRI)) { + if (asn1_oid_decode(&ctx, end, &oid, &oidlen)) { + + cFYI(1, ("OID len = %d oid = 0x%lx 0x%lx " + "0x%lx 0x%lx", oidlen, *oid, + *(oid + 1), *(oid + 2), *(oid + 3))); + + if (compare_oid(oid, oidlen, MSKRB5_OID, + MSKRB5_OID_LEN)) + use_kerberos = true; + else if (compare_oid(oid, oidlen, KRB5_OID, + KRB5_OID_LEN)) + use_kerberos = true; + else if (compare_oid(oid, oidlen, NTLMSSP_OID, + NTLMSSP_OID_LEN)) + use_ntlmssp = true; + + kfree(oid); + } + } else { + cFYI(1, ("Should be an oid what is going on?")); } + } - if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - cFYI(1, - ("Error decoding last part negTokenInit exit 7")); - return 0; - } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { - cFYI(1, - ("Exit 8 cls = %d con = %d tag = %d end = %p (%d)", - cls, con, tag, end, *end)); - return 0; - } - if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { - cFYI(1, - ("Error decoding last part negTokenInit exit9")); - return 0; - } else if ((cls != ASN1_UNI) || (con != ASN1_PRI) - || (tag != ASN1_GENSTR)) { - cFYI(1, - ("Exit10 cls = %d con = %d tag = %d end = %p (%d)", - cls, con, tag, end, *end)); - return 0; - } - cFYI(1, ("Need to call asn1_octets_decode() function for %s", - ctx.pointer)); /* is this UTF-8 or ASCII? */ + if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { + cFYI(1, ("Error decoding last part negTokenInit exit3")); + return 0; + } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { + /* tag = 3 indicating mechListMIC */ + cFYI(1, ("Exit 4 cls = %d con = %d tag = %d end = %p (%d)", + cls, con, tag, end, *end)); + return 0; + } + if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { + cFYI(1, ("Error decoding last part negTokenInit exit5")); + return 0; + } else if ((cls != ASN1_UNI) || (con != ASN1_CON) + || (tag != ASN1_SEQ)) { + cFYI(1, ("cls = %d con = %d tag = %d end = %p (%d)", + cls, con, tag, end, *end)); + } + + if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { + cFYI(1, ("Error decoding last part negTokenInit exit 7")); + return 0; + } else if ((cls != ASN1_CTX) || (con != ASN1_CON)) { + cFYI(1, ("Exit 8 cls = %d con = %d tag = %d end = %p (%d)", + cls, con, tag, end, *end)); + return 0; + } + if (asn1_header_decode(&ctx, &end, &cls, &con, &tag) == 0) { + cFYI(1, ("Error decoding last part negTokenInit exit9")); + return 0; + } else if ((cls != ASN1_UNI) || (con != ASN1_PRI) + || (tag != ASN1_GENSTR)) { + cFYI(1, ("Exit10 cls = %d con = %d tag = %d end = %p (%d)", + cls, con, tag, end, *end)); + return 0; } + cFYI(1, ("Need to call asn1_octets_decode() function for %s", + ctx.pointer)); /* is this UTF-8 or ASCII? */ if (use_kerberos) *secType = Kerberos; diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index cc950f69e51..69a12aae91d 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -79,27 +79,25 @@ void cifs_dump_mids(struct TCP_Server_Info *server) spin_lock(&GlobalMid_Lock); list_for_each(tmp, &server->pending_mid_q) { mid_entry = list_entry(tmp, struct mid_q_entry, qhead); - if (mid_entry) { - cERROR(1, ("State: %d Cmd: %d Pid: %d Tsk: %p Mid %d", - mid_entry->midState, - (int)mid_entry->command, - mid_entry->pid, - mid_entry->tsk, - mid_entry->mid)); + cERROR(1, ("State: %d Cmd: %d Pid: %d Tsk: %p Mid %d", + mid_entry->midState, + (int)mid_entry->command, + mid_entry->pid, + mid_entry->tsk, + mid_entry->mid)); #ifdef CONFIG_CIFS_STATS2 - cERROR(1, ("IsLarge: %d buf: %p time rcv: %ld now: %ld", - mid_entry->largeBuf, - mid_entry->resp_buf, - mid_entry->when_received, - jiffies)); + cERROR(1, ("IsLarge: %d buf: %p time rcv: %ld now: %ld", + mid_entry->largeBuf, + mid_entry->resp_buf, + mid_entry->when_received, + jiffies)); #endif /* STATS2 */ - cERROR(1, ("IsMult: %d IsEnd: %d", mid_entry->multiRsp, - mid_entry->multiEnd)); - if (mid_entry->resp_buf) { - cifs_dump_detail(mid_entry->resp_buf); - cifs_dump_mem("existing buf: ", - mid_entry->resp_buf, 62); - } + cERROR(1, ("IsMult: %d IsEnd: %d", mid_entry->multiRsp, + mid_entry->multiEnd)); + if (mid_entry->resp_buf) { + cifs_dump_detail(mid_entry->resp_buf); + cifs_dump_mem("existing buf: ", + mid_entry->resp_buf, 62); } } spin_unlock(&GlobalMid_Lock); @@ -107,9 +105,7 @@ void cifs_dump_mids(struct TCP_Server_Info *server) #endif /* CONFIG_CIFS_DEBUG2 */ #ifdef CONFIG_PROC_FS -static int -cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset, - int count, int *eof, void *data) +static int cifs_debug_data_proc_show(struct seq_file *m, void *v) { struct list_head *tmp; struct list_head *tmp1; @@ -117,23 +113,13 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset, struct cifsSesInfo *ses; struct cifsTconInfo *tcon; int i; - int length = 0; - char *original_buf = buf; - - *beginBuffer = buf + offset; - length = - sprintf(buf, + seq_puts(m, "Display Internal CIFS Data Structures for Debugging\n" "---------------------------------------------------\n"); - buf += length; - length = sprintf(buf, "CIFS Version %s\n", CIFS_VERSION); - buf += length; - length = sprintf(buf, - "Active VFS Requests: %d\n", GlobalTotalActiveXid); - buf += length; - length = sprintf(buf, "Servers:"); - buf += length; + seq_printf(m, "CIFS Version %s\n", CIFS_VERSION); + seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid); + seq_printf(m, "Servers:"); i = 0; read_lock(&GlobalSMBSeslock); @@ -142,11 +128,10 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset, ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList); if ((ses->serverDomain == NULL) || (ses->serverOS == NULL) || (ses->serverNOS == NULL)) { - buf += sprintf(buf, "\nentry for %s not fully " + seq_printf(m, "\nentry for %s not fully " "displayed\n\t", ses->serverName); } else { - length = - sprintf(buf, + seq_printf(m, "\n%d) Name: %s Domain: %s Mounts: %d OS:" " %s \n\tNOS: %s\tCapability: 0x%x\n\tSMB" " session status: %d\t", @@ -154,10 +139,9 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset, atomic_read(&ses->inUse), ses->serverOS, ses->serverNOS, ses->capabilities, ses->status); - buf += length; } if (ses->server) { - buf += sprintf(buf, "TCP status: %d\n\tLocal Users To " + seq_printf(m, "TCP status: %d\n\tLocal Users To " "Server: %d SecMode: 0x%x Req On Wire: %d", ses->server->tcpStatus, atomic_read(&ses->server->socketUseCount), @@ -165,41 +149,34 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset, atomic_read(&ses->server->inFlight)); #ifdef CONFIG_CIFS_STATS2 - buf += sprintf(buf, " In Send: %d In MaxReq Wait: %d", + seq_printf(m, " In Send: %d In MaxReq Wait: %d", atomic_read(&ses->server->inSend), atomic_read(&ses->server->num_waiters)); #endif - length = sprintf(buf, "\nMIDs:\n"); - buf += length; + seq_puts(m, "\nMIDs:\n"); spin_lock(&GlobalMid_Lock); list_for_each(tmp1, &ses->server->pending_mid_q) { mid_entry = list_entry(tmp1, struct mid_q_entry, qhead); - if (mid_entry) { - length = sprintf(buf, - "State: %d com: %d pid:" - " %d tsk: %p mid %d\n", - mid_entry->midState, - (int)mid_entry->command, - mid_entry->pid, - mid_entry->tsk, - mid_entry->mid); - buf += length; - } + seq_printf(m, "State: %d com: %d pid:" + " %d tsk: %p mid %d\n", + mid_entry->midState, + (int)mid_entry->command, + mid_entry->pid, + mid_entry->tsk, + mid_entry->mid); } spin_unlock(&GlobalMid_Lock); } } read_unlock(&GlobalSMBSeslock); - sprintf(buf, "\n"); - buf++; + seq_putc(m, '\n'); - length = sprintf(buf, "Shares:"); - buf += length; + seq_puts(m, "Shares:"); i = 0; read_lock(&GlobalSMBSeslock); @@ -208,62 +185,52 @@ cifs_debug_data_read(char *buf, char **beginBuffer, off_t offset, i++; tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); dev_type = le32_to_cpu(tcon->fsDevInfo.DeviceType); - length = sprintf(buf, "\n%d) %s Uses: %d ", i, + seq_printf(m, "\n%d) %s Uses: %d ", i, tcon->treeName, atomic_read(&tcon->useCount)); - buf += length; if (tcon->nativeFileSystem) { - length = sprintf(buf, "Type: %s ", + seq_printf(m, "Type: %s ", tcon->nativeFileSystem); - buf += length; } - length = sprintf(buf, "DevInfo: 0x%x Attributes: 0x%x" + seq_printf(m, "DevInfo: 0x%x Attributes: 0x%x" "\nPathComponentMax: %d Status: %d", le32_to_cpu(tcon->fsDevInfo.DeviceCharacteristics), le32_to_cpu(tcon->fsAttrInfo.Attributes), le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength), tcon->tidStatus); - buf += length; if (dev_type == FILE_DEVICE_DISK) - length = sprintf(buf, " type: DISK "); + seq_puts(m, " type: DISK "); else if (dev_type == FILE_DEVICE_CD_ROM) - length = sprintf(buf, " type: CDROM "); + seq_puts(m, " type: CDROM "); else - length = - sprintf(buf, " type: %d ", dev_type); - buf += length; - if (tcon->tidStatus == CifsNeedReconnect) { - buf += sprintf(buf, "\tDISCONNECTED "); - length += 14; - } + seq_printf(m, " type: %d ", dev_type); + + if (tcon->tidStatus == CifsNeedReconnect) + seq_puts(m, "\tDISCONNECTED "); } read_unlock(&GlobalSMBSeslock); - length = sprintf(buf, "\n"); - buf += length; + seq_putc(m, '\n'); /* BB add code to dump additional info such as TCP session info now */ - /* Now calculate total size of returned data */ - length = buf - original_buf; - - if (offset + count >= length) - *eof = 1; - if (length < offset) { - *eof = 1; - return 0; - } else { - length = length - offset; - } - if (length > count) - length = count; + return 0; +} - return length; +static int cifs_debug_data_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, cifs_debug_data_proc_show, NULL); } -#ifdef CONFIG_CIFS_STATS +static const struct file_operations cifs_debug_data_proc_fops = { + .owner = THIS_MODULE, + .open = cifs_debug_data_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; -static int -cifs_stats_write(struct file *file, const char __user *buffer, - unsigned long count, void *data) +#ifdef CONFIG_CIFS_STATS +static ssize_t cifs_stats_proc_write(struct file *file, + const char __user *buffer, size_t count, loff_t *ppos) { char c; int rc; @@ -307,236 +274,132 @@ cifs_stats_write(struct file *file, const char __user *buffer, return count; } -static int -cifs_stats_read(char *buf, char **beginBuffer, off_t offset, - int count, int *eof, void *data) +static int cifs_stats_proc_show(struct seq_file *m, void *v) { - int item_length, i, length; + int i; struct list_head *tmp; struct cifsTconInfo *tcon; - *beginBuffer = buf + offset; - - length = sprintf(buf, + seq_printf(m, "Resources in use\nCIFS Session: %d\n", sesInfoAllocCount.counter); - buf += length; - item_length = - sprintf(buf, "Share (unique mount targets): %d\n", + seq_printf(m, "Share (unique mount targets): %d\n", tconInfoAllocCount.counter); - length += item_length; - buf += item_length; - item_length = - sprintf(buf, "SMB Request/Response Buffer: %d Pool size: %d\n", + seq_printf(m, "SMB Request/Response Buffer: %d Pool size: %d\n", bufAllocCount.counter, cifs_min_rcv + tcpSesAllocCount.counter); - length += item_length; - buf += item_length; - item_length = - sprintf(buf, "SMB Small Req/Resp Buffer: %d Pool size: %d\n", + seq_printf(m, "SMB Small Req/Resp Buffer: %d Pool size: %d\n", smBufAllocCount.counter, cifs_min_small); - length += item_length; - buf += item_length; #ifdef CONFIG_CIFS_STATS2 - item_length = sprintf(buf, "Total Large %d Small %d Allocations\n", + seq_printf(m, "Total Large %d Small %d Allocations\n", atomic_read(&totBufAllocCount), atomic_read(&totSmBufAllocCount)); - length += item_length; - buf += item_length; #endif /* CONFIG_CIFS_STATS2 */ - item_length = - sprintf(buf, "Operations (MIDs): %d\n", - midCount.counter); - length += item_length; - buf += item_length; - item_length = sprintf(buf, + seq_printf(m, "Operations (MIDs): %d\n", midCount.counter); + seq_printf(m, "\n%d session %d share reconnects\n", tcpSesReconnectCount.counter, tconInfoReconnectCount.counter); - length += item_length; - buf += item_length; - item_length = sprintf(buf, + seq_printf(m, "Total vfs operations: %d maximum at one time: %d\n", GlobalCurrentXid, GlobalMaxActiveXid); - length += item_length; - buf += item_length; i = 0; read_lock(&GlobalSMBSeslock); list_for_each(tmp, &GlobalTreeConnectionList) { i++; tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); - item_length = sprintf(buf, "\n%d) %s", i, tcon->treeName); - buf += item_length; - length += item_length; - if (tcon->tidStatus == CifsNeedReconnect) { - buf += sprintf(buf, "\tDISCONNECTED "); - length += 14; - } - item_length = sprintf(buf, "\nSMBs: %d Oplock Breaks: %d", + seq_printf(m, "\n%d) %s", i, tcon->treeName); + if (tcon->tidStatus == CifsNeedReconnect) + seq_puts(m, "\tDISCONNECTED "); + seq_printf(m, "\nSMBs: %d Oplock Breaks: %d", atomic_read(&tcon->num_smbs_sent), atomic_read(&tcon->num_oplock_brks)); - buf += item_length; - length += item_length; - item_length = sprintf(buf, "\nReads: %d Bytes: %lld", + seq_printf(m, "\nReads: %d Bytes: %lld", atomic_read(&tcon->num_reads), (long long)(tcon->bytes_read)); - buf += item_length; - length += item_length; - item_length = sprintf(buf, "\nWrites: %d Bytes: %lld", + seq_printf(m, "\nWrites: %d Bytes: %lld", atomic_read(&tcon->num_writes), (long long)(tcon->bytes_written)); - buf += item_length; - length += item_length; - item_length = sprintf(buf, + seq_printf(m, "\nLocks: %d HardLinks: %d Symlinks: %d", atomic_read(&tcon->num_locks), atomic_read(&tcon->num_hardlinks), atomic_read(&tcon->num_symlinks)); - buf += item_length; - length += item_length; - item_length = sprintf(buf, "\nOpens: %d Closes: %d Deletes: %d", + seq_printf(m, "\nOpens: %d Closes: %d Deletes: %d", atomic_read(&tcon->num_opens), atomic_read(&tcon->num_closes), atomic_read(&tcon->num_deletes)); - buf += item_length; - length += item_length; - item_length = sprintf(buf, "\nMkdirs: %d Rmdirs: %d", + seq_printf(m, "\nMkdirs: %d Rmdirs: %d", atomic_read(&tcon->num_mkdirs), atomic_read(&tcon->num_rmdirs)); - buf += item_length; - length += item_length; - item_length = sprintf(buf, "\nRenames: %d T2 Renames %d", + seq_printf(m, "\nRenames: %d T2 Renames %d", atomic_read(&tcon->num_renames), atomic_read(&tcon->num_t2renames)); - buf += item_length; - length += item_length; - item_length = sprintf(buf, "\nFindFirst: %d FNext %d FClose %d", + seq_printf(m, "\nFindFirst: %d FNext %d FClose %d", atomic_read(&tcon->num_ffirst), atomic_read(&tcon->num_fnext), atomic_read(&tcon->num_fclose)); - buf += item_length; - length += item_length; } read_unlock(&GlobalSMBSeslock); - buf += sprintf(buf, "\n"); - length++; - - if (offset + count >= length) - *eof = 1; - if (length < offset) { - *eof = 1; - return 0; - } else { - length = length - offset; - } - if (length > count) - length = count; + seq_putc(m, '\n'); + return 0; +} - return length; +static int cifs_stats_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, cifs_stats_proc_show, NULL); } + +static const struct file_operations cifs_stats_proc_fops = { + .owner = THIS_MODULE, + .open = cifs_stats_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = cifs_stats_proc_write, +}; #endif /* STATS */ static struct proc_dir_entry *proc_fs_cifs; -read_proc_t cifs_txanchor_read; -static read_proc_t cifsFYI_read; -static write_proc_t cifsFYI_write; -static read_proc_t oplockEnabled_read; -static write_proc_t oplockEnabled_write; -static read_proc_t lookupFlag_read; -static write_proc_t lookupFlag_write; -static read_proc_t traceSMB_read; -static write_proc_t traceSMB_write; -static read_proc_t multiuser_mount_read; -static write_proc_t multiuser_mount_write; -static read_proc_t security_flags_read; -static write_proc_t security_flags_write; -/* static read_proc_t ntlmv2_enabled_read; -static write_proc_t ntlmv2_enabled_write; -static read_proc_t packet_signing_enabled_read; -static write_proc_t packet_signing_enabled_write;*/ -static read_proc_t experimEnabled_read; -static write_proc_t experimEnabled_write; -static read_proc_t linuxExtensionsEnabled_read; -static write_proc_t linuxExtensionsEnabled_write; +static const struct file_operations cifsFYI_proc_fops; +static const struct file_operations cifs_oplock_proc_fops; +static const struct file_operations cifs_lookup_cache_proc_fops; +static const struct file_operations traceSMB_proc_fops; +static const struct file_operations cifs_multiuser_mount_proc_fops; +static const struct file_operations cifs_security_flags_proc_fops; +static const struct file_operations cifs_experimental_proc_fops; +static const struct file_operations cifs_linux_ext_proc_fops; void cifs_proc_init(void) { - struct proc_dir_entry *pde; - proc_fs_cifs = proc_mkdir("fs/cifs", NULL); if (proc_fs_cifs == NULL) return; proc_fs_cifs->owner = THIS_MODULE; - create_proc_read_entry("DebugData", 0, proc_fs_cifs, - cifs_debug_data_read, NULL); + proc_create("DebugData", 0, proc_fs_cifs, &cifs_debug_data_proc_fops); #ifdef CONFIG_CIFS_STATS - pde = create_proc_read_entry("Stats", 0, proc_fs_cifs, - cifs_stats_read, NULL); - if (pde) - pde->write_proc = cifs_stats_write; + proc_create("Stats", 0, proc_fs_cifs, &cifs_stats_proc_fops); #endif /* STATS */ - pde = create_proc_read_entry("cifsFYI", 0, proc_fs_cifs, - cifsFYI_read, NULL); - if (pde) - pde->write_proc = cifsFYI_write; - - pde = - create_proc_read_entry("traceSMB", 0, proc_fs_cifs, - traceSMB_read, NULL); - if (pde) - pde->write_proc = traceSMB_write; - - pde = create_proc_read_entry("OplockEnabled", 0, proc_fs_cifs, - oplockEnabled_read, NULL); - if (pde) - pde->write_proc = oplockEnabled_write; - - pde = create_proc_read_entry("Experimental", 0, proc_fs_cifs, - experimEnabled_read, NULL); - if (pde) - pde->write_proc = experimEnabled_write; - - pde = create_proc_read_entry("LinuxExtensionsEnabled", 0, proc_fs_cifs, - linuxExtensionsEnabled_read, NULL); - if (pde) - pde->write_proc = linuxExtensionsEnabled_write; - - pde = - create_proc_read_entry("MultiuserMount", 0, proc_fs_cifs, - multiuser_mount_read, NULL); - if (pde) - pde->write_proc = multiuser_mount_write; - - pde = - create_proc_read_entry("SecurityFlags", 0, proc_fs_cifs, - security_flags_read, NULL); - if (pde) - pde->write_proc = security_flags_write; - - pde = - create_proc_read_entry("LookupCacheEnabled", 0, proc_fs_cifs, - lookupFlag_read, NULL); - if (pde) - pde->write_proc = lookupFlag_write; - -/* pde = - create_proc_read_entry("NTLMV2Enabled", 0, proc_fs_cifs, - ntlmv2_enabled_read, NULL); - if (pde) - pde->write_proc = ntlmv2_enabled_write; - - pde = - create_proc_read_entry("PacketSigningEnabled", 0, proc_fs_cifs, - packet_signing_enabled_read, NULL); - if (pde) - pde->write_proc = packet_signing_enabled_write;*/ + proc_create("cifsFYI", 0, proc_fs_cifs, &cifsFYI_proc_fops); + proc_create("traceSMB", 0, proc_fs_cifs, &traceSMB_proc_fops); + proc_create("OplockEnabled", 0, proc_fs_cifs, &cifs_oplock_proc_fops); + proc_create("Experimental", 0, proc_fs_cifs, + &cifs_experimental_proc_fops); + proc_create("LinuxExtensionsEnabled", 0, proc_fs_cifs, + &cifs_linux_ext_proc_fops); + proc_create("MultiuserMount", 0, proc_fs_cifs, + &cifs_multiuser_mount_proc_fops); + proc_create("SecurityFlags", 0, proc_fs_cifs, + &cifs_security_flags_proc_fops); + proc_create("LookupCacheEnabled", 0, proc_fs_cifs, + &cifs_lookup_cache_proc_fops); } void @@ -553,39 +416,26 @@ cifs_proc_clean(void) #endif remove_proc_entry("MultiuserMount", proc_fs_cifs); remove_proc_entry("OplockEnabled", proc_fs_cifs); -/* remove_proc_entry("NTLMV2Enabled",proc_fs_cifs); */ remove_proc_entry("SecurityFlags", proc_fs_cifs); -/* remove_proc_entry("PacketSigningEnabled", proc_fs_cifs); */ remove_proc_entry("LinuxExtensionsEnabled", proc_fs_cifs); remove_proc_entry("Experimental", proc_fs_cifs); remove_proc_entry("LookupCacheEnabled", proc_fs_cifs); remove_proc_entry("fs/cifs", NULL); } -static int -cifsFYI_read(char *page, char **start, off_t off, int count, - int *eof, void *data) +static int cifsFYI_proc_show(struct seq_file *m, void *v) { - int len; - - len = sprintf(page, "%d\n", cifsFYI); - - len -= off; - *start = page + off; - - if (len > count) - len = count; - else - *eof = 1; - - if (len < 0) - len = 0; + seq_printf(m, "%d\n", cifsFYI); + return 0; +} - return len; +static int cifsFYI_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, cifsFYI_proc_show, NULL); } -static int -cifsFYI_write(struct file *file, const char __user *buffer, - unsigned long count, void *data) + +static ssize_t cifsFYI_proc_write(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos) { char c; int rc; @@ -603,30 +453,28 @@ cifsFYI_write(struct file *file, const char __user *buffer, return count; } -static int -oplockEnabled_read(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - int len; - - len = sprintf(page, "%d\n", oplockEnabled); - - len -= off; - *start = page + off; +static const struct file_operations cifsFYI_proc_fops = { + .owner = THIS_MODULE, + .open = cifsFYI_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = cifsFYI_proc_write, +}; - if (len > count) - len = count; - else - *eof = 1; - - if (len < 0) - len = 0; +static int cifs_oplock_proc_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%d\n", oplockEnabled); + return 0; +} - return len; +static int cifs_oplock_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, cifs_oplock_proc_show, NULL); } -static int -oplockEnabled_write(struct file *file, const char __user *buffer, - unsigned long count, void *data) + +static ssize_t cifs_oplock_proc_write(struct file *file, + const char __user *buffer, size_t count, loff_t *ppos) { char c; int rc; @@ -642,30 +490,28 @@ oplockEnabled_write(struct file *file, const char __user *buffer, return count; } -static int -experimEnabled_read(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - int len; - - len = sprintf(page, "%d\n", experimEnabled); +static const struct file_operations cifs_oplock_proc_fops = { + .owner = THIS_MODULE, + .open = cifs_oplock_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = cifs_oplock_proc_write, +}; - len -= off; - *start = page + off; - - if (len > count) - len = count; - else - *eof = 1; - - if (len < 0) - len = 0; +static int cifs_experimental_proc_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%d\n", experimEnabled); + return 0; +} - return len; +static int cifs_experimental_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, cifs_experimental_proc_show, NULL); } -static int -experimEnabled_write(struct file *file, const char __user *buffer, - unsigned long count, void *data) + +static ssize_t cifs_experimental_proc_write(struct file *file, + const char __user *buffer, size_t count, loff_t *ppos) { char c; int rc; @@ -683,29 +529,28 @@ experimEnabled_write(struct file *file, const char __user *buffer, return count; } -static int -linuxExtensionsEnabled_read(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - int len; - - len = sprintf(page, "%d\n", linuxExtEnabled); - len -= off; - *start = page + off; +static const struct file_operations cifs_experimental_proc_fops = { + .owner = THIS_MODULE, + .open = cifs_experimental_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = cifs_experimental_proc_write, +}; - if (len > count) - len = count; - else - *eof = 1; - - if (len < 0) - len = 0; +static int cifs_linux_ext_proc_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%d\n", linuxExtEnabled); + return 0; +} - return len; +static int cifs_linux_ext_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, cifs_linux_ext_proc_show, NULL); } -static int -linuxExtensionsEnabled_write(struct file *file, const char __user *buffer, - unsigned long count, void *data) + +static ssize_t cifs_linux_ext_proc_write(struct file *file, + const char __user *buffer, size_t count, loff_t *ppos) { char c; int rc; @@ -721,31 +566,28 @@ linuxExtensionsEnabled_write(struct file *file, const char __user *buffer, return count; } +static const struct file_operations cifs_linux_ext_proc_fops = { + .owner = THIS_MODULE, + .open = cifs_linux_ext_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = cifs_linux_ext_proc_write, +}; -static int -lookupFlag_read(char *page, char **start, off_t off, - int count, int *eof, void *data) +static int cifs_lookup_cache_proc_show(struct seq_file *m, void *v) { - int len; - - len = sprintf(page, "%d\n", lookupCacheEnabled); - - len -= off; - *start = page + off; - - if (len > count) - len = count; - else - *eof = 1; - - if (len < 0) - len = 0; + seq_printf(m, "%d\n", lookupCacheEnabled); + return 0; +} - return len; +static int cifs_lookup_cache_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, cifs_lookup_cache_proc_show, NULL); } -static int -lookupFlag_write(struct file *file, const char __user *buffer, - unsigned long count, void *data) + +static ssize_t cifs_lookup_cache_proc_write(struct file *file, + const char __user *buffer, size_t count, loff_t *ppos) { char c; int rc; @@ -760,30 +602,29 @@ lookupFlag_write(struct file *file, const char __user *buffer, return count; } -static int -traceSMB_read(char *page, char **start, off_t off, int count, - int *eof, void *data) -{ - int len; - - len = sprintf(page, "%d\n", traceSMB); - - len -= off; - *start = page + off; - if (len > count) - len = count; - else - *eof = 1; +static const struct file_operations cifs_lookup_cache_proc_fops = { + .owner = THIS_MODULE, + .open = cifs_lookup_cache_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = cifs_lookup_cache_proc_write, +}; - if (len < 0) - len = 0; +static int traceSMB_proc_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%d\n", traceSMB); + return 0; +} - return len; +static int traceSMB_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, traceSMB_proc_show, NULL); } -static int -traceSMB_write(struct file *file, const char __user *buffer, - unsigned long count, void *data) + +static ssize_t traceSMB_proc_write(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos) { char c; int rc; @@ -799,30 +640,28 @@ traceSMB_write(struct file *file, const char __user *buffer, return count; } -static int -multiuser_mount_read(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - int len; - - len = sprintf(page, "%d\n", multiuser_mount); - - len -= off; - *start = page + off; +static const struct file_operations traceSMB_proc_fops = { + .owner = THIS_MODULE, + .open = traceSMB_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = traceSMB_proc_write, +}; - if (len > count) - len = count; - else - *eof = 1; - - if (len < 0) - len = 0; +static int cifs_multiuser_mount_proc_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%d\n", multiuser_mount); + return 0; +} - return len; +static int cifs_multiuser_mount_proc_open(struct inode *inode, struct file *fh) +{ + return single_open(fh, cifs_multiuser_mount_proc_show, NULL); } -static int -multiuser_mount_write(struct file *file, const char __user *buffer, - unsigned long count, void *data) + +static ssize_t cifs_multiuser_mount_proc_write(struct file *file, + const char __user *buffer, size_t count, loff_t *ppos) { char c; int rc; @@ -838,30 +677,28 @@ multiuser_mount_write(struct file *file, const char __user *buffer, return count; } -static int -security_flags_read(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - int len; - - len = sprintf(page, "0x%x\n", extended_security); - - len -= off; - *start = page + off; +static const struct file_operations cifs_multiuser_mount_proc_fops = { + .owner = THIS_MODULE, + .open = cifs_multiuser_mount_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = cifs_multiuser_mount_proc_write, +}; - if (len > count) - len = count; - else - *eof = 1; - - if (len < 0) - len = 0; +static int cifs_security_flags_proc_show(struct seq_file *m, void *v) +{ + seq_printf(m, "0x%x\n", extended_security); + return 0; +} - return len; +static int cifs_security_flags_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, cifs_security_flags_proc_show, NULL); } -static int -security_flags_write(struct file *file, const char __user *buffer, - unsigned long count, void *data) + +static ssize_t cifs_security_flags_proc_write(struct file *file, + const char __user *buffer, size_t count, loff_t *ppos) { unsigned int flags; char flags_string[12]; @@ -917,6 +754,15 @@ security_flags_write(struct file *file, const char __user *buffer, /* BB should we turn on MAY flags for other MUST options? */ return count; } + +static const struct file_operations cifs_security_flags_proc_fops = { + .owner = THIS_MODULE, + .open = cifs_security_flags_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .write = cifs_security_flags_proc_write, +}; #else inline void cifs_proc_init(void) { diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index d82374c9e32..d2c8eef84f3 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -226,7 +226,7 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd, int err; mntget(newmnt); - err = do_add_mount(newmnt, nd, nd->path.mnt->mnt_flags, mntlist); + err = do_add_mount(newmnt, &nd->path, nd->path.mnt->mnt_flags, mntlist); switch (err) { case 0: path_put(&nd->path); diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index 7013aaff6ae..2434ab0e879 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c @@ -66,8 +66,8 @@ struct key_type cifs_spnego_key_type = { .describe = user_describe, }; -#define MAX_VER_STR_LEN 9 /* length of longest version string e.g. - strlen(";ver=0xFF") */ +#define MAX_VER_STR_LEN 8 /* length of longest version string e.g. + strlen("ver=0xFF") */ #define MAX_MECH_STR_LEN 13 /* length of longest security mechanism name, eg in future could have strlen(";sec=ntlmsspi") */ #define MAX_IPV6_ADDR_LEN 42 /* eg FEDC:BA98:7654:3210:FEDC:BA98:7654:3210/60 */ @@ -81,11 +81,15 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo) struct key *spnego_key; const char *hostname = server->hostname; - /* BB: come up with better scheme for determining length */ - /* length of fields (with semicolons): ver=0xyz ipv4= ipaddress host= - hostname sec=mechanism uid=0x uid */ - desc_len = MAX_VER_STR_LEN + 5 + MAX_IPV6_ADDR_LEN + 1 + 6 + - strlen(hostname) + MAX_MECH_STR_LEN + 8 + (sizeof(uid_t) * 2); + /* length of fields (with semicolons): ver=0xyz ip4=ipaddress + host=hostname sec=mechanism uid=0xFF user=username */ + desc_len = MAX_VER_STR_LEN + + 6 /* len of "host=" */ + strlen(hostname) + + 5 /* len of ";ipv4=" */ + MAX_IPV6_ADDR_LEN + + MAX_MECH_STR_LEN + + 7 /* len of ";uid=0x" */ + (sizeof(uid_t) * 2) + + 6 /* len of ";user=" */ + strlen(sesInfo->userName) + 1; + spnego_key = ERR_PTR(-ENOMEM); description = kzalloc(desc_len, GFP_KERNEL); if (description == NULL) diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 0e9fc2ba90e..57ecdc83c26 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -56,7 +56,7 @@ int match_sid(struct cifs_sid *ctsid) struct cifs_sid *cwsid; if (!ctsid) - return (-1); + return -1; for (i = 0; i < NUM_WK_SIDS; ++i) { cwsid = &(wksidarr[i].cifssid); @@ -87,11 +87,11 @@ int match_sid(struct cifs_sid *ctsid) } cFYI(1, ("matching sid: %s\n", wksidarr[i].sidname)); - return (0); /* sids compare/match */ + return 0; /* sids compare/match */ } cFYI(1, ("No matching sid")); - return (-1); + return -1; } /* if the two SIDs (roughly equivalent to a UUID for a user or group) are @@ -102,16 +102,16 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid) int num_subauth, num_sat, num_saw; if ((!ctsid) || (!cwsid)) - return (0); + return 0; /* compare the revision */ if (ctsid->revision != cwsid->revision) - return (0); + return 0; /* compare all of the six auth values */ for (i = 0; i < 6; ++i) { if (ctsid->authority[i] != cwsid->authority[i]) - return (0); + return 0; } /* compare all of the subauth values if any */ @@ -121,11 +121,11 @@ int compare_sids(const struct cifs_sid *ctsid, const struct cifs_sid *cwsid) if (num_subauth) { for (i = 0; i < num_subauth; ++i) { if (ctsid->sub_auth[i] != cwsid->sub_auth[i]) - return (0); + return 0; } } - return (1); /* sids compare/match */ + return 1; /* sids compare/match */ } @@ -169,8 +169,7 @@ static void copy_sec_desc(const struct cifs_ntsd *pntsd, for (i = 0; i < 6; i++) ngroup_sid_ptr->authority[i] = group_sid_ptr->authority[i]; for (i = 0; i < 5; i++) - ngroup_sid_ptr->sub_auth[i] = - cpu_to_le32(group_sid_ptr->sub_auth[i]); + ngroup_sid_ptr->sub_auth[i] = group_sid_ptr->sub_auth[i]; return; } @@ -285,7 +284,7 @@ static __u16 fill_ace_for_sid(struct cifs_ace *pntace, size = 1 + 1 + 2 + 4 + 1 + 1 + 6 + (psid->num_subauth * 4); pntace->size = cpu_to_le16(size); - return (size); + return size; } @@ -426,7 +425,7 @@ static int set_chmod_dacl(struct cifs_acl *pndacl, struct cifs_sid *pownersid, pndacl->size = cpu_to_le16(size + sizeof(struct cifs_acl)); pndacl->num_aces = cpu_to_le32(3); - return (0); + return 0; } @@ -510,7 +509,7 @@ static int parse_sec_desc(struct cifs_ntsd *pntsd, int acl_len, sizeof(struct cifs_sid)); */ - return (0); + return 0; } @@ -527,7 +526,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd, struct cifs_acl *ndacl_ptr = NULL; /* no need for SACL ptr */ if ((inode == NULL) || (pntsd == NULL) || (pnntsd == NULL)) - return (-EIO); + return -EIO; owner_sid_ptr = (struct cifs_sid *)((char *)pntsd + le32_to_cpu(pntsd->osidoffset)); @@ -550,7 +549,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd, /* copy security descriptor control portion and owner and group sid */ copy_sec_desc(pntsd, pnntsd, sidsoffset); - return (rc); + return rc; } @@ -629,11 +628,11 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, cFYI(DBG2, ("set ACL for %s from mode 0x%x", path, inode->i_mode)); if (!inode) - return (rc); + return rc; sb = inode->i_sb; if (sb == NULL) - return (rc); + return rc; cifs_sb = CIFS_SB(sb); xid = GetXid(); @@ -652,7 +651,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, if (rc != 0) { cERROR(1, ("Unable to open file to set ACL")); FreeXid(xid); - return (rc); + return rc; } } @@ -665,7 +664,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, FreeXid(xid); - return (rc); + return rc; } /* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */ @@ -715,7 +714,7 @@ int mode_to_acl(struct inode *inode, const char *path, __u64 nmode) if (!pnntsd) { cERROR(1, ("Unable to allocate security descriptor")); kfree(pntsd); - return (-ENOMEM); + return -ENOMEM; } rc = build_sec_desc(pntsd, pnntsd, inode, nmode); @@ -732,6 +731,6 @@ int mode_to_acl(struct inode *inode, const char *path, __u64 nmode) kfree(pntsd); } - return (rc); + return rc; } #endif /* CONFIG_CIFS_EXPERIMENTAL */ diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 4ff8939c6cc..83fd40dc1ef 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -310,9 +310,8 @@ void calc_lanman_hash(struct cifsSesInfo *ses, char *lnm_session_key) utf8 and other multibyte codepages each need their own strupper function since a byte at a time will ont work. */ - for (i = 0; i < CIFS_ENCPWD_SIZE; i++) { + for (i = 0; i < CIFS_ENCPWD_SIZE; i++) password_with_pad[i] = toupper(password_with_pad[i]); - } SMBencrypt(password_with_pad, ses->server->cryptKey, lnm_session_key); /* clear password before we return/free memory */ diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 22857c639df..25ecbd5b040 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -175,6 +175,8 @@ out_no_root: if (inode) iput(inode); + cifs_umount(sb, cifs_sb); + out_mount_failed: if (cifs_sb) { #ifdef CONFIG_CIFS_DFS_UPCALL @@ -267,7 +269,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } -static int cifs_permission(struct inode *inode, int mask, struct nameidata *nd) +static int cifs_permission(struct inode *inode, int mask) { struct cifs_sb_info *cifs_sb; @@ -766,7 +768,7 @@ const struct file_operations cifs_dir_ops = { }; static void -cifs_init_once(struct kmem_cache *cachep, void *inode) +cifs_init_once(void *inode) { struct cifsInodeInfo *cifsi = inode; @@ -930,36 +932,34 @@ static int cifs_oplock_thread(void *dummyarg) schedule_timeout(39*HZ); } else { oplock_item = list_entry(GlobalOplock_Q.next, - struct oplock_q_entry, qhead); - if (oplock_item) { - cFYI(1, ("found oplock item to write out")); - pTcon = oplock_item->tcon; - inode = oplock_item->pinode; - netfid = oplock_item->netfid; - spin_unlock(&GlobalMid_Lock); - DeleteOplockQEntry(oplock_item); - /* can not grab inode sem here since it would + struct oplock_q_entry, qhead); + cFYI(1, ("found oplock item to write out")); + pTcon = oplock_item->tcon; + inode = oplock_item->pinode; + netfid = oplock_item->netfid; + spin_unlock(&GlobalMid_Lock); + DeleteOplockQEntry(oplock_item); + /* can not grab inode sem here since it would deadlock when oplock received on delete since vfs_unlink holds the i_mutex across the call */ - /* mutex_lock(&inode->i_mutex);*/ - if (S_ISREG(inode->i_mode)) { - rc = - filemap_fdatawrite(inode->i_mapping); - if (CIFS_I(inode)->clientCanCacheRead - == 0) { - waitrc = filemap_fdatawait(inode->i_mapping); - invalidate_remote_inode(inode); - } - if (rc == 0) - rc = waitrc; - } else - rc = 0; - /* mutex_unlock(&inode->i_mutex);*/ - if (rc) - CIFS_I(inode)->write_behind_rc = rc; - cFYI(1, ("Oplock flush inode %p rc %d", - inode, rc)); + /* mutex_lock(&inode->i_mutex);*/ + if (S_ISREG(inode->i_mode)) { + rc = filemap_fdatawrite(inode->i_mapping); + if (CIFS_I(inode)->clientCanCacheRead == 0) { + waitrc = filemap_fdatawait( + inode->i_mapping); + invalidate_remote_inode(inode); + } + if (rc == 0) + rc = waitrc; + } else + rc = 0; + /* mutex_unlock(&inode->i_mutex);*/ + if (rc) + CIFS_I(inode)->write_behind_rc = rc; + cFYI(1, ("Oplock flush inode %p rc %d", + inode, rc)); /* releasing stale oplock after recent reconnect of smb session using a now incorrect file @@ -967,15 +967,13 @@ static int cifs_oplock_thread(void *dummyarg) not bother sending an oplock release if session to server still is disconnected since oplock already released by the server in that case */ - if (pTcon->tidStatus != CifsNeedReconnect) { - rc = CIFSSMBLock(0, pTcon, netfid, - 0 /* len */ , 0 /* offset */, 0, - 0, LOCKING_ANDX_OPLOCK_RELEASE, - false /* wait flag */); - cFYI(1, ("Oplock release rc = %d", rc)); - } - } else - spin_unlock(&GlobalMid_Lock); + if (pTcon->tidStatus != CifsNeedReconnect) { + rc = CIFSSMBLock(0, pTcon, netfid, + 0 /* len */ , 0 /* offset */, 0, + 0, LOCKING_ANDX_OPLOCK_RELEASE, + false /* wait flag */); + cFYI(1, ("Oplock release rc = %d", rc)); + } set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(1); /* yield in case q were corrupt */ } @@ -1001,8 +999,7 @@ static int cifs_dnotify_thread(void *dummyarg) list_for_each(tmp, &GlobalSMBSessionList) { ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList); - if (ses && ses->server && - atomic_read(&ses->server->inFlight)) + if (ses->server && atomic_read(&ses->server->inFlight)) wake_up_all(&ses->server->response_q); } read_unlock(&GlobalSMBSeslock); diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 25a6cbd1552..135c965c413 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -101,5 +101,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* EXPERIMENTAL */ -#define CIFS_VERSION "1.53" +#define CIFS_VERSION "1.54" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 9cfcf326ead..7e1cf262eff 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -27,7 +27,7 @@ #define MAX_SES_INFO 2 #define MAX_TCON_INFO 4 -#define MAX_TREE_SIZE 2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1 +#define MAX_TREE_SIZE (2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1) #define MAX_SERVER_SIZE 15 #define MAX_SHARE_SIZE 64 /* used to be 20, this should still be enough */ #define MAX_USERNAME_SIZE 32 /* 32 is to allow for 15 char names + null @@ -537,8 +537,8 @@ require use of the stronger protocol */ #endif /* WEAK_PW_HASH */ #define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */ -#define CIFSSEC_DEF CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 -#define CIFSSEC_MAX CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2 +#define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2) +#define CIFSSEC_MAX (CIFSSEC_MUST_SIGN | CIFSSEC_MUST_NTLMV2) #define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLM | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_LANMAN | CIFSSEC_MAY_PLNTXT | CIFSSEC_MAY_KRB5) /* ***************************************************************** diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 0f327c224da..d2a073edd1b 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -31,7 +31,7 @@ #else #define CIFS_PROT 0 #endif -#define POSIX_PROT CIFS_PROT+1 +#define POSIX_PROT (CIFS_PROT+1) #define BAD_PROT 0xFFFF /* SMB command codes */ @@ -262,7 +262,7 @@ */ #define CIFS_NO_HANDLE 0xFFFF -#define NO_CHANGE_64 cpu_to_le64(0xFFFFFFFFFFFFFFFFULL) +#define NO_CHANGE_64 0xFFFFFFFFFFFFFFFFULL #define NO_CHANGE_32 0xFFFFFFFFUL /* IPC$ in ASCII */ @@ -341,7 +341,7 @@ #define CREATE_COMPLETE_IF_OPLK 0x00000100 /* should be zero */ #define CREATE_NO_EA_KNOWLEDGE 0x00000200 #define CREATE_EIGHT_DOT_THREE 0x00000400 /* doc says this is obsolete - "open for recovery" flag - should + "open for recovery" flag should be zero in any case */ #define CREATE_OPEN_FOR_RECOVERY 0x00000400 #define CREATE_RANDOM_ACCESS 0x00000800 @@ -414,8 +414,8 @@ struct smb_hdr { __u8 WordCount; } __attribute__((packed)); /* given a pointer to an smb_hdr retrieve the value of byte count */ -#define BCC(smb_var) ( *(__u16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount))) -#define BCC_LE(smb_var) ( *(__le16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount))) +#define BCC(smb_var) (*(__u16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount))) +#define BCC_LE(smb_var) (*(__le16 *)((char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount))) /* given a pointer to an smb_hdr retrieve the pointer to the byte area */ #define pByteArea(smb_var) ((unsigned char *)smb_var + sizeof(struct smb_hdr) + (2 * smb_var->WordCount) + 2) diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index b9f5e935f82..a729d083e6f 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -172,12 +172,13 @@ extern int CIFSSMBQFSUnixInfo(const int xid, struct cifsTconInfo *tcon); extern int CIFSSMBQFSPosixInfo(const int xid, struct cifsTconInfo *tcon, struct kstatfs *FSData); -extern int CIFSSMBSetTimes(const int xid, struct cifsTconInfo *tcon, +extern int CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon, const char *fileName, const FILE_BASIC_INFO *data, const struct nls_table *nls_codepage, int remap_special_chars); -extern int CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon, - const FILE_BASIC_INFO *data, __u16 fid); +extern int CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon, + const FILE_BASIC_INFO *data, __u16 fid, + __u32 pid_of_opener); #if 0 extern int CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon, char *fileName, __u16 dos_attributes, @@ -191,9 +192,20 @@ extern int CIFSSMBSetEOF(const int xid, struct cifsTconInfo *tcon, extern int CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size, __u16 fileHandle, __u32 opener_pid, bool AllocSizeFlag); -extern int CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *pTcon, - char *full_path, __u64 mode, __u64 uid, - __u64 gid, dev_t dev, + +struct cifs_unix_set_info_args { + __u64 ctime; + __u64 atime; + __u64 mtime; + __u64 mode; + __u64 uid; + __u64 gid; + dev_t device; +}; + +extern int CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *pTcon, + char *fileName, + const struct cifs_unix_set_info_args *args, const struct nls_table *nls_codepage, int remap_special_chars); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 4511b708f0f..994de7c9047 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -128,8 +128,7 @@ static void mark_open_files_invalid(struct cifsTconInfo *pTcon) write_lock(&GlobalSMBSeslock); list_for_each_safe(tmp, tmp1, &pTcon->openFileList) { open_file = list_entry(tmp, struct cifsFileInfo, tlist); - if (open_file) - open_file->invalidHandle = true; + open_file->invalidHandle = true; } write_unlock(&GlobalSMBSeslock); /* BB Add call to invalidate_inodes(sb) for all superblocks mounted @@ -686,11 +685,10 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) SecurityBlob, count - 16, &server->secType); - if (rc == 1) { + if (rc == 1) rc = 0; - } else { + else rc = -EINVAL; - } } } else server->capabilities &= ~CAP_EXTENDED_SECURITY; @@ -3914,7 +3912,10 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, bool is_unicode; struct dfs_referral_level_3 *ref; - is_unicode = pSMBr->hdr.Flags2 & SMBFLG2_UNICODE; + if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) + is_unicode = true; + else + is_unicode = false; *num_of_nodes = le16_to_cpu(pSMBr->NumberOfReferrals); if (*num_of_nodes < 1) { @@ -4814,8 +4815,8 @@ CIFSSMBSetFileSize(const int xid, struct cifsTconInfo *tcon, __u64 size, time and resort to the original setpathinfo level which takes the ancient DOS time format with 2 second granularity */ int -CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon, - const FILE_BASIC_INFO *data, __u16 fid) +CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon, + const FILE_BASIC_INFO *data, __u16 fid, __u32 pid_of_opener) { struct smb_com_transaction2_sfi_req *pSMB = NULL; char *data_offset; @@ -4828,11 +4829,8 @@ CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon, if (rc) return rc; - /* At this point there is no need to override the current pid - with the pid of the opener, but that could change if we someday - use an existing handle (rather than opening one on the fly) */ - /* pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener); - pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16));*/ + pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener); + pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16)); params = 6; pSMB->MaxSetupCount = 0; @@ -4880,9 +4878,9 @@ CIFSSMBSetFileTimes(const int xid, struct cifsTconInfo *tcon, int -CIFSSMBSetTimes(const int xid, struct cifsTconInfo *tcon, const char *fileName, - const FILE_BASIC_INFO *data, - const struct nls_table *nls_codepage, int remap) +CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon, + const char *fileName, const FILE_BASIC_INFO *data, + const struct nls_table *nls_codepage, int remap) { TRANSACTION2_SPI_REQ *pSMB = NULL; TRANSACTION2_SPI_RSP *pSMBr = NULL; @@ -5011,10 +5009,9 @@ SetAttrLgcyRetry: #endif /* temporarily unneeded SetAttr legacy function */ int -CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *tcon, - char *fileName, __u64 mode, __u64 uid, __u64 gid, - dev_t device, const struct nls_table *nls_codepage, - int remap) +CIFSSMBUnixSetInfo(const int xid, struct cifsTconInfo *tcon, char *fileName, + const struct cifs_unix_set_info_args *args, + const struct nls_table *nls_codepage, int remap) { TRANSACTION2_SPI_REQ *pSMB = NULL; TRANSACTION2_SPI_RSP *pSMBr = NULL; @@ -5023,6 +5020,7 @@ CIFSSMBUnixSetPerms(const int xid, struct cifsTconInfo *tcon, int bytes_returned = 0; FILE_UNIX_BASIC_INFO *data_offset; __u16 params, param_offset, offset, count, byte_count; + __u64 mode = args->mode; cFYI(1, ("In SetUID/GID/Mode")); setPermsRetry: @@ -5078,16 +5076,16 @@ setPermsRetry: set file size and do not want to truncate file size to zero accidently as happened on one Samba server beta by putting zero instead of -1 here */ - data_offset->EndOfFile = NO_CHANGE_64; - data_offset->NumOfBytes = NO_CHANGE_64; - data_offset->LastStatusChange = NO_CHANGE_64; - data_offset->LastAccessTime = NO_CHANGE_64; - data_offset->LastModificationTime = NO_CHANGE_64; - data_offset->Uid = cpu_to_le64(uid); - data_offset->Gid = cpu_to_le64(gid); + data_offset->EndOfFile = cpu_to_le64(NO_CHANGE_64); + data_offset->NumOfBytes = cpu_to_le64(NO_CHANGE_64); + data_offset->LastStatusChange = cpu_to_le64(args->ctime); + data_offset->LastAccessTime = cpu_to_le64(args->atime); + data_offset->LastModificationTime = cpu_to_le64(args->mtime); + data_offset->Uid = cpu_to_le64(args->uid); + data_offset->Gid = cpu_to_le64(args->gid); /* better to leave device as zero when it is */ - data_offset->DevMajor = cpu_to_le64(MAJOR(device)); - data_offset->DevMinor = cpu_to_le64(MINOR(device)); + data_offset->DevMajor = cpu_to_le64(MAJOR(args->device)); + data_offset->DevMinor = cpu_to_le64(MINOR(args->device)); data_offset->Permissions = cpu_to_le64(mode); if (S_ISREG(mode)) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index e8fa46c7cff..0711db65afe 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -151,7 +151,7 @@ cifs_reconnect(struct TCP_Server_Info *server) } list_for_each(tmp, &GlobalTreeConnectionList) { tcon = list_entry(tmp, struct cifsTconInfo, cifsConnectionList); - if ((tcon) && (tcon->ses) && (tcon->ses->server == server)) + if ((tcon->ses) && (tcon->ses->server == server)) tcon->tidStatus = CifsNeedReconnect; } read_unlock(&GlobalSMBSeslock); @@ -173,14 +173,12 @@ cifs_reconnect(struct TCP_Server_Info *server) mid_entry = list_entry(tmp, struct mid_q_entry, qhead); - if (mid_entry) { - if (mid_entry->midState == MID_REQUEST_SUBMITTED) { + if (mid_entry->midState == MID_REQUEST_SUBMITTED) { /* Mark other intransit requests as needing retry so we do not immediately mark the session bad again (ie after we reconnect below) as they timeout too */ - mid_entry->midState = MID_RETRY_NEEDED; - } + mid_entry->midState = MID_RETRY_NEEDED; } } spin_unlock(&GlobalMid_Lock); @@ -351,11 +349,9 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) current->flags |= PF_MEMALLOC; cFYI(1, ("Demultiplex PID: %d", task_pid_nr(current))); - write_lock(&GlobalSMBSeslock); - atomic_inc(&tcpSesAllocCount); - length = tcpSesAllocCount.counter; - write_unlock(&GlobalSMBSeslock); - if (length > 1) + + length = atomic_inc_return(&tcpSesAllocCount); + if (length > 1) mempool_resize(cifs_req_poolp, length + cifs_min_rcv, GFP_KERNEL); @@ -455,7 +451,7 @@ incomplete_rcv: /* Note that FC 1001 length is big endian on the wire, but we convert it here so it is always manipulated as host byte order */ - pdu_length = ntohl(smb_buffer->smb_buf_length); + pdu_length = be32_to_cpu((__force __be32)smb_buffer->smb_buf_length); smb_buffer->smb_buf_length = pdu_length; cFYI(1, ("rfc1002 length 0x%x", pdu_length+4)); @@ -745,14 +741,11 @@ multi_t2_fnd: coming home not much else we can do but free the memory */ } - write_lock(&GlobalSMBSeslock); - atomic_dec(&tcpSesAllocCount); - length = tcpSesAllocCount.counter; - /* last chance to mark ses pointers invalid if there are any pointing to this (e.g if a crazy root user tried to kill cifsd kernel thread explicitly this might happen) */ + write_lock(&GlobalSMBSeslock); list_for_each(tmp, &GlobalSMBSessionList) { ses = list_entry(tmp, struct cifsSesInfo, cifsSessionList); @@ -763,6 +756,8 @@ multi_t2_fnd: kfree(server->hostname); kfree(server); + + length = atomic_dec_return(&tcpSesAllocCount); if (length > 0) mempool_resize(cifs_req_poolp, length + cifs_min_rcv, GFP_KERNEL); @@ -1461,6 +1456,39 @@ get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, const char *old_path, return rc; } +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static struct lock_class_key cifs_key[2]; +static struct lock_class_key cifs_slock_key[2]; + +static inline void +cifs_reclassify_socket4(struct socket *sock) +{ + struct sock *sk = sock->sk; + BUG_ON(sock_owned_by_user(sk)); + sock_lock_init_class_and_name(sk, "slock-AF_INET-CIFS", + &cifs_slock_key[0], "sk_lock-AF_INET-CIFS", &cifs_key[0]); +} + +static inline void +cifs_reclassify_socket6(struct socket *sock) +{ + struct sock *sk = sock->sk; + BUG_ON(sock_owned_by_user(sk)); + sock_lock_init_class_and_name(sk, "slock-AF_INET6-CIFS", + &cifs_slock_key[1], "sk_lock-AF_INET6-CIFS", &cifs_key[1]); +} +#else +static inline void +cifs_reclassify_socket4(struct socket *sock) +{ +} + +static inline void +cifs_reclassify_socket6(struct socket *sock) +{ +} +#endif + /* See RFC1001 section 14 on representation of Netbios names */ static void rfc1002mangle(char *target, char *source, unsigned int length) { @@ -1495,6 +1523,7 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, /* BB other socket options to set KEEPALIVE, NODELAY? */ cFYI(1, ("Socket created")); (*csocket)->sk->sk_allocation = GFP_NOFS; + cifs_reclassify_socket4(*csocket); } } @@ -1627,6 +1656,7 @@ ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket) /* BB other socket options to set KEEPALIVE, NODELAY? */ cFYI(1, ("ipv6 Socket created")); (*csocket)->sk->sk_allocation = GFP_NOFS; + cifs_reclassify_socket6(*csocket); } } @@ -3588,97 +3618,91 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, } first_time = 1; } - if (!rc) { - pSesInfo->flags = 0; - pSesInfo->capabilities = pSesInfo->server->capabilities; - if (linuxExtEnabled == 0) - pSesInfo->capabilities &= (~CAP_UNIX); + + if (rc) + goto ss_err_exit; + + pSesInfo->flags = 0; + pSesInfo->capabilities = pSesInfo->server->capabilities; + if (linuxExtEnabled == 0) + pSesInfo->capabilities &= (~CAP_UNIX); /* pSesInfo->sequence_number = 0;*/ - cFYI(1, - ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d", - pSesInfo->server->secMode, - pSesInfo->server->capabilities, - pSesInfo->server->timeAdj)); - if (experimEnabled < 2) - rc = CIFS_SessSetup(xid, pSesInfo, - first_time, nls_info); - else if (extended_security - && (pSesInfo->capabilities - & CAP_EXTENDED_SECURITY) - && (pSesInfo->server->secType == NTLMSSP)) { - rc = -EOPNOTSUPP; - } else if (extended_security - && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) - && (pSesInfo->server->secType == RawNTLMSSP)) { - cFYI(1, ("NTLMSSP sesssetup")); - rc = CIFSNTLMSSPNegotiateSessSetup(xid, - pSesInfo, - &ntlmv2_flag, - nls_info); - if (!rc) { - if (ntlmv2_flag) { - char *v2_response; - cFYI(1, ("more secure NTLM ver2 hash")); - if (CalcNTLMv2_partial_mac_key(pSesInfo, - nls_info)) { - rc = -ENOMEM; - goto ss_err_exit; - } else - v2_response = kmalloc(16 + 64 /* blob */, GFP_KERNEL); - if (v2_response) { - CalcNTLMv2_response(pSesInfo, - v2_response); - /* if (first_time) - cifs_calculate_ntlmv2_mac_key( - pSesInfo->server->mac_signing_key, - response, ntlm_session_key,*/ - kfree(v2_response); + cFYI(1, ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d", + pSesInfo->server->secMode, + pSesInfo->server->capabilities, + pSesInfo->server->timeAdj)); + if (experimEnabled < 2) + rc = CIFS_SessSetup(xid, pSesInfo, first_time, nls_info); + else if (extended_security + && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) + && (pSesInfo->server->secType == NTLMSSP)) { + rc = -EOPNOTSUPP; + } else if (extended_security + && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) + && (pSesInfo->server->secType == RawNTLMSSP)) { + cFYI(1, ("NTLMSSP sesssetup")); + rc = CIFSNTLMSSPNegotiateSessSetup(xid, pSesInfo, &ntlmv2_flag, + nls_info); + if (!rc) { + if (ntlmv2_flag) { + char *v2_response; + cFYI(1, ("more secure NTLM ver2 hash")); + if (CalcNTLMv2_partial_mac_key(pSesInfo, + nls_info)) { + rc = -ENOMEM; + goto ss_err_exit; + } else + v2_response = kmalloc(16 + 64 /* blob*/, + GFP_KERNEL); + if (v2_response) { + CalcNTLMv2_response(pSesInfo, + v2_response); + /* if (first_time) + cifs_calculate_ntlmv2_mac_key */ + kfree(v2_response); /* BB Put dummy sig in SessSetup PDU? */ - } else { - rc = -ENOMEM; - goto ss_err_exit; - } - } else { - SMBNTencrypt(pSesInfo->password, - pSesInfo->server->cryptKey, - ntlm_session_key); - - if (first_time) - cifs_calculate_mac_key( - &pSesInfo->server->mac_signing_key, - ntlm_session_key, - pSesInfo->password); + rc = -ENOMEM; + goto ss_err_exit; } + + } else { + SMBNTencrypt(pSesInfo->password, + pSesInfo->server->cryptKey, + ntlm_session_key); + + if (first_time) + cifs_calculate_mac_key( + &pSesInfo->server->mac_signing_key, + ntlm_session_key, + pSesInfo->password); + } /* for better security the weaker lanman hash not sent in AuthSessSetup so we no longer calculate it */ - rc = CIFSNTLMSSPAuthSessSetup(xid, - pSesInfo, - ntlm_session_key, - ntlmv2_flag, - nls_info); - } - } else { /* old style NTLM 0.12 session setup */ - SMBNTencrypt(pSesInfo->password, - pSesInfo->server->cryptKey, - ntlm_session_key); + rc = CIFSNTLMSSPAuthSessSetup(xid, pSesInfo, + ntlm_session_key, + ntlmv2_flag, + nls_info); + } + } else { /* old style NTLM 0.12 session setup */ + SMBNTencrypt(pSesInfo->password, pSesInfo->server->cryptKey, + ntlm_session_key); - if (first_time) - cifs_calculate_mac_key( + if (first_time) + cifs_calculate_mac_key( &pSesInfo->server->mac_signing_key, ntlm_session_key, pSesInfo->password); - rc = CIFSSessSetup(xid, pSesInfo, - ntlm_session_key, nls_info); - } - if (rc) { - cERROR(1, ("Send error in SessSetup = %d", rc)); - } else { - cFYI(1, ("CIFS Session Established successfully")); + rc = CIFSSessSetup(xid, pSesInfo, ntlm_session_key, nls_info); + } + if (rc) { + cERROR(1, ("Send error in SessSetup = %d", rc)); + } else { + cFYI(1, ("CIFS Session Established successfully")); pSesInfo->status = CifsGood; - } } + ss_err_exit: return rc; } diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index fb69c1fa85c..e962e75e6f7 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -226,23 +226,28 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, /* If Open reported that we actually created a file then we now have to set the mode if possible */ if ((pTcon->unix_ext) && (oplock & CIFS_CREATE_ACTION)) { + struct cifs_unix_set_info_args args = { + .mode = mode, + .ctime = NO_CHANGE_64, + .atime = NO_CHANGE_64, + .mtime = NO_CHANGE_64, + .device = 0, + }; + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { - CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, - (__u64)current->fsuid, - (__u64)current->fsgid, - 0 /* dev */, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + args.uid = (__u64) current->fsuid; + if (inode->i_mode & S_ISGID) + args.gid = (__u64) inode->i_gid; + else + args.gid = (__u64) current->fsgid; } else { - CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, - (__u64)-1, - (__u64)-1, - 0 /* dev */, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + args.uid = NO_CHANGE_64; + args.gid = NO_CHANGE_64; } + CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); } else { /* BB implement mode setting via Windows security descriptors e.g. */ @@ -267,7 +272,12 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) { newinode->i_uid = current->fsuid; - newinode->i_gid = current->fsgid; + if (inode->i_mode & S_ISGID) + newinode->i_gid = + inode->i_gid; + else + newinode->i_gid = + current->fsgid; } } } @@ -357,21 +367,24 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, if (full_path == NULL) rc = -ENOMEM; else if (pTcon->unix_ext) { - mode &= ~current->fs->umask; + struct cifs_unix_set_info_args args = { + .mode = mode & ~current->fs->umask, + .ctime = NO_CHANGE_64, + .atime = NO_CHANGE_64, + .mtime = NO_CHANGE_64, + .device = device_number, + }; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { - rc = CIFSSMBUnixSetPerms(xid, pTcon, full_path, - mode, (__u64)current->fsuid, - (__u64)current->fsgid, - device_number, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + args.uid = (__u64) current->fsuid; + args.gid = (__u64) current->fsgid; } else { - rc = CIFSSMBUnixSetPerms(xid, pTcon, - full_path, mode, (__u64)-1, (__u64)-1, - device_number, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + args.uid = NO_CHANGE_64; + args.gid = NO_CHANGE_64; } + rc = CIFSSMBUnixSetInfo(xid, pTcon, full_path, + &args, cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); if (!rc) { rc = cifs_get_inode_info_unix(&newinode, full_path, diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 0aac824371a..ff14d14903a 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -310,18 +310,19 @@ int cifs_open(struct inode *inode, struct file *file) /* time to set mode which we can not set earlier due to problems creating new read-only files */ if (pTcon->unix_ext) { - CIFSSMBUnixSetPerms(xid, pTcon, full_path, - inode->i_mode, - (__u64)-1, (__u64)-1, 0 /* dev */, + struct cifs_unix_set_info_args args = { + .mode = inode->i_mode, + .uid = NO_CHANGE_64, + .gid = NO_CHANGE_64, + .ctime = NO_CHANGE_64, + .atime = NO_CHANGE_64, + .mtime = NO_CHANGE_64, + .device = 0, + }; + CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); - } else { - /* BB implement via Windows security descriptors eg - CIFSSMBWinSetPerms(xid, pTcon, full_path, mode, - -1, -1, local_nls); - in the meantime could set r/o dos attribute when - perms are eg: mode & 0222 == 0 */ } } @@ -1280,7 +1281,7 @@ retry: if (first < 0) lock_page(page); - else if (TestSetPageLocked(page)) + else if (!trylock_page(page)) break; if (unlikely(page->mapping != mapping)) { diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 2e904bd111c..848286861c3 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -649,6 +649,7 @@ struct inode *cifs_iget(struct super_block *sb, unsigned long ino) inode->i_fop = &simple_dir_operations; inode->i_uid = cifs_sb->mnt_uid; inode->i_gid = cifs_sb->mnt_gid; + } else if (rc) { _FreeXid(xid); iget_failed(inode); return ERR_PTR(rc); @@ -737,7 +738,7 @@ psx_del_no_retry: /* ATTRS set to normal clears r/o bit */ pinfo_buf->Attributes = cpu_to_le32(ATTR_NORMAL); if (!(pTcon->ses->flags & CIFS_SES_NT4)) - rc = CIFSSMBSetTimes(xid, pTcon, full_path, + rc = CIFSSMBSetPathInfo(xid, pTcon, full_path, pinfo_buf, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & @@ -767,9 +768,10 @@ psx_del_no_retry: cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); if (rc == 0) { - rc = CIFSSMBSetFileTimes(xid, pTcon, - pinfo_buf, - netfid); + rc = CIFSSMBSetFileInfo(xid, pTcon, + pinfo_buf, + netfid, + current->tgid); CIFSSMBClose(xid, pTcon, netfid); } } @@ -984,32 +986,41 @@ mkdir_get_info: * failed to get it from the server or was set bogus */ if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2)) direntry->d_inode->i_nlink = 2; + mode &= ~current->fs->umask; + /* must turn on setgid bit if parent dir has it */ + if (inode->i_mode & S_ISGID) + mode |= S_ISGID; + if (pTcon->unix_ext) { + struct cifs_unix_set_info_args args = { + .mode = mode, + .ctime = NO_CHANGE_64, + .atime = NO_CHANGE_64, + .mtime = NO_CHANGE_64, + .device = 0, + }; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { - CIFSSMBUnixSetPerms(xid, pTcon, full_path, - mode, - (__u64)current->fsuid, - (__u64)current->fsgid, - 0 /* dev_t */, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + args.uid = (__u64)current->fsuid; + if (inode->i_mode & S_ISGID) + args.gid = (__u64)inode->i_gid; + else + args.gid = (__u64)current->fsgid; } else { - CIFSSMBUnixSetPerms(xid, pTcon, full_path, - mode, (__u64)-1, - (__u64)-1, 0 /* dev_t */, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + args.uid = NO_CHANGE_64; + args.gid = NO_CHANGE_64; } + CIFSSMBUnixSetInfo(xid, pTcon, full_path, &args, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); } else { if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && (mode & S_IWUGO) == 0) { FILE_BASIC_INFO pInfo; memset(&pInfo, 0, sizeof(pInfo)); pInfo.Attributes = cpu_to_le32(ATTR_READONLY); - CIFSSMBSetTimes(xid, pTcon, full_path, + CIFSSMBSetPathInfo(xid, pTcon, full_path, &pInfo, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); @@ -1024,8 +1035,12 @@ mkdir_get_info: CIFS_MOUNT_SET_UID) { direntry->d_inode->i_uid = current->fsuid; - direntry->d_inode->i_gid = - current->fsgid; + if (inode->i_mode & S_ISGID) + direntry->d_inode->i_gid = + inode->i_gid; + else + direntry->d_inode->i_gid = + current->fsgid; } } } @@ -1310,10 +1325,11 @@ int cifs_revalidate(struct dentry *direntry) /* if (S_ISDIR(direntry->d_inode->i_mode)) shrink_dcache_parent(direntry); */ if (S_ISREG(direntry->d_inode->i_mode)) { - if (direntry->d_inode->i_mapping) + if (direntry->d_inode->i_mapping) { wbrc = filemap_fdatawait(direntry->d_inode->i_mapping); if (wbrc) CIFS_I(direntry->d_inode)->write_behind_rc = wbrc; + } /* may eventually have to do this for open files too */ if (list_empty(&(cifsInode->openFileList))) { /* changed on server - flush read ahead pages */ @@ -1413,31 +1429,304 @@ out_busy: return -ETXTBSY; } -int cifs_setattr(struct dentry *direntry, struct iattr *attrs) +static int +cifs_set_file_size(struct inode *inode, struct iattr *attrs, + int xid, char *full_path) { + int rc; + struct cifsFileInfo *open_file; + struct cifsInodeInfo *cifsInode = CIFS_I(inode); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifsTconInfo *pTcon = cifs_sb->tcon; + + /* + * To avoid spurious oplock breaks from server, in the case of + * inodes that we already have open, avoid doing path based + * setting of file size if we can do it by handle. + * This keeps our caching token (oplock) and avoids timeouts + * when the local oplock break takes longer to flush + * writebehind data than the SMB timeout for the SetPathInfo + * request would allow + */ + open_file = find_writable_file(cifsInode); + if (open_file) { + __u16 nfid = open_file->netfid; + __u32 npid = open_file->pid; + rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size, nfid, + npid, false); + atomic_dec(&open_file->wrtPending); + cFYI(1, ("SetFSize for attrs rc = %d", rc)); + if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { + unsigned int bytes_written; + rc = CIFSSMBWrite(xid, pTcon, nfid, 0, attrs->ia_size, + &bytes_written, NULL, NULL, 1); + cFYI(1, ("Wrt seteof rc %d", rc)); + } + } else + rc = -EINVAL; + + if (rc != 0) { + /* Set file size by pathname rather than by handle + either because no valid, writeable file handle for + it was found or because there was an error setting + it by handle */ + rc = CIFSSMBSetEOF(xid, pTcon, full_path, attrs->ia_size, + false, cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); + cFYI(1, ("SetEOF by path (setattrs) rc = %d", rc)); + if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { + __u16 netfid; + int oplock = 0; + + rc = SMBLegacyOpen(xid, pTcon, full_path, + FILE_OPEN, GENERIC_WRITE, + CREATE_NOT_DIR, &netfid, &oplock, NULL, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); + if (rc == 0) { + unsigned int bytes_written; + rc = CIFSSMBWrite(xid, pTcon, netfid, 0, + attrs->ia_size, + &bytes_written, NULL, + NULL, 1); + cFYI(1, ("wrt seteof rc %d", rc)); + CIFSSMBClose(xid, pTcon, netfid); + } + } + } + + if (rc == 0) { + rc = cifs_vmtruncate(inode, attrs->ia_size); + cifs_truncate_page(inode->i_mapping, inode->i_size); + } + + return rc; +} + +static int +cifs_set_file_info(struct inode *inode, struct iattr *attrs, int xid, + char *full_path, __u32 dosattr) +{ + int rc; + int oplock = 0; + __u16 netfid; + __u32 netpid; + bool set_time = false; + struct cifsFileInfo *open_file; + struct cifsInodeInfo *cifsInode = CIFS_I(inode); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifsTconInfo *pTcon = cifs_sb->tcon; + FILE_BASIC_INFO info_buf; + + if (attrs->ia_valid & ATTR_ATIME) { + set_time = true; + info_buf.LastAccessTime = + cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime)); + } else + info_buf.LastAccessTime = 0; + + if (attrs->ia_valid & ATTR_MTIME) { + set_time = true; + info_buf.LastWriteTime = + cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime)); + } else + info_buf.LastWriteTime = 0; + + /* + * Samba throws this field away, but windows may actually use it. + * Do not set ctime unless other time stamps are changed explicitly + * (i.e. by utimes()) since we would then have a mix of client and + * server times. + */ + if (set_time && (attrs->ia_valid & ATTR_CTIME)) { + cFYI(1, ("CIFS - CTIME changed")); + info_buf.ChangeTime = + cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime)); + } else + info_buf.ChangeTime = 0; + + info_buf.CreationTime = 0; /* don't change */ + info_buf.Attributes = cpu_to_le32(dosattr); + + /* + * If the file is already open for write, just use that fileid + */ + open_file = find_writable_file(cifsInode); + if (open_file) { + netfid = open_file->netfid; + netpid = open_file->pid; + goto set_via_filehandle; + } + + /* + * NT4 apparently returns success on this call, but it doesn't + * really work. + */ + if (!(pTcon->ses->flags & CIFS_SES_NT4)) { + rc = CIFSSMBSetPathInfo(xid, pTcon, full_path, + &info_buf, cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); + if (rc != -EOPNOTSUPP && rc != -EINVAL) + goto out; + } + + cFYI(1, ("calling SetFileInfo since SetPathInfo for " + "times not supported by this server")); + rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, + SYNCHRONIZE | FILE_WRITE_ATTRIBUTES, + CREATE_NOT_DIR, &netfid, &oplock, + NULL, cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); + + if (rc != 0) { + if (rc == -EIO) + rc = -EINVAL; + goto out; + } + + netpid = current->tgid; + +set_via_filehandle: + rc = CIFSSMBSetFileInfo(xid, pTcon, &info_buf, netfid, netpid); + if (open_file == NULL) + CIFSSMBClose(xid, pTcon, netfid); + else + atomic_dec(&open_file->wrtPending); +out: + return rc; +} + +static int +cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) +{ + int rc; int xid; - struct cifs_sb_info *cifs_sb; - struct cifsTconInfo *pTcon; char *full_path = NULL; - int rc = -EACCES; - struct cifsFileInfo *open_file = NULL; - FILE_BASIC_INFO time_buf; - bool set_time = false; - bool set_dosattr = false; - __u64 mode = 0xFFFFFFFFFFFFFFFFULL; - __u64 uid = 0xFFFFFFFFFFFFFFFFULL; - __u64 gid = 0xFFFFFFFFFFFFFFFFULL; - struct cifsInodeInfo *cifsInode; struct inode *inode = direntry->d_inode; + struct cifsInodeInfo *cifsInode = CIFS_I(inode); + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifsTconInfo *pTcon = cifs_sb->tcon; + struct cifs_unix_set_info_args *args = NULL; + + cFYI(1, ("setattr_unix on file %s attrs->ia_valid=0x%x", + direntry->d_name.name, attrs->ia_valid)); + + xid = GetXid(); + + if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) { + /* check if we have permission to change attrs */ + rc = inode_change_ok(inode, attrs); + if (rc < 0) + goto out; + else + rc = 0; + } + + full_path = build_path_from_dentry(direntry); + if (full_path == NULL) { + rc = -ENOMEM; + goto out; + } + + if ((attrs->ia_valid & ATTR_MTIME) || (attrs->ia_valid & ATTR_SIZE)) { + /* + Flush data before changing file size or changing the last + write time of the file on the server. If the + flush returns error, store it to report later and continue. + BB: This should be smarter. Why bother flushing pages that + will be truncated anyway? Also, should we error out here if + the flush returns error? + */ + rc = filemap_write_and_wait(inode->i_mapping); + if (rc != 0) { + cifsInode->write_behind_rc = rc; + rc = 0; + } + } + + if (attrs->ia_valid & ATTR_SIZE) { + rc = cifs_set_file_size(inode, attrs, xid, full_path); + if (rc != 0) + goto out; + } + + /* skip mode change if it's just for clearing setuid/setgid */ + if (attrs->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) + attrs->ia_valid &= ~ATTR_MODE; + + args = kmalloc(sizeof(*args), GFP_KERNEL); + if (args == NULL) { + rc = -ENOMEM; + goto out; + } + + /* set up the struct */ + if (attrs->ia_valid & ATTR_MODE) + args->mode = attrs->ia_mode; + else + args->mode = NO_CHANGE_64; + + if (attrs->ia_valid & ATTR_UID) + args->uid = attrs->ia_uid; + else + args->uid = NO_CHANGE_64; + + if (attrs->ia_valid & ATTR_GID) + args->gid = attrs->ia_gid; + else + args->gid = NO_CHANGE_64; + + if (attrs->ia_valid & ATTR_ATIME) + args->atime = cifs_UnixTimeToNT(attrs->ia_atime); + else + args->atime = NO_CHANGE_64; + + if (attrs->ia_valid & ATTR_MTIME) + args->mtime = cifs_UnixTimeToNT(attrs->ia_mtime); + else + args->mtime = NO_CHANGE_64; + + if (attrs->ia_valid & ATTR_CTIME) + args->ctime = cifs_UnixTimeToNT(attrs->ia_ctime); + else + args->ctime = NO_CHANGE_64; + + args->device = 0; + rc = CIFSSMBUnixSetInfo(xid, pTcon, full_path, args, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); + + if (!rc) + rc = inode_setattr(inode, attrs); +out: + kfree(args); + kfree(full_path); + FreeXid(xid); + return rc; +} + +static int +cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) +{ + int xid; + struct inode *inode = direntry->d_inode; + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifsInodeInfo *cifsInode = CIFS_I(inode); + char *full_path = NULL; + int rc = -EACCES; + __u32 dosattr = 0; + __u64 mode = NO_CHANGE_64; xid = GetXid(); cFYI(1, ("setattr on file %s attrs->iavalid 0x%x", direntry->d_name.name, attrs->ia_valid)); - cifs_sb = CIFS_SB(inode->i_sb); - pTcon = cifs_sb->tcon; - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) == 0) { /* check if we have permission to change attrs */ rc = inode_change_ok(inode, attrs); @@ -1453,7 +1742,6 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) FreeXid(xid); return -ENOMEM; } - cifsInode = CIFS_I(inode); if ((attrs->ia_valid & ATTR_MTIME) || (attrs->ia_valid & ATTR_SIZE)) { /* @@ -1472,78 +1760,8 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) } if (attrs->ia_valid & ATTR_SIZE) { - /* To avoid spurious oplock breaks from server, in the case of - inodes that we already have open, avoid doing path based - setting of file size if we can do it by handle. - This keeps our caching token (oplock) and avoids timeouts - when the local oplock break takes longer to flush - writebehind data than the SMB timeout for the SetPathInfo - request would allow */ - - open_file = find_writable_file(cifsInode); - if (open_file) { - __u16 nfid = open_file->netfid; - __u32 npid = open_file->pid; - rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size, - nfid, npid, false); - atomic_dec(&open_file->wrtPending); - cFYI(1, ("SetFSize for attrs rc = %d", rc)); - if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { - unsigned int bytes_written; - rc = CIFSSMBWrite(xid, pTcon, - nfid, 0, attrs->ia_size, - &bytes_written, NULL, NULL, - 1 /* 45 seconds */); - cFYI(1, ("Wrt seteof rc %d", rc)); - } - } else - rc = -EINVAL; - - if (rc != 0) { - /* Set file size by pathname rather than by handle - either because no valid, writeable file handle for - it was found or because there was an error setting - it by handle */ - rc = CIFSSMBSetEOF(xid, pTcon, full_path, - attrs->ia_size, false, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - cFYI(1, ("SetEOF by path (setattrs) rc = %d", rc)); - if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) { - __u16 netfid; - int oplock = 0; - - rc = SMBLegacyOpen(xid, pTcon, full_path, - FILE_OPEN, GENERIC_WRITE, - CREATE_NOT_DIR, &netfid, &oplock, - NULL, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - if (rc == 0) { - unsigned int bytes_written; - rc = CIFSSMBWrite(xid, pTcon, - netfid, 0, - attrs->ia_size, - &bytes_written, NULL, - NULL, 1 /* 45 sec */); - cFYI(1, ("wrt seteof rc %d", rc)); - CIFSSMBClose(xid, pTcon, netfid); - } - - } - } - - /* Server is ok setting allocation size implicitly - no need - to call: - CIFSSMBSetEOF(xid, pTcon, full_path, attrs->ia_size, true, - cifs_sb->local_nls); - */ - - if (rc == 0) { - rc = cifs_vmtruncate(inode, attrs->ia_size); - cifs_truncate_page(inode->i_mapping, inode->i_size); - } else + rc = cifs_set_file_size(inode, attrs, xid, full_path); + if (rc != 0) goto cifs_setattr_exit; } @@ -1554,21 +1772,8 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) * CIFSACL support + proper Windows to Unix idmapping, we may be * able to support this in the future. */ - if (!pTcon->unix_ext && - !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) { + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) attrs->ia_valid &= ~(ATTR_UID | ATTR_GID); - } else { - if (attrs->ia_valid & ATTR_UID) { - cFYI(1, ("UID changed to %d", attrs->ia_uid)); - uid = attrs->ia_uid; - } - if (attrs->ia_valid & ATTR_GID) { - cFYI(1, ("GID changed to %d", attrs->ia_gid)); - gid = attrs->ia_gid; - } - } - - time_buf.Attributes = 0; /* skip mode change if it's just for clearing setuid/setgid */ if (attrs->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID)) @@ -1579,13 +1784,7 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) mode = attrs->ia_mode; } - if ((pTcon->unix_ext) - && (attrs->ia_valid & (ATTR_MODE | ATTR_GID | ATTR_UID))) - rc = CIFSSMBUnixSetPerms(xid, pTcon, full_path, mode, uid, gid, - 0 /* dev_t */, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - else if (attrs->ia_valid & ATTR_MODE) { + if (attrs->ia_valid & ATTR_MODE) { rc = 0; #ifdef CONFIG_CIFS_EXPERIMENTAL if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) @@ -1594,24 +1793,19 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) #endif if (((mode & S_IWUGO) == 0) && (cifsInode->cifsAttrs & ATTR_READONLY) == 0) { - set_dosattr = true; - time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs | - ATTR_READONLY); + + dosattr = cifsInode->cifsAttrs | ATTR_READONLY; + /* fix up mode if we're not using dynperm */ if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) == 0) attrs->ia_mode = inode->i_mode & ~S_IWUGO; } else if ((mode & S_IWUGO) && (cifsInode->cifsAttrs & ATTR_READONLY)) { - /* If file is readonly on server, we would - not be able to write to it - so if any write - bit is enabled for user or group or other we - need to at least try to remove r/o dos attr */ - set_dosattr = true; - time_buf.Attributes = cpu_to_le32(cifsInode->cifsAttrs & - (~ATTR_READONLY)); - /* Windows ignores set to zero */ - if (time_buf.Attributes == 0) - time_buf.Attributes |= cpu_to_le32(ATTR_NORMAL); + + dosattr = cifsInode->cifsAttrs & ~ATTR_READONLY; + /* Attributes of 0 are ignored */ + if (dosattr == 0) + dosattr |= ATTR_NORMAL; /* reset local inode permissions to normal */ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) { @@ -1629,82 +1823,18 @@ int cifs_setattr(struct dentry *direntry, struct iattr *attrs) } } - if (attrs->ia_valid & ATTR_ATIME) { - set_time = true; - time_buf.LastAccessTime = - cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime)); - } else - time_buf.LastAccessTime = 0; - - if (attrs->ia_valid & ATTR_MTIME) { - set_time = true; - time_buf.LastWriteTime = - cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime)); - } else - time_buf.LastWriteTime = 0; - /* Do not set ctime explicitly unless other time - stamps are changed explicitly (i.e. by utime() - since we would then have a mix of client and - server times */ - - if (set_time && (attrs->ia_valid & ATTR_CTIME)) { - set_time = true; - /* Although Samba throws this field away - it may be useful to Windows - but we do - not want to set ctime unless some other - timestamp is changing */ - cFYI(1, ("CIFS - CTIME changed")); - time_buf.ChangeTime = - cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime)); - } else - time_buf.ChangeTime = 0; - - if (set_time || set_dosattr) { - time_buf.CreationTime = 0; /* do not change */ - /* In the future we should experiment - try setting timestamps - via Handle (SetFileInfo) instead of by path */ - if (!(pTcon->ses->flags & CIFS_SES_NT4)) - rc = CIFSSMBSetTimes(xid, pTcon, full_path, &time_buf, - cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - else - rc = -EOPNOTSUPP; + if (attrs->ia_valid & (ATTR_MTIME|ATTR_ATIME|ATTR_CTIME) || + ((attrs->ia_valid & ATTR_MODE) && dosattr)) { + rc = cifs_set_file_info(inode, attrs, xid, full_path, dosattr); + /* BB: check for rc = -EOPNOTSUPP and switch to legacy mode */ - if (rc == -EOPNOTSUPP) { - int oplock = 0; - __u16 netfid; - - cFYI(1, ("calling SetFileInfo since SetPathInfo for " - "times not supported by this server")); - /* BB we could scan to see if we already have it open - and pass in pid of opener to function */ - rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, - SYNCHRONIZE | FILE_WRITE_ATTRIBUTES, - CREATE_NOT_DIR, &netfid, &oplock, - NULL, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - if (rc == 0) { - rc = CIFSSMBSetFileTimes(xid, pTcon, &time_buf, - netfid); - CIFSSMBClose(xid, pTcon, netfid); - } else { - /* BB For even older servers we could convert time_buf - into old DOS style which uses two second - granularity */ - - /* rc = CIFSSMBSetTimesLegacy(xid, pTcon, full_path, - &time_buf, cifs_sb->local_nls); */ - } - } /* Even if error on time set, no sense failing the call if the server would set the time to a reasonable value anyway, and this check ensures that we are not being called from sys_utimes in which case we ought to fail the call back to the user when the server rejects the call */ if ((rc) && (attrs->ia_valid & - (ATTR_MODE | ATTR_GID | ATTR_UID | ATTR_SIZE))) + (ATTR_MODE | ATTR_GID | ATTR_UID | ATTR_SIZE))) rc = 0; } @@ -1718,6 +1848,21 @@ cifs_setattr_exit: return rc; } +int +cifs_setattr(struct dentry *direntry, struct iattr *attrs) +{ + struct inode *inode = direntry->d_inode; + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifsTconInfo *pTcon = cifs_sb->tcon; + + if (pTcon->unix_ext) + return cifs_setattr_unix(direntry, attrs); + + return cifs_setattr_nounix(direntry, attrs); + + /* BB: add cifs_setattr_legacy for really old servers */ +} + #if 0 void cifs_delete_inode(struct inode *inode) { diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 83f30695488..5f40ed3473f 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -690,6 +690,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon, else cifs_buf_release(cifsFile->srch_inf. ntwrk_buf_start); + cifsFile->srch_inf.ntwrk_buf_start = NULL; } rc = initiate_cifs_search(xid, file); if (rc) { diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 000ac509c98..e286db9f5ee 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -265,6 +265,7 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, cFYI(1, ("Sending smb: total_len %d", total_len)); dump_smb(smb_buffer, len); + i = 0; while (total_len) { rc = kernel_sendmsg(ssocket, &smb_msg, &iov[first_vec], n_vec - first_vec, total_len); diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 3d2580e00a3..c5916228243 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -137,9 +137,11 @@ exit: } -int coda_permission(struct inode *inode, int mask, struct nameidata *nd) +int coda_permission(struct inode *inode, int mask) { int error = 0; + + mask &= MAY_READ | MAY_WRITE | MAY_EXEC; if (!mask) return 0; diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 2f58dfc7008..830f51abb97 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -58,7 +58,7 @@ static void coda_destroy_inode(struct inode *inode) kmem_cache_free(coda_inode_cachep, ITOC(inode)); } -static void init_once(struct kmem_cache * cachep, void *foo) +static void init_once(void *foo) { struct coda_inode_info *ei = (struct coda_inode_info *) foo; diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c index c21a1f552a6..c51365422aa 100644 --- a/fs/coda/pioctl.c +++ b/fs/coda/pioctl.c @@ -24,8 +24,7 @@ #include <linux/coda_psdev.h> /* pioctl ops */ -static int coda_ioctl_permission(struct inode *inode, int mask, - struct nameidata *nd); +static int coda_ioctl_permission(struct inode *inode, int mask); static int coda_pioctl(struct inode * inode, struct file * filp, unsigned int cmd, unsigned long user_data); @@ -42,8 +41,7 @@ const struct file_operations coda_ioctl_operations = { }; /* the coda pioctl inode ops */ -static int coda_ioctl_permission(struct inode *inode, int mask, - struct nameidata *nd) +static int coda_ioctl_permission(struct inode *inode, int mask) { return 0; } @@ -51,7 +49,7 @@ static int coda_ioctl_permission(struct inode *inode, int mask, static int coda_pioctl(struct inode * inode, struct file * filp, unsigned int cmd, unsigned long user_data) { - struct nameidata nd; + struct path path; int error; struct PioctlData data; struct inode *target_inode = NULL; @@ -66,21 +64,21 @@ static int coda_pioctl(struct inode * inode, struct file * filp, * Look up the pathname. Note that the pathname is in * user memory, and namei takes care of this */ - if ( data.follow ) { - error = user_path_walk(data.path, &nd); + if (data.follow) { + error = user_path(data.path, &path); } else { - error = user_path_walk_link(data.path, &nd); + error = user_lpath(data.path, &path); } if ( error ) { return error; } else { - target_inode = nd.path.dentry->d_inode; + target_inode = path.dentry->d_inode; } /* return if it is not a Coda inode */ if ( target_inode->i_sb != inode->i_sb ) { - path_put(&nd.path); + path_put(&path); return -EINVAL; } @@ -89,7 +87,7 @@ static int coda_pioctl(struct inode * inode, struct file * filp, error = venus_pioctl(inode->i_sb, &(cnp->c_fid), cmd, &data); - path_put(&nd.path); + path_put(&path); return error; } diff --git a/fs/compat.c b/fs/compat.c index 106eba28ec5..c9d1472e65c 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -234,18 +234,18 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs * * The following statfs calls are copies of code from fs/open.c and * should be checked against those from time to time */ -asmlinkage long compat_sys_statfs(const char __user *path, struct compat_statfs __user *buf) +asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf) { - struct nameidata nd; + struct path path; int error; - error = user_path_walk(path, &nd); + error = user_path(pathname, &path); if (!error) { struct kstatfs tmp; - error = vfs_statfs(nd.path.dentry, &tmp); + error = vfs_statfs(path.dentry, &tmp); if (!error) error = put_compat_statfs(buf, &tmp); - path_put(&nd.path); + path_put(&path); } return error; } @@ -299,21 +299,21 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat return 0; } -asmlinkage long compat_sys_statfs64(const char __user *path, compat_size_t sz, struct compat_statfs64 __user *buf) +asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf) { - struct nameidata nd; + struct path path; int error; if (sz != sizeof(*buf)) return -EINVAL; - error = user_path_walk(path, &nd); + error = user_path(pathname, &path); if (!error) { struct kstatfs tmp; - error = vfs_statfs(nd.path.dentry, &tmp); + error = vfs_statfs(path.dentry, &tmp); if (!error) error = put_compat_statfs64(buf, &tmp); - path_put(&nd.path); + path_put(&path); } return error; } diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h index da015c12e3e..762d287123c 100644 --- a/fs/configfs/configfs_internal.h +++ b/fs/configfs/configfs_internal.h @@ -49,8 +49,10 @@ struct configfs_dirent { #define CONFIGFS_USET_DEFAULT 0x0080 #define CONFIGFS_USET_DROPPING 0x0100 #define CONFIGFS_USET_IN_MKDIR 0x0200 +#define CONFIGFS_USET_CREATING 0x0400 #define CONFIGFS_NOT_PINNED (CONFIGFS_ITEM_ATTR) +extern struct mutex configfs_symlink_mutex; extern spinlock_t configfs_dirent_lock; extern struct vfsmount * configfs_mount; @@ -66,6 +68,7 @@ extern void configfs_inode_exit(void); extern int configfs_create_file(struct config_item *, const struct configfs_attribute *); extern int configfs_make_dirent(struct configfs_dirent *, struct dentry *, void *, umode_t, int); +extern int configfs_dirent_is_ready(struct configfs_dirent *); extern int configfs_add_file(struct dentry *, const struct configfs_attribute *, int); extern void configfs_hash_and_remove(struct dentry * dir, const char * name); diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 179589be063..7a8db78a91d 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -185,7 +185,7 @@ static int create_dir(struct config_item * k, struct dentry * p, error = configfs_dirent_exists(p->d_fsdata, d->d_name.name); if (!error) error = configfs_make_dirent(p->d_fsdata, d, k, mode, - CONFIGFS_DIR); + CONFIGFS_DIR | CONFIGFS_USET_CREATING); if (!error) { error = configfs_create(d, mode, init_dir); if (!error) { @@ -209,6 +209,9 @@ static int create_dir(struct config_item * k, struct dentry * p, * configfs_create_dir - create a directory for an config_item. * @item: config_itemwe're creating directory for. * @dentry: config_item's dentry. + * + * Note: user-created entries won't be allowed under this new directory + * until it is validated by configfs_dir_set_ready() */ static int configfs_create_dir(struct config_item * item, struct dentry *dentry) @@ -231,6 +234,44 @@ static int configfs_create_dir(struct config_item * item, struct dentry *dentry) return error; } +/* + * Allow userspace to create new entries under a new directory created with + * configfs_create_dir(), and under all of its chidlren directories recursively. + * @sd configfs_dirent of the new directory to validate + * + * Caller must hold configfs_dirent_lock. + */ +static void configfs_dir_set_ready(struct configfs_dirent *sd) +{ + struct configfs_dirent *child_sd; + + sd->s_type &= ~CONFIGFS_USET_CREATING; + list_for_each_entry(child_sd, &sd->s_children, s_sibling) + if (child_sd->s_type & CONFIGFS_USET_CREATING) + configfs_dir_set_ready(child_sd); +} + +/* + * Check that a directory does not belong to a directory hierarchy being + * attached and not validated yet. + * @sd configfs_dirent of the directory to check + * + * @return non-zero iff the directory was validated + * + * Note: takes configfs_dirent_lock, so the result may change from false to true + * in two consecutive calls, but never from true to false. + */ +int configfs_dirent_is_ready(struct configfs_dirent *sd) +{ + int ret; + + spin_lock(&configfs_dirent_lock); + ret = !(sd->s_type & CONFIGFS_USET_CREATING); + spin_unlock(&configfs_dirent_lock); + + return ret; +} + int configfs_create_link(struct configfs_symlink *sl, struct dentry *parent, struct dentry *dentry) @@ -283,6 +324,8 @@ static void remove_dir(struct dentry * d) * The only thing special about this is that we remove any files in * the directory before we remove the directory, and we've inlined * what used to be configfs_rmdir() below, instead of calling separately. + * + * Caller holds the mutex of the item's inode */ static void configfs_remove_dir(struct config_item * item) @@ -330,7 +373,19 @@ static struct dentry * configfs_lookup(struct inode *dir, struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata; struct configfs_dirent * sd; int found = 0; - int err = 0; + int err; + + /* + * Fake invisibility if dir belongs to a group/default groups hierarchy + * being attached + * + * This forbids userspace to read/write attributes of items which may + * not complete their initialization, since the dentries of the + * attributes won't be instantiated. + */ + err = -ENOENT; + if (!configfs_dirent_is_ready(parent_sd)) + goto out; list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { if (sd->s_type & CONFIGFS_NOT_PINNED) { @@ -353,6 +408,7 @@ static struct dentry * configfs_lookup(struct inode *dir, return simple_lookup(dir, dentry, nd); } +out: return ERR_PTR(err); } @@ -370,13 +426,17 @@ static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex struct configfs_dirent *sd; int ret; + /* Mark that we're trying to drop the group */ + parent_sd->s_type |= CONFIGFS_USET_DROPPING; + ret = -EBUSY; if (!list_empty(&parent_sd->s_links)) goto out; ret = 0; list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { - if (sd->s_type & CONFIGFS_NOT_PINNED) + if (!sd->s_element || + (sd->s_type & CONFIGFS_NOT_PINNED)) continue; if (sd->s_type & CONFIGFS_USET_DEFAULT) { /* Abort if racing with mkdir() */ @@ -385,8 +445,6 @@ static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex *wait_mutex = &sd->s_dentry->d_inode->i_mutex; return -EAGAIN; } - /* Mark that we're trying to drop the group */ - sd->s_type |= CONFIGFS_USET_DROPPING; /* * Yup, recursive. If there's a problem, blame @@ -414,12 +472,11 @@ static void configfs_detach_rollback(struct dentry *dentry) struct configfs_dirent *parent_sd = dentry->d_fsdata; struct configfs_dirent *sd; - list_for_each_entry(sd, &parent_sd->s_children, s_sibling) { - if (sd->s_type & CONFIGFS_USET_DEFAULT) { + parent_sd->s_type &= ~CONFIGFS_USET_DROPPING; + + list_for_each_entry(sd, &parent_sd->s_children, s_sibling) + if (sd->s_type & CONFIGFS_USET_DEFAULT) configfs_detach_rollback(sd->s_dentry); - sd->s_type &= ~CONFIGFS_USET_DROPPING; - } - } } static void detach_attrs(struct config_item * item) @@ -558,36 +615,21 @@ static int create_default_group(struct config_group *parent_group, static int populate_groups(struct config_group *group) { struct config_group *new_group; - struct dentry *dentry = group->cg_item.ci_dentry; int ret = 0; int i; if (group->default_groups) { - /* - * FYI, we're faking mkdir here - * I'm not sure we need this semaphore, as we're called - * from our parent's mkdir. That holds our parent's - * i_mutex, so afaik lookup cannot continue through our - * parent to find us, let alone mess with our tree. - * That said, taking our i_mutex is closer to mkdir - * emulation, and shouldn't hurt. - */ - mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); - for (i = 0; group->default_groups[i]; i++) { new_group = group->default_groups[i]; ret = create_default_group(group, new_group); - if (ret) + if (ret) { + detach_groups(group); break; + } } - - mutex_unlock(&dentry->d_inode->i_mutex); } - if (ret) - detach_groups(group); - return ret; } @@ -702,7 +744,15 @@ static int configfs_attach_item(struct config_item *parent_item, if (!ret) { ret = populate_attrs(item); if (ret) { + /* + * We are going to remove an inode and its dentry but + * the VFS may already have hit and used them. Thus, + * we must lock them as rmdir() would. + */ + mutex_lock(&dentry->d_inode->i_mutex); configfs_remove_dir(item); + dentry->d_inode->i_flags |= S_DEAD; + mutex_unlock(&dentry->d_inode->i_mutex); d_delete(dentry); } } @@ -710,6 +760,7 @@ static int configfs_attach_item(struct config_item *parent_item, return ret; } +/* Caller holds the mutex of the item's inode */ static void configfs_detach_item(struct config_item *item) { detach_attrs(item); @@ -728,16 +779,30 @@ static int configfs_attach_group(struct config_item *parent_item, sd = dentry->d_fsdata; sd->s_type |= CONFIGFS_USET_DIR; + /* + * FYI, we're faking mkdir in populate_groups() + * We must lock the group's inode to avoid races with the VFS + * which can already hit the inode and try to add/remove entries + * under it. + * + * We must also lock the inode to remove it safely in case of + * error, as rmdir() would. + */ + mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); ret = populate_groups(to_config_group(item)); if (ret) { configfs_detach_item(item); - d_delete(dentry); + dentry->d_inode->i_flags |= S_DEAD; } + mutex_unlock(&dentry->d_inode->i_mutex); + if (ret) + d_delete(dentry); } return ret; } +/* Caller holds the mutex of the group's inode */ static void configfs_detach_group(struct config_item *item) { detach_groups(to_config_group(item)); @@ -1035,7 +1100,7 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) struct configfs_subsystem *subsys; struct configfs_dirent *sd; struct config_item_type *type; - struct module *owner = NULL; + struct module *subsys_owner = NULL, *new_item_owner = NULL; char *name; if (dentry->d_parent == configfs_sb->s_root) { @@ -1044,6 +1109,16 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) } sd = dentry->d_parent->d_fsdata; + + /* + * Fake invisibility if dir belongs to a group/default groups hierarchy + * being attached + */ + if (!configfs_dirent_is_ready(sd)) { + ret = -ENOENT; + goto out; + } + if (!(sd->s_type & CONFIGFS_USET_DIR)) { ret = -EPERM; goto out; @@ -1062,10 +1137,25 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) goto out_put; } + /* + * The subsystem may belong to a different module than the item + * being created. We don't want to safely pin the new item but + * fail to pin the subsystem it sits under. + */ + if (!subsys->su_group.cg_item.ci_type) { + ret = -EINVAL; + goto out_put; + } + subsys_owner = subsys->su_group.cg_item.ci_type->ct_owner; + if (!try_module_get(subsys_owner)) { + ret = -EINVAL; + goto out_put; + } + name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL); if (!name) { ret = -ENOMEM; - goto out_put; + goto out_subsys_put; } snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name); @@ -1094,10 +1184,10 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) kfree(name); if (ret) { /* - * If item == NULL, then link_obj() was never called. + * If ret != 0, then link_obj() was never called. * There are no extra references to clean up. */ - goto out_put; + goto out_subsys_put; } /* @@ -1111,8 +1201,8 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) goto out_unlink; } - owner = type->ct_owner; - if (!try_module_get(owner)) { + new_item_owner = type->ct_owner; + if (!try_module_get(new_item_owner)) { ret = -EINVAL; goto out_unlink; } @@ -1142,6 +1232,8 @@ static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) spin_lock(&configfs_dirent_lock); sd->s_type &= ~CONFIGFS_USET_IN_MKDIR; + if (!ret) + configfs_dir_set_ready(dentry->d_fsdata); spin_unlock(&configfs_dirent_lock); out_unlink: @@ -1159,9 +1251,13 @@ out_unlink: mutex_unlock(&subsys->su_mutex); if (module_got) - module_put(owner); + module_put(new_item_owner); } +out_subsys_put: + if (ret) + module_put(subsys_owner); + out_put: /* * link_obj()/link_group() took a reference from child->parent, @@ -1180,7 +1276,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) struct config_item *item; struct configfs_subsystem *subsys; struct configfs_dirent *sd; - struct module *owner = NULL; + struct module *subsys_owner = NULL, *dead_item_owner = NULL; int ret; if (dentry->d_parent == configfs_sb->s_root) @@ -1207,6 +1303,15 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) return -EINVAL; } + /* configfs_mkdir() shouldn't have allowed this */ + BUG_ON(!subsys->su_group.cg_item.ci_type); + subsys_owner = subsys->su_group.cg_item.ci_type->ct_owner; + + /* + * Ensure that no racing symlink() will make detach_prep() fail while + * the new link is temporarily attached + */ + mutex_lock(&configfs_symlink_mutex); spin_lock(&configfs_dirent_lock); do { struct mutex *wait_mutex; @@ -1215,6 +1320,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) if (ret) { configfs_detach_rollback(dentry); spin_unlock(&configfs_dirent_lock); + mutex_unlock(&configfs_symlink_mutex); if (ret != -EAGAIN) { config_item_put(parent_item); return ret; @@ -1224,10 +1330,12 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) mutex_lock(wait_mutex); mutex_unlock(wait_mutex); + mutex_lock(&configfs_symlink_mutex); spin_lock(&configfs_dirent_lock); } } while (ret == -EAGAIN); spin_unlock(&configfs_dirent_lock); + mutex_unlock(&configfs_symlink_mutex); /* Get a working ref for the duration of this function */ item = configfs_get_config_item(dentry); @@ -1236,7 +1344,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) config_item_put(parent_item); if (item->ci_type) - owner = item->ci_type->ct_owner; + dead_item_owner = item->ci_type->ct_owner; if (sd->s_type & CONFIGFS_USET_DIR) { configfs_detach_group(item); @@ -1258,7 +1366,8 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) /* Drop our reference from above */ config_item_put(item); - module_put(owner); + module_put(dead_item_owner); + module_put(subsys_owner); return 0; } @@ -1314,13 +1423,24 @@ static int configfs_dir_open(struct inode *inode, struct file *file) { struct dentry * dentry = file->f_path.dentry; struct configfs_dirent * parent_sd = dentry->d_fsdata; + int err; mutex_lock(&dentry->d_inode->i_mutex); - file->private_data = configfs_new_dirent(parent_sd, NULL); + /* + * Fake invisibility if dir belongs to a group/default groups hierarchy + * being attached + */ + err = -ENOENT; + if (configfs_dirent_is_ready(parent_sd)) { + file->private_data = configfs_new_dirent(parent_sd, NULL); + if (IS_ERR(file->private_data)) + err = PTR_ERR(file->private_data); + else + err = 0; + } mutex_unlock(&dentry->d_inode->i_mutex); - return IS_ERR(file->private_data) ? PTR_ERR(file->private_data) : 0; - + return err; } static int configfs_dir_close(struct inode *inode, struct file *file) @@ -1491,6 +1611,10 @@ int configfs_register_subsystem(struct configfs_subsystem *subsys) if (err) { d_delete(dentry); dput(dentry); + } else { + spin_lock(&configfs_dirent_lock); + configfs_dir_set_ready(dentry->d_fsdata); + spin_unlock(&configfs_dirent_lock); } } @@ -1517,11 +1641,13 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys) mutex_lock_nested(&configfs_sb->s_root->d_inode->i_mutex, I_MUTEX_PARENT); mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD); + mutex_lock(&configfs_symlink_mutex); spin_lock(&configfs_dirent_lock); if (configfs_detach_prep(dentry, NULL)) { printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n"); } spin_unlock(&configfs_dirent_lock); + mutex_unlock(&configfs_symlink_mutex); configfs_detach_group(&group->cg_item); dentry->d_inode->i_flags |= S_DEAD; mutex_unlock(&dentry->d_inode->i_mutex); diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c index 0004d18c40a..bf74973b049 100644 --- a/fs/configfs/symlink.c +++ b/fs/configfs/symlink.c @@ -31,6 +31,9 @@ #include <linux/configfs.h> #include "configfs_internal.h" +/* Protects attachments of new symlinks */ +DEFINE_MUTEX(configfs_symlink_mutex); + static int item_depth(struct config_item * item) { struct config_item * p = item; @@ -73,11 +76,20 @@ static int create_link(struct config_item *parent_item, struct configfs_symlink *sl; int ret; + ret = -ENOENT; + if (!configfs_dirent_is_ready(target_sd)) + goto out; ret = -ENOMEM; sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL); if (sl) { sl->sl_target = config_item_get(item); spin_lock(&configfs_dirent_lock); + if (target_sd->s_type & CONFIGFS_USET_DROPPING) { + spin_unlock(&configfs_dirent_lock); + config_item_put(item); + kfree(sl); + return -ENOENT; + } list_add(&sl->sl_list, &target_sd->s_links); spin_unlock(&configfs_dirent_lock); ret = configfs_create_link(sl, parent_item->ci_dentry, @@ -91,6 +103,7 @@ static int create_link(struct config_item *parent_item, } } +out: return ret; } @@ -120,6 +133,7 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna { int ret; struct nameidata nd; + struct configfs_dirent *sd; struct config_item *parent_item; struct config_item *target_item; struct config_item_type *type; @@ -128,9 +142,19 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna if (dentry->d_parent == configfs_sb->s_root) goto out; + sd = dentry->d_parent->d_fsdata; + /* + * Fake invisibility if dir belongs to a group/default groups hierarchy + * being attached + */ + ret = -ENOENT; + if (!configfs_dirent_is_ready(sd)) + goto out; + parent_item = configfs_get_config_item(dentry->d_parent); type = parent_item->ci_type; + ret = -EPERM; if (!type || !type->ct_item_ops || !type->ct_item_ops->allow_link) goto out_put; @@ -141,7 +165,9 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna ret = type->ct_item_ops->allow_link(parent_item, target_item); if (!ret) { + mutex_lock(&configfs_symlink_mutex); ret = create_link(parent_item, target_item, dentry); + mutex_unlock(&configfs_symlink_mutex); if (ret && type->ct_item_ops->drop_link) type->ct_item_ops->drop_link(parent_item, target_item); diff --git a/fs/dcache.c b/fs/dcache.c index 3818d6ab76c..101663d15e9 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -487,6 +487,7 @@ restart: if (!cnt) break; } + cond_resched_lock(&dcache_lock); } } while (!list_empty(&tmp)) { @@ -1219,6 +1220,107 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) return new; } +/** + * d_add_ci - lookup or allocate new dentry with case-exact name + * @inode: the inode case-insensitive lookup has found + * @dentry: the negative dentry that was passed to the parent's lookup func + * @name: the case-exact name to be associated with the returned dentry + * + * This is to avoid filling the dcache with case-insensitive names to the + * same inode, only the actual correct case is stored in the dcache for + * case-insensitive filesystems. + * + * For a case-insensitive lookup match and if the the case-exact dentry + * already exists in in the dcache, use it and return it. + * + * If no entry exists with the exact case name, allocate new dentry with + * the exact case, and return the spliced entry. + */ +struct dentry *d_add_ci(struct inode *inode, struct dentry *dentry, + struct qstr *name) +{ + int error; + struct dentry *found; + struct dentry *new; + + /* Does a dentry matching the name exist already? */ + found = d_hash_and_lookup(dentry->d_parent, name); + /* If not, create it now and return */ + if (!found) { + new = d_alloc(dentry->d_parent, name); + if (!new) { + error = -ENOMEM; + goto err_out; + } + found = d_splice_alias(inode, new); + if (found) { + dput(new); + return found; + } + return new; + } + /* Matching dentry exists, check if it is negative. */ + if (found->d_inode) { + if (unlikely(found->d_inode != inode)) { + /* This can't happen because bad inodes are unhashed. */ + BUG_ON(!is_bad_inode(inode)); + BUG_ON(!is_bad_inode(found->d_inode)); + } + /* + * Already have the inode and the dentry attached, decrement + * the reference count to balance the iget() done + * earlier on. We found the dentry using d_lookup() so it + * cannot be disconnected and thus we do not need to worry + * about any NFS/disconnectedness issues here. + */ + iput(inode); + return found; + } + /* + * Negative dentry: instantiate it unless the inode is a directory and + * has a 'disconnected' dentry (i.e. IS_ROOT and DCACHE_DISCONNECTED), + * in which case d_move() that in place of the found dentry. + */ + if (!S_ISDIR(inode->i_mode)) { + /* Not a directory; everything is easy. */ + d_instantiate(found, inode); + return found; + } + spin_lock(&dcache_lock); + if (list_empty(&inode->i_dentry)) { + /* + * Directory without a 'disconnected' dentry; we need to do + * d_instantiate() by hand because it takes dcache_lock which + * we already hold. + */ + list_add(&found->d_alias, &inode->i_dentry); + found->d_inode = inode; + spin_unlock(&dcache_lock); + security_d_instantiate(found, inode); + return found; + } + /* + * Directory with a 'disconnected' dentry; get a reference to the + * 'disconnected' dentry. + */ + new = list_entry(inode->i_dentry.next, struct dentry, d_alias); + dget_locked(new); + spin_unlock(&dcache_lock); + /* Do security vodoo. */ + security_d_instantiate(found, inode); + /* Move new in place of found. */ + d_move(new, found); + /* Balance the iget() we did above. */ + iput(inode); + /* Throw away found. */ + dput(found); + /* Use new as the actual dentry. */ + return new; + +err_out: + iput(inode); + return ERR_PTR(error); +} /** * d_lookup - search for a dentry @@ -2253,6 +2355,7 @@ EXPORT_SYMBOL(d_path); EXPORT_SYMBOL(d_prune_aliases); EXPORT_SYMBOL(d_rehash); EXPORT_SYMBOL(d_splice_alias); +EXPORT_SYMBOL(d_add_ci); EXPORT_SYMBOL(d_validate); EXPORT_SYMBOL(dget_locked); EXPORT_SYMBOL(dput); diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 285b64a8b06..488eb424f66 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -29,7 +29,7 @@ #define DEVPTS_DEFAULT_MODE 0600 extern int pty_limit; /* Config limit on Unix98 ptys */ -static DEFINE_IDR(allocated_ptys); +static DEFINE_IDA(allocated_ptys); static DEFINE_MUTEX(allocated_ptys_lock); static struct vfsmount *devpts_mnt; @@ -180,24 +180,24 @@ static struct dentry *get_node(int num) int devpts_new_index(void) { int index; - int idr_ret; + int ida_ret; retry: - if (!idr_pre_get(&allocated_ptys, GFP_KERNEL)) { + if (!ida_pre_get(&allocated_ptys, GFP_KERNEL)) { return -ENOMEM; } mutex_lock(&allocated_ptys_lock); - idr_ret = idr_get_new(&allocated_ptys, NULL, &index); - if (idr_ret < 0) { + ida_ret = ida_get_new(&allocated_ptys, &index); + if (ida_ret < 0) { mutex_unlock(&allocated_ptys_lock); - if (idr_ret == -EAGAIN) + if (ida_ret == -EAGAIN) goto retry; return -EIO; } if (index >= pty_limit) { - idr_remove(&allocated_ptys, index); + ida_remove(&allocated_ptys, index); mutex_unlock(&allocated_ptys_lock); return -EIO; } @@ -208,7 +208,7 @@ retry: void devpts_kill_index(int idx) { mutex_lock(&allocated_ptys_lock); - idr_remove(&allocated_ptys, idx); + ida_remove(&allocated_ptys, idx); mutex_unlock(&allocated_ptys_lock); } diff --git a/fs/direct-io.c b/fs/direct-io.c index 9e81addbd6e..9606ee848fd 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -150,17 +150,11 @@ static int dio_refill_pages(struct dio *dio) int nr_pages; nr_pages = min(dio->total_pages - dio->curr_page, DIO_PAGES); - down_read(¤t->mm->mmap_sem); - ret = get_user_pages( - current, /* Task for fault acounting */ - current->mm, /* whose pages? */ + ret = get_user_pages_fast( dio->curr_user_address, /* Where from? */ nr_pages, /* How many pages? */ dio->rw == READ, /* Write to memory? */ - 0, /* force (?) */ - &dio->pages[0], - NULL); /* vmas */ - up_read(¤t->mm->mmap_sem); + &dio->pages[0]); /* Put results here */ if (ret < 0 && dio->blocks_available && (dio->rw & WRITE)) { struct page *page = ZERO_PAGE(0); diff --git a/fs/dlm/config.c b/fs/dlm/config.c index c4e7d721bd8..89d2fb7b991 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c @@ -2,7 +2,7 @@ ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. -** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. +** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -30,16 +30,16 @@ static struct config_group *space_list; static struct config_group *comm_list; -static struct comm *local_comm; +static struct dlm_comm *local_comm; -struct clusters; -struct cluster; -struct spaces; -struct space; -struct comms; -struct comm; -struct nodes; -struct node; +struct dlm_clusters; +struct dlm_cluster; +struct dlm_spaces; +struct dlm_space; +struct dlm_comms; +struct dlm_comm; +struct dlm_nodes; +struct dlm_node; static struct config_group *make_cluster(struct config_group *, const char *); static void drop_cluster(struct config_group *, struct config_item *); @@ -68,17 +68,22 @@ static ssize_t show_node(struct config_item *i, struct configfs_attribute *a, static ssize_t store_node(struct config_item *i, struct configfs_attribute *a, const char *buf, size_t len); -static ssize_t comm_nodeid_read(struct comm *cm, char *buf); -static ssize_t comm_nodeid_write(struct comm *cm, const char *buf, size_t len); -static ssize_t comm_local_read(struct comm *cm, char *buf); -static ssize_t comm_local_write(struct comm *cm, const char *buf, size_t len); -static ssize_t comm_addr_write(struct comm *cm, const char *buf, size_t len); -static ssize_t node_nodeid_read(struct node *nd, char *buf); -static ssize_t node_nodeid_write(struct node *nd, const char *buf, size_t len); -static ssize_t node_weight_read(struct node *nd, char *buf); -static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len); - -struct cluster { +static ssize_t comm_nodeid_read(struct dlm_comm *cm, char *buf); +static ssize_t comm_nodeid_write(struct dlm_comm *cm, const char *buf, + size_t len); +static ssize_t comm_local_read(struct dlm_comm *cm, char *buf); +static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf, + size_t len); +static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, + size_t len); +static ssize_t node_nodeid_read(struct dlm_node *nd, char *buf); +static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf, + size_t len); +static ssize_t node_weight_read(struct dlm_node *nd, char *buf); +static ssize_t node_weight_write(struct dlm_node *nd, const char *buf, + size_t len); + +struct dlm_cluster { struct config_group group; unsigned int cl_tcp_port; unsigned int cl_buffer_size; @@ -109,11 +114,11 @@ enum { struct cluster_attribute { struct configfs_attribute attr; - ssize_t (*show)(struct cluster *, char *); - ssize_t (*store)(struct cluster *, const char *, size_t); + ssize_t (*show)(struct dlm_cluster *, char *); + ssize_t (*store)(struct dlm_cluster *, const char *, size_t); }; -static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field, +static ssize_t cluster_set(struct dlm_cluster *cl, unsigned int *cl_field, int *info_field, int check_zero, const char *buf, size_t len) { @@ -134,12 +139,12 @@ static ssize_t cluster_set(struct cluster *cl, unsigned int *cl_field, } #define CLUSTER_ATTR(name, check_zero) \ -static ssize_t name##_write(struct cluster *cl, const char *buf, size_t len) \ +static ssize_t name##_write(struct dlm_cluster *cl, const char *buf, size_t len) \ { \ return cluster_set(cl, &cl->cl_##name, &dlm_config.ci_##name, \ check_zero, buf, len); \ } \ -static ssize_t name##_read(struct cluster *cl, char *buf) \ +static ssize_t name##_read(struct dlm_cluster *cl, char *buf) \ { \ return snprintf(buf, PAGE_SIZE, "%u\n", cl->cl_##name); \ } \ @@ -181,8 +186,8 @@ enum { struct comm_attribute { struct configfs_attribute attr; - ssize_t (*show)(struct comm *, char *); - ssize_t (*store)(struct comm *, const char *, size_t); + ssize_t (*show)(struct dlm_comm *, char *); + ssize_t (*store)(struct dlm_comm *, const char *, size_t); }; static struct comm_attribute comm_attr_nodeid = { @@ -222,8 +227,8 @@ enum { struct node_attribute { struct configfs_attribute attr; - ssize_t (*show)(struct node *, char *); - ssize_t (*store)(struct node *, const char *, size_t); + ssize_t (*show)(struct dlm_node *, char *); + ssize_t (*store)(struct dlm_node *, const char *, size_t); }; static struct node_attribute node_attr_nodeid = { @@ -248,26 +253,26 @@ static struct configfs_attribute *node_attrs[] = { NULL, }; -struct clusters { +struct dlm_clusters { struct configfs_subsystem subsys; }; -struct spaces { +struct dlm_spaces { struct config_group ss_group; }; -struct space { +struct dlm_space { struct config_group group; struct list_head members; struct mutex members_lock; int members_count; }; -struct comms { +struct dlm_comms { struct config_group cs_group; }; -struct comm { +struct dlm_comm { struct config_item item; int nodeid; int local; @@ -275,11 +280,11 @@ struct comm { struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT]; }; -struct nodes { +struct dlm_nodes { struct config_group ns_group; }; -struct node { +struct dlm_node { struct config_item item; struct list_head list; /* space->members */ int nodeid; @@ -372,38 +377,40 @@ static struct config_item_type node_type = { .ct_owner = THIS_MODULE, }; -static struct cluster *to_cluster(struct config_item *i) +static struct dlm_cluster *to_cluster(struct config_item *i) { - return i ? container_of(to_config_group(i), struct cluster, group):NULL; + return i ? container_of(to_config_group(i), struct dlm_cluster, group) : + NULL; } -static struct space *to_space(struct config_item *i) +static struct dlm_space *to_space(struct config_item *i) { - return i ? container_of(to_config_group(i), struct space, group) : NULL; + return i ? container_of(to_config_group(i), struct dlm_space, group) : + NULL; } -static struct comm *to_comm(struct config_item *i) +static struct dlm_comm *to_comm(struct config_item *i) { - return i ? container_of(i, struct comm, item) : NULL; + return i ? container_of(i, struct dlm_comm, item) : NULL; } -static struct node *to_node(struct config_item *i) +static struct dlm_node *to_node(struct config_item *i) { - return i ? container_of(i, struct node, item) : NULL; + return i ? container_of(i, struct dlm_node, item) : NULL; } static struct config_group *make_cluster(struct config_group *g, const char *name) { - struct cluster *cl = NULL; - struct spaces *sps = NULL; - struct comms *cms = NULL; + struct dlm_cluster *cl = NULL; + struct dlm_spaces *sps = NULL; + struct dlm_comms *cms = NULL; void *gps = NULL; - cl = kzalloc(sizeof(struct cluster), GFP_KERNEL); + cl = kzalloc(sizeof(struct dlm_cluster), GFP_KERNEL); gps = kcalloc(3, sizeof(struct config_group *), GFP_KERNEL); - sps = kzalloc(sizeof(struct spaces), GFP_KERNEL); - cms = kzalloc(sizeof(struct comms), GFP_KERNEL); + sps = kzalloc(sizeof(struct dlm_spaces), GFP_KERNEL); + cms = kzalloc(sizeof(struct dlm_comms), GFP_KERNEL); if (!cl || !gps || !sps || !cms) goto fail; @@ -443,7 +450,7 @@ static struct config_group *make_cluster(struct config_group *g, static void drop_cluster(struct config_group *g, struct config_item *i) { - struct cluster *cl = to_cluster(i); + struct dlm_cluster *cl = to_cluster(i); struct config_item *tmp; int j; @@ -461,20 +468,20 @@ static void drop_cluster(struct config_group *g, struct config_item *i) static void release_cluster(struct config_item *i) { - struct cluster *cl = to_cluster(i); + struct dlm_cluster *cl = to_cluster(i); kfree(cl->group.default_groups); kfree(cl); } static struct config_group *make_space(struct config_group *g, const char *name) { - struct space *sp = NULL; - struct nodes *nds = NULL; + struct dlm_space *sp = NULL; + struct dlm_nodes *nds = NULL; void *gps = NULL; - sp = kzalloc(sizeof(struct space), GFP_KERNEL); + sp = kzalloc(sizeof(struct dlm_space), GFP_KERNEL); gps = kcalloc(2, sizeof(struct config_group *), GFP_KERNEL); - nds = kzalloc(sizeof(struct nodes), GFP_KERNEL); + nds = kzalloc(sizeof(struct dlm_nodes), GFP_KERNEL); if (!sp || !gps || !nds) goto fail; @@ -500,7 +507,7 @@ static struct config_group *make_space(struct config_group *g, const char *name) static void drop_space(struct config_group *g, struct config_item *i) { - struct space *sp = to_space(i); + struct dlm_space *sp = to_space(i); struct config_item *tmp; int j; @@ -517,16 +524,16 @@ static void drop_space(struct config_group *g, struct config_item *i) static void release_space(struct config_item *i) { - struct space *sp = to_space(i); + struct dlm_space *sp = to_space(i); kfree(sp->group.default_groups); kfree(sp); } static struct config_item *make_comm(struct config_group *g, const char *name) { - struct comm *cm; + struct dlm_comm *cm; - cm = kzalloc(sizeof(struct comm), GFP_KERNEL); + cm = kzalloc(sizeof(struct dlm_comm), GFP_KERNEL); if (!cm) return ERR_PTR(-ENOMEM); @@ -539,7 +546,7 @@ static struct config_item *make_comm(struct config_group *g, const char *name) static void drop_comm(struct config_group *g, struct config_item *i) { - struct comm *cm = to_comm(i); + struct dlm_comm *cm = to_comm(i); if (local_comm == cm) local_comm = NULL; dlm_lowcomms_close(cm->nodeid); @@ -550,16 +557,16 @@ static void drop_comm(struct config_group *g, struct config_item *i) static void release_comm(struct config_item *i) { - struct comm *cm = to_comm(i); + struct dlm_comm *cm = to_comm(i); kfree(cm); } static struct config_item *make_node(struct config_group *g, const char *name) { - struct space *sp = to_space(g->cg_item.ci_parent); - struct node *nd; + struct dlm_space *sp = to_space(g->cg_item.ci_parent); + struct dlm_node *nd; - nd = kzalloc(sizeof(struct node), GFP_KERNEL); + nd = kzalloc(sizeof(struct dlm_node), GFP_KERNEL); if (!nd) return ERR_PTR(-ENOMEM); @@ -578,8 +585,8 @@ static struct config_item *make_node(struct config_group *g, const char *name) static void drop_node(struct config_group *g, struct config_item *i) { - struct space *sp = to_space(g->cg_item.ci_parent); - struct node *nd = to_node(i); + struct dlm_space *sp = to_space(g->cg_item.ci_parent); + struct dlm_node *nd = to_node(i); mutex_lock(&sp->members_lock); list_del(&nd->list); @@ -591,11 +598,11 @@ static void drop_node(struct config_group *g, struct config_item *i) static void release_node(struct config_item *i) { - struct node *nd = to_node(i); + struct dlm_node *nd = to_node(i); kfree(nd); } -static struct clusters clusters_root = { +static struct dlm_clusters clusters_root = { .subsys = { .su_group = { .cg_item = { @@ -625,7 +632,7 @@ void dlm_config_exit(void) static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a, char *buf) { - struct cluster *cl = to_cluster(i); + struct dlm_cluster *cl = to_cluster(i); struct cluster_attribute *cla = container_of(a, struct cluster_attribute, attr); return cla->show ? cla->show(cl, buf) : 0; @@ -635,7 +642,7 @@ static ssize_t store_cluster(struct config_item *i, struct configfs_attribute *a, const char *buf, size_t len) { - struct cluster *cl = to_cluster(i); + struct dlm_cluster *cl = to_cluster(i); struct cluster_attribute *cla = container_of(a, struct cluster_attribute, attr); return cla->store ? cla->store(cl, buf, len) : -EINVAL; @@ -644,7 +651,7 @@ static ssize_t store_cluster(struct config_item *i, static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, char *buf) { - struct comm *cm = to_comm(i); + struct dlm_comm *cm = to_comm(i); struct comm_attribute *cma = container_of(a, struct comm_attribute, attr); return cma->show ? cma->show(cm, buf) : 0; @@ -653,29 +660,31 @@ static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a, const char *buf, size_t len) { - struct comm *cm = to_comm(i); + struct dlm_comm *cm = to_comm(i); struct comm_attribute *cma = container_of(a, struct comm_attribute, attr); return cma->store ? cma->store(cm, buf, len) : -EINVAL; } -static ssize_t comm_nodeid_read(struct comm *cm, char *buf) +static ssize_t comm_nodeid_read(struct dlm_comm *cm, char *buf) { return sprintf(buf, "%d\n", cm->nodeid); } -static ssize_t comm_nodeid_write(struct comm *cm, const char *buf, size_t len) +static ssize_t comm_nodeid_write(struct dlm_comm *cm, const char *buf, + size_t len) { cm->nodeid = simple_strtol(buf, NULL, 0); return len; } -static ssize_t comm_local_read(struct comm *cm, char *buf) +static ssize_t comm_local_read(struct dlm_comm *cm, char *buf) { return sprintf(buf, "%d\n", cm->local); } -static ssize_t comm_local_write(struct comm *cm, const char *buf, size_t len) +static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf, + size_t len) { cm->local= simple_strtol(buf, NULL, 0); if (cm->local && !local_comm) @@ -683,7 +692,7 @@ static ssize_t comm_local_write(struct comm *cm, const char *buf, size_t len) return len; } -static ssize_t comm_addr_write(struct comm *cm, const char *buf, size_t len) +static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len) { struct sockaddr_storage *addr; @@ -705,7 +714,7 @@ static ssize_t comm_addr_write(struct comm *cm, const char *buf, size_t len) static ssize_t show_node(struct config_item *i, struct configfs_attribute *a, char *buf) { - struct node *nd = to_node(i); + struct dlm_node *nd = to_node(i); struct node_attribute *nda = container_of(a, struct node_attribute, attr); return nda->show ? nda->show(nd, buf) : 0; @@ -714,29 +723,31 @@ static ssize_t show_node(struct config_item *i, struct configfs_attribute *a, static ssize_t store_node(struct config_item *i, struct configfs_attribute *a, const char *buf, size_t len) { - struct node *nd = to_node(i); + struct dlm_node *nd = to_node(i); struct node_attribute *nda = container_of(a, struct node_attribute, attr); return nda->store ? nda->store(nd, buf, len) : -EINVAL; } -static ssize_t node_nodeid_read(struct node *nd, char *buf) +static ssize_t node_nodeid_read(struct dlm_node *nd, char *buf) { return sprintf(buf, "%d\n", nd->nodeid); } -static ssize_t node_nodeid_write(struct node *nd, const char *buf, size_t len) +static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf, + size_t len) { nd->nodeid = simple_strtol(buf, NULL, 0); return len; } -static ssize_t node_weight_read(struct node *nd, char *buf) +static ssize_t node_weight_read(struct dlm_node *nd, char *buf) { return sprintf(buf, "%d\n", nd->weight); } -static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len) +static ssize_t node_weight_write(struct dlm_node *nd, const char *buf, + size_t len) { nd->weight = simple_strtol(buf, NULL, 0); return len; @@ -746,7 +757,7 @@ static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len) * Functions for the dlm to get the info that's been configured */ -static struct space *get_space(char *name) +static struct dlm_space *get_space(char *name) { struct config_item *i; @@ -760,15 +771,15 @@ static struct space *get_space(char *name) return to_space(i); } -static void put_space(struct space *sp) +static void put_space(struct dlm_space *sp) { config_item_put(&sp->group.cg_item); } -static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr) +static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr) { struct config_item *i; - struct comm *cm = NULL; + struct dlm_comm *cm = NULL; int found = 0; if (!comm_list) @@ -801,7 +812,7 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr) return cm; } -static void put_comm(struct comm *cm) +static void put_comm(struct dlm_comm *cm) { config_item_put(&cm->item); } @@ -810,8 +821,8 @@ static void put_comm(struct comm *cm) int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out, int **new_out, int *new_count_out) { - struct space *sp; - struct node *nd; + struct dlm_space *sp; + struct dlm_node *nd; int i = 0, rv = 0, ids_count = 0, new_count = 0; int *ids, *new; @@ -874,8 +885,8 @@ int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out, int dlm_node_weight(char *lsname, int nodeid) { - struct space *sp; - struct node *nd; + struct dlm_space *sp; + struct dlm_node *nd; int w = -EEXIST; sp = get_space(lsname); @@ -897,7 +908,7 @@ int dlm_node_weight(char *lsname, int nodeid) int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr) { - struct comm *cm = get_comm(nodeid, NULL); + struct dlm_comm *cm = get_comm(nodeid, NULL); if (!cm) return -EEXIST; if (!cm->addr_count) @@ -909,7 +920,7 @@ int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr) int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid) { - struct comm *cm = get_comm(0, addr); + struct dlm_comm *cm = get_comm(0, addr); if (!cm) return -EEXIST; *nodeid = cm->nodeid; diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 2d3d1027ce2..724ddac9153 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -363,6 +363,7 @@ static int search_rsb_list(struct list_head *head, char *name, int len, if (len == r->res_length && !memcmp(name, r->res_name, len)) goto found; } + *r_ret = NULL; return -EBADR; found: @@ -1782,7 +1783,8 @@ static void grant_pending_locks(struct dlm_rsb *r) list_for_each_entry_safe(lkb, s, &r->res_grantqueue, lkb_statequeue) { if (lkb->lkb_bastfn && lock_requires_bast(lkb, high, cw)) { - if (cw && high == DLM_LOCK_PR) + if (cw && high == DLM_LOCK_PR && + lkb->lkb_grmode == DLM_LOCK_PR) queue_bast(r, lkb, DLM_LOCK_CW); else queue_bast(r, lkb, high); diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 637018c891e..3962262f991 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -891,8 +891,10 @@ static void tcp_connect_to_sock(struct connection *con) goto out_err; memset(&saddr, 0, sizeof(saddr)); - if (dlm_nodeid_to_addr(con->nodeid, &saddr)) + if (dlm_nodeid_to_addr(con->nodeid, &saddr)) { + sock_release(sock); goto out_err; + } sock->sk->sk_user_data = con; con->rx_action = receive_from_sock; diff --git a/fs/dlm/user.c b/fs/dlm/user.c index f976f303c19..34f14a14fb4 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -527,8 +527,10 @@ static ssize_t device_write(struct file *file, const char __user *buf, k32buf = (struct dlm_write_request32 *)kbuf; kbuf = kmalloc(count + 1 + (sizeof(struct dlm_write_request) - sizeof(struct dlm_write_request32)), GFP_KERNEL); - if (!kbuf) + if (!kbuf) { + kfree(k32buf); return -ENOMEM; + } if (proc) set_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags); @@ -539,8 +541,10 @@ static ssize_t device_write(struct file *file, const char __user *buf, /* do we really need this? can a write happen after a close? */ if ((kbuf->cmd == DLM_USER_LOCK || kbuf->cmd == DLM_USER_UNLOCK) && - test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags)) - return -EINVAL; + (proc && test_bit(DLM_PROC_FLAGS_CLOSING, &proc->flags))) { + error = -EINVAL; + goto out_free; + } sigfillset(&allsigs); sigprocmask(SIG_BLOCK, &allsigs, &tmpsig); diff --git a/fs/dquot.c b/fs/dquot.c index 1346eebe74c..8ec4d6cc763 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -1793,6 +1793,21 @@ static int vfs_quota_on_remount(struct super_block *sb, int type) return ret; } +int vfs_quota_on_path(struct super_block *sb, int type, int format_id, + struct path *path) +{ + int error = security_quota_on(path->dentry); + if (error) + return error; + /* Quota file not on the same filesystem? */ + if (path->mnt->mnt_sb != sb) + error = -EXDEV; + else + error = vfs_quota_on_inode(path->dentry->d_inode, type, + format_id); + return error; +} + /* Actual function called from quotactl() */ int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path, int remount) @@ -1804,19 +1819,10 @@ int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path, return vfs_quota_on_remount(sb, type); error = path_lookup(path, LOOKUP_FOLLOW, &nd); - if (error < 0) - return error; - error = security_quota_on(nd.path.dentry); - if (error) - goto out_path; - /* Quota file not on the same filesystem? */ - if (nd.path.mnt->mnt_sb != sb) - error = -EXDEV; - else - error = vfs_quota_on_inode(nd.path.dentry->d_inode, type, - format_id); -out_path: - path_put(&nd.path); + if (!error) { + error = vfs_quota_on_path(sb, type, format_id, &nd.path); + path_put(&nd.path); + } return error; } @@ -2185,6 +2191,7 @@ EXPORT_SYMBOL(unregister_quota_format); EXPORT_SYMBOL(dqstats); EXPORT_SYMBOL(dq_data_lock); EXPORT_SYMBOL(vfs_quota_on); +EXPORT_SYMBOL(vfs_quota_on_path); EXPORT_SYMBOL(vfs_quota_on_mount); EXPORT_SYMBOL(vfs_quota_off); EXPORT_SYMBOL(vfs_quota_sync); diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 7b99917ffad..06db79d05c1 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -475,8 +475,8 @@ int ecryptfs_encrypt_page(struct page *page) { struct inode *ecryptfs_inode; struct ecryptfs_crypt_stat *crypt_stat; - char *enc_extent_virt = NULL; - struct page *enc_extent_page; + char *enc_extent_virt; + struct page *enc_extent_page = NULL; loff_t extent_offset; int rc = 0; @@ -492,14 +492,14 @@ int ecryptfs_encrypt_page(struct page *page) page->index); goto out; } - enc_extent_virt = kmalloc(PAGE_CACHE_SIZE, GFP_USER); - if (!enc_extent_virt) { + enc_extent_page = alloc_page(GFP_USER); + if (!enc_extent_page) { rc = -ENOMEM; ecryptfs_printk(KERN_ERR, "Error allocating memory for " "encrypted extent\n"); goto out; } - enc_extent_page = virt_to_page(enc_extent_virt); + enc_extent_virt = kmap(enc_extent_page); for (extent_offset = 0; extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); extent_offset++) { @@ -527,7 +527,10 @@ int ecryptfs_encrypt_page(struct page *page) } } out: - kfree(enc_extent_virt); + if (enc_extent_page) { + kunmap(enc_extent_page); + __free_page(enc_extent_page); + } return rc; } @@ -609,8 +612,8 @@ int ecryptfs_decrypt_page(struct page *page) { struct inode *ecryptfs_inode; struct ecryptfs_crypt_stat *crypt_stat; - char *enc_extent_virt = NULL; - struct page *enc_extent_page; + char *enc_extent_virt; + struct page *enc_extent_page = NULL; unsigned long extent_offset; int rc = 0; @@ -627,14 +630,14 @@ int ecryptfs_decrypt_page(struct page *page) page->index); goto out; } - enc_extent_virt = kmalloc(PAGE_CACHE_SIZE, GFP_USER); - if (!enc_extent_virt) { + enc_extent_page = alloc_page(GFP_USER); + if (!enc_extent_page) { rc = -ENOMEM; ecryptfs_printk(KERN_ERR, "Error allocating memory for " "encrypted extent\n"); goto out; } - enc_extent_page = virt_to_page(enc_extent_virt); + enc_extent_virt = kmap(enc_extent_page); for (extent_offset = 0; extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size); extent_offset++) { @@ -662,7 +665,10 @@ int ecryptfs_decrypt_page(struct page *page) } } out: - kfree(enc_extent_virt); + if (enc_extent_page) { + kunmap(enc_extent_page); + __free_page(enc_extent_page); + } return rc; } diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index d755455e3bf..89209f00f9c 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -465,7 +465,6 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry, int rc; struct dentry *lower_dentry; struct dentry *lower_dir_dentry; - umode_t mode; char *encoded_symname; int encoded_symlen; struct ecryptfs_crypt_stat *crypt_stat = NULL; @@ -473,7 +472,6 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry, lower_dentry = ecryptfs_dentry_to_lower(dentry); dget(lower_dentry); lower_dir_dentry = lock_parent(lower_dentry); - mode = S_IALLUGO; encoded_symlen = ecryptfs_encode_filename(crypt_stat, symname, strlen(symname), &encoded_symname); @@ -482,7 +480,7 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry, goto out_lock; } rc = vfs_symlink(lower_dir_dentry->d_inode, lower_dentry, - encoded_symname, mode); + encoded_symname); kfree(encoded_symname); if (rc || !lower_dentry->d_inode) goto out_lock; @@ -830,22 +828,9 @@ out: } static int -ecryptfs_permission(struct inode *inode, int mask, struct nameidata *nd) +ecryptfs_permission(struct inode *inode, int mask) { - int rc; - - if (nd) { - struct vfsmount *vfsmnt_save = nd->path.mnt; - struct dentry *dentry_save = nd->path.dentry; - - nd->path.mnt = ecryptfs_dentry_to_lower_mnt(nd->path.dentry); - nd->path.dentry = ecryptfs_dentry_to_lower(nd->path.dentry); - rc = permission(ecryptfs_inode_to_lower(inode), mask, nd); - nd->path.mnt = vfsmnt_save; - nd->path.dentry = dentry_save; - } else - rc = permission(ecryptfs_inode_to_lower(inode), mask, NULL); - return rc; + return inode_permission(ecryptfs_inode_to_lower(inode), mask); } /** diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 6f403cfba14..448dfd597b5 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -578,7 +578,7 @@ static struct file_system_type ecryptfs_fs_type = { * Initializes the ecryptfs_inode_info_cache when it is created */ static void -inode_info_init_once(struct kmem_cache *cachep, void *vptr) +inode_info_init_once(void *vptr) { struct ecryptfs_inode_info *ei = (struct ecryptfs_inode_info *)vptr; @@ -589,7 +589,7 @@ static struct ecryptfs_cache_info { struct kmem_cache **cache; const char *name; size_t size; - void (*ctor)(struct kmem_cache *cache, void *obj); + void (*ctor)(void *obj); } ecryptfs_cache_infos[] = { { .cache = &ecryptfs_auth_tok_list_item_cache, diff --git a/fs/efs/super.c b/fs/efs/super.c index d733531b55e..567b134fa1f 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -70,7 +70,7 @@ static void efs_destroy_inode(struct inode *inode) kmem_cache_free(efs_inode_cachep, INODE_INFO(inode)); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct efs_inode_info *ei = (struct efs_inode_info *) foo; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 0c87474f791..7cc0eb756b5 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1041,10 +1041,7 @@ retry: } /* - * It opens an eventpoll file descriptor. The "size" parameter is there - * for historical reasons, when epoll was using an hash instead of an - * RB tree. With the current implementation, the "size" parameter is ignored - * (besides sanity checks). + * Open an eventpoll file descriptor. */ asmlinkage long sys_epoll_create1(int flags) { diff --git a/fs/exec.c b/fs/exec.c index 5e559013e30..32993beecbe 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -32,6 +32,7 @@ #include <linux/swap.h> #include <linux/string.h> #include <linux/init.h> +#include <linux/pagemap.h> #include <linux/highmem.h> #include <linux/spinlock.h> #include <linux/key.h> @@ -42,13 +43,13 @@ #include <linux/module.h> #include <linux/namei.h> #include <linux/proc_fs.h> -#include <linux/ptrace.h> #include <linux/mount.h> #include <linux/security.h> #include <linux/syscalls.h> #include <linux/tsacct_kern.h> #include <linux/cn_proc.h> #include <linux/audit.h> +#include <linux/tracehook.h> #include <asm/uaccess.h> #include <asm/mmu_context.h> @@ -106,11 +107,17 @@ static inline void put_binfmt(struct linux_binfmt * fmt) */ asmlinkage long sys_uselib(const char __user * library) { - struct file * file; + struct file *file; struct nameidata nd; - int error; - - error = __user_path_lookup_open(library, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC); + char *tmp = getname(library); + int error = PTR_ERR(tmp); + + if (!IS_ERR(tmp)) { + error = path_lookup_open(AT_FDCWD, tmp, + LOOKUP_FOLLOW, &nd, + FMODE_READ|FMODE_EXEC); + putname(tmp); + } if (error) goto out; @@ -118,7 +125,11 @@ asmlinkage long sys_uselib(const char __user * library) if (!S_ISREG(nd.path.dentry->d_inode->i_mode)) goto exit; - error = vfs_permission(&nd, MAY_READ | MAY_EXEC); + error = -EACCES; + if (nd.path.mnt->mnt_flags & MNT_NOEXEC) + goto exit; + + error = vfs_permission(&nd, MAY_READ | MAY_EXEC | MAY_OPEN); if (error) goto exit; @@ -656,38 +667,43 @@ EXPORT_SYMBOL(setup_arg_pages); struct file *open_exec(const char *name) { struct nameidata nd; - int err; struct file *file; + int err; - err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, FMODE_READ|FMODE_EXEC); - file = ERR_PTR(err); - - if (!err) { - struct inode *inode = nd.path.dentry->d_inode; - file = ERR_PTR(-EACCES); - if (S_ISREG(inode->i_mode)) { - int err = vfs_permission(&nd, MAY_EXEC); - file = ERR_PTR(err); - if (!err) { - file = nameidata_to_filp(&nd, - O_RDONLY|O_LARGEFILE); - if (!IS_ERR(file)) { - err = deny_write_access(file); - if (err) { - fput(file); - file = ERR_PTR(err); - } - } -out: - return file; - } - } - release_open_intent(&nd); - path_put(&nd.path); + err = path_lookup_open(AT_FDCWD, name, LOOKUP_FOLLOW, &nd, + FMODE_READ|FMODE_EXEC); + if (err) + goto out; + + err = -EACCES; + if (!S_ISREG(nd.path.dentry->d_inode->i_mode)) + goto out_path_put; + + if (nd.path.mnt->mnt_flags & MNT_NOEXEC) + goto out_path_put; + + err = vfs_permission(&nd, MAY_EXEC | MAY_OPEN); + if (err) + goto out_path_put; + + file = nameidata_to_filp(&nd, O_RDONLY|O_LARGEFILE); + if (IS_ERR(file)) + return file; + + err = deny_write_access(file); + if (err) { + fput(file); + goto out; } - goto out; -} + return file; + + out_path_put: + release_open_intent(&nd); + path_put(&nd.path); + out: + return ERR_PTR(err); +} EXPORT_SYMBOL(open_exec); int kernel_read(struct file *file, unsigned long offset, @@ -1071,13 +1087,8 @@ EXPORT_SYMBOL(prepare_binprm); static int unsafe_exec(struct task_struct *p) { - int unsafe = 0; - if (p->ptrace & PT_PTRACED) { - if (p->ptrace & PT_PTRACE_CAP) - unsafe |= LSM_UNSAFE_PTRACE_CAP; - else - unsafe |= LSM_UNSAFE_PTRACE; - } + int unsafe = tracehook_unsafe_exec(p); + if (atomic_read(&p->fs->count) > 1 || atomic_read(&p->files->count) > 1 || atomic_read(&p->sighand->count) > 1) @@ -1214,6 +1225,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) read_unlock(&binfmt_lock); retval = fn(bprm, regs); if (retval >= 0) { + tracehook_report_exec(fmt, bprm, regs); put_binfmt(fmt); allow_write_access(bprm->file); if (bprm->file) diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index e58669e1b87..ae8c4f850b2 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -294,7 +294,7 @@ ext2_check_acl(struct inode *inode, int mask) } int -ext2_permission(struct inode *inode, int mask, struct nameidata *nd) +ext2_permission(struct inode *inode, int mask) { return generic_permission(inode, mask, ext2_check_acl); } diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h index 0bde85bafe3..b42cf578554 100644 --- a/fs/ext2/acl.h +++ b/fs/ext2/acl.h @@ -58,7 +58,7 @@ static inline int ext2_acl_count(size_t size) #define EXT2_ACL_NOT_CACHED ((void *)-1) /* acl.c */ -extern int ext2_permission (struct inode *, int, struct nameidata *); +extern int ext2_permission (struct inode *, int); extern int ext2_acl_chmod (struct inode *); extern int ext2_init_acl (struct inode *, struct inode *); diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 384fc0d1dd7..991d6dfeb51 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -791,6 +791,7 @@ const struct address_space_operations ext2_aops = { .direct_IO = ext2_direct_IO, .writepages = ext2_writepages, .migratepage = buffer_migrate_page, + .is_partially_uptodate = block_is_partially_uptodate, }; const struct address_space_operations ext2_aops_xip = { diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 31308a3b0b8..fd88c7b43e6 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -159,7 +159,7 @@ static void ext2_destroy_inode(struct inode *inode) kmem_cache_free(ext2_inode_cachep, EXT2_I(inode)); } -static void init_once(struct kmem_cache * cachep, void *foo) +static void init_once(void *foo) { struct ext2_inode_info *ei = (struct ext2_inode_info *) foo; diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index a754d184817..b60bb241880 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c @@ -299,7 +299,7 @@ ext3_check_acl(struct inode *inode, int mask) } int -ext3_permission(struct inode *inode, int mask, struct nameidata *nd) +ext3_permission(struct inode *inode, int mask) { return generic_permission(inode, mask, ext3_check_acl); } diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h index 0d1e6279cbf..42da16b8cac 100644 --- a/fs/ext3/acl.h +++ b/fs/ext3/acl.h @@ -58,7 +58,7 @@ static inline int ext3_acl_count(size_t size) #define EXT3_ACL_NOT_CACHED ((void *)-1) /* acl.c */ -extern int ext3_permission (struct inode *, int, struct nameidata *); +extern int ext3_permission (struct inode *, int); extern int ext3_acl_chmod (struct inode *); extern int ext3_init_acl (handle_t *, struct inode *, struct inode *); diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 3bf07d70b91..507d8689b11 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1767,44 +1767,47 @@ static int ext3_journalled_set_page_dirty(struct page *page) } static const struct address_space_operations ext3_ordered_aops = { - .readpage = ext3_readpage, - .readpages = ext3_readpages, - .writepage = ext3_ordered_writepage, - .sync_page = block_sync_page, - .write_begin = ext3_write_begin, - .write_end = ext3_ordered_write_end, - .bmap = ext3_bmap, - .invalidatepage = ext3_invalidatepage, - .releasepage = ext3_releasepage, - .direct_IO = ext3_direct_IO, - .migratepage = buffer_migrate_page, + .readpage = ext3_readpage, + .readpages = ext3_readpages, + .writepage = ext3_ordered_writepage, + .sync_page = block_sync_page, + .write_begin = ext3_write_begin, + .write_end = ext3_ordered_write_end, + .bmap = ext3_bmap, + .invalidatepage = ext3_invalidatepage, + .releasepage = ext3_releasepage, + .direct_IO = ext3_direct_IO, + .migratepage = buffer_migrate_page, + .is_partially_uptodate = block_is_partially_uptodate, }; static const struct address_space_operations ext3_writeback_aops = { - .readpage = ext3_readpage, - .readpages = ext3_readpages, - .writepage = ext3_writeback_writepage, - .sync_page = block_sync_page, - .write_begin = ext3_write_begin, - .write_end = ext3_writeback_write_end, - .bmap = ext3_bmap, - .invalidatepage = ext3_invalidatepage, - .releasepage = ext3_releasepage, - .direct_IO = ext3_direct_IO, - .migratepage = buffer_migrate_page, + .readpage = ext3_readpage, + .readpages = ext3_readpages, + .writepage = ext3_writeback_writepage, + .sync_page = block_sync_page, + .write_begin = ext3_write_begin, + .write_end = ext3_writeback_write_end, + .bmap = ext3_bmap, + .invalidatepage = ext3_invalidatepage, + .releasepage = ext3_releasepage, + .direct_IO = ext3_direct_IO, + .migratepage = buffer_migrate_page, + .is_partially_uptodate = block_is_partially_uptodate, }; static const struct address_space_operations ext3_journalled_aops = { - .readpage = ext3_readpage, - .readpages = ext3_readpages, - .writepage = ext3_journalled_writepage, - .sync_page = block_sync_page, - .write_begin = ext3_write_begin, - .write_end = ext3_journalled_write_end, - .set_page_dirty = ext3_journalled_set_page_dirty, - .bmap = ext3_bmap, - .invalidatepage = ext3_invalidatepage, - .releasepage = ext3_releasepage, + .readpage = ext3_readpage, + .readpages = ext3_readpages, + .writepage = ext3_journalled_writepage, + .sync_page = block_sync_page, + .write_begin = ext3_write_begin, + .write_end = ext3_journalled_write_end, + .set_page_dirty = ext3_journalled_set_page_dirty, + .bmap = ext3_bmap, + .invalidatepage = ext3_invalidatepage, + .releasepage = ext3_releasepage, + .is_partially_uptodate = block_is_partially_uptodate, }; void ext3_set_aops(struct inode *inode) diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 615788c6843..f38a5afc39a 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -472,7 +472,7 @@ static void ext3_destroy_inode(struct inode *inode) kmem_cache_free(ext3_inode_cachep, EXT3_I(inode)); } -static void init_once(struct kmem_cache * cachep, void *foo) +static void init_once(void *foo) { struct ext3_inode_info *ei = (struct ext3_inode_info *) foo; @@ -2810,8 +2810,9 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id, journal_unlock_updates(EXT3_SB(sb)->s_journal); } + err = vfs_quota_on_path(sb, type, format_id, &nd.path); path_put(&nd.path); - return vfs_quota_on(sb, type, format_id, path, remount); + return err; } /* Read data from quotafile - avoid pagecache and such because we cannot afford diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 3c8dab880d9..694ed6fadcc 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -40,34 +40,35 @@ ext4_acl_from_disk(const void *value, size_t size) acl = posix_acl_alloc(count, GFP_NOFS); if (!acl) return ERR_PTR(-ENOMEM); - for (n=0; n < count; n++) { + for (n = 0; n < count; n++) { ext4_acl_entry *entry = (ext4_acl_entry *)value; if ((char *)value + sizeof(ext4_acl_entry_short) > end) goto fail; acl->a_entries[n].e_tag = le16_to_cpu(entry->e_tag); acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm); - switch(acl->a_entries[n].e_tag) { - case ACL_USER_OBJ: - case ACL_GROUP_OBJ: - case ACL_MASK: - case ACL_OTHER: - value = (char *)value + - sizeof(ext4_acl_entry_short); - acl->a_entries[n].e_id = ACL_UNDEFINED_ID; - break; - - case ACL_USER: - case ACL_GROUP: - value = (char *)value + sizeof(ext4_acl_entry); - if ((char *)value > end) - goto fail; - acl->a_entries[n].e_id = - le32_to_cpu(entry->e_id); - break; - - default: + + switch (acl->a_entries[n].e_tag) { + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + value = (char *)value + + sizeof(ext4_acl_entry_short); + acl->a_entries[n].e_id = ACL_UNDEFINED_ID; + break; + + case ACL_USER: + case ACL_GROUP: + value = (char *)value + sizeof(ext4_acl_entry); + if ((char *)value > end) goto fail; + acl->a_entries[n].e_id = + le32_to_cpu(entry->e_id); + break; + + default: + goto fail; } } if (value != end) @@ -96,27 +97,26 @@ ext4_acl_to_disk(const struct posix_acl *acl, size_t *size) return ERR_PTR(-ENOMEM); ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION); e = (char *)ext_acl + sizeof(ext4_acl_header); - for (n=0; n < acl->a_count; n++) { + for (n = 0; n < acl->a_count; n++) { ext4_acl_entry *entry = (ext4_acl_entry *)e; entry->e_tag = cpu_to_le16(acl->a_entries[n].e_tag); entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm); - switch(acl->a_entries[n].e_tag) { - case ACL_USER: - case ACL_GROUP: - entry->e_id = - cpu_to_le32(acl->a_entries[n].e_id); - e += sizeof(ext4_acl_entry); - break; - - case ACL_USER_OBJ: - case ACL_GROUP_OBJ: - case ACL_MASK: - case ACL_OTHER: - e += sizeof(ext4_acl_entry_short); - break; - - default: - goto fail; + switch (acl->a_entries[n].e_tag) { + case ACL_USER: + case ACL_GROUP: + entry->e_id = cpu_to_le32(acl->a_entries[n].e_id); + e += sizeof(ext4_acl_entry); + break; + + case ACL_USER_OBJ: + case ACL_GROUP_OBJ: + case ACL_MASK: + case ACL_OTHER: + e += sizeof(ext4_acl_entry_short); + break; + + default: + goto fail; } } return (char *)ext_acl; @@ -167,23 +167,23 @@ ext4_get_acl(struct inode *inode, int type) if (!test_opt(inode->i_sb, POSIX_ACL)) return NULL; - switch(type) { - case ACL_TYPE_ACCESS: - acl = ext4_iget_acl(inode, &ei->i_acl); - if (acl != EXT4_ACL_NOT_CACHED) - return acl; - name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; - break; - - case ACL_TYPE_DEFAULT: - acl = ext4_iget_acl(inode, &ei->i_default_acl); - if (acl != EXT4_ACL_NOT_CACHED) - return acl; - name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT; - break; - - default: - return ERR_PTR(-EINVAL); + switch (type) { + case ACL_TYPE_ACCESS: + acl = ext4_iget_acl(inode, &ei->i_acl); + if (acl != EXT4_ACL_NOT_CACHED) + return acl; + name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; + break; + + case ACL_TYPE_DEFAULT: + acl = ext4_iget_acl(inode, &ei->i_default_acl); + if (acl != EXT4_ACL_NOT_CACHED) + return acl; + name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT; + break; + + default: + return ERR_PTR(-EINVAL); } retval = ext4_xattr_get(inode, name_index, "", NULL, 0); if (retval > 0) { @@ -201,14 +201,14 @@ ext4_get_acl(struct inode *inode, int type) kfree(value); if (!IS_ERR(acl)) { - switch(type) { - case ACL_TYPE_ACCESS: - ext4_iset_acl(inode, &ei->i_acl, acl); - break; - - case ACL_TYPE_DEFAULT: - ext4_iset_acl(inode, &ei->i_default_acl, acl); - break; + switch (type) { + case ACL_TYPE_ACCESS: + ext4_iset_acl(inode, &ei->i_acl, acl); + break; + + case ACL_TYPE_DEFAULT: + ext4_iset_acl(inode, &ei->i_default_acl, acl); + break; } } return acl; @@ -232,31 +232,31 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type, if (S_ISLNK(inode->i_mode)) return -EOPNOTSUPP; - switch(type) { - case ACL_TYPE_ACCESS: - name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; - if (acl) { - mode_t mode = inode->i_mode; - error = posix_acl_equiv_mode(acl, &mode); - if (error < 0) - return error; - else { - inode->i_mode = mode; - ext4_mark_inode_dirty(handle, inode); - if (error == 0) - acl = NULL; - } + switch (type) { + case ACL_TYPE_ACCESS: + name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS; + if (acl) { + mode_t mode = inode->i_mode; + error = posix_acl_equiv_mode(acl, &mode); + if (error < 0) + return error; + else { + inode->i_mode = mode; + ext4_mark_inode_dirty(handle, inode); + if (error == 0) + acl = NULL; } - break; + } + break; - case ACL_TYPE_DEFAULT: - name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT; - if (!S_ISDIR(inode->i_mode)) - return acl ? -EACCES : 0; - break; + case ACL_TYPE_DEFAULT: + name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT; + if (!S_ISDIR(inode->i_mode)) + return acl ? -EACCES : 0; + break; - default: - return -EINVAL; + default: + return -EINVAL; } if (acl) { value = ext4_acl_to_disk(acl, &size); @@ -269,14 +269,14 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type, kfree(value); if (!error) { - switch(type) { - case ACL_TYPE_ACCESS: - ext4_iset_acl(inode, &ei->i_acl, acl); - break; - - case ACL_TYPE_DEFAULT: - ext4_iset_acl(inode, &ei->i_default_acl, acl); - break; + switch (type) { + case ACL_TYPE_ACCESS: + ext4_iset_acl(inode, &ei->i_acl, acl); + break; + + case ACL_TYPE_DEFAULT: + ext4_iset_acl(inode, &ei->i_default_acl, acl); + break; } } return error; @@ -299,7 +299,7 @@ ext4_check_acl(struct inode *inode, int mask) } int -ext4_permission(struct inode *inode, int mask, struct nameidata *nd) +ext4_permission(struct inode *inode, int mask) { return generic_permission(inode, mask, ext4_check_acl); } diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h index 26a5c1abf14..cd2b855a07d 100644 --- a/fs/ext4/acl.h +++ b/fs/ext4/acl.h @@ -58,7 +58,7 @@ static inline int ext4_acl_count(size_t size) #define EXT4_ACL_NOT_CACHED ((void *)-1) /* acl.c */ -extern int ext4_permission (struct inode *, int, struct nameidata *); +extern int ext4_permission (struct inode *, int); extern int ext4_acl_chmod (struct inode *); extern int ext4_init_acl (handle_t *, struct inode *, struct inode *); diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 495ab21b983..1ae5004e93f 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -314,25 +314,28 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) if (unlikely(!bh)) { ext4_error(sb, __func__, "Cannot read block bitmap - " - "block_group = %d, block_bitmap = %llu", - (int)block_group, (unsigned long long)bitmap_blk); + "block_group = %lu, block_bitmap = %llu", + block_group, bitmap_blk); return NULL; } if (bh_uptodate_or_lock(bh)) return bh; + spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { ext4_init_block_bitmap(sb, bh, block_group, desc); set_buffer_uptodate(bh); unlock_buffer(bh); + spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); return bh; } + spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); if (bh_submit_read(bh) < 0) { put_bh(bh); ext4_error(sb, __func__, "Cannot read block bitmap - " - "block_group = %d, block_bitmap = %llu", - (int)block_group, (unsigned long long)bitmap_blk); + "block_group = %lu, block_bitmap = %llu", + block_group, bitmap_blk); return NULL; } ext4_valid_block_bitmap(sb, desc, block_group, bh); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 303e41cf7b1..6c7924d9e35 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1044,7 +1044,6 @@ extern void ext4_mb_update_group_info(struct ext4_group_info *grp, /* inode.c */ -void ext4_da_release_space(struct inode *inode, int used, int to_free); int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, struct buffer_head *bh, ext4_fsblk_t blocknr); struct buffer_head *ext4_getblk(handle_t *, struct inode *, diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 42c4c0c892e..612c3d2c382 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -99,7 +99,7 @@ static int ext4_ext_journal_restart(handle_t *handle, int needed) if (handle->h_buffer_credits > needed) return 0; err = ext4_journal_extend(handle, needed); - if (err) + if (err <= 0) return err; return ext4_journal_restart(handle, needed); } @@ -1441,7 +1441,7 @@ unsigned int ext4_ext_check_overlap(struct inode *inode, /* * get the next allocated block if the extent in the path - * is before the requested block(s) + * is before the requested block(s) */ if (b2 < b1) { b2 = ext4_ext_next_allocated_block(path); @@ -1910,9 +1910,13 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, BUG_ON(b != ex_ee_block + ex_ee_len - 1); } - /* at present, extent can't cross block group: */ - /* leaf + bitmap + group desc + sb + inode */ - credits = 5; + /* + * 3 for leaf, sb, and inode plus 2 (bmap and group + * descriptor) for each block group; assume two block + * groups plus ex_ee_len/blocks_per_block_group for + * the worst case + */ + credits = 7 + 2*(ex_ee_len/EXT4_BLOCKS_PER_GROUP(inode->i_sb)); if (ex == EXT_FIRST_EXTENT(eh)) { correct_index = 1; credits += (ext_depth(inode)) + 1; @@ -2323,7 +2327,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, unsigned int newdepth; /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */ if (allocated <= EXT4_EXT_ZERO_LEN) { - /* Mark first half uninitialized. + /* + * iblock == ee_block is handled by the zerouout + * at the beginning. + * Mark first half uninitialized. * Mark second half initialized and zero out the * initialized extent */ @@ -2346,7 +2353,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ex->ee_len = orig_ex.ee_len; ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); ext4_ext_dirty(handle, inode, path + depth); - /* zeroed the full extent */ + /* blocks available from iblock */ return allocated; } else if (err) @@ -2374,6 +2381,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, err = PTR_ERR(path); return err; } + /* get the second half extent details */ ex = path[depth].p_ext; err = ext4_ext_get_access(handle, inode, path + depth); @@ -2403,6 +2411,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); ext4_ext_dirty(handle, inode, path + depth); /* zeroed the full extent */ + /* blocks available from iblock */ return allocated; } else if (err) @@ -2418,23 +2427,22 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, */ orig_ex.ee_len = cpu_to_le16(ee_len - ext4_ext_get_actual_len(ex3)); - if (newdepth != depth) { - depth = newdepth; - ext4_ext_drop_refs(path); - path = ext4_ext_find_extent(inode, iblock, path); - if (IS_ERR(path)) { - err = PTR_ERR(path); - goto out; - } - eh = path[depth].p_hdr; - ex = path[depth].p_ext; - if (ex2 != &newex) - ex2 = ex; - - err = ext4_ext_get_access(handle, inode, path + depth); - if (err) - goto out; + depth = newdepth; + ext4_ext_drop_refs(path); + path = ext4_ext_find_extent(inode, iblock, path); + if (IS_ERR(path)) { + err = PTR_ERR(path); + goto out; } + eh = path[depth].p_hdr; + ex = path[depth].p_ext; + if (ex2 != &newex) + ex2 = ex; + + err = ext4_ext_get_access(handle, inode, path + depth); + if (err) + goto out; + allocated = max_blocks; /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying @@ -2452,6 +2460,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, ext4_ext_store_pblock(ex, ext_pblock(&orig_ex)); ext4_ext_dirty(handle, inode, path + depth); /* zero out the first half */ + /* blocks available from iblock */ return allocated; } } diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index a92eb305344..655e760212b 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -97,34 +97,44 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, * Return buffer_head of bitmap on success or NULL. */ static struct buffer_head * -read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) +ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) { struct ext4_group_desc *desc; struct buffer_head *bh = NULL; + ext4_fsblk_t bitmap_blk; desc = ext4_get_group_desc(sb, block_group, NULL); if (!desc) - goto error_out; - if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { - bh = sb_getblk(sb, ext4_inode_bitmap(sb, desc)); - if (!buffer_uptodate(bh)) { - lock_buffer(bh); - if (!buffer_uptodate(bh)) { - ext4_init_inode_bitmap(sb, bh, block_group, - desc); - set_buffer_uptodate(bh); - } - unlock_buffer(bh); - } - } else { - bh = sb_bread(sb, ext4_inode_bitmap(sb, desc)); + return NULL; + bitmap_blk = ext4_inode_bitmap(sb, desc); + bh = sb_getblk(sb, bitmap_blk); + if (unlikely(!bh)) { + ext4_error(sb, __func__, + "Cannot read inode bitmap - " + "block_group = %lu, inode_bitmap = %llu", + block_group, bitmap_blk); + return NULL; } - if (!bh) - ext4_error(sb, "read_inode_bitmap", + if (bh_uptodate_or_lock(bh)) + return bh; + + spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); + if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { + ext4_init_inode_bitmap(sb, bh, block_group, desc); + set_buffer_uptodate(bh); + unlock_buffer(bh); + spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); + return bh; + } + spin_unlock(sb_bgl_lock(EXT4_SB(sb), block_group)); + if (bh_submit_read(bh) < 0) { + put_bh(bh); + ext4_error(sb, __func__, "Cannot read inode bitmap - " "block_group = %lu, inode_bitmap = %llu", - block_group, ext4_inode_bitmap(sb, desc)); -error_out: + block_group, bitmap_blk); + return NULL; + } return bh; } @@ -200,7 +210,7 @@ void ext4_free_inode (handle_t *handle, struct inode * inode) } block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); - bitmap_bh = read_inode_bitmap(sb, block_group); + bitmap_bh = ext4_read_inode_bitmap(sb, block_group); if (!bitmap_bh) goto error_return; @@ -623,7 +633,7 @@ got_group: goto fail; brelse(bitmap_bh); - bitmap_bh = read_inode_bitmap(sb, group); + bitmap_bh = ext4_read_inode_bitmap(sb, group); if (!bitmap_bh) goto fail; @@ -728,7 +738,7 @@ got: /* When marking the block group with * ~EXT4_BG_INODE_UNINIT we don't want to depend - * on the value of bg_itable_unsed even though + * on the value of bg_itable_unused even though * mke2fs could have initialized the same for us. * Instead we calculated the value below */ @@ -891,7 +901,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino) block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); - bitmap_bh = read_inode_bitmap(sb, block_group); + bitmap_bh = ext4_read_inode_bitmap(sb, block_group); if (!bitmap_bh) { ext4_warning(sb, __func__, "inode bitmap error for orphan %lu", ino); @@ -969,7 +979,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb) continue; desc_count += le16_to_cpu(gdp->bg_free_inodes_count); brelse(bitmap_bh); - bitmap_bh = read_inode_bitmap(sb, i); + bitmap_bh = ext4_read_inode_bitmap(sb, i); if (!bitmap_bh) continue; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 8ca2763df09..59fbbe899ac 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -191,6 +191,7 @@ static int ext4_journal_test_restart(handle_t *handle, struct inode *inode) void ext4_delete_inode (struct inode * inode) { handle_t *handle; + int err; if (ext4_should_order_data(inode)) ext4_begin_ordered_truncate(inode, 0); @@ -199,8 +200,9 @@ void ext4_delete_inode (struct inode * inode) if (is_bad_inode(inode)) goto no_delete; - handle = start_transaction(inode); + handle = ext4_journal_start(inode, blocks_for_truncate(inode)+3); if (IS_ERR(handle)) { + ext4_std_error(inode->i_sb, PTR_ERR(handle)); /* * If we're going to skip the normal cleanup, we still need to * make sure that the in-core orphan linked list is properly @@ -213,8 +215,34 @@ void ext4_delete_inode (struct inode * inode) if (IS_SYNC(inode)) handle->h_sync = 1; inode->i_size = 0; + err = ext4_mark_inode_dirty(handle, inode); + if (err) { + ext4_warning(inode->i_sb, __func__, + "couldn't mark inode dirty (err %d)", err); + goto stop_handle; + } if (inode->i_blocks) ext4_truncate(inode); + + /* + * ext4_ext_truncate() doesn't reserve any slop when it + * restarts journal transactions; therefore there may not be + * enough credits left in the handle to remove the inode from + * the orphan list and set the dtime field. + */ + if (handle->h_buffer_credits < 3) { + err = ext4_journal_extend(handle, 3); + if (err > 0) + err = ext4_journal_restart(handle, 3); + if (err != 0) { + ext4_warning(inode->i_sb, __func__, + "couldn't extend journal (err %d)", err); + stop_handle: + ext4_journal_stop(handle); + goto no_delete; + } + } + /* * Kill off the orphan record which ext4_truncate created. * AKPM: I think this can be inside the above `if'. @@ -952,6 +980,67 @@ out: return err; } +/* + * Calculate the number of metadata blocks need to reserve + * to allocate @blocks for non extent file based file + */ +static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) +{ + int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb); + int ind_blks, dind_blks, tind_blks; + + /* number of new indirect blocks needed */ + ind_blks = (blocks + icap - 1) / icap; + + dind_blks = (ind_blks + icap - 1) / icap; + + tind_blks = 1; + + return ind_blks + dind_blks + tind_blks; +} + +/* + * Calculate the number of metadata blocks need to reserve + * to allocate given number of blocks + */ +static int ext4_calc_metadata_amount(struct inode *inode, int blocks) +{ + if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) + return ext4_ext_calc_metadata_amount(inode, blocks); + + return ext4_indirect_calc_metadata_amount(inode, blocks); +} + +static void ext4_da_update_reserve_space(struct inode *inode, int used) +{ + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + int total, mdb, mdb_free; + + spin_lock(&EXT4_I(inode)->i_block_reservation_lock); + /* recalculate the number of metablocks still need to be reserved */ + total = EXT4_I(inode)->i_reserved_data_blocks - used; + mdb = ext4_calc_metadata_amount(inode, total); + + /* figure out how many metablocks to release */ + BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); + mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; + + /* Account for allocated meta_blocks */ + mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; + + /* update fs free blocks counter for truncate case */ + percpu_counter_add(&sbi->s_freeblocks_counter, mdb_free); + + /* update per-inode reservations */ + BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks); + EXT4_I(inode)->i_reserved_data_blocks -= used; + + BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); + EXT4_I(inode)->i_reserved_meta_blocks = mdb; + EXT4_I(inode)->i_allocated_meta_blocks = 0; + spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); +} + /* Maximum number of blocks we map for direct IO at once. */ #define DIO_MAX_BLOCKS 4096 /* @@ -965,10 +1054,9 @@ out: /* + * The ext4_get_blocks_wrap() function try to look up the requested blocks, + * and returns if the blocks are already mapped. * - * - * ext4_ext4 get_block() wrapper function - * It will do a look up first, and returns if the blocks already mapped. * Otherwise it takes the write lock of the i_data_sem and allocate blocks * and store the allocated blocks in the result buffer head and mark it * mapped. @@ -1069,7 +1157,7 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, * which were deferred till now */ if ((retval > 0) && buffer_delay(bh)) - ext4_da_release_space(inode, retval, 0); + ext4_da_update_reserve_space(inode, retval); } up_write((&EXT4_I(inode)->i_data_sem)); @@ -1336,12 +1424,8 @@ static int ext4_ordered_write_end(struct file *file, { handle_t *handle = ext4_journal_current_handle(); struct inode *inode = mapping->host; - unsigned from, to; int ret = 0, ret2; - from = pos & (PAGE_CACHE_SIZE - 1); - to = from + len; - ret = ext4_jbd2_file_inode(handle, inode); if (ret == 0) { @@ -1437,36 +1521,6 @@ static int ext4_journalled_write_end(struct file *file, return ret ? ret : copied; } -/* - * Calculate the number of metadata blocks need to reserve - * to allocate @blocks for non extent file based file - */ -static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) -{ - int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb); - int ind_blks, dind_blks, tind_blks; - - /* number of new indirect blocks needed */ - ind_blks = (blocks + icap - 1) / icap; - - dind_blks = (ind_blks + icap - 1) / icap; - - tind_blks = 1; - - return ind_blks + dind_blks + tind_blks; -} - -/* - * Calculate the number of metadata blocks need to reserve - * to allocate given number of blocks - */ -static int ext4_calc_metadata_amount(struct inode *inode, int blocks) -{ - if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) - return ext4_ext_calc_metadata_amount(inode, blocks); - - return ext4_indirect_calc_metadata_amount(inode, blocks); -} static int ext4_da_reserve_space(struct inode *inode, int nrblocks) { @@ -1490,7 +1544,6 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks) spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); return -ENOSPC; } - /* reduce fs free blocks counter */ percpu_counter_sub(&sbi->s_freeblocks_counter, total); @@ -1501,35 +1554,31 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks) return 0; /* success */ } -void ext4_da_release_space(struct inode *inode, int used, int to_free) +static void ext4_da_release_space(struct inode *inode, int to_free) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); int total, mdb, mdb_free, release; spin_lock(&EXT4_I(inode)->i_block_reservation_lock); /* recalculate the number of metablocks still need to be reserved */ - total = EXT4_I(inode)->i_reserved_data_blocks - used - to_free; + total = EXT4_I(inode)->i_reserved_data_blocks - to_free; mdb = ext4_calc_metadata_amount(inode, total); /* figure out how many metablocks to release */ BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; - /* Account for allocated meta_blocks */ - mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; - release = to_free + mdb_free; /* update fs free blocks counter for truncate case */ percpu_counter_add(&sbi->s_freeblocks_counter, release); /* update per-inode reservations */ - BUG_ON(used + to_free > EXT4_I(inode)->i_reserved_data_blocks); - EXT4_I(inode)->i_reserved_data_blocks -= (used + to_free); + BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); + EXT4_I(inode)->i_reserved_data_blocks -= to_free; BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); EXT4_I(inode)->i_reserved_meta_blocks = mdb; - EXT4_I(inode)->i_allocated_meta_blocks = 0; spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); } @@ -1551,7 +1600,7 @@ static void ext4_da_page_release_reservation(struct page *page, } curr_off = next_off; } while ((bh = bh->b_this_page) != head); - ext4_da_release_space(page->mapping->host, 0, to_release); + ext4_da_release_space(page->mapping->host, to_release); } /* @@ -2280,8 +2329,11 @@ retry: } page = __grab_cache_page(mapping, index); - if (!page) - return -ENOMEM; + if (!page) { + ext4_journal_stop(handle); + ret = -ENOMEM; + goto out; + } *pagep = page; ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, @@ -2806,59 +2858,63 @@ static int ext4_journalled_set_page_dirty(struct page *page) } static const struct address_space_operations ext4_ordered_aops = { - .readpage = ext4_readpage, - .readpages = ext4_readpages, - .writepage = ext4_normal_writepage, - .sync_page = block_sync_page, - .write_begin = ext4_write_begin, - .write_end = ext4_ordered_write_end, - .bmap = ext4_bmap, - .invalidatepage = ext4_invalidatepage, - .releasepage = ext4_releasepage, - .direct_IO = ext4_direct_IO, - .migratepage = buffer_migrate_page, + .readpage = ext4_readpage, + .readpages = ext4_readpages, + .writepage = ext4_normal_writepage, + .sync_page = block_sync_page, + .write_begin = ext4_write_begin, + .write_end = ext4_ordered_write_end, + .bmap = ext4_bmap, + .invalidatepage = ext4_invalidatepage, + .releasepage = ext4_releasepage, + .direct_IO = ext4_direct_IO, + .migratepage = buffer_migrate_page, + .is_partially_uptodate = block_is_partially_uptodate, }; static const struct address_space_operations ext4_writeback_aops = { - .readpage = ext4_readpage, - .readpages = ext4_readpages, - .writepage = ext4_normal_writepage, - .sync_page = block_sync_page, - .write_begin = ext4_write_begin, - .write_end = ext4_writeback_write_end, - .bmap = ext4_bmap, - .invalidatepage = ext4_invalidatepage, - .releasepage = ext4_releasepage, - .direct_IO = ext4_direct_IO, - .migratepage = buffer_migrate_page, + .readpage = ext4_readpage, + .readpages = ext4_readpages, + .writepage = ext4_normal_writepage, + .sync_page = block_sync_page, + .write_begin = ext4_write_begin, + .write_end = ext4_writeback_write_end, + .bmap = ext4_bmap, + .invalidatepage = ext4_invalidatepage, + .releasepage = ext4_releasepage, + .direct_IO = ext4_direct_IO, + .migratepage = buffer_migrate_page, + .is_partially_uptodate = block_is_partially_uptodate, }; static const struct address_space_operations ext4_journalled_aops = { - .readpage = ext4_readpage, - .readpages = ext4_readpages, - .writepage = ext4_journalled_writepage, - .sync_page = block_sync_page, - .write_begin = ext4_write_begin, - .write_end = ext4_journalled_write_end, - .set_page_dirty = ext4_journalled_set_page_dirty, - .bmap = ext4_bmap, - .invalidatepage = ext4_invalidatepage, - .releasepage = ext4_releasepage, + .readpage = ext4_readpage, + .readpages = ext4_readpages, + .writepage = ext4_journalled_writepage, + .sync_page = block_sync_page, + .write_begin = ext4_write_begin, + .write_end = ext4_journalled_write_end, + .set_page_dirty = ext4_journalled_set_page_dirty, + .bmap = ext4_bmap, + .invalidatepage = ext4_invalidatepage, + .releasepage = ext4_releasepage, + .is_partially_uptodate = block_is_partially_uptodate, }; static const struct address_space_operations ext4_da_aops = { - .readpage = ext4_readpage, - .readpages = ext4_readpages, - .writepage = ext4_da_writepage, - .writepages = ext4_da_writepages, - .sync_page = block_sync_page, - .write_begin = ext4_da_write_begin, - .write_end = ext4_da_write_end, - .bmap = ext4_bmap, - .invalidatepage = ext4_da_invalidatepage, - .releasepage = ext4_releasepage, - .direct_IO = ext4_direct_IO, - .migratepage = buffer_migrate_page, + .readpage = ext4_readpage, + .readpages = ext4_readpages, + .writepage = ext4_da_writepage, + .writepages = ext4_da_writepages, + .sync_page = block_sync_page, + .write_begin = ext4_da_write_begin, + .write_end = ext4_da_write_end, + .bmap = ext4_bmap, + .invalidatepage = ext4_da_invalidatepage, + .releasepage = ext4_releasepage, + .direct_IO = ext4_direct_IO, + .migratepage = buffer_migrate_page, + .is_partially_uptodate = block_is_partially_uptodate, }; void ext4_set_aops(struct inode *inode) @@ -3586,6 +3642,16 @@ static int __ext4_get_inode_loc(struct inode *inode, } if (!buffer_uptodate(bh)) { lock_buffer(bh); + + /* + * If the buffer has the write error flag, we have failed + * to write out another inode in the same block. In this + * case, we don't have to read the block because we may + * read the old inode data successfully. + */ + if (buffer_write_io_error(bh) && !buffer_uptodate(bh)) + set_buffer_uptodate(bh); + if (buffer_uptodate(bh)) { /* someone brought it uptodate while we waited */ unlock_buffer(bh); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 8d141a25bbe..865e9ddb44d 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -787,13 +787,16 @@ static int ext4_mb_init_cache(struct page *page, char *incore) if (bh_uptodate_or_lock(bh[i])) continue; + spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { ext4_init_block_bitmap(sb, bh[i], first_group + i, desc); set_buffer_uptodate(bh[i]); unlock_buffer(bh[i]); + spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); continue; } + spin_unlock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); get_bh(bh[i]); bh[i]->b_end_io = end_buffer_read_sync; submit_bh(READ, bh[i]); @@ -2477,7 +2480,7 @@ err_freesgi: int ext4_mb_init(struct super_block *sb, int needs_recovery) { struct ext4_sb_info *sbi = EXT4_SB(sb); - unsigned i; + unsigned i, j; unsigned offset; unsigned max; int ret; @@ -2537,7 +2540,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT; sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; - i = sizeof(struct ext4_locality_group) * NR_CPUS; + i = sizeof(struct ext4_locality_group) * nr_cpu_ids; sbi->s_locality_groups = kmalloc(i, GFP_KERNEL); if (sbi->s_locality_groups == NULL) { clear_opt(sbi->s_mount_opt, MBALLOC); @@ -2545,11 +2548,12 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) kfree(sbi->s_mb_maxs); return -ENOMEM; } - for (i = 0; i < NR_CPUS; i++) { + for (i = 0; i < nr_cpu_ids; i++) { struct ext4_locality_group *lg; lg = &sbi->s_locality_groups[i]; mutex_init(&lg->lg_mutex); - INIT_LIST_HEAD(&lg->lg_prealloc_list); + for (j = 0; j < PREALLOC_TB_SIZE; j++) + INIT_LIST_HEAD(&lg->lg_prealloc_list[j]); spin_lock_init(&lg->lg_prealloc_lock); } @@ -3260,6 +3264,7 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac, struct ext4_prealloc_space *pa) { unsigned int len = ac->ac_o_ex.fe_len; + ext4_get_group_no_and_offset(ac->ac_sb, pa->pa_pstart, &ac->ac_b_ex.fe_group, &ac->ac_b_ex.fe_start); @@ -3282,6 +3287,7 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac, static noinline_for_stack int ext4_mb_use_preallocated(struct ext4_allocation_context *ac) { + int order, i; struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); struct ext4_locality_group *lg; struct ext4_prealloc_space *pa; @@ -3322,22 +3328,29 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) lg = ac->ac_lg; if (lg == NULL) return 0; - - rcu_read_lock(); - list_for_each_entry_rcu(pa, &lg->lg_prealloc_list, pa_inode_list) { - spin_lock(&pa->pa_lock); - if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) { - atomic_inc(&pa->pa_count); - ext4_mb_use_group_pa(ac, pa); + order = fls(ac->ac_o_ex.fe_len) - 1; + if (order > PREALLOC_TB_SIZE - 1) + /* The max size of hash table is PREALLOC_TB_SIZE */ + order = PREALLOC_TB_SIZE - 1; + + for (i = order; i < PREALLOC_TB_SIZE; i++) { + rcu_read_lock(); + list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i], + pa_inode_list) { + spin_lock(&pa->pa_lock); + if (pa->pa_deleted == 0 && + pa->pa_free >= ac->ac_o_ex.fe_len) { + atomic_inc(&pa->pa_count); + ext4_mb_use_group_pa(ac, pa); + spin_unlock(&pa->pa_lock); + ac->ac_criteria = 20; + rcu_read_unlock(); + return 1; + } spin_unlock(&pa->pa_lock); - ac->ac_criteria = 20; - rcu_read_unlock(); - return 1; } - spin_unlock(&pa->pa_lock); + rcu_read_unlock(); } - rcu_read_unlock(); - return 0; } @@ -3560,6 +3573,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac) pa->pa_free = pa->pa_len; atomic_set(&pa->pa_count, 1); spin_lock_init(&pa->pa_lock); + INIT_LIST_HEAD(&pa->pa_inode_list); pa->pa_deleted = 0; pa->pa_linear = 1; @@ -3580,10 +3594,10 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac) list_add(&pa->pa_group_list, &grp->bb_prealloc_list); ext4_unlock_group(sb, ac->ac_b_ex.fe_group); - spin_lock(pa->pa_obj_lock); - list_add_tail_rcu(&pa->pa_inode_list, &lg->lg_prealloc_list); - spin_unlock(pa->pa_obj_lock); - + /* + * We will later add the new pa to the right bucket + * after updating the pa_free in ext4_mb_release_context + */ return 0; } @@ -3733,20 +3747,23 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, bitmap_bh = ext4_read_block_bitmap(sb, group); if (bitmap_bh == NULL) { - /* error handling here */ - ext4_mb_release_desc(&e4b); - BUG_ON(bitmap_bh == NULL); + ext4_error(sb, __func__, "Error in reading block " + "bitmap for %lu\n", group); + return 0; } err = ext4_mb_load_buddy(sb, group, &e4b); - BUG_ON(err != 0); /* error handling here */ + if (err) { + ext4_error(sb, __func__, "Error in loading buddy " + "information for %lu\n", group); + put_bh(bitmap_bh); + return 0; + } if (needed == 0) needed = EXT4_BLOCKS_PER_GROUP(sb) + 1; - grp = ext4_get_group_info(sb, group); INIT_LIST_HEAD(&list); - ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); repeat: ext4_lock_group(sb, group); @@ -3903,13 +3920,18 @@ repeat: ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL); err = ext4_mb_load_buddy(sb, group, &e4b); - BUG_ON(err != 0); /* error handling here */ + if (err) { + ext4_error(sb, __func__, "Error in loading buddy " + "information for %lu\n", group); + continue; + } bitmap_bh = ext4_read_block_bitmap(sb, group); if (bitmap_bh == NULL) { - /* error handling here */ + ext4_error(sb, __func__, "Error in reading block " + "bitmap for %lu\n", group); ext4_mb_release_desc(&e4b); - BUG_ON(bitmap_bh == NULL); + continue; } ext4_lock_group(sb, group); @@ -4112,22 +4134,168 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac, } +static noinline_for_stack void +ext4_mb_discard_lg_preallocations(struct super_block *sb, + struct ext4_locality_group *lg, + int order, int total_entries) +{ + ext4_group_t group = 0; + struct ext4_buddy e4b; + struct list_head discard_list; + struct ext4_prealloc_space *pa, *tmp; + struct ext4_allocation_context *ac; + + mb_debug("discard locality group preallocation\n"); + + INIT_LIST_HEAD(&discard_list); + ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); + + spin_lock(&lg->lg_prealloc_lock); + list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order], + pa_inode_list) { + spin_lock(&pa->pa_lock); + if (atomic_read(&pa->pa_count)) { + /* + * This is the pa that we just used + * for block allocation. So don't + * free that + */ + spin_unlock(&pa->pa_lock); + continue; + } + if (pa->pa_deleted) { + spin_unlock(&pa->pa_lock); + continue; + } + /* only lg prealloc space */ + BUG_ON(!pa->pa_linear); + + /* seems this one can be freed ... */ + pa->pa_deleted = 1; + spin_unlock(&pa->pa_lock); + + list_del_rcu(&pa->pa_inode_list); + list_add(&pa->u.pa_tmp_list, &discard_list); + + total_entries--; + if (total_entries <= 5) { + /* + * we want to keep only 5 entries + * allowing it to grow to 8. This + * mak sure we don't call discard + * soon for this list. + */ + break; + } + } + spin_unlock(&lg->lg_prealloc_lock); + + list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) { + + ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL); + if (ext4_mb_load_buddy(sb, group, &e4b)) { + ext4_error(sb, __func__, "Error in loading buddy " + "information for %lu\n", group); + continue; + } + ext4_lock_group(sb, group); + list_del(&pa->pa_group_list); + ext4_mb_release_group_pa(&e4b, pa, ac); + ext4_unlock_group(sb, group); + + ext4_mb_release_desc(&e4b); + list_del(&pa->u.pa_tmp_list); + call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback); + } + if (ac) + kmem_cache_free(ext4_ac_cachep, ac); +} + +/* + * We have incremented pa_count. So it cannot be freed at this + * point. Also we hold lg_mutex. So no parallel allocation is + * possible from this lg. That means pa_free cannot be updated. + * + * A parallel ext4_mb_discard_group_preallocations is possible. + * which can cause the lg_prealloc_list to be updated. + */ + +static void ext4_mb_add_n_trim(struct ext4_allocation_context *ac) +{ + int order, added = 0, lg_prealloc_count = 1; + struct super_block *sb = ac->ac_sb; + struct ext4_locality_group *lg = ac->ac_lg; + struct ext4_prealloc_space *tmp_pa, *pa = ac->ac_pa; + + order = fls(pa->pa_free) - 1; + if (order > PREALLOC_TB_SIZE - 1) + /* The max size of hash table is PREALLOC_TB_SIZE */ + order = PREALLOC_TB_SIZE - 1; + /* Add the prealloc space to lg */ + rcu_read_lock(); + list_for_each_entry_rcu(tmp_pa, &lg->lg_prealloc_list[order], + pa_inode_list) { + spin_lock(&tmp_pa->pa_lock); + if (tmp_pa->pa_deleted) { + spin_unlock(&pa->pa_lock); + continue; + } + if (!added && pa->pa_free < tmp_pa->pa_free) { + /* Add to the tail of the previous entry */ + list_add_tail_rcu(&pa->pa_inode_list, + &tmp_pa->pa_inode_list); + added = 1; + /* + * we want to count the total + * number of entries in the list + */ + } + spin_unlock(&tmp_pa->pa_lock); + lg_prealloc_count++; + } + if (!added) + list_add_tail_rcu(&pa->pa_inode_list, + &lg->lg_prealloc_list[order]); + rcu_read_unlock(); + + /* Now trim the list to be not more than 8 elements */ + if (lg_prealloc_count > 8) { + ext4_mb_discard_lg_preallocations(sb, lg, + order, lg_prealloc_count); + return; + } + return ; +} + /* * release all resource we used in allocation */ static int ext4_mb_release_context(struct ext4_allocation_context *ac) { - if (ac->ac_pa) { - if (ac->ac_pa->pa_linear) { + struct ext4_prealloc_space *pa = ac->ac_pa; + if (pa) { + if (pa->pa_linear) { /* see comment in ext4_mb_use_group_pa() */ - spin_lock(&ac->ac_pa->pa_lock); - ac->ac_pa->pa_pstart += ac->ac_b_ex.fe_len; - ac->ac_pa->pa_lstart += ac->ac_b_ex.fe_len; - ac->ac_pa->pa_free -= ac->ac_b_ex.fe_len; - ac->ac_pa->pa_len -= ac->ac_b_ex.fe_len; - spin_unlock(&ac->ac_pa->pa_lock); + spin_lock(&pa->pa_lock); + pa->pa_pstart += ac->ac_b_ex.fe_len; + pa->pa_lstart += ac->ac_b_ex.fe_len; + pa->pa_free -= ac->ac_b_ex.fe_len; + pa->pa_len -= ac->ac_b_ex.fe_len; + spin_unlock(&pa->pa_lock); + /* + * We want to add the pa to the right bucket. + * Remove it from the list and while adding + * make sure the list to which we are adding + * doesn't grow big. + */ + if (likely(pa->pa_free)) { + spin_lock(pa->pa_obj_lock); + list_del_rcu(&pa->pa_inode_list); + spin_unlock(pa->pa_obj_lock); + ext4_mb_add_n_trim(ac); + } } - ext4_mb_put_pa(ac, ac->ac_sb, ac->ac_pa); + ext4_mb_put_pa(ac, ac->ac_sb, pa); } if (ac->ac_bitmap_page) page_cache_release(ac->ac_bitmap_page); @@ -4420,11 +4588,15 @@ do_more: count -= overflow; } bitmap_bh = ext4_read_block_bitmap(sb, block_group); - if (!bitmap_bh) + if (!bitmap_bh) { + err = -EIO; goto error_return; + } gdp = ext4_get_group_desc(sb, block_group, &gd_bh); - if (!gdp) + if (!gdp) { + err = -EIO; goto error_return; + } if (in_range(ext4_block_bitmap(sb, gdp), block, count) || in_range(ext4_inode_bitmap(sb, gdp), block, count) || diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index bfe6add46bc..c7c9906c2a7 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -164,11 +164,17 @@ struct ext4_free_extent { * Locality group: * we try to group all related changes together * so that writeback can flush/allocate them together as well + * Size of lg_prealloc_list hash is determined by MB_DEFAULT_GROUP_PREALLOC + * (512). We store prealloc space into the hash based on the pa_free blocks + * order value.ie, fls(pa_free)-1; */ +#define PREALLOC_TB_SIZE 10 struct ext4_locality_group { /* for allocator */ - struct mutex lg_mutex; /* to serialize allocates */ - struct list_head lg_prealloc_list;/* list of preallocations */ + /* to serialize allocates */ + struct mutex lg_mutex; + /* list of preallocations */ + struct list_head lg_prealloc_list[PREALLOC_TB_SIZE]; spinlock_t lg_prealloc_lock; }; diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index f000fbe2cd9..0a926516426 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -73,7 +73,7 @@ static int verify_group_input(struct super_block *sb, "Inode bitmap not in group (block %llu)", (unsigned long long)input->inode_bitmap); else if (outside(input->inode_table, start, end) || - outside(itend - 1, start, end)) + outside(itend - 1, start, end)) ext4_warning(sb, __func__, "Inode table not in group (blocks %llu-%llu)", (unsigned long long)input->inode_table, itend - 1); @@ -104,7 +104,7 @@ static int verify_group_input(struct super_block *sb, (unsigned long long)input->inode_bitmap, start, metaend - 1); else if (inside(input->inode_table, start, metaend) || - inside(itend - 1, start, metaend)) + inside(itend - 1, start, metaend)) ext4_warning(sb, __func__, "Inode table (%llu-%llu) overlaps" "GDT table (%llu-%llu)", @@ -158,9 +158,9 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh, if (err) { if ((err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA))) return err; - if ((err = ext4_journal_get_write_access(handle, bh))) + if ((err = ext4_journal_get_write_access(handle, bh))) return err; - } + } return 0; } @@ -416,11 +416,11 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n", gdb_num); - /* - * If we are not using the primary superblock/GDT copy don't resize, - * because the user tools have no way of handling this. Probably a - * bad time to do it anyways. - */ + /* + * If we are not using the primary superblock/GDT copy don't resize, + * because the user tools have no way of handling this. Probably a + * bad time to do it anyways. + */ if (EXT4_SB(sb)->s_sbh->b_blocknr != le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) { ext4_warning(sb, __func__, @@ -507,14 +507,14 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, return 0; exit_inode: - //ext4_journal_release_buffer(handle, iloc.bh); + /* ext4_journal_release_buffer(handle, iloc.bh); */ brelse(iloc.bh); exit_dindj: - //ext4_journal_release_buffer(handle, dind); + /* ext4_journal_release_buffer(handle, dind); */ exit_primary: - //ext4_journal_release_buffer(handle, *primary); + /* ext4_journal_release_buffer(handle, *primary); */ exit_sbh: - //ext4_journal_release_buffer(handle, *primary); + /* ext4_journal_release_buffer(handle, *primary); */ exit_dind: brelse(dind); exit_bh: @@ -818,12 +818,12 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh))) goto exit_journal; - /* - * We will only either add reserved group blocks to a backup group - * or remove reserved blocks for the first group in a new group block. - * Doing both would be mean more complex code, and sane people don't - * use non-sparse filesystems anymore. This is already checked above. - */ + /* + * We will only either add reserved group blocks to a backup group + * or remove reserved blocks for the first group in a new group block. + * Doing both would be mean more complex code, and sane people don't + * use non-sparse filesystems anymore. This is already checked above. + */ if (gdb_off) { primary = sbi->s_group_desc[gdb_num]; if ((err = ext4_journal_get_write_access(handle, primary))) @@ -835,24 +835,24 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) } else if ((err = add_new_gdb(handle, inode, input, &primary))) goto exit_journal; - /* - * OK, now we've set up the new group. Time to make it active. - * - * Current kernels don't lock all allocations via lock_super(), - * so we have to be safe wrt. concurrent accesses the group - * data. So we need to be careful to set all of the relevant - * group descriptor data etc. *before* we enable the group. - * - * The key field here is sbi->s_groups_count: as long as - * that retains its old value, nobody is going to access the new - * group. - * - * So first we update all the descriptor metadata for the new - * group; then we update the total disk blocks count; then we - * update the groups count to enable the group; then finally we - * update the free space counts so that the system can start - * using the new disk blocks. - */ + /* + * OK, now we've set up the new group. Time to make it active. + * + * Current kernels don't lock all allocations via lock_super(), + * so we have to be safe wrt. concurrent accesses the group + * data. So we need to be careful to set all of the relevant + * group descriptor data etc. *before* we enable the group. + * + * The key field here is sbi->s_groups_count: as long as + * that retains its old value, nobody is going to access the new + * group. + * + * So first we update all the descriptor metadata for the new + * group; then we update the total disk blocks count; then we + * update the groups count to enable the group; then finally we + * update the free space counts so that the system can start + * using the new disk blocks. + */ /* Update group descriptor block for new group */ gdp = (struct ext4_group_desc *)((char *)primary->b_data + @@ -946,7 +946,8 @@ exit_put: return err; } /* ext4_group_add */ -/* Extend the filesystem to the new number of blocks specified. This entry +/* + * Extend the filesystem to the new number of blocks specified. This entry * point is only used to extend the current filesystem to the end of the last * existing group. It can be accessed via ioctl, or by "remount,resize=<size>" * for emergencies (because it has no dependencies on reserved blocks). @@ -1024,7 +1025,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, o_blocks_count + add, add); /* See if the device is actually as big as what was requested */ - bh = sb_bread(sb, o_blocks_count + add -1); + bh = sb_bread(sb, o_blocks_count + add - 1); if (!bh) { ext4_warning(sb, __func__, "can't read last block, resize aborted"); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 1cb371dcd60..d5d77958b86 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -49,20 +49,19 @@ static int ext4_load_journal(struct super_block *, struct ext4_super_block *, unsigned long journal_devnum); static int ext4_create_journal(struct super_block *, struct ext4_super_block *, unsigned int); -static void ext4_commit_super (struct super_block * sb, - struct ext4_super_block * es, - int sync); -static void ext4_mark_recovery_complete(struct super_block * sb, - struct ext4_super_block * es); -static void ext4_clear_journal_err(struct super_block * sb, - struct ext4_super_block * es); +static void ext4_commit_super(struct super_block *sb, + struct ext4_super_block *es, int sync); +static void ext4_mark_recovery_complete(struct super_block *sb, + struct ext4_super_block *es); +static void ext4_clear_journal_err(struct super_block *sb, + struct ext4_super_block *es); static int ext4_sync_fs(struct super_block *sb, int wait); -static const char *ext4_decode_error(struct super_block * sb, int errno, +static const char *ext4_decode_error(struct super_block *sb, int errno, char nbuf[16]); -static int ext4_remount (struct super_block * sb, int * flags, char * data); -static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf); +static int ext4_remount(struct super_block *sb, int *flags, char *data); +static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); static void ext4_unlockfs(struct super_block *sb); -static void ext4_write_super (struct super_block * sb); +static void ext4_write_super(struct super_block *sb); static void ext4_write_super_lockfs(struct super_block *sb); @@ -211,15 +210,15 @@ static void ext4_handle_error(struct super_block *sb) if (sb->s_flags & MS_RDONLY) return; - if (!test_opt (sb, ERRORS_CONT)) { + if (!test_opt(sb, ERRORS_CONT)) { journal_t *journal = EXT4_SB(sb)->s_journal; EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; if (journal) jbd2_journal_abort(journal, -EIO); } - if (test_opt (sb, ERRORS_RO)) { - printk (KERN_CRIT "Remounting filesystem read-only\n"); + if (test_opt(sb, ERRORS_RO)) { + printk(KERN_CRIT "Remounting filesystem read-only\n"); sb->s_flags |= MS_RDONLY; } ext4_commit_super(sb, es, 1); @@ -228,13 +227,13 @@ static void ext4_handle_error(struct super_block *sb) sb->s_id); } -void ext4_error (struct super_block * sb, const char * function, - const char * fmt, ...) +void ext4_error(struct super_block *sb, const char *function, + const char *fmt, ...) { va_list args; va_start(args, fmt); - printk(KERN_CRIT "EXT4-fs error (device %s): %s: ",sb->s_id, function); + printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); vprintk(fmt, args); printk("\n"); va_end(args); @@ -242,7 +241,7 @@ void ext4_error (struct super_block * sb, const char * function, ext4_handle_error(sb); } -static const char *ext4_decode_error(struct super_block * sb, int errno, +static const char *ext4_decode_error(struct super_block *sb, int errno, char nbuf[16]) { char *errstr = NULL; @@ -278,8 +277,7 @@ static const char *ext4_decode_error(struct super_block * sb, int errno, /* __ext4_std_error decodes expected errors from journaling functions * automatically and invokes the appropriate error response. */ -void __ext4_std_error (struct super_block * sb, const char * function, - int errno) +void __ext4_std_error(struct super_block *sb, const char *function, int errno) { char nbuf[16]; const char *errstr; @@ -292,8 +290,8 @@ void __ext4_std_error (struct super_block * sb, const char * function, return; errstr = ext4_decode_error(sb, errno, nbuf); - printk (KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n", - sb->s_id, function, errstr); + printk(KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n", + sb->s_id, function, errstr); ext4_handle_error(sb); } @@ -308,15 +306,15 @@ void __ext4_std_error (struct super_block * sb, const char * function, * case we take the easy way out and panic immediately. */ -void ext4_abort (struct super_block * sb, const char * function, - const char * fmt, ...) +void ext4_abort(struct super_block *sb, const char *function, + const char *fmt, ...) { va_list args; - printk (KERN_CRIT "ext4_abort called.\n"); + printk(KERN_CRIT "ext4_abort called.\n"); va_start(args, fmt); - printk(KERN_CRIT "EXT4-fs error (device %s): %s: ",sb->s_id, function); + printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function); vprintk(fmt, args); printk("\n"); va_end(args); @@ -334,8 +332,8 @@ void ext4_abort (struct super_block * sb, const char * function, jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); } -void ext4_warning (struct super_block * sb, const char * function, - const char * fmt, ...) +void ext4_warning(struct super_block *sb, const char *function, + const char *fmt, ...) { va_list args; @@ -496,7 +494,7 @@ static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi) } } -static void ext4_put_super (struct super_block * sb) +static void ext4_put_super(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; @@ -595,7 +593,7 @@ static void ext4_destroy_inode(struct inode *inode) kmem_cache_free(ext4_inode_cachep, EXT4_I(inode)); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; @@ -647,7 +645,8 @@ static void ext4_clear_inode(struct inode *inode) &EXT4_I(inode)->jinode); } -static inline void ext4_show_quota_options(struct seq_file *seq, struct super_block *sb) +static inline void ext4_show_quota_options(struct seq_file *seq, + struct super_block *sb) { #if defined(CONFIG_QUOTA) struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -822,8 +821,8 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, } #ifdef CONFIG_QUOTA -#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group") -#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) +#define QTYPE2NAME(t) ((t) == USRQUOTA?"user":"group") +#define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) static int ext4_dquot_initialize(struct inode *inode, int type); static int ext4_dquot_drop(struct inode *inode); @@ -991,12 +990,12 @@ static ext4_fsblk_t get_sb_block(void **data) return sb_block; } -static int parse_options (char *options, struct super_block *sb, - unsigned int *inum, unsigned long *journal_devnum, - ext4_fsblk_t *n_blocks_count, int is_remount) +static int parse_options(char *options, struct super_block *sb, + unsigned int *inum, unsigned long *journal_devnum, + ext4_fsblk_t *n_blocks_count, int is_remount) { struct ext4_sb_info *sbi = EXT4_SB(sb); - char * p; + char *p; substring_t args[MAX_OPT_ARGS]; int data_opt = 0; int option; @@ -1009,7 +1008,7 @@ static int parse_options (char *options, struct super_block *sb, if (!options) return 1; - while ((p = strsep (&options, ",")) != NULL) { + while ((p = strsep(&options, ",")) != NULL) { int token; if (!*p) continue; @@ -1017,16 +1016,16 @@ static int parse_options (char *options, struct super_block *sb, token = match_token(p, tokens, args); switch (token) { case Opt_bsd_df: - clear_opt (sbi->s_mount_opt, MINIX_DF); + clear_opt(sbi->s_mount_opt, MINIX_DF); break; case Opt_minix_df: - set_opt (sbi->s_mount_opt, MINIX_DF); + set_opt(sbi->s_mount_opt, MINIX_DF); break; case Opt_grpid: - set_opt (sbi->s_mount_opt, GRPID); + set_opt(sbi->s_mount_opt, GRPID); break; case Opt_nogrpid: - clear_opt (sbi->s_mount_opt, GRPID); + clear_opt(sbi->s_mount_opt, GRPID); break; case Opt_resuid: if (match_int(&args[0], &option)) @@ -1043,41 +1042,41 @@ static int parse_options (char *options, struct super_block *sb, /* *sb_block = match_int(&args[0]); */ break; case Opt_err_panic: - clear_opt (sbi->s_mount_opt, ERRORS_CONT); - clear_opt (sbi->s_mount_opt, ERRORS_RO); - set_opt (sbi->s_mount_opt, ERRORS_PANIC); + clear_opt(sbi->s_mount_opt, ERRORS_CONT); + clear_opt(sbi->s_mount_opt, ERRORS_RO); + set_opt(sbi->s_mount_opt, ERRORS_PANIC); break; case Opt_err_ro: - clear_opt (sbi->s_mount_opt, ERRORS_CONT); - clear_opt (sbi->s_mount_opt, ERRORS_PANIC); - set_opt (sbi->s_mount_opt, ERRORS_RO); + clear_opt(sbi->s_mount_opt, ERRORS_CONT); + clear_opt(sbi->s_mount_opt, ERRORS_PANIC); + set_opt(sbi->s_mount_opt, ERRORS_RO); break; case Opt_err_cont: - clear_opt (sbi->s_mount_opt, ERRORS_RO); - clear_opt (sbi->s_mount_opt, ERRORS_PANIC); - set_opt (sbi->s_mount_opt, ERRORS_CONT); + clear_opt(sbi->s_mount_opt, ERRORS_RO); + clear_opt(sbi->s_mount_opt, ERRORS_PANIC); + set_opt(sbi->s_mount_opt, ERRORS_CONT); break; case Opt_nouid32: - set_opt (sbi->s_mount_opt, NO_UID32); + set_opt(sbi->s_mount_opt, NO_UID32); break; case Opt_nocheck: - clear_opt (sbi->s_mount_opt, CHECK); + clear_opt(sbi->s_mount_opt, CHECK); break; case Opt_debug: - set_opt (sbi->s_mount_opt, DEBUG); + set_opt(sbi->s_mount_opt, DEBUG); break; case Opt_oldalloc: - set_opt (sbi->s_mount_opt, OLDALLOC); + set_opt(sbi->s_mount_opt, OLDALLOC); break; case Opt_orlov: - clear_opt (sbi->s_mount_opt, OLDALLOC); + clear_opt(sbi->s_mount_opt, OLDALLOC); break; #ifdef CONFIG_EXT4DEV_FS_XATTR case Opt_user_xattr: - set_opt (sbi->s_mount_opt, XATTR_USER); + set_opt(sbi->s_mount_opt, XATTR_USER); break; case Opt_nouser_xattr: - clear_opt (sbi->s_mount_opt, XATTR_USER); + clear_opt(sbi->s_mount_opt, XATTR_USER); break; #else case Opt_user_xattr: @@ -1115,7 +1114,7 @@ static int parse_options (char *options, struct super_block *sb, "journal on remount\n"); return 0; } - set_opt (sbi->s_mount_opt, UPDATE_JOURNAL); + set_opt(sbi->s_mount_opt, UPDATE_JOURNAL); break; case Opt_journal_inum: if (is_remount) { @@ -1145,7 +1144,7 @@ static int parse_options (char *options, struct super_block *sb, set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); break; case Opt_noload: - set_opt (sbi->s_mount_opt, NOLOAD); + set_opt(sbi->s_mount_opt, NOLOAD); break; case Opt_commit: if (match_int(&args[0], &option)) @@ -1331,7 +1330,7 @@ set_qf_format: "on this filesystem, use tune2fs\n"); return 0; } - set_opt (sbi->s_mount_opt, EXTENTS); + set_opt(sbi->s_mount_opt, EXTENTS); break; case Opt_noextents: /* @@ -1348,7 +1347,7 @@ set_qf_format: "-o noextents options\n"); return 0; } - clear_opt (sbi->s_mount_opt, EXTENTS); + clear_opt(sbi->s_mount_opt, EXTENTS); break; case Opt_i_version: set_opt(sbi->s_mount_opt, I_VERSION); @@ -1374,9 +1373,9 @@ set_qf_format: set_opt(sbi->s_mount_opt, DELALLOC); break; default: - printk (KERN_ERR - "EXT4-fs: Unrecognized mount option \"%s\" " - "or missing value\n", p); + printk(KERN_ERR + "EXT4-fs: Unrecognized mount option \"%s\" " + "or missing value\n", p); return 0; } } @@ -1423,31 +1422,31 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, int res = 0; if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) { - printk (KERN_ERR "EXT4-fs warning: revision level too high, " - "forcing read-only mode\n"); + printk(KERN_ERR "EXT4-fs warning: revision level too high, " + "forcing read-only mode\n"); res = MS_RDONLY; } if (read_only) return res; if (!(sbi->s_mount_state & EXT4_VALID_FS)) - printk (KERN_WARNING "EXT4-fs warning: mounting unchecked fs, " - "running e2fsck is recommended\n"); + printk(KERN_WARNING "EXT4-fs warning: mounting unchecked fs, " + "running e2fsck is recommended\n"); else if ((sbi->s_mount_state & EXT4_ERROR_FS)) - printk (KERN_WARNING - "EXT4-fs warning: mounting fs with errors, " - "running e2fsck is recommended\n"); + printk(KERN_WARNING + "EXT4-fs warning: mounting fs with errors, " + "running e2fsck is recommended\n"); else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 && le16_to_cpu(es->s_mnt_count) >= (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count)) - printk (KERN_WARNING - "EXT4-fs warning: maximal mount count reached, " - "running e2fsck is recommended\n"); + printk(KERN_WARNING + "EXT4-fs warning: maximal mount count reached, " + "running e2fsck is recommended\n"); else if (le32_to_cpu(es->s_checkinterval) && (le32_to_cpu(es->s_lastcheck) + le32_to_cpu(es->s_checkinterval) <= get_seconds())) - printk (KERN_WARNING - "EXT4-fs warning: checktime reached, " - "running e2fsck is recommended\n"); + printk(KERN_WARNING + "EXT4-fs warning: checktime reached, " + "running e2fsck is recommended\n"); #if 0 /* @@@ We _will_ want to clear the valid bit if we find * inconsistencies, to force a fsck at reboot. But for @@ -1506,14 +1505,13 @@ static int ext4_fill_flex_info(struct super_block *sb) flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) / groups_per_flex; - sbi->s_flex_groups = kmalloc(flex_group_count * + sbi->s_flex_groups = kzalloc(flex_group_count * sizeof(struct flex_groups), GFP_KERNEL); if (sbi->s_flex_groups == NULL) { - printk(KERN_ERR "EXT4-fs: not enough memory\n"); + printk(KERN_ERR "EXT4-fs: not enough memory for " + "%lu flex groups\n", flex_group_count); goto failed; } - memset(sbi->s_flex_groups, 0, flex_group_count * - sizeof(struct flex_groups)); gdp = ext4_get_group_desc(sb, 1, &bh); block_bitmap = ext4_block_bitmap(sb, gdp) - 1; @@ -1597,16 +1595,14 @@ static int ext4_check_descriptors(struct super_block *sb) (EXT4_BLOCKS_PER_GROUP(sb) - 1); block_bitmap = ext4_block_bitmap(sb, gdp); - if (block_bitmap < first_block || block_bitmap > last_block) - { + if (block_bitmap < first_block || block_bitmap > last_block) { printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " "Block bitmap for group %lu not in group " "(block %llu)!", i, block_bitmap); return 0; } inode_bitmap = ext4_inode_bitmap(sb, gdp); - if (inode_bitmap < first_block || inode_bitmap > last_block) - { + if (inode_bitmap < first_block || inode_bitmap > last_block) { printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " "Inode bitmap for group %lu not in group " "(block %llu)!", i, inode_bitmap); @@ -1614,26 +1610,28 @@ static int ext4_check_descriptors(struct super_block *sb) } inode_table = ext4_inode_table(sb, gdp); if (inode_table < first_block || - inode_table + sbi->s_itb_per_group - 1 > last_block) - { + inode_table + sbi->s_itb_per_group - 1 > last_block) { printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " "Inode table for group %lu not in group " "(block %llu)!", i, inode_table); return 0; } + spin_lock(sb_bgl_lock(sbi, i)); if (!ext4_group_desc_csum_verify(sbi, i, gdp)) { printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " "Checksum for group %lu failed (%u!=%u)\n", i, le16_to_cpu(ext4_group_desc_csum(sbi, i, gdp)), le16_to_cpu(gdp->bg_checksum)); - return 0; + if (!(sb->s_flags & MS_RDONLY)) + return 0; } + spin_unlock(sb_bgl_lock(sbi, i)); if (!flexbg_flag) first_block += EXT4_BLOCKS_PER_GROUP(sb); } ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb)); - sbi->s_es->s_free_inodes_count=cpu_to_le32(ext4_count_free_inodes(sb)); + sbi->s_es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); return 1; } @@ -1654,8 +1652,8 @@ static int ext4_check_descriptors(struct super_block *sb) * e2fsck was run on this filesystem, and it must have already done the orphan * inode cleanup for us, so we can safely abort without any further action. */ -static void ext4_orphan_cleanup (struct super_block * sb, - struct ext4_super_block * es) +static void ext4_orphan_cleanup(struct super_block *sb, + struct ext4_super_block *es) { unsigned int s_flags = sb->s_flags; int nr_orphans = 0, nr_truncates = 0; @@ -1732,7 +1730,7 @@ static void ext4_orphan_cleanup (struct super_block * sb, iput(inode); /* The delete magic happens here! */ } -#define PLURAL(x) (x), ((x)==1) ? "" : "s" +#define PLURAL(x) (x), ((x) == 1) ? "" : "s" if (nr_orphans) printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n", @@ -1899,12 +1897,12 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) return 0; } -static int ext4_fill_super (struct super_block *sb, void *data, int silent) +static int ext4_fill_super(struct super_block *sb, void *data, int silent) __releases(kernel_lock) __acquires(kernel_lock) { - struct buffer_head * bh; + struct buffer_head *bh; struct ext4_super_block *es = NULL; struct ext4_sb_info *sbi; ext4_fsblk_t block; @@ -1953,7 +1951,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) } if (!(bh = sb_bread(sb, logical_sb_block))) { - printk (KERN_ERR "EXT4-fs: unable to read superblock\n"); + printk(KERN_ERR "EXT4-fs: unable to read superblock\n"); goto out_fail; } /* @@ -2026,8 +2024,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) set_opt(sbi->s_mount_opt, DELALLOC); - if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, - NULL, 0)) + if (!parse_options((char *) data, sb, &journal_inum, &journal_devnum, + NULL, 0)) goto failed_mount; sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | @@ -2102,7 +2100,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) goto failed_mount; } - brelse (bh); + brelse(bh); logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; offset = do_div(logical_sb_block, blocksize); bh = sb_bread(sb, logical_sb_block); @@ -2114,8 +2112,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) es = (struct ext4_super_block *)(((char *)bh->b_data) + offset); sbi->s_es = es; if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { - printk (KERN_ERR - "EXT4-fs: Magic mismatch, very weird !\n"); + printk(KERN_ERR + "EXT4-fs: Magic mismatch, very weird !\n"); goto failed_mount; } } @@ -2132,9 +2130,9 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || (!is_power_of_2(sbi->s_inode_size)) || (sbi->s_inode_size > blocksize)) { - printk (KERN_ERR - "EXT4-fs: unsupported inode size: %d\n", - sbi->s_inode_size); + printk(KERN_ERR + "EXT4-fs: unsupported inode size: %d\n", + sbi->s_inode_size); goto failed_mount; } if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) @@ -2166,20 +2164,20 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) sbi->s_mount_state = le16_to_cpu(es->s_state); sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); - for (i=0; i < 4; i++) + for (i = 0; i < 4; i++) sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); sbi->s_def_hash_version = es->s_def_hash_version; if (sbi->s_blocks_per_group > blocksize * 8) { - printk (KERN_ERR - "EXT4-fs: #blocks per group too big: %lu\n", - sbi->s_blocks_per_group); + printk(KERN_ERR + "EXT4-fs: #blocks per group too big: %lu\n", + sbi->s_blocks_per_group); goto failed_mount; } if (sbi->s_inodes_per_group > blocksize * 8) { - printk (KERN_ERR - "EXT4-fs: #inodes per group too big: %lu\n", - sbi->s_inodes_per_group); + printk(KERN_ERR + "EXT4-fs: #inodes per group too big: %lu\n", + sbi->s_inodes_per_group); goto failed_mount; } @@ -2213,10 +2211,10 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) sbi->s_groups_count = blocks_count; db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / EXT4_DESC_PER_BLOCK(sb); - sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *), + sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), GFP_KERNEL); if (sbi->s_group_desc == NULL) { - printk (KERN_ERR "EXT4-fs: not enough memory\n"); + printk(KERN_ERR "EXT4-fs: not enough memory\n"); goto failed_mount; } @@ -2226,13 +2224,13 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) block = descriptor_loc(sb, logical_sb_block, i); sbi->s_group_desc[i] = sb_bread(sb, block); if (!sbi->s_group_desc[i]) { - printk (KERN_ERR "EXT4-fs: " - "can't read group descriptor %d\n", i); + printk(KERN_ERR "EXT4-fs: " + "can't read group descriptor %d\n", i); db_count = i; goto failed_mount2; } } - if (!ext4_check_descriptors (sb)) { + if (!ext4_check_descriptors(sb)) { printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n"); goto failed_mount2; } @@ -2308,11 +2306,11 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) EXT4_SB(sb)->s_journal->j_failed_commit) { printk(KERN_CRIT "EXT4-fs error (device %s): " "ext4_fill_super: Journal transaction " - "%u is corrupt\n", sb->s_id, + "%u is corrupt\n", sb->s_id, EXT4_SB(sb)->s_journal->j_failed_commit); - if (test_opt (sb, ERRORS_RO)) { - printk (KERN_CRIT - "Mounting filesystem read-only\n"); + if (test_opt(sb, ERRORS_RO)) { + printk(KERN_CRIT + "Mounting filesystem read-only\n"); sb->s_flags |= MS_RDONLY; EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; es->s_state |= cpu_to_le16(EXT4_ERROR_FS); @@ -2332,9 +2330,9 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) goto failed_mount3; } else { if (!silent) - printk (KERN_ERR - "ext4: No journal on filesystem on %s\n", - sb->s_id); + printk(KERN_ERR + "ext4: No journal on filesystem on %s\n", + sb->s_id); goto failed_mount3; } @@ -2418,7 +2416,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) goto failed_mount4; } - ext4_setup_super (sb, es, sb->s_flags & MS_RDONLY); + ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY); /* determine the minimum size of new large inodes, if present */ if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { @@ -2457,12 +2455,12 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) ext4_orphan_cleanup(sb, es); EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; if (needs_recovery) - printk (KERN_INFO "EXT4-fs: recovery complete.\n"); + printk(KERN_INFO "EXT4-fs: recovery complete.\n"); ext4_mark_recovery_complete(sb, es); - printk (KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n", - test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal": - test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered": - "writeback"); + printk(KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n", + test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal": + test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered": + "writeback"); if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - " @@ -2575,14 +2573,14 @@ static journal_t *ext4_get_journal(struct super_block *sb, static journal_t *ext4_get_dev_journal(struct super_block *sb, dev_t j_dev) { - struct buffer_head * bh; + struct buffer_head *bh; journal_t *journal; ext4_fsblk_t start; ext4_fsblk_t len; int hblock, blocksize; ext4_fsblk_t sb_block; unsigned long offset; - struct ext4_super_block * es; + struct ext4_super_block *es; struct block_device *bdev; bdev = ext4_blkdev_get(j_dev); @@ -2697,8 +2695,8 @@ static int ext4_load_journal(struct super_block *sb, "unavailable, cannot proceed.\n"); return -EROFS; } - printk (KERN_INFO "EXT4-fs: write access will " - "be enabled during recovery.\n"); + printk(KERN_INFO "EXT4-fs: write access will " + "be enabled during recovery.\n"); } } @@ -2751,8 +2749,8 @@ static int ext4_load_journal(struct super_block *sb, return 0; } -static int ext4_create_journal(struct super_block * sb, - struct ext4_super_block * es, +static int ext4_create_journal(struct super_block *sb, + struct ext4_super_block *es, unsigned int journal_inum) { journal_t *journal; @@ -2793,9 +2791,8 @@ static int ext4_create_journal(struct super_block * sb, return 0; } -static void ext4_commit_super (struct super_block * sb, - struct ext4_super_block * es, - int sync) +static void ext4_commit_super(struct super_block *sb, + struct ext4_super_block *es, int sync) { struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; @@ -2816,8 +2813,8 @@ static void ext4_commit_super (struct super_block * sb, * remounting) the filesystem readonly, then we will end up with a * consistent fs on disk. Record that fact. */ -static void ext4_mark_recovery_complete(struct super_block * sb, - struct ext4_super_block * es) +static void ext4_mark_recovery_complete(struct super_block *sb, + struct ext4_super_block *es) { journal_t *journal = EXT4_SB(sb)->s_journal; @@ -2839,8 +2836,8 @@ static void ext4_mark_recovery_complete(struct super_block * sb, * has recorded an error from a previous lifetime, move that error to the * main filesystem now. */ -static void ext4_clear_journal_err(struct super_block * sb, - struct ext4_super_block * es) +static void ext4_clear_journal_err(struct super_block *sb, + struct ext4_super_block *es) { journal_t *journal; int j_errno; @@ -2865,7 +2862,7 @@ static void ext4_clear_journal_err(struct super_block * sb, EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; es->s_state |= cpu_to_le16(EXT4_ERROR_FS); - ext4_commit_super (sb, es, 1); + ext4_commit_super(sb, es, 1); jbd2_journal_clear_err(journal); } @@ -2898,7 +2895,7 @@ int ext4_force_commit(struct super_block *sb) * This implicitly triggers the writebehind on sync(). */ -static void ext4_write_super (struct super_block * sb) +static void ext4_write_super(struct super_block *sb) { if (mutex_trylock(&sb->s_lock) != 0) BUG(); @@ -2954,13 +2951,14 @@ static void ext4_unlockfs(struct super_block *sb) } } -static int ext4_remount (struct super_block * sb, int * flags, char * data) +static int ext4_remount(struct super_block *sb, int *flags, char *data) { - struct ext4_super_block * es; + struct ext4_super_block *es; struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_fsblk_t n_blocks_count = 0; unsigned long old_sb_flags; struct ext4_mount_options old_opts; + ext4_group_t g; int err; #ifdef CONFIG_QUOTA int i; @@ -3039,6 +3037,26 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data) } /* + * Make sure the group descriptor checksums + * are sane. If they aren't, refuse to + * remount r/w. + */ + for (g = 0; g < sbi->s_groups_count; g++) { + struct ext4_group_desc *gdp = + ext4_get_group_desc(sb, g, NULL); + + if (!ext4_group_desc_csum_verify(sbi, g, gdp)) { + printk(KERN_ERR + "EXT4-fs: ext4_remount: " + "Checksum for group %lu failed (%u!=%u)\n", + g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)), + le16_to_cpu(gdp->bg_checksum)); + err = -EINVAL; + goto restore_opts; + } + } + + /* * If we have an unprocessed orphan list hanging * around from a previously readonly bdev mount, * require a full umount/remount for now. @@ -3063,7 +3081,7 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data) sbi->s_mount_state = le16_to_cpu(es->s_state); if ((err = ext4_group_extend(sb, es, n_blocks_count))) goto restore_opts; - if (!ext4_setup_super (sb, es, 0)) + if (!ext4_setup_super(sb, es, 0)) sb->s_flags &= ~MS_RDONLY; } } @@ -3093,7 +3111,7 @@ restore_opts: return err; } -static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf) +static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; struct ext4_sb_info *sbi = EXT4_SB(sb); @@ -3331,12 +3349,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, } /* Journaling quota? */ if (EXT4_SB(sb)->s_qf_names[type]) { - /* Quotafile not of fs root? */ + /* Quotafile not in fs root? */ if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) printk(KERN_WARNING "EXT4-fs: Quota file not on filesystem root. " "Journaled quota will not work.\n"); - } + } /* * When we journal data on quota file, we have to flush journal to see @@ -3352,8 +3370,9 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); } + err = vfs_quota_on_path(sb, type, format_id, &nd.path); path_put(&nd.path); - return vfs_quota_on(sb, type, format_id, path, remount); + return err; } /* Read data from quotafile - avoid pagecache and such because we cannot afford diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 93c5fdcdad2..8954208b489 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1512,7 +1512,7 @@ static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header, char *name = entry->e_name; int n; - for (n=0; n < entry->e_name_len; n++) { + for (n = 0; n < entry->e_name_len; n++) { hash = (hash << NAME_HASH_SHIFT) ^ (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^ *name++; diff --git a/fs/fat/cache.c b/fs/fat/cache.c index 3a9ecac8d61..3222f51c41c 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -36,7 +36,7 @@ static inline int fat_max_cache(struct inode *inode) static struct kmem_cache *fat_cache_cachep; -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct fat_cache *cache = (struct fat_cache *)foo; diff --git a/fs/fat/file.c b/fs/fat/file.c index c672df4036e..ddde37025ca 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -15,6 +15,8 @@ #include <linux/writeback.h> #include <linux/backing-dev.h> #include <linux/blkdev.h> +#include <linux/fsnotify.h> +#include <linux/security.h> int fat_generic_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) @@ -64,6 +66,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, /* Equivalent to a chmod() */ ia.ia_valid = ATTR_MODE | ATTR_CTIME; + ia.ia_ctime = current_fs_time(inode->i_sb); if (is_dir) { ia.ia_mode = MSDOS_MKMODE(attr, S_IRWXUGO & ~sbi->options.fs_dmask) @@ -90,11 +93,21 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, } } + /* + * The security check is questionable... We single + * out the RO attribute for checking by the security + * module, just because it maps to a file mode. + */ + err = security_inode_setattr(filp->f_path.dentry, &ia); + if (err) + goto up; + /* This MUST be done before doing anything irreversible... */ - err = notify_change(filp->f_path.dentry, &ia); + err = fat_setattr(filp->f_path.dentry, &ia); if (err) goto up; + fsnotify_change(filp->f_path.dentry, ia.ia_valid); if (sbi->options.sys_immutable) { if (attr & ATTR_SYS) inode->i_flags |= S_IMMUTABLE; @@ -300,6 +313,8 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode) return 0; } +#define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET) + int fat_setattr(struct dentry *dentry, struct iattr *attr) { struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); @@ -323,9 +338,9 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) /* Check for setting the inode time. */ ia_valid = attr->ia_valid; - if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET)) { + if (ia_valid & TIMES_SET_FLAGS) { if (fat_allow_set_time(sbi, inode)) - attr->ia_valid &= ~(ATTR_MTIME_SET | ATTR_ATIME_SET); + attr->ia_valid &= ~TIMES_SET_FLAGS; } error = inode_change_ok(inode, attr); diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 23676f9d79c..6d266d793e2 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -498,7 +498,7 @@ static void fat_destroy_inode(struct inode *inode) kmem_cache_free(fat_inode_cachep, MSDOS_I(inode)); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct msdos_inode_info *ei = (struct msdos_inode_info *)foo; diff --git a/fs/fcntl.c b/fs/fcntl.c index 9679fcbdeaa..ac4f7db9f13 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -49,82 +49,6 @@ static int get_close_on_exec(unsigned int fd) return res; } -/* - * locate_fd finds a free file descriptor in the open_fds fdset, - * expanding the fd arrays if necessary. Must be called with the - * file_lock held for write. - */ - -static int locate_fd(unsigned int orig_start, int cloexec) -{ - struct files_struct *files = current->files; - unsigned int newfd; - unsigned int start; - int error; - struct fdtable *fdt; - - spin_lock(&files->file_lock); - - error = -EINVAL; - if (orig_start >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) - goto out; - -repeat: - fdt = files_fdtable(files); - /* - * Someone might have closed fd's in the range - * orig_start..fdt->next_fd - */ - start = orig_start; - if (start < files->next_fd) - start = files->next_fd; - - newfd = start; - if (start < fdt->max_fds) - newfd = find_next_zero_bit(fdt->open_fds->fds_bits, - fdt->max_fds, start); - - error = -EMFILE; - if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) - goto out; - - error = expand_files(files, newfd); - if (error < 0) - goto out; - - /* - * If we needed to expand the fs array we - * might have blocked - try again. - */ - if (error) - goto repeat; - - if (start <= files->next_fd) - files->next_fd = newfd + 1; - - FD_SET(newfd, fdt->open_fds); - if (cloexec) - FD_SET(newfd, fdt->close_on_exec); - else - FD_CLR(newfd, fdt->close_on_exec); - error = newfd; - -out: - spin_unlock(&files->file_lock); - return error; -} - -static int dupfd(struct file *file, unsigned int start, int cloexec) -{ - int fd = locate_fd(start, cloexec); - if (fd >= 0) - fd_install(fd, file); - else - fput(file); - - return fd; -} - asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags) { int err = -EBADF; @@ -135,35 +59,39 @@ asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags) if ((flags & ~O_CLOEXEC) != 0) return -EINVAL; - spin_lock(&files->file_lock); - if (!(file = fcheck(oldfd))) - goto out_unlock; - err = newfd; - if (newfd == oldfd) - goto out_unlock; - err = -EBADF; - if (newfd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) - goto out_unlock; - get_file(file); /* We are now finished with oldfd */ + if (unlikely(oldfd == newfd)) + return -EINVAL; + spin_lock(&files->file_lock); err = expand_files(files, newfd); - if (err < 0) - goto out_fput; - - /* To avoid races with open() and dup(), we will mark the fd as - * in-use in the open-file bitmap throughout the entire dup2() - * process. This is quite safe: do_close() uses the fd array - * entry, not the bitmap, to decide what work needs to be - * done. --sct */ - /* Doesn't work. open() might be there first. --AV */ - - /* Yes. It's a race. In user space. Nothing sane to do */ + file = fcheck(oldfd); + if (unlikely(!file)) + goto Ebadf; + if (unlikely(err < 0)) { + if (err == -EMFILE) + goto Ebadf; + goto out_unlock; + } + /* + * We need to detect attempts to do dup2() over allocated but still + * not finished descriptor. NB: OpenBSD avoids that at the price of + * extra work in their equivalent of fget() - they insert struct + * file immediately after grabbing descriptor, mark it larval if + * more work (e.g. actual opening) is needed and make sure that + * fget() treats larval files as absent. Potentially interesting, + * but while extra work in fget() is trivial, locking implications + * and amount of surgery on open()-related paths in VFS are not. + * FreeBSD fails with -EBADF in the same situation, NetBSD "solution" + * deadlocks in rather amusing ways, AFAICS. All of that is out of + * scope of POSIX or SUS, since neither considers shared descriptor + * tables and this condition does not arise without those. + */ err = -EBUSY; fdt = files_fdtable(files); tofree = fdt->fd[newfd]; if (!tofree && FD_ISSET(newfd, fdt->open_fds)) - goto out_fput; - + goto out_unlock; + get_file(file); rcu_assign_pointer(fdt->fd[newfd], file); FD_SET(newfd, fdt->open_fds); if (flags & O_CLOEXEC) @@ -174,31 +102,41 @@ asmlinkage long sys_dup3(unsigned int oldfd, unsigned int newfd, int flags) if (tofree) filp_close(tofree, files); - err = newfd; -out: - return err; -out_unlock: - spin_unlock(&files->file_lock); - goto out; -out_fput: + return newfd; + +Ebadf: + err = -EBADF; +out_unlock: spin_unlock(&files->file_lock); - fput(file); - goto out; + return err; } asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd) { + if (unlikely(newfd == oldfd)) { /* corner case */ + struct files_struct *files = current->files; + rcu_read_lock(); + if (!fcheck_files(files, oldfd)) + oldfd = -EBADF; + rcu_read_unlock(); + return oldfd; + } return sys_dup3(oldfd, newfd, 0); } asmlinkage long sys_dup(unsigned int fildes) { int ret = -EBADF; - struct file * file = fget(fildes); - - if (file) - ret = dupfd(file, 0, 0); + struct file *file = fget(fildes); + + if (file) { + ret = get_unused_fd(); + if (ret >= 0) + fd_install(ret, file); + else + fput(file); + } return ret; } @@ -321,8 +259,13 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, switch (cmd) { case F_DUPFD: case F_DUPFD_CLOEXEC: - get_file(filp); - err = dupfd(filp, arg, cmd == F_DUPFD_CLOEXEC); + if (arg >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) + break; + err = alloc_fd(arg, cmd == F_DUPFD_CLOEXEC ? O_CLOEXEC : 0); + if (err >= 0) { + get_file(filp); + fd_install(err, filp); + } break; case F_GETFD: err = get_close_on_exec(fd) ? FD_CLOEXEC : 0; diff --git a/fs/fifo.c b/fs/fifo.c index 9785e36f81e..987bf941149 100644 --- a/fs/fifo.c +++ b/fs/fifo.c @@ -57,7 +57,7 @@ static int fifo_open(struct inode *inode, struct file *filp) * POSIX.1 says that O_NONBLOCK means return with the FIFO * opened, even when there is no process writing the FIFO. */ - filp->f_op = &read_fifo_fops; + filp->f_op = &read_pipefifo_fops; pipe->r_counter++; if (pipe->readers++ == 0) wake_up_partner(inode); @@ -86,7 +86,7 @@ static int fifo_open(struct inode *inode, struct file *filp) if ((filp->f_flags & O_NONBLOCK) && !pipe->readers) goto err; - filp->f_op = &write_fifo_fops; + filp->f_op = &write_pipefifo_fops; pipe->w_counter++; if (!pipe->writers++) wake_up_partner(inode); @@ -105,7 +105,7 @@ static int fifo_open(struct inode *inode, struct file *filp) * This implementation will NEVER block on a O_RDWR open, since * the process can at least talk to itself. */ - filp->f_op = &rdwr_fifo_fops; + filp->f_op = &rdwr_pipefifo_fops; pipe->readers++; pipe->writers++; @@ -151,5 +151,5 @@ err_nocleanup: * depending on the access mode of the file... */ const struct file_operations def_fifo_fops = { - .open = fifo_open, /* will set read or write pipe_fops */ + .open = fifo_open, /* will set read_ or write_pipefifo_fops */ }; diff --git a/fs/file.c b/fs/file.c index 7b3887e054d..f313314f996 100644 --- a/fs/file.c +++ b/fs/file.c @@ -6,6 +6,7 @@ * Manage the dynamic fd arrays in the process files_struct. */ +#include <linux/module.h> #include <linux/fs.h> #include <linux/mm.h> #include <linux/time.h> @@ -250,9 +251,18 @@ int expand_files(struct files_struct *files, int nr) struct fdtable *fdt; fdt = files_fdtable(files); + + /* + * N.B. For clone tasks sharing a files structure, this test + * will limit the total number of files that can be opened. + */ + if (nr >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) + return -EMFILE; + /* Do we need to expand? */ if (nr < fdt->max_fds) return 0; + /* Can we expand? */ if (nr >= sysctl_nr_open) return -EMFILE; @@ -423,3 +433,63 @@ struct files_struct init_files = { }, .file_lock = __SPIN_LOCK_UNLOCKED(init_task.file_lock), }; + +/* + * allocate a file descriptor, mark it busy. + */ +int alloc_fd(unsigned start, unsigned flags) +{ + struct files_struct *files = current->files; + unsigned int fd; + int error; + struct fdtable *fdt; + + spin_lock(&files->file_lock); +repeat: + fdt = files_fdtable(files); + fd = start; + if (fd < files->next_fd) + fd = files->next_fd; + + if (fd < fdt->max_fds) + fd = find_next_zero_bit(fdt->open_fds->fds_bits, + fdt->max_fds, fd); + + error = expand_files(files, fd); + if (error < 0) + goto out; + + /* + * If we needed to expand the fs array we + * might have blocked - try again. + */ + if (error) + goto repeat; + + if (start <= files->next_fd) + files->next_fd = fd + 1; + + FD_SET(fd, fdt->open_fds); + if (flags & O_CLOEXEC) + FD_SET(fd, fdt->close_on_exec); + else + FD_CLR(fd, fdt->close_on_exec); + error = fd; +#if 1 + /* Sanity check */ + if (rcu_dereference(fdt->fd[fd]) != NULL) { + printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd); + rcu_assign_pointer(fdt->fd[fd], NULL); + } +#endif + +out: + spin_unlock(&files->file_lock); + return error; +} + +int get_unused_fd(void) +{ + return alloc_fd(0, 0); +} +EXPORT_SYMBOL(get_unused_fd); diff --git a/fs/file_table.c b/fs/file_table.c index 83084225b4c..f45a4493f9e 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -120,7 +120,7 @@ struct file *get_empty_filp(void) tsk = current; INIT_LIST_HEAD(&f->f_u.fu_list); - atomic_set(&f->f_count, 1); + atomic_long_set(&f->f_count, 1); rwlock_init(&f->f_owner.lock); f->f_uid = tsk->fsuid; f->f_gid = tsk->fsgid; @@ -219,7 +219,7 @@ EXPORT_SYMBOL(init_file); void fput(struct file *file) { - if (atomic_dec_and_test(&file->f_count)) + if (atomic_long_dec_and_test(&file->f_count)) __fput(file); } @@ -294,7 +294,7 @@ struct file *fget(unsigned int fd) rcu_read_lock(); file = fcheck_files(files, fd); if (file) { - if (!atomic_inc_not_zero(&file->f_count)) { + if (!atomic_long_inc_not_zero(&file->f_count)) { /* File object ref couldn't be taken */ rcu_read_unlock(); return NULL; @@ -326,7 +326,7 @@ struct file *fget_light(unsigned int fd, int *fput_needed) rcu_read_lock(); file = fcheck_files(files, fd); if (file) { - if (atomic_inc_not_zero(&file->f_count)) + if (atomic_long_inc_not_zero(&file->f_count)) *fput_needed = 1; else /* Didn't get the reference, someone's freed */ @@ -341,7 +341,7 @@ struct file *fget_light(unsigned int fd, int *fput_needed) void put_filp(struct file *file) { - if (atomic_dec_and_test(&file->f_count)) { + if (atomic_long_dec_and_test(&file->f_count)) { security_file_free(file); file_kill(file); file_free(file); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 51d0035ff07..fd03330cade 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -898,7 +898,7 @@ static int fuse_access(struct inode *inode, int mask) return PTR_ERR(req); memset(&inarg, 0, sizeof(inarg)); - inarg.mask = mask; + inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC); req->in.h.opcode = FUSE_ACCESS; req->in.h.nodeid = get_node_id(inode); req->in.numargs = 1; @@ -927,7 +927,7 @@ static int fuse_access(struct inode *inode, int mask) * access request is sent. Execute permission is still checked * locally based on file mode. */ -static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd) +static int fuse_permission(struct inode *inode, int mask) { struct fuse_conn *fc = get_fuse_conn(inode); bool refreshed = false; @@ -962,7 +962,7 @@ static int fuse_permission(struct inode *inode, int mask, struct nameidata *nd) exist. So if permissions are revoked this won't be noticed immediately, only after the attribute timeout has expired */ - } else if (nd && (nd->flags & (LOOKUP_ACCESS | LOOKUP_CHDIR))) { + } else if (mask & MAY_ACCESS) { err = fuse_access(inode, mask); } else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) { if (!(inode->i_mode & S_IXUGO)) { diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 67ff2c6a8f6..2bada6bbc31 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -893,7 +893,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (count == 0) goto out; - err = remove_suid(file->f_path.dentry); + err = file_remove_suid(file); if (err) goto out; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 7d2f7d6e22e..d2249f174e2 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -956,7 +956,7 @@ static inline void unregister_fuseblk(void) } #endif -static void fuse_inode_init_once(struct kmem_cache *cachep, void *foo) +static void fuse_inode_init_once(void *foo) { struct inode * inode = foo; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 6da0ab355b8..8b0806a3294 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -448,7 +448,7 @@ struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) struct qstr qstr; struct inode *inode; gfs2_str2qstr(&qstr, name); - inode = gfs2_lookupi(dip, &qstr, 1, NULL); + inode = gfs2_lookupi(dip, &qstr, 1); /* gfs2_lookupi has inconsistent callers: vfs * related routines expect NULL for no entry found, * gfs2_lookup_simple callers expect ENOENT @@ -477,7 +477,7 @@ struct inode *gfs2_lookup_simple(struct inode *dip, const char *name) */ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, - int is_root, struct nameidata *nd) + int is_root) { struct super_block *sb = dir->i_sb; struct gfs2_inode *dip = GFS2_I(dir); @@ -1173,7 +1173,7 @@ int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) break; } - tmp = gfs2_lookupi(dir, &dotdot, 1, NULL); + tmp = gfs2_lookupi(dir, &dotdot, 1); if (IS_ERR(tmp)) { error = PTR_ERR(tmp); break; diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 6074c2506f7..58f9607d6a8 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -83,7 +83,7 @@ int gfs2_inode_refresh(struct gfs2_inode *ip); int gfs2_dinode_dealloc(struct gfs2_inode *inode); int gfs2_change_nlink(struct gfs2_inode *ip, int diff); struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, - int is_root, struct nameidata *nd); + int is_root); struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, unsigned int mode, dev_t dev); int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index bcc668d0fad..bb2cc303ac2 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -24,7 +24,7 @@ #include "util.h" #include "glock.h" -static void gfs2_init_inode_once(struct kmem_cache *cachep, void *foo) +static void gfs2_init_inode_once(void *foo) { struct gfs2_inode *ip = foo; @@ -33,7 +33,7 @@ static void gfs2_init_inode_once(struct kmem_cache *cachep, void *foo) ip->i_alloc = NULL; } -static void gfs2_init_glock_once(struct kmem_cache *cachep, void *foo) +static void gfs2_init_glock_once(void *foo) { struct gfs2_glock *gl = foo; diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c index 990d9f4bc46..9cda8536530 100644 --- a/fs/gfs2/ops_export.c +++ b/fs/gfs2/ops_export.c @@ -134,7 +134,7 @@ static struct dentry *gfs2_get_parent(struct dentry *child) struct dentry *dentry; gfs2_str2qstr(&dotdot, ".."); - inode = gfs2_lookupi(child->d_inode, &dotdot, 1, NULL); + inode = gfs2_lookupi(child->d_inode, &dotdot, 1); if (!inode) return ERR_PTR(-ENOENT); diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 1e252dfc529..e2c62f73a77 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -74,7 +74,7 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry, return PTR_ERR(inode); } - inode = gfs2_lookupi(dir, &dentry->d_name, 0, nd); + inode = gfs2_lookupi(dir, &dentry->d_name, 0); if (inode) { if (!IS_ERR(inode)) { gfs2_holder_uninit(ghs); @@ -109,7 +109,7 @@ static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry, dentry->d_op = &gfs2_dops; - inode = gfs2_lookupi(dir, &dentry->d_name, 0, nd); + inode = gfs2_lookupi(dir, &dentry->d_name, 0); if (inode && IS_ERR(inode)) return ERR_CAST(inode); @@ -915,12 +915,6 @@ int gfs2_permission(struct inode *inode, int mask) return error; } -static int gfs2_iop_permission(struct inode *inode, int mask, - struct nameidata *nd) -{ - return gfs2_permission(inode, mask); -} - static int setattr_size(struct inode *inode, struct iattr *attr) { struct gfs2_inode *ip = GFS2_I(inode); @@ -1150,7 +1144,7 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name) } const struct inode_operations gfs2_file_iops = { - .permission = gfs2_iop_permission, + .permission = gfs2_permission, .setattr = gfs2_setattr, .getattr = gfs2_getattr, .setxattr = gfs2_setxattr, @@ -1169,7 +1163,7 @@ const struct inode_operations gfs2_dir_iops = { .rmdir = gfs2_rmdir, .mknod = gfs2_mknod, .rename = gfs2_rename, - .permission = gfs2_iop_permission, + .permission = gfs2_permission, .setattr = gfs2_setattr, .getattr = gfs2_getattr, .setxattr = gfs2_setxattr, @@ -1181,7 +1175,7 @@ const struct inode_operations gfs2_dir_iops = { const struct inode_operations gfs2_symlink_iops = { .readlink = gfs2_readlink, .follow_link = gfs2_follow_link, - .permission = gfs2_iop_permission, + .permission = gfs2_permission, .setattr = gfs2_setattr, .getattr = gfs2_getattr, .setxattr = gfs2_setxattr, diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 63a8a902d9d..ca831991cbc 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -389,7 +389,7 @@ int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) break; INIT_LIST_HEAD(&jd->extent_list); - jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1, NULL); + jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1); if (!jd->jd_inode || IS_ERR(jd->jd_inode)) { if (!jd->jd_inode) error = -ENOENT; diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index dc4ec640e87..7e19835efa2 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -511,8 +511,7 @@ void hfs_clear_inode(struct inode *inode) } } -static int hfs_permission(struct inode *inode, int mask, - struct nameidata *nd) +static int hfs_permission(struct inode *inode, int mask) { if (S_ISREG(inode->i_mode) && mask & MAY_EXEC) return 0; @@ -523,8 +522,6 @@ static int hfs_file_open(struct inode *inode, struct file *file) { if (HFS_IS_RSRC(inode)) inode = HFS_I(inode)->rsrc_inode; - if (atomic_read(&file->f_count) != 1) - return 0; atomic_inc(&HFS_I(inode)->opencnt); return 0; } @@ -535,8 +532,6 @@ static int hfs_file_release(struct inode *inode, struct file *file) if (HFS_IS_RSRC(inode)) inode = HFS_I(inode)->rsrc_inode; - if (atomic_read(&file->f_count) != 0) - return 0; if (atomic_dec_and_test(&HFS_I(inode)->opencnt)) { mutex_lock(&inode->i_mutex); hfs_file_truncate(inode); diff --git a/fs/hfs/super.c b/fs/hfs/super.c index ac2ec5ef66e..4abb1047c68 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -432,7 +432,7 @@ static struct file_system_type hfs_fs_type = { .fs_flags = FS_REQUIRES_DEV, }; -static void hfs_init_once(struct kmem_cache *cachep, void *p) +static void hfs_init_once(void *p) { struct hfs_inode_info *i = p; diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index cc3b5e24339..b085d64a2b6 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -238,7 +238,7 @@ static void hfsplus_set_perms(struct inode *inode, struct hfsplus_perm *perms) perms->dev = cpu_to_be32(HFSPLUS_I(inode).dev); } -static int hfsplus_permission(struct inode *inode, int mask, struct nameidata *nd) +static int hfsplus_permission(struct inode *inode, int mask) { /* MAY_EXEC is also used for lookup, if no x bit is set allow lookup, * open_exec has the same test, so it's still not executable, if a x bit @@ -254,8 +254,6 @@ static int hfsplus_file_open(struct inode *inode, struct file *file) { if (HFSPLUS_IS_RSRC(inode)) inode = HFSPLUS_I(inode).rsrc_inode; - if (atomic_read(&file->f_count) != 1) - return 0; atomic_inc(&HFSPLUS_I(inode).opencnt); return 0; } @@ -266,8 +264,6 @@ static int hfsplus_file_release(struct inode *inode, struct file *file) if (HFSPLUS_IS_RSRC(inode)) inode = HFSPLUS_I(inode).rsrc_inode; - if (atomic_read(&file->f_count) != 0) - return 0; if (atomic_dec_and_test(&HFSPLUS_I(inode).opencnt)) { mutex_lock(&inode->i_mutex); hfsplus_file_truncate(inode); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 3859118531c..e834e578c93 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -485,7 +485,7 @@ static struct file_system_type hfsplus_fs_type = { .fs_flags = FS_REQUIRES_DEV, }; -static void hfsplus_init_once(struct kmem_cache *cachep, void *p) +static void hfsplus_init_once(void *p) { struct hfsplus_inode_info *i = p; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 5222345ddcc..d6ecabf4d23 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -822,7 +822,7 @@ int hostfs_rename(struct inode *from_ino, struct dentry *from, return err; } -int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd) +int hostfs_permission(struct inode *ino, int desired) { char *name; int r = 0, w = 0, x = 0, err; diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index d256559b410..d9c59a77544 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -415,7 +415,7 @@ again: d_drop(dentry); spin_lock(&dentry->d_lock); if (atomic_read(&dentry->d_count) > 1 || - permission(inode, MAY_WRITE, NULL) || + generic_permission(inode, MAY_WRITE, NULL) || !S_ISREG(inode->i_mode) || get_write_access(inode)) { spin_unlock(&dentry->d_lock); diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index f63a699ec65..b8ae9c90ada 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -173,7 +173,7 @@ static void hpfs_destroy_inode(struct inode *inode) kmem_cache_free(hpfs_inode_cachep, hpfs_i(inode)); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct hpfs_inode_info *ei = (struct hpfs_inode_info *) foo; diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index 65077aa90f0..2b3d1828db9 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -655,20 +655,13 @@ static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd) return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd); } -int hppfs_permission(struct inode *inode, int mask, struct nameidata *nd) -{ - return generic_permission(inode, mask, NULL); -} - static const struct inode_operations hppfs_dir_iops = { .lookup = hppfs_lookup, - .permission = hppfs_permission, }; static const struct inode_operations hppfs_link_iops = { .readlink = hppfs_readlink, .follow_link = hppfs_follow_link, - .permission = hppfs_permission, }; static struct inode *get_inode(struct super_block *sb, struct dentry *dentry) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index dbd01d262ca..3f58923fb39 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -705,7 +705,7 @@ static const struct address_space_operations hugetlbfs_aops = { }; -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo; diff --git a/fs/inode.c b/fs/inode.c index c36d9480335..0487ddba139 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -166,6 +166,7 @@ static struct inode *alloc_inode(struct super_block *sb) mapping_set_gfp_mask(mapping, GFP_HIGHUSER_PAGECACHE); mapping->assoc_mapping = NULL; mapping->backing_dev_info = &default_backing_dev_info; + mapping->writeback_index = 0; /* * If the block_device provides a backing_dev_info for client @@ -209,7 +210,7 @@ void inode_init_once(struct inode *inode) INIT_LIST_HEAD(&inode->i_dentry); INIT_LIST_HEAD(&inode->i_devices); INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); - rwlock_init(&inode->i_data.tree_lock); + spin_lock_init(&inode->i_data.tree_lock); spin_lock_init(&inode->i_data.i_mmap_lock); INIT_LIST_HEAD(&inode->i_data.private_list); spin_lock_init(&inode->i_data.private_lock); @@ -224,7 +225,7 @@ void inode_init_once(struct inode *inode) EXPORT_SYMBOL(inode_init_once); -static void init_once(struct kmem_cache * cachep, void *foo) +static void init_once(void *foo) { struct inode * inode = (struct inode *) foo; diff --git a/fs/inotify_user.c b/fs/inotify_user.c index fe79c25d95d..60249429a25 100644 --- a/fs/inotify_user.c +++ b/fs/inotify_user.c @@ -354,20 +354,20 @@ static void inotify_dev_event_dequeue(struct inotify_device *dev) } /* - * find_inode - resolve a user-given path to a specific inode and return a nd + * find_inode - resolve a user-given path to a specific inode */ -static int find_inode(const char __user *dirname, struct nameidata *nd, +static int find_inode(const char __user *dirname, struct path *path, unsigned flags) { int error; - error = __user_walk(dirname, flags, nd); + error = user_path_at(AT_FDCWD, dirname, flags, path); if (error) return error; /* you can only watch an inode if you have read permissions on it */ - error = vfs_permission(nd, MAY_READ); + error = inode_permission(path->dentry->d_inode, MAY_READ); if (error) - path_put(&nd->path); + path_put(path); return error; } @@ -650,11 +650,11 @@ asmlinkage long sys_inotify_init(void) return sys_inotify_init1(0); } -asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask) +asmlinkage long sys_inotify_add_watch(int fd, const char __user *pathname, u32 mask) { struct inode *inode; struct inotify_device *dev; - struct nameidata nd; + struct path path; struct file *filp; int ret, fput_needed; unsigned flags = 0; @@ -674,12 +674,12 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask) if (mask & IN_ONLYDIR) flags |= LOOKUP_DIRECTORY; - ret = find_inode(path, &nd, flags); + ret = find_inode(pathname, &path, flags); if (unlikely(ret)) goto fput_and_out; - /* inode held in place by reference to nd; dev by fget on fd */ - inode = nd.path.dentry->d_inode; + /* inode held in place by reference to path; dev by fget on fd */ + inode = path.dentry->d_inode; dev = filp->private_data; mutex_lock(&dev->up_mutex); @@ -688,7 +688,7 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask) ret = create_watch(dev, inode, mask); mutex_unlock(&dev->up_mutex); - path_put(&nd.path); + path_put(&path); fput_and_out: fput_light(filp, fput_needed); return ret; diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 044a254d526..26948a6033b 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -73,7 +73,7 @@ static void isofs_destroy_inode(struct inode *inode) kmem_cache_free(isofs_inode_cachep, ISOFS_I(inode)); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct iso_inode_info *ei = foo; diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 2eccbfaa1d4..ae08c057e75 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -63,7 +63,7 @@ static void release_buffer_page(struct buffer_head *bh) goto nope; /* OK, it's a truncated page */ - if (TestSetPageLocked(page)) + if (!trylock_page(page)) goto nope; page_cache_get(page); @@ -221,7 +221,7 @@ write_out_data: * blocking lock_buffer(). */ if (buffer_dirty(bh)) { - if (test_set_buffer_locked(bh)) { + if (!trylock_buffer(bh)) { BUFFER_TRACE(bh, "needs blocking lock"); spin_unlock(&journal->j_list_lock); /* Write out all data to prevent deadlocks */ @@ -446,7 +446,7 @@ void journal_commit_transaction(journal_t *journal) spin_lock(&journal->j_list_lock); } if (unlikely(!buffer_uptodate(bh))) { - if (TestSetPageLocked(bh->b_page)) { + if (!trylock_page(bh->b_page)) { spin_unlock(&journal->j_list_lock); lock_page(bh->b_page); spin_lock(&journal->j_list_lock); diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 8dee3200750..0540ca27a44 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -291,7 +291,7 @@ handle_t *journal_start(journal_t *journal, int nblocks) goto out; } - lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_); + lock_map_acquire(&handle->h_lockdep_map); out: return handle; @@ -1448,7 +1448,7 @@ int journal_stop(handle_t *handle) spin_unlock(&journal->j_state_lock); } - lock_release(&handle->h_lockdep_map, 1, _THIS_IP_); + lock_map_release(&handle->h_lockdep_map); jbd_free_handle(handle); return err; diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index f8b3be87322..f2ad061e95e 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -67,7 +67,7 @@ static void release_buffer_page(struct buffer_head *bh) goto nope; /* OK, it's a truncated page */ - if (TestSetPageLocked(page)) + if (!trylock_page(page)) goto nope; page_cache_get(page); @@ -262,8 +262,18 @@ static int journal_finish_inode_data_buffers(journal_t *journal, jinode->i_flags |= JI_COMMIT_RUNNING; spin_unlock(&journal->j_list_lock); err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping); - if (!ret) - ret = err; + if (err) { + /* + * Because AS_EIO is cleared by + * wait_on_page_writeback_range(), set it again so + * that user process can get -EIO from fsync(). + */ + set_bit(AS_EIO, + &jinode->i_vfs_inode->i_mapping->flags); + + if (!ret) + ret = err; + } spin_lock(&journal->j_list_lock); jinode->i_flags &= ~JI_COMMIT_RUNNING; wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); @@ -670,8 +680,14 @@ start_journal_io: * commit block, which happens below in such setting. */ err = journal_finish_inode_data_buffers(journal, commit_transaction); - if (err) - jbd2_journal_abort(journal, err); + if (err) { + char b[BDEVNAME_SIZE]; + + printk(KERN_WARNING + "JBD2: Detected IO errors while flushing file data " + "on %s\n", bdevname(journal->j_fs_dev, b)); + err = 0; + } /* Lo and behold: we have just managed to send a transaction to the log. Before we can commit it, wait for the IO so far to diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index b26c6d9fe6a..8207a01c4ed 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -68,7 +68,6 @@ EXPORT_SYMBOL(jbd2_journal_set_features); EXPORT_SYMBOL(jbd2_journal_create); EXPORT_SYMBOL(jbd2_journal_load); EXPORT_SYMBOL(jbd2_journal_destroy); -EXPORT_SYMBOL(jbd2_journal_update_superblock); EXPORT_SYMBOL(jbd2_journal_abort); EXPORT_SYMBOL(jbd2_journal_errno); EXPORT_SYMBOL(jbd2_journal_ack_err); diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 4f7cadbb19f..e5d540588fa 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -301,7 +301,7 @@ handle_t *jbd2_journal_start(journal_t *journal, int nblocks) goto out; } - lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_); + lock_map_acquire(&handle->h_lockdep_map); out: return handle; } @@ -1279,7 +1279,7 @@ int jbd2_journal_stop(handle_t *handle) spin_unlock(&journal->j_state_lock); } - lock_release(&handle->h_lockdep_map, 1, _THIS_IP_); + lock_map_release(&handle->h_lockdep_map); jbd2_free_handle(handle); return err; diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 4c80404a9ab..d98713777a1 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c @@ -314,7 +314,7 @@ static int jffs2_check_acl(struct inode *inode, int mask) return -EAGAIN; } -int jffs2_permission(struct inode *inode, int mask, struct nameidata *nd) +int jffs2_permission(struct inode *inode, int mask) { return generic_permission(inode, mask, jffs2_check_acl); } diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h index 0bb7f003fd8..8ca058aed38 100644 --- a/fs/jffs2/acl.h +++ b/fs/jffs2/acl.h @@ -28,7 +28,7 @@ struct jffs2_acl_header { #define JFFS2_ACL_NOT_CACHED ((void *)-1) -extern int jffs2_permission(struct inode *, int, struct nameidata *); +extern int jffs2_permission(struct inode *, int); extern int jffs2_acl_chmod(struct inode *); extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *); extern int jffs2_init_acl_post(struct inode *); diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index c0c141f6fde..cd219ef5525 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -38,7 +38,7 @@ const struct file_operations jffs2_dir_operations = { .read = generic_read_dir, .readdir = jffs2_readdir, - .ioctl = jffs2_ioctl, + .unlocked_ioctl=jffs2_ioctl, .fsync = jffs2_fsync }; diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index 5e920343b2c..5a98aa87c85 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -46,7 +46,7 @@ const struct file_operations jffs2_file_operations = .aio_read = generic_file_aio_read, .write = do_sync_write, .aio_write = generic_file_aio_write, - .ioctl = jffs2_ioctl, + .unlocked_ioctl=jffs2_ioctl, .mmap = generic_file_readonly_mmap, .fsync = jffs2_fsync, .splice_read = generic_file_splice_read, diff --git a/fs/jffs2/ioctl.c b/fs/jffs2/ioctl.c index e2177210f62..9d41f43e47b 100644 --- a/fs/jffs2/ioctl.c +++ b/fs/jffs2/ioctl.c @@ -12,8 +12,7 @@ #include <linux/fs.h> #include "nodelist.h" -int jffs2_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, - unsigned long arg) +long jffs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { /* Later, this will provide for lsattr.jffs2 and chattr.jffs2, which will include compression support etc. */ diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index 2cc866cf134..5e194a5c8e2 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -167,7 +167,7 @@ int jffs2_fsync(struct file *, struct dentry *, int); int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg); /* ioctl.c */ -int jffs2_ioctl(struct inode *, struct file *, unsigned int, unsigned long); +long jffs2_ioctl(struct file *, unsigned int, unsigned long); /* symlink.c */ extern const struct inode_operations jffs2_symlink_inode_operations; diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c index 629af01e5ad..6caf1e1ee26 100644 --- a/fs/jffs2/summary.c +++ b/fs/jffs2/summary.c @@ -23,6 +23,8 @@ int jffs2_sum_init(struct jffs2_sb_info *c) { + uint32_t sum_size = max_t(uint32_t, c->sector_size, MAX_SUMMARY_SIZE); + c->summary = kzalloc(sizeof(struct jffs2_summary), GFP_KERNEL); if (!c->summary) { @@ -30,7 +32,7 @@ int jffs2_sum_init(struct jffs2_sb_info *c) return -ENOMEM; } - c->summary->sum_buf = vmalloc(c->sector_size); + c->summary->sum_buf = kmalloc(sum_size, GFP_KERNEL); if (!c->summary->sum_buf) { JFFS2_WARNING("Can't allocate buffer for writing out summary information!\n"); @@ -49,7 +51,7 @@ void jffs2_sum_exit(struct jffs2_sb_info *c) jffs2_sum_disable_collecting(c->summary); - vfree(c->summary->sum_buf); + kfree(c->summary->sum_buf); c->summary->sum_buf = NULL; kfree(c->summary); @@ -665,7 +667,7 @@ crc_err: /* Write summary data to flash - helper function for jffs2_sum_write_sumnode() */ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb, - uint32_t infosize, uint32_t datasize, int padsize) + uint32_t infosize, uint32_t datasize, int padsize) { struct jffs2_raw_summary isum; union jffs2_sum_mem *temp; @@ -676,6 +678,26 @@ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock int ret; size_t retlen; + if (padsize + datasize > MAX_SUMMARY_SIZE) { + /* It won't fit in the buffer. Abort summary for this jeb */ + jffs2_sum_disable_collecting(c->summary); + + JFFS2_WARNING("Summary too big (%d data, %d pad) in eraseblock at %08x\n", + datasize, padsize, jeb->offset); + /* Non-fatal */ + return 0; + } + /* Is there enough space for summary? */ + if (padsize < 0) { + /* don't try to write out summary for this jeb */ + jffs2_sum_disable_collecting(c->summary); + + JFFS2_WARNING("Not enough space for summary, padsize = %d\n", + padsize); + /* Non-fatal */ + return 0; + } + memset(c->summary->sum_buf, 0xff, datasize); memset(&isum, 0, sizeof(isum)); @@ -821,7 +843,7 @@ int jffs2_sum_write_sumnode(struct jffs2_sb_info *c) { int datasize, infosize, padsize; struct jffs2_eraseblock *jeb; - int ret; + int ret = 0; dbg_summary("called\n"); @@ -841,16 +863,6 @@ int jffs2_sum_write_sumnode(struct jffs2_sb_info *c) infosize += padsize; datasize += padsize; - /* Is there enough space for summary? */ - if (padsize < 0) { - /* don't try to write out summary for this jeb */ - jffs2_sum_disable_collecting(c->summary); - - JFFS2_WARNING("Not enough space for summary, padsize = %d\n", padsize); - spin_lock(&c->erase_completion_lock); - return 0; - } - ret = jffs2_sum_write_data(c, jeb, infosize, datasize, padsize); spin_lock(&c->erase_completion_lock); return ret; diff --git a/fs/jffs2/summary.h b/fs/jffs2/summary.h index 8bf34f2fa5c..60207a2ae95 100644 --- a/fs/jffs2/summary.h +++ b/fs/jffs2/summary.h @@ -13,6 +13,12 @@ #ifndef JFFS2_SUMMARY_H #define JFFS2_SUMMARY_H +/* Limit summary size to 64KiB so that we can kmalloc it. If the summary + is larger than that, we have to just ditch it and avoid using summary + for the eraseblock in question... and it probably doesn't hurt us much + anyway. */ +#define MAX_SUMMARY_SIZE 65536 + #include <linux/uio.h> #include <linux/jffs2.h> diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 7da69eae49e..efd401257ed 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -44,7 +44,7 @@ static void jffs2_destroy_inode(struct inode *inode) kmem_cache_free(jffs2_inode_cachep, JFFS2_INODE_INFO(inode)); } -static void jffs2_i_init_once(struct kmem_cache *cachep, void *foo) +static void jffs2_i_init_once(void *foo) { struct jffs2_inode_info *f = foo; diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index 4d84bdc8829..d3e5c33665d 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -140,7 +140,7 @@ static int jfs_check_acl(struct inode *inode, int mask) return -EAGAIN; } -int jfs_permission(struct inode *inode, int mask, struct nameidata *nd) +int jfs_permission(struct inode *inode, int mask) { return generic_permission(inode, mask, jfs_check_acl); } diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h index 455fa429204..88475f10a38 100644 --- a/fs/jfs/jfs_acl.h +++ b/fs/jfs/jfs_acl.h @@ -20,7 +20,7 @@ #ifdef CONFIG_JFS_POSIX_ACL -int jfs_permission(struct inode *, int, struct nameidata *); +int jfs_permission(struct inode *, int); int jfs_init_acl(tid_t, struct inode *, struct inode *); int jfs_setattr(struct dentry *, struct iattr *); diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 854ff0ec574..c350057087d 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -182,7 +182,7 @@ static inline void remove_metapage(struct page *page, struct metapage *mp) #endif -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct metapage *mp = (struct metapage *)foo; diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 359c091d896..3630718be39 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -760,7 +760,7 @@ static struct file_system_type jfs_fs_type = { .fs_flags = FS_REQUIRES_DEV, }; -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct jfs_inode_info *jfs_ip = (struct jfs_inode_info *) foo; diff --git a/fs/libfs.c b/fs/libfs.c index baeb71ee1cd..1add676a19d 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -216,8 +216,8 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name, s->s_flags = MS_NOUSER; s->s_maxbytes = ~0ULL; - s->s_blocksize = 1024; - s->s_blocksize_bits = 10; + s->s_blocksize = PAGE_SIZE; + s->s_blocksize_bits = PAGE_SHIFT; s->s_magic = magic; s->s_op = ops ? ops : &simple_super_operations; s->s_time_gran = 1; diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 39944463933..4a714f64515 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -83,7 +83,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp, { struct nlm_host *host; struct nlm_file *file; - int rc = rpc_success; + __be32 rc = rpc_success; dprintk("lockd: TEST4 called\n"); resp->cookie = argp->cookie; @@ -116,7 +116,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, { struct nlm_host *host; struct nlm_file *file; - int rc = rpc_success; + __be32 rc = rpc_success; dprintk("lockd: LOCK called\n"); diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 76019d2ff72..76262c1986f 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -112,7 +112,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp, { struct nlm_host *host; struct nlm_file *file; - int rc = rpc_success; + __be32 rc = rpc_success; dprintk("lockd: TEST called\n"); resp->cookie = argp->cookie; @@ -146,7 +146,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, { struct nlm_host *host; struct nlm_file *file; - int rc = rpc_success; + __be32 rc = rpc_success; dprintk("lockd: LOCK called\n"); diff --git a/fs/locks.c b/fs/locks.c index 01490300f7c..5eb259e3cd3 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -201,7 +201,7 @@ EXPORT_SYMBOL(locks_init_lock); * Initialises the fields of the file lock which are invariant for * free file_locks. */ -static void init_once(struct kmem_cache *cache, void *foo) +static void init_once(void *foo) { struct file_lock *lock = (struct file_lock *) foo; diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 523d7371341..d1d1eb84679 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -68,7 +68,7 @@ static void minix_destroy_inode(struct inode *inode) kmem_cache_free(minix_inode_cachep, minix_i(inode)); } -static void init_once(struct kmem_cache * cachep, void *foo) +static void init_once(void *foo) { struct minix_inode_info *ei = (struct minix_inode_info *) foo; diff --git a/fs/namei.c b/fs/namei.c index 01e67dddcc3..4ea63ed5e79 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -31,7 +31,6 @@ #include <linux/file.h> #include <linux/fcntl.h> #include <linux/device_cgroup.h> -#include <asm/namei.h> #include <asm/uaccess.h> #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE]) @@ -185,6 +184,8 @@ int generic_permission(struct inode *inode, int mask, { umode_t mode = inode->i_mode; + mask &= MAY_READ | MAY_WRITE | MAY_EXEC; + if (current->fsuid == inode->i_uid) mode >>= 6; else { @@ -203,7 +204,7 @@ int generic_permission(struct inode *inode, int mask, /* * If the DACs are ok we don't need any capability check. */ - if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask)) + if ((mask & ~mode) == 0) return 0; check_capabilities: @@ -226,13 +227,9 @@ int generic_permission(struct inode *inode, int mask, return -EACCES; } -int permission(struct inode *inode, int mask, struct nameidata *nd) +int inode_permission(struct inode *inode, int mask) { - int retval, submask; - struct vfsmount *mnt = NULL; - - if (nd) - mnt = nd->path.mnt; + int retval; if (mask & MAY_WRITE) { umode_t mode = inode->i_mode; @@ -251,19 +248,9 @@ int permission(struct inode *inode, int mask, struct nameidata *nd) return -EACCES; } - if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) { - /* - * MAY_EXEC on regular files is denied if the fs is mounted - * with the "noexec" flag. - */ - if (mnt && (mnt->mnt_flags & MNT_NOEXEC)) - return -EACCES; - } - /* Ordinary permission routines do not understand MAY_APPEND. */ - submask = mask & ~MAY_APPEND; if (inode->i_op && inode->i_op->permission) { - retval = inode->i_op->permission(inode, submask, nd); + retval = inode->i_op->permission(inode, mask); if (!retval) { /* * Exec permission on a regular file is denied if none @@ -277,7 +264,7 @@ int permission(struct inode *inode, int mask, struct nameidata *nd) return -EACCES; } } else { - retval = generic_permission(inode, submask, NULL); + retval = generic_permission(inode, mask, NULL); } if (retval) return retval; @@ -286,7 +273,8 @@ int permission(struct inode *inode, int mask, struct nameidata *nd) if (retval) return retval; - return security_inode_permission(inode, mask, nd); + return security_inode_permission(inode, + mask & (MAY_READ|MAY_WRITE|MAY_EXEC|MAY_APPEND)); } /** @@ -301,7 +289,7 @@ int permission(struct inode *inode, int mask, struct nameidata *nd) */ int vfs_permission(struct nameidata *nd, int mask) { - return permission(nd->path.dentry->d_inode, mask, nd); + return inode_permission(nd->path.dentry->d_inode, mask); } /** @@ -318,7 +306,7 @@ int vfs_permission(struct nameidata *nd, int mask) */ int file_permission(struct file *file, int mask) { - return permission(file->f_path.dentry->d_inode, mask, NULL); + return inode_permission(file->f_path.dentry->d_inode, mask); } /* @@ -459,8 +447,7 @@ static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, * short-cut DAC fails, then call permission() to do more * complete permission check. */ -static int exec_permission_lite(struct inode *inode, - struct nameidata *nd) +static int exec_permission_lite(struct inode *inode) { umode_t mode = inode->i_mode; @@ -486,7 +473,7 @@ static int exec_permission_lite(struct inode *inode, return -EACCES; ok: - return security_inode_permission(inode, MAY_EXEC, nd); + return security_inode_permission(inode, MAY_EXEC); } /* @@ -519,7 +506,14 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s */ result = d_lookup(parent, name); if (!result) { - struct dentry * dentry = d_alloc(parent, name); + struct dentry *dentry; + + /* Don't create child dentry for a dead directory. */ + result = ERR_PTR(-ENOENT); + if (IS_DEADDIR(dir)) + goto out_unlock; + + dentry = d_alloc(parent, name); result = ERR_PTR(-ENOMEM); if (dentry) { result = dir->i_op->lookup(dir, dentry, nd); @@ -528,6 +522,7 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s else result = dentry; } +out_unlock: mutex_unlock(&dir->i_mutex); return result; } @@ -545,27 +540,16 @@ static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, s return result; } -static int __emul_lookup_dentry(const char *, struct nameidata *); - /* SMP-safe */ -static __always_inline int +static __always_inline void walk_init_root(const char *name, struct nameidata *nd) { struct fs_struct *fs = current->fs; read_lock(&fs->lock); - if (fs->altroot.dentry && !(nd->flags & LOOKUP_NOALT)) { - nd->path = fs->altroot; - path_get(&fs->altroot); - read_unlock(&fs->lock); - if (__emul_lookup_dentry(name,nd)) - return 0; - read_lock(&fs->lock); - } nd->path = fs->root; path_get(&fs->root); read_unlock(&fs->lock); - return 1; } /* @@ -606,12 +590,9 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l if (*link == '/') { path_put(&nd->path); - if (!walk_init_root(link, nd)) - /* weird __emul_prefix() stuff did it */ - goto out; + walk_init_root(link, nd); } res = link_path_walk(link, nd); -out: if (nd->depth || res || nd->last_type!=LAST_NORM) return res; /* @@ -889,7 +870,7 @@ static int __link_path_walk(const char *name, struct nameidata *nd) unsigned int c; nd->flags |= LOOKUP_CONTINUE; - err = exec_permission_lite(inode, nd); + err = exec_permission_lite(inode); if (err == -EAGAIN) err = vfs_permission(nd, MAY_EXEC); if (err) @@ -1060,67 +1041,6 @@ static int path_walk(const char *name, struct nameidata *nd) return link_path_walk(name, nd); } -/* - * SMP-safe: Returns 1 and nd will have valid dentry and mnt, if - * everything is done. Returns 0 and drops input nd, if lookup failed; - */ -static int __emul_lookup_dentry(const char *name, struct nameidata *nd) -{ - if (path_walk(name, nd)) - return 0; /* something went wrong... */ - - if (!nd->path.dentry->d_inode || - S_ISDIR(nd->path.dentry->d_inode->i_mode)) { - struct path old_path = nd->path; - struct qstr last = nd->last; - int last_type = nd->last_type; - struct fs_struct *fs = current->fs; - - /* - * NAME was not found in alternate root or it's a directory. - * Try to find it in the normal root: - */ - nd->last_type = LAST_ROOT; - read_lock(&fs->lock); - nd->path = fs->root; - path_get(&fs->root); - read_unlock(&fs->lock); - if (path_walk(name, nd) == 0) { - if (nd->path.dentry->d_inode) { - path_put(&old_path); - return 1; - } - path_put(&nd->path); - } - nd->path = old_path; - nd->last = last; - nd->last_type = last_type; - } - return 1; -} - -void set_fs_altroot(void) -{ - char *emul = __emul_prefix(); - struct nameidata nd; - struct path path = {}, old_path; - int err; - struct fs_struct *fs = current->fs; - - if (!emul) - goto set_it; - err = path_lookup(emul, LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_NOALT, &nd); - if (!err) - path = nd.path; -set_it: - write_lock(&fs->lock); - old_path = fs->altroot; - fs->altroot = path; - write_unlock(&fs->lock); - if (old_path.dentry) - path_put(&old_path); -} - /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ static int do_path_lookup(int dfd, const char *name, unsigned int flags, struct nameidata *nd) @@ -1136,14 +1056,6 @@ static int do_path_lookup(int dfd, const char *name, if (*name=='/') { read_lock(&fs->lock); - if (fs->altroot.dentry && !(nd->flags & LOOKUP_NOALT)) { - nd->path = fs->altroot; - path_get(&fs->altroot); - read_unlock(&fs->lock); - if (__emul_lookup_dentry(name,nd)) - goto out; /* found in altroot */ - read_lock(&fs->lock); - } nd->path = fs->root; path_get(&fs->root); read_unlock(&fs->lock); @@ -1177,7 +1089,6 @@ static int do_path_lookup(int dfd, const char *name, } retval = path_walk(name, nd); -out: if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry && nd->path.dentry->d_inode)) audit_inode(name, nd->path.dentry); @@ -1282,19 +1193,6 @@ static int path_lookup_create(int dfd, const char *name, nd, open_flags, create_mode); } -int __user_path_lookup_open(const char __user *name, unsigned int lookup_flags, - struct nameidata *nd, int open_flags) -{ - char *tmp = getname(name); - int err = PTR_ERR(tmp); - - if (!IS_ERR(tmp)) { - err = __path_lookup_intent_open(AT_FDCWD, tmp, lookup_flags, nd, open_flags, 0); - putname(tmp); - } - return err; -} - static struct dentry *__lookup_hash(struct qstr *name, struct dentry *base, struct nameidata *nd) { @@ -1317,7 +1215,14 @@ static struct dentry *__lookup_hash(struct qstr *name, dentry = cached_lookup(base, name, nd); if (!dentry) { - struct dentry *new = d_alloc(base, name); + struct dentry *new; + + /* Don't create child dentry for a dead directory. */ + dentry = ERR_PTR(-ENOENT); + if (IS_DEADDIR(inode)) + goto out; + + new = d_alloc(base, name); dentry = ERR_PTR(-ENOMEM); if (!new) goto out; @@ -1340,7 +1245,7 @@ static struct dentry *lookup_hash(struct nameidata *nd) { int err; - err = permission(nd->path.dentry->d_inode, MAY_EXEC, nd); + err = inode_permission(nd->path.dentry->d_inode, MAY_EXEC); if (err) return ERR_PTR(err); return __lookup_hash(&nd->last, nd->path.dentry, nd); @@ -1388,7 +1293,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) if (err) return ERR_PTR(err); - err = permission(base->d_inode, MAY_EXEC, NULL); + err = inode_permission(base->d_inode, MAY_EXEC); if (err) return ERR_PTR(err); return __lookup_hash(&this, base, NULL); @@ -1416,22 +1321,40 @@ struct dentry *lookup_one_noperm(const char *name, struct dentry *base) return __lookup_hash(&this, base, NULL); } -int __user_walk_fd(int dfd, const char __user *name, unsigned flags, - struct nameidata *nd) +int user_path_at(int dfd, const char __user *name, unsigned flags, + struct path *path) { + struct nameidata nd; char *tmp = getname(name); int err = PTR_ERR(tmp); - if (!IS_ERR(tmp)) { - err = do_path_lookup(dfd, tmp, flags, nd); + + BUG_ON(flags & LOOKUP_PARENT); + + err = do_path_lookup(dfd, tmp, flags, &nd); putname(tmp); + if (!err) + *path = nd.path; } return err; } -int __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) +static int user_path_parent(int dfd, const char __user *path, + struct nameidata *nd, char **name) { - return __user_walk_fd(AT_FDCWD, name, flags, nd); + char *s = getname(path); + int error; + + if (IS_ERR(s)) + return PTR_ERR(s); + + error = do_path_lookup(dfd, s, LOOKUP_PARENT, nd); + if (error) + putname(s); + else + *name = s; + + return error; } /* @@ -1478,7 +1401,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir) BUG_ON(victim->d_parent->d_inode != dir); audit_inode_child(victim->d_name.name, victim, dir); - error = permission(dir,MAY_WRITE | MAY_EXEC, NULL); + error = inode_permission(dir, MAY_WRITE | MAY_EXEC); if (error) return error; if (IS_APPEND(dir)) @@ -1508,14 +1431,13 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir) * 3. We should have write and exec permissions on dir * 4. We can't do it if dir is immutable (done in permission()) */ -static inline int may_create(struct inode *dir, struct dentry *child, - struct nameidata *nd) +static inline int may_create(struct inode *dir, struct dentry *child) { if (child->d_inode) return -EEXIST; if (IS_DEADDIR(dir)) return -ENOENT; - return permission(dir,MAY_WRITE | MAY_EXEC, nd); + return inode_permission(dir, MAY_WRITE | MAY_EXEC); } /* @@ -1581,7 +1503,7 @@ void unlock_rename(struct dentry *p1, struct dentry *p2) int vfs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd) { - int error = may_create(dir, dentry, nd); + int error = may_create(dir, dentry); if (error) return error; @@ -1755,7 +1677,7 @@ struct file *do_filp_open(int dfd, const char *pathname, int will_write; int flag = open_to_namei_flags(open_flag); - acc_mode = ACC_MODE(flag); + acc_mode = MAY_OPEN | ACC_MODE(flag); /* O_TRUNC implies we need access checks for write permissions */ if (flag & O_TRUNC) @@ -2025,7 +1947,7 @@ EXPORT_SYMBOL_GPL(lookup_create); int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) { - int error = may_create(dir, dentry, NULL); + int error = may_create(dir, dentry); if (error) return error; @@ -2071,20 +1993,18 @@ static int may_mknod(mode_t mode) asmlinkage long sys_mknodat(int dfd, const char __user *filename, int mode, unsigned dev) { - int error = 0; - char * tmp; - struct dentry * dentry; + int error; + char *tmp; + struct dentry *dentry; struct nameidata nd; if (S_ISDIR(mode)) return -EPERM; - tmp = getname(filename); - if (IS_ERR(tmp)) - return PTR_ERR(tmp); - error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd); + error = user_path_parent(dfd, filename, &nd, &tmp); if (error) - goto out; + return error; + dentry = lookup_create(&nd, 0); if (IS_ERR(dentry)) { error = PTR_ERR(dentry); @@ -2116,7 +2036,6 @@ out_dput: out_unlock: mutex_unlock(&nd.path.dentry->d_inode->i_mutex); path_put(&nd.path); -out: putname(tmp); return error; @@ -2129,7 +2048,7 @@ asmlinkage long sys_mknod(const char __user *filename, int mode, unsigned dev) int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) { - int error = may_create(dir, dentry, NULL); + int error = may_create(dir, dentry); if (error) return error; @@ -2156,14 +2075,10 @@ asmlinkage long sys_mkdirat(int dfd, const char __user *pathname, int mode) struct dentry *dentry; struct nameidata nd; - tmp = getname(pathname); - error = PTR_ERR(tmp); - if (IS_ERR(tmp)) + error = user_path_parent(dfd, pathname, &nd, &tmp); + if (error) goto out_err; - error = do_path_lookup(dfd, tmp, LOOKUP_PARENT, &nd); - if (error) - goto out; dentry = lookup_create(&nd, 1); error = PTR_ERR(dentry); if (IS_ERR(dentry)) @@ -2181,7 +2096,6 @@ out_dput: out_unlock: mutex_unlock(&nd.path.dentry->d_inode->i_mutex); path_put(&nd.path); -out: putname(tmp); out_err: return error; @@ -2259,13 +2173,9 @@ static long do_rmdir(int dfd, const char __user *pathname) struct dentry *dentry; struct nameidata nd; - name = getname(pathname); - if(IS_ERR(name)) - return PTR_ERR(name); - - error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd); + error = user_path_parent(dfd, pathname, &nd, &name); if (error) - goto exit; + return error; switch(nd.last_type) { case LAST_DOTDOT: @@ -2294,7 +2204,6 @@ exit2: mutex_unlock(&nd.path.dentry->d_inode->i_mutex); exit1: path_put(&nd.path); -exit: putname(name); return error; } @@ -2343,19 +2252,16 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry) */ static long do_unlinkat(int dfd, const char __user *pathname) { - int error = 0; - char * name; + int error; + char *name; struct dentry *dentry; struct nameidata nd; struct inode *inode = NULL; - name = getname(pathname); - if(IS_ERR(name)) - return PTR_ERR(name); - - error = do_path_lookup(dfd, name, LOOKUP_PARENT, &nd); + error = user_path_parent(dfd, pathname, &nd, &name); if (error) - goto exit; + return error; + error = -EISDIR; if (nd.last_type != LAST_NORM) goto exit1; @@ -2382,7 +2288,6 @@ static long do_unlinkat(int dfd, const char __user *pathname) iput(inode); /* truncate the inode here */ exit1: path_put(&nd.path); -exit: putname(name); return error; @@ -2408,9 +2313,9 @@ asmlinkage long sys_unlink(const char __user *pathname) return do_unlinkat(AT_FDCWD, pathname); } -int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, int mode) +int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname) { - int error = may_create(dir, dentry, NULL); + int error = may_create(dir, dentry); if (error) return error; @@ -2432,23 +2337,20 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname, i asmlinkage long sys_symlinkat(const char __user *oldname, int newdfd, const char __user *newname) { - int error = 0; - char * from; - char * to; + int error; + char *from; + char *to; struct dentry *dentry; struct nameidata nd; from = getname(oldname); - if(IS_ERR(from)) + if (IS_ERR(from)) return PTR_ERR(from); - to = getname(newname); - error = PTR_ERR(to); - if (IS_ERR(to)) - goto out_putname; - error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd); + error = user_path_parent(newdfd, newname, &nd, &to); if (error) - goto out; + goto out_putname; + dentry = lookup_create(&nd, 0); error = PTR_ERR(dentry); if (IS_ERR(dentry)) @@ -2457,14 +2359,13 @@ asmlinkage long sys_symlinkat(const char __user *oldname, error = mnt_want_write(nd.path.mnt); if (error) goto out_dput; - error = vfs_symlink(nd.path.dentry->d_inode, dentry, from, S_IALLUGO); + error = vfs_symlink(nd.path.dentry->d_inode, dentry, from); mnt_drop_write(nd.path.mnt); out_dput: dput(dentry); out_unlock: mutex_unlock(&nd.path.dentry->d_inode->i_mutex); path_put(&nd.path); -out: putname(to); out_putname: putname(from); @@ -2484,7 +2385,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de if (!inode) return -ENOENT; - error = may_create(dir, new_dentry, NULL); + error = may_create(dir, new_dentry); if (error) return error; @@ -2498,19 +2399,19 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de return -EPERM; if (!dir->i_op || !dir->i_op->link) return -EPERM; - if (S_ISDIR(old_dentry->d_inode->i_mode)) + if (S_ISDIR(inode->i_mode)) return -EPERM; error = security_inode_link(old_dentry, dir, new_dentry); if (error) return error; - mutex_lock(&old_dentry->d_inode->i_mutex); + mutex_lock(&inode->i_mutex); DQUOT_INIT(dir); error = dir->i_op->link(old_dentry, dir, new_dentry); - mutex_unlock(&old_dentry->d_inode->i_mutex); + mutex_unlock(&inode->i_mutex); if (!error) - fsnotify_link(dir, old_dentry->d_inode, new_dentry); + fsnotify_link(dir, inode, new_dentry); return error; } @@ -2528,27 +2429,25 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname, int flags) { struct dentry *new_dentry; - struct nameidata nd, old_nd; + struct nameidata nd; + struct path old_path; int error; - char * to; + char *to; if ((flags & ~AT_SYMLINK_FOLLOW) != 0) return -EINVAL; - to = getname(newname); - if (IS_ERR(to)) - return PTR_ERR(to); - - error = __user_walk_fd(olddfd, oldname, - flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0, - &old_nd); + error = user_path_at(olddfd, oldname, + flags & AT_SYMLINK_FOLLOW ? LOOKUP_FOLLOW : 0, + &old_path); if (error) - goto exit; - error = do_path_lookup(newdfd, to, LOOKUP_PARENT, &nd); + return error; + + error = user_path_parent(newdfd, newname, &nd, &to); if (error) goto out; error = -EXDEV; - if (old_nd.path.mnt != nd.path.mnt) + if (old_path.mnt != nd.path.mnt) goto out_release; new_dentry = lookup_create(&nd, 0); error = PTR_ERR(new_dentry); @@ -2557,7 +2456,7 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname, error = mnt_want_write(nd.path.mnt); if (error) goto out_dput; - error = vfs_link(old_nd.path.dentry, nd.path.dentry->d_inode, new_dentry); + error = vfs_link(old_path.dentry, nd.path.dentry->d_inode, new_dentry); mnt_drop_write(nd.path.mnt); out_dput: dput(new_dentry); @@ -2565,10 +2464,9 @@ out_unlock: mutex_unlock(&nd.path.dentry->d_inode->i_mutex); out_release: path_put(&nd.path); -out: - path_put(&old_nd.path); -exit: putname(to); +out: + path_put(&old_path); return error; } @@ -2621,7 +2519,7 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, * we'll need to flip '..'. */ if (new_dir != old_dir) { - error = permission(old_dentry->d_inode, MAY_WRITE, NULL); + error = inode_permission(old_dentry->d_inode, MAY_WRITE); if (error) return error; } @@ -2696,7 +2594,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, return error; if (!new_dentry->d_inode) - error = may_create(new_dir, new_dentry, NULL); + error = may_create(new_dir, new_dentry); else error = may_delete(new_dir, new_dentry, is_dir); if (error) @@ -2724,20 +2622,22 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, return error; } -static int do_rename(int olddfd, const char *oldname, - int newdfd, const char *newname) +asmlinkage long sys_renameat(int olddfd, const char __user *oldname, + int newdfd, const char __user *newname) { - int error = 0; - struct dentry * old_dir, * new_dir; - struct dentry * old_dentry, *new_dentry; - struct dentry * trap; + struct dentry *old_dir, *new_dir; + struct dentry *old_dentry, *new_dentry; + struct dentry *trap; struct nameidata oldnd, newnd; + char *from; + char *to; + int error; - error = do_path_lookup(olddfd, oldname, LOOKUP_PARENT, &oldnd); + error = user_path_parent(olddfd, oldname, &oldnd, &from); if (error) goto exit; - error = do_path_lookup(newdfd, newname, LOOKUP_PARENT, &newnd); + error = user_path_parent(newdfd, newname, &newnd, &to); if (error) goto exit1; @@ -2799,29 +2699,11 @@ exit3: unlock_rename(new_dir, old_dir); exit2: path_put(&newnd.path); + putname(to); exit1: path_put(&oldnd.path); -exit: - return error; -} - -asmlinkage long sys_renameat(int olddfd, const char __user *oldname, - int newdfd, const char __user *newname) -{ - int error; - char * from; - char * to; - - from = getname(oldname); - if(IS_ERR(from)) - return PTR_ERR(from); - to = getname(newname); - error = PTR_ERR(to); - if (!IS_ERR(to)) { - error = do_rename(olddfd, from, newdfd, to); - putname(to); - } putname(from); +exit: return error; } @@ -2959,8 +2841,7 @@ const struct inode_operations page_symlink_inode_operations = { .put_link = page_put_link, }; -EXPORT_SYMBOL(__user_walk); -EXPORT_SYMBOL(__user_walk_fd); +EXPORT_SYMBOL(user_path_at); EXPORT_SYMBOL(follow_down); EXPORT_SYMBOL(follow_up); EXPORT_SYMBOL(get_write_access); /* binfmt_aout */ @@ -2975,7 +2856,7 @@ EXPORT_SYMBOL(page_symlink); EXPORT_SYMBOL(page_symlink_inode_operations); EXPORT_SYMBOL(path_lookup); EXPORT_SYMBOL(vfs_path_lookup); -EXPORT_SYMBOL(permission); +EXPORT_SYMBOL(inode_permission); EXPORT_SYMBOL(vfs_permission); EXPORT_SYMBOL(file_permission); EXPORT_SYMBOL(unlock_rename); diff --git a/fs/namespace.c b/fs/namespace.c index 4f6f7635b59..6e283c93b50 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -112,9 +112,13 @@ struct vfsmount *alloc_vfsmnt(const char *name) int err; err = mnt_alloc_id(mnt); - if (err) { - kmem_cache_free(mnt_cache, mnt); - return NULL; + if (err) + goto out_free_cache; + + if (name) { + mnt->mnt_devname = kstrdup(name, GFP_KERNEL); + if (!mnt->mnt_devname) + goto out_free_id; } atomic_set(&mnt->mnt_count, 1); @@ -127,16 +131,14 @@ struct vfsmount *alloc_vfsmnt(const char *name) INIT_LIST_HEAD(&mnt->mnt_slave_list); INIT_LIST_HEAD(&mnt->mnt_slave); atomic_set(&mnt->__mnt_writers, 0); - if (name) { - int size = strlen(name) + 1; - char *newname = kmalloc(size, GFP_KERNEL); - if (newname) { - memcpy(newname, name, size); - mnt->mnt_devname = newname; - } - } } return mnt; + +out_free_id: + mnt_free_id(mnt); +out_free_cache: + kmem_cache_free(mnt_cache, mnt); + return NULL; } /* @@ -309,10 +311,9 @@ static void handle_write_count_underflow(struct vfsmount *mnt) */ if ((atomic_read(&mnt->__mnt_writers) < 0) && !(mnt->mnt_flags & MNT_IMBALANCED_WRITE_COUNT)) { - printk(KERN_DEBUG "leak detected on mount(%p) writers " + WARN(1, KERN_DEBUG "leak detected on mount(%p) writers " "count: %d\n", mnt, atomic_read(&mnt->__mnt_writers)); - WARN_ON(1); /* use the flag to keep the dmesg spam down */ mnt->mnt_flags |= MNT_IMBALANCED_WRITE_COUNT; } @@ -1129,27 +1130,27 @@ static int do_umount(struct vfsmount *mnt, int flags) asmlinkage long sys_umount(char __user * name, int flags) { - struct nameidata nd; + struct path path; int retval; - retval = __user_walk(name, LOOKUP_FOLLOW, &nd); + retval = user_path(name, &path); if (retval) goto out; retval = -EINVAL; - if (nd.path.dentry != nd.path.mnt->mnt_root) + if (path.dentry != path.mnt->mnt_root) goto dput_and_out; - if (!check_mnt(nd.path.mnt)) + if (!check_mnt(path.mnt)) goto dput_and_out; retval = -EPERM; if (!capable(CAP_SYS_ADMIN)) goto dput_and_out; - retval = do_umount(nd.path.mnt, flags); + retval = do_umount(path.mnt, flags); dput_and_out: /* we mustn't call path_put() as that would clear mnt_expiry_mark */ - dput(nd.path.dentry); - mntput_no_expire(nd.path.mnt); + dput(path.dentry); + mntput_no_expire(path.mnt); out: return retval; } @@ -1666,31 +1667,31 @@ static noinline int do_new_mount(struct nameidata *nd, char *type, int flags, if (IS_ERR(mnt)) return PTR_ERR(mnt); - return do_add_mount(mnt, nd, mnt_flags, NULL); + return do_add_mount(mnt, &nd->path, mnt_flags, NULL); } /* * add a mount into a namespace's mount tree * - provide the option of adding the new mount to an expiration list */ -int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd, +int do_add_mount(struct vfsmount *newmnt, struct path *path, int mnt_flags, struct list_head *fslist) { int err; down_write(&namespace_sem); /* Something was mounted here while we slept */ - while (d_mountpoint(nd->path.dentry) && - follow_down(&nd->path.mnt, &nd->path.dentry)) + while (d_mountpoint(path->dentry) && + follow_down(&path->mnt, &path->dentry)) ; err = -EINVAL; - if (!check_mnt(nd->path.mnt)) + if (!check_mnt(path->mnt)) goto unlock; /* Refuse the same filesystem on the same mount point */ err = -EBUSY; - if (nd->path.mnt->mnt_sb == newmnt->mnt_sb && - nd->path.mnt->mnt_root == nd->path.dentry) + if (path->mnt->mnt_sb == newmnt->mnt_sb && + path->mnt->mnt_root == path->dentry) goto unlock; err = -EINVAL; @@ -1698,7 +1699,7 @@ int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd, goto unlock; newmnt->mnt_flags = mnt_flags; - if ((err = graft_tree(newmnt, &nd->path))) + if ((err = graft_tree(newmnt, path))) goto unlock; if (fslist) /* add to the specified expiration list */ @@ -1973,7 +1974,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, struct fs_struct *fs) { struct mnt_namespace *new_ns; - struct vfsmount *rootmnt = NULL, *pwdmnt = NULL, *altrootmnt = NULL; + struct vfsmount *rootmnt = NULL, *pwdmnt = NULL; struct vfsmount *p, *q; new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); @@ -2016,10 +2017,6 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, pwdmnt = p; fs->pwd.mnt = mntget(q); } - if (p == fs->altroot.mnt) { - altrootmnt = p; - fs->altroot.mnt = mntget(q); - } } p = next_mnt(p, mnt_ns->root); q = next_mnt(q, new_ns->root); @@ -2030,8 +2027,6 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, mntput(rootmnt); if (pwdmnt) mntput(pwdmnt); - if (altrootmnt) - mntput(altrootmnt); return new_ns; } @@ -2184,28 +2179,26 @@ asmlinkage long sys_pivot_root(const char __user * new_root, const char __user * put_old) { struct vfsmount *tmp; - struct nameidata new_nd, old_nd; - struct path parent_path, root_parent, root; + struct path new, old, parent_path, root_parent, root; int error; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - error = __user_walk(new_root, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, - &new_nd); + error = user_path_dir(new_root, &new); if (error) goto out0; error = -EINVAL; - if (!check_mnt(new_nd.path.mnt)) + if (!check_mnt(new.mnt)) goto out1; - error = __user_walk(put_old, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &old_nd); + error = user_path_dir(put_old, &old); if (error) goto out1; - error = security_sb_pivotroot(&old_nd.path, &new_nd.path); + error = security_sb_pivotroot(&old, &new); if (error) { - path_put(&old_nd.path); + path_put(&old); goto out1; } @@ -2214,69 +2207,69 @@ asmlinkage long sys_pivot_root(const char __user * new_root, path_get(¤t->fs->root); read_unlock(¤t->fs->lock); down_write(&namespace_sem); - mutex_lock(&old_nd.path.dentry->d_inode->i_mutex); + mutex_lock(&old.dentry->d_inode->i_mutex); error = -EINVAL; - if (IS_MNT_SHARED(old_nd.path.mnt) || - IS_MNT_SHARED(new_nd.path.mnt->mnt_parent) || + if (IS_MNT_SHARED(old.mnt) || + IS_MNT_SHARED(new.mnt->mnt_parent) || IS_MNT_SHARED(root.mnt->mnt_parent)) goto out2; if (!check_mnt(root.mnt)) goto out2; error = -ENOENT; - if (IS_DEADDIR(new_nd.path.dentry->d_inode)) + if (IS_DEADDIR(new.dentry->d_inode)) goto out2; - if (d_unhashed(new_nd.path.dentry) && !IS_ROOT(new_nd.path.dentry)) + if (d_unhashed(new.dentry) && !IS_ROOT(new.dentry)) goto out2; - if (d_unhashed(old_nd.path.dentry) && !IS_ROOT(old_nd.path.dentry)) + if (d_unhashed(old.dentry) && !IS_ROOT(old.dentry)) goto out2; error = -EBUSY; - if (new_nd.path.mnt == root.mnt || - old_nd.path.mnt == root.mnt) + if (new.mnt == root.mnt || + old.mnt == root.mnt) goto out2; /* loop, on the same file system */ error = -EINVAL; if (root.mnt->mnt_root != root.dentry) goto out2; /* not a mountpoint */ if (root.mnt->mnt_parent == root.mnt) goto out2; /* not attached */ - if (new_nd.path.mnt->mnt_root != new_nd.path.dentry) + if (new.mnt->mnt_root != new.dentry) goto out2; /* not a mountpoint */ - if (new_nd.path.mnt->mnt_parent == new_nd.path.mnt) + if (new.mnt->mnt_parent == new.mnt) goto out2; /* not attached */ /* make sure we can reach put_old from new_root */ - tmp = old_nd.path.mnt; + tmp = old.mnt; spin_lock(&vfsmount_lock); - if (tmp != new_nd.path.mnt) { + if (tmp != new.mnt) { for (;;) { if (tmp->mnt_parent == tmp) goto out3; /* already mounted on put_old */ - if (tmp->mnt_parent == new_nd.path.mnt) + if (tmp->mnt_parent == new.mnt) break; tmp = tmp->mnt_parent; } - if (!is_subdir(tmp->mnt_mountpoint, new_nd.path.dentry)) + if (!is_subdir(tmp->mnt_mountpoint, new.dentry)) goto out3; - } else if (!is_subdir(old_nd.path.dentry, new_nd.path.dentry)) + } else if (!is_subdir(old.dentry, new.dentry)) goto out3; - detach_mnt(new_nd.path.mnt, &parent_path); + detach_mnt(new.mnt, &parent_path); detach_mnt(root.mnt, &root_parent); /* mount old root on put_old */ - attach_mnt(root.mnt, &old_nd.path); + attach_mnt(root.mnt, &old); /* mount new_root on / */ - attach_mnt(new_nd.path.mnt, &root_parent); + attach_mnt(new.mnt, &root_parent); touch_mnt_namespace(current->nsproxy->mnt_ns); spin_unlock(&vfsmount_lock); - chroot_fs_refs(&root, &new_nd.path); - security_sb_post_pivotroot(&root, &new_nd.path); + chroot_fs_refs(&root, &new); + security_sb_post_pivotroot(&root, &new); error = 0; path_put(&root_parent); path_put(&parent_path); out2: - mutex_unlock(&old_nd.path.dentry->d_inode->i_mutex); + mutex_unlock(&old.dentry->d_inode->i_mutex); up_write(&namespace_sem); path_put(&root); - path_put(&old_nd.path); + path_put(&old); out1: - path_put(&new_nd.path); + path_put(&new); out0: return error; out3: diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 011ef0b6d2d..07e9715b865 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -266,7 +266,7 @@ leave_me:; static int -__ncp_lookup_validate(struct dentry * dentry, struct nameidata *nd) +__ncp_lookup_validate(struct dentry *dentry) { struct ncp_server *server; struct dentry *parent; @@ -340,7 +340,7 @@ ncp_lookup_validate(struct dentry * dentry, struct nameidata *nd) { int res; lock_kernel(); - res = __ncp_lookup_validate(dentry, nd); + res = __ncp_lookup_validate(dentry); unlock_kernel(); return res; } diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 2e5ab1204de..d642f0e5b36 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -64,7 +64,7 @@ static void ncp_destroy_inode(struct inode *inode) kmem_cache_free(ncp_inode_cachep, NCP_FINFO(inode)); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct ncp_inode_info *ei = (struct ncp_inode_info *) foo; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 28a238dab23..74f92b717f7 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1884,7 +1884,7 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) return status; nfs_access_add_cache(inode, &cache); out: - if ((cache.mask & mask) == mask) + if ((mask & ~cache.mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) return 0; return -EACCES; } @@ -1907,17 +1907,17 @@ int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags) return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags)); } -int nfs_permission(struct inode *inode, int mask, struct nameidata *nd) +int nfs_permission(struct inode *inode, int mask) { struct rpc_cred *cred; int res = 0; nfs_inc_stats(inode, NFSIOS_VFSACCESS); - if (mask == 0) + if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0) goto out; /* Is this sys_access() ? */ - if (nd != NULL && (nd->flags & LOOKUP_ACCESS)) + if (mask & MAY_ACCESS) goto force_lookup; switch (inode->i_mode & S_IFMT) { @@ -1926,8 +1926,7 @@ int nfs_permission(struct inode *inode, int mask, struct nameidata *nd) case S_IFREG: /* NFSv4 has atomic_open... */ if (nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN) - && nd != NULL - && (nd->flags & LOOKUP_OPEN)) + && (mask & MAY_OPEN)) goto out; break; case S_IFDIR: diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index df23f987da6..52daefa2f52 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1242,7 +1242,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi) #endif } -static void init_once(struct kmem_cache * cachep, void *foo) +static void init_once(void *foo) { struct nfs_inode *nfsi = (struct nfs_inode *) foo; diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 2f285ef7639..66df08dd1ca 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -129,7 +129,7 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) goto out_err; mntget(mnt); - err = do_add_mount(mnt, nd, nd->path.mnt->mnt_flags|MNT_SHRINKABLE, + err = do_add_mount(mnt, &nd->path, nd->path.mnt->mnt_flags|MNT_SHRINKABLE, &nfs_automount_list); if (err < 0) { mntput(mnt); diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 8478fc25dae..46763d1cd39 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -127,7 +127,7 @@ enum { Opt_err }; -static match_table_t __initconst tokens = { +static match_table_t __initdata tokens = { {Opt_port, "port=%u"}, {Opt_rsize, "rsize=%u"}, {Opt_wsize, "wsize=%u"}, diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 1b94e3650f5..9abcd2b329f 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1718,9 +1718,9 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) * ones were explicitly specified. Fall back to legacy behavior and * just return success. */ - if ((nfsvers == 4 && options4->version == 1) || - (nfsvers <= 3 && options->version >= 1 && - options->version <= 6)) + if ((nfsvers == 4 && (!options4 || options4->version == 1)) || + (nfsvers <= 3 && (!options || (options->version >= 1 && + options->version <= 6)))) return 0; data = kzalloc(sizeof(*data), GFP_KERNEL); diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 3adf8b26646..f089e5839d7 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -95,10 +95,11 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata) static void nfs_async_unlink_release(void *calldata) { struct nfs_unlinkdata *data = calldata; + struct super_block *sb = data->dir->i_sb; nfs_dec_sillycount(data->dir); - nfs_sb_deactive(NFS_SERVER(data->dir)); nfs_free_unlinkdata(data); + nfs_sb_deactive(NFS_SB(sb)); } static const struct rpc_call_ops nfs_unlink_ops = { diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 33bfcf09db4..9dc036f1835 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -1023,7 +1023,7 @@ exp_export(struct nfsctl_export *nxp) /* Look up the dentry */ err = path_lookup(nxp->ex_path, 0, &nd); if (err) - goto out_unlock; + goto out_put_clp; err = -EINVAL; exp = exp_get_by_name(clp, nd.path.mnt, nd.path.dentry, NULL); @@ -1090,9 +1090,9 @@ finish: exp_put(exp); if (fsid_key && !IS_ERR(fsid_key)) cache_put(&fsid_key->h, &svc_expkey_cache); - if (clp) - auth_domain_put(clp); path_put(&nd.path); +out_put_clp: + auth_domain_put(clp); out_unlock: exp_writeunlock(); out: diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index eef1629806f..2e51adac65d 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -851,7 +851,7 @@ struct nfsd4_operation { static struct nfsd4_operation nfsd4_ops[]; -static inline char *nfsd4_op_name(unsigned opnum); +static const char *nfsd4_op_name(unsigned opnum); /* * COMPOUND call. @@ -1116,8 +1116,7 @@ static struct nfsd4_operation nfsd4_ops[OP_RELEASE_LOCKOWNER+1] = { }, }; -static inline char * -nfsd4_op_name(unsigned opnum) +static const char *nfsd4_op_name(unsigned opnum) { if (opnum < ARRAY_SIZE(nfsd4_ops)) return nfsd4_ops[opnum].op_name; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 1955a2702e6..c53e65f8f3a 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -12,6 +12,7 @@ #include <linux/time.h> #include <linux/errno.h> #include <linux/fs.h> +#include <linux/namei.h> #include <linux/fcntl.h> #include <linux/net.h> #include <linux/in.h> diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index f45451eb1e3..ea37c96f044 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -51,7 +51,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry) /* make sure parents give x permission to user */ int err; parent = dget_parent(tdentry); - err = permission(parent->d_inode, MAY_EXEC, NULL); + err = inode_permission(parent->d_inode, MAY_EXEC); if (err < 0) { dput(parent); break; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 0f4481e0502..18060bed526 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1516,7 +1516,6 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, struct dentry *dentry, *dnew; __be32 err, cerr; int host_err; - umode_t mode; err = nfserr_noent; if (!flen || !plen) @@ -1535,11 +1534,6 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, if (IS_ERR(dnew)) goto out_nfserr; - mode = S_IALLUGO; - /* Only the MODE ATTRibute is even vaguely meaningful */ - if (iap && (iap->ia_valid & ATTR_MODE)) - mode = iap->ia_mode & S_IALLUGO; - host_err = mnt_want_write(fhp->fh_export->ex_path.mnt); if (host_err) goto out_nfserr; @@ -1551,11 +1545,11 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp, else { strncpy(path_alloced, path, plen); path_alloced[plen] = 0; - host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced, mode); + host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced); kfree(path_alloced); } } else - host_err = vfs_symlink(dentry->d_inode, dnew, path, mode); + host_err = vfs_symlink(dentry->d_inode, dnew, path); if (!host_err) { if (EX_ISSYNC(fhp->fh_export)) @@ -1959,12 +1953,12 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, return 0; /* This assumes NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */ - err = permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC), NULL); + err = inode_permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC)); /* Allow read access to binaries even when mode 111 */ if (err == -EACCES && S_ISREG(inode->i_mode) && acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE)) - err = permission(inode, MAY_EXEC, NULL); + err = inode_permission(inode, MAY_EXEC); return err? nfserrno(err) : 0; } diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index 00e9ccde8e4..b38f944f066 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -1194,7 +1194,7 @@ lock_retry_remap: tbh = bhs[i]; if (!tbh) continue; - if (unlikely(test_set_buffer_locked(tbh))) + if (!trylock_buffer(tbh)) BUG(); /* The buffer dirty state is now irrelevant, just clean it. */ clear_buffer_dirty(tbh); diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c index 33ff314cc50..9669541d011 100644 --- a/fs/ntfs/compress.c +++ b/fs/ntfs/compress.c @@ -665,7 +665,7 @@ lock_retry_remap: for (i = 0; i < nr_bhs; i++) { struct buffer_head *tbh = bhs[i]; - if (unlikely(test_set_buffer_locked(tbh))) + if (!trylock_buffer(tbh)) continue; if (unlikely(buffer_uptodate(tbh))) { unlock_buffer(tbh); diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 3c5550cd11d..d020866d423 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -2118,7 +2118,7 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb, goto out; if (!count) goto out; - err = remove_suid(file->f_path.dentry); + err = file_remove_suid(file); if (err) goto out; file_update_time(file); diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 790defb847e..17d32ca6bc3 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -586,7 +586,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no, for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { struct buffer_head *tbh = bhs[i_bhs]; - if (unlikely(test_set_buffer_locked(tbh))) + if (!trylock_buffer(tbh)) BUG(); BUG_ON(!buffer_uptodate(tbh)); clear_buffer_dirty(tbh); @@ -779,7 +779,7 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync) for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { struct buffer_head *tbh = bhs[i_bhs]; - if (unlikely(test_set_buffer_locked(tbh))) + if (!trylock_buffer(tbh)) BUG(); BUG_ON(!buffer_uptodate(tbh)); clear_buffer_dirty(tbh); diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 3e76f3b216b..4a46743b507 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -3080,7 +3080,7 @@ struct kmem_cache *ntfs_inode_cache; struct kmem_cache *ntfs_big_inode_cache; /* Init once constructor for the inode slab cache. */ -static void ntfs_big_inode_init_once(struct kmem_cache *cachep, void *foo) +static void ntfs_big_inode_init_once(void *foo) { ntfs_inode *ni = (ntfs_inode *)foo; diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 1db080135c6..506c24fb507 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -1073,12 +1073,15 @@ static void ocfs2_write_failure(struct inode *inode, for(i = 0; i < wc->w_num_pages; i++) { tmppage = wc->w_pages[i]; - if (ocfs2_should_order_data(inode)) - walk_page_buffers(wc->w_handle, page_buffers(tmppage), - from, to, NULL, - ocfs2_journal_dirty_data); - - block_commit_write(tmppage, from, to); + if (page_has_buffers(tmppage)) { + if (ocfs2_should_order_data(inode)) + walk_page_buffers(wc->w_handle, + page_buffers(tmppage), + from, to, NULL, + ocfs2_journal_dirty_data); + + block_commit_write(tmppage, from, to); + } } } @@ -1901,12 +1904,14 @@ int ocfs2_write_end_nolock(struct address_space *mapping, to = PAGE_CACHE_SIZE; } - if (ocfs2_should_order_data(inode)) - walk_page_buffers(wc->w_handle, page_buffers(tmppage), - from, to, NULL, - ocfs2_journal_dirty_data); - - block_commit_write(tmppage, from, to); + if (page_has_buffers(tmppage)) { + if (ocfs2_should_order_data(inode)) + walk_page_buffers(wc->w_handle, + page_buffers(tmppage), + from, to, NULL, + ocfs2_journal_dirty_data); + block_commit_write(tmppage, from, to); + } } out_write_size: diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c index e48aba698b7..533a789c3ef 100644 --- a/fs/ocfs2/dlm/dlmfs.c +++ b/fs/ocfs2/dlm/dlmfs.c @@ -267,8 +267,7 @@ static ssize_t dlmfs_file_write(struct file *filp, return writelen; } -static void dlmfs_init_once(struct kmem_cache *cachep, - void *foo) +static void dlmfs_init_once(void *foo) { struct dlmfs_inode_private *ip = (struct dlmfs_inode_private *) foo; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index e8514e8b6ce..ec2ed15c3da 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1176,7 +1176,7 @@ bail: return err; } -int ocfs2_permission(struct inode *inode, int mask, struct nameidata *nd) +int ocfs2_permission(struct inode *inode, int mask) { int ret; @@ -1766,8 +1766,8 @@ out_inode_unlock: out_rw_unlock: ocfs2_rw_unlock(inode, 1); - mutex_unlock(&inode->i_mutex); out: + mutex_unlock(&inode->i_mutex); return ret; } diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index 048ddcaf5c8..1e27b4d017e 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h @@ -62,8 +62,7 @@ int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); -int ocfs2_permission(struct inode *inode, int mask, - struct nameidata *nd); +int ocfs2_permission(struct inode *inode, int mask); int ocfs2_should_update_atime(struct inode *inode, struct vfsmount *vfsmnt); diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index a8c19cb3cfd..7a37240f7a3 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -57,7 +57,7 @@ static int __ocfs2_recovery_thread(void *arg); static int ocfs2_commit_cache(struct ocfs2_super *osb); static int ocfs2_wait_on_mount(struct ocfs2_super *osb); static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, - int dirty); + int dirty, int replayed); static int ocfs2_trylock_journal(struct ocfs2_super *osb, int slot_num); static int ocfs2_recover_orphans(struct ocfs2_super *osb, @@ -562,8 +562,18 @@ done: return status; } +static void ocfs2_bump_recovery_generation(struct ocfs2_dinode *di) +{ + le32_add_cpu(&(di->id1.journal1.ij_recovery_generation), 1); +} + +static u32 ocfs2_get_recovery_generation(struct ocfs2_dinode *di) +{ + return le32_to_cpu(di->id1.journal1.ij_recovery_generation); +} + static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, - int dirty) + int dirty, int replayed) { int status; unsigned int flags; @@ -593,6 +603,9 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, flags &= ~OCFS2_JOURNAL_DIRTY_FL; fe->id1.journal1.ij_flags = cpu_to_le32(flags); + if (replayed) + ocfs2_bump_recovery_generation(fe); + status = ocfs2_write_block(osb, bh, journal->j_inode); if (status < 0) mlog_errno(status); @@ -667,7 +680,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb) * Do not toggle if flush was unsuccessful otherwise * will leave dirty metadata in a "clean" journal */ - status = ocfs2_journal_toggle_dirty(osb, 0); + status = ocfs2_journal_toggle_dirty(osb, 0, 0); if (status < 0) mlog_errno(status); } @@ -710,7 +723,7 @@ static void ocfs2_clear_journal_error(struct super_block *sb, } } -int ocfs2_journal_load(struct ocfs2_journal *journal, int local) +int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed) { int status = 0; struct ocfs2_super *osb; @@ -729,7 +742,7 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local) ocfs2_clear_journal_error(osb->sb, journal->j_journal, osb->slot_num); - status = ocfs2_journal_toggle_dirty(osb, 1); + status = ocfs2_journal_toggle_dirty(osb, 1, replayed); if (status < 0) { mlog_errno(status); goto done; @@ -771,7 +784,7 @@ int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full) goto bail; } - status = ocfs2_journal_toggle_dirty(journal->j_osb, 0); + status = ocfs2_journal_toggle_dirty(journal->j_osb, 0, 0); if (status < 0) mlog_errno(status); @@ -1034,6 +1047,12 @@ restart: spin_unlock(&osb->osb_lock); mlog(0, "All nodes recovered\n"); + /* Refresh all journal recovery generations from disk */ + status = ocfs2_check_journals_nolocks(osb); + status = (status == -EROFS) ? 0 : status; + if (status < 0) + mlog_errno(status); + ocfs2_super_unlock(osb, 1); /* We always run recovery on our own orphan dir - the dead @@ -1096,6 +1115,42 @@ out: mlog_exit_void(); } +static int ocfs2_read_journal_inode(struct ocfs2_super *osb, + int slot_num, + struct buffer_head **bh, + struct inode **ret_inode) +{ + int status = -EACCES; + struct inode *inode = NULL; + + BUG_ON(slot_num >= osb->max_slots); + + inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, + slot_num); + if (!inode || is_bad_inode(inode)) { + mlog_errno(status); + goto bail; + } + SET_INODE_JOURNAL(inode); + + status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, bh, 0, inode); + if (status < 0) { + mlog_errno(status); + goto bail; + } + + status = 0; + +bail: + if (inode) { + if (status || !ret_inode) + iput(inode); + else + *ret_inode = inode; + } + return status; +} + /* Does the actual journal replay and marks the journal inode as * clean. Will only replay if the journal inode is marked dirty. */ static int ocfs2_replay_journal(struct ocfs2_super *osb, @@ -1109,22 +1164,36 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, struct ocfs2_dinode *fe; journal_t *journal = NULL; struct buffer_head *bh = NULL; + u32 slot_reco_gen; - inode = ocfs2_get_system_file_inode(osb, JOURNAL_SYSTEM_INODE, - slot_num); - if (inode == NULL) { - status = -EACCES; + status = ocfs2_read_journal_inode(osb, slot_num, &bh, &inode); + if (status) { mlog_errno(status); goto done; } - if (is_bad_inode(inode)) { - status = -EACCES; - iput(inode); - inode = NULL; - mlog_errno(status); + + fe = (struct ocfs2_dinode *)bh->b_data; + slot_reco_gen = ocfs2_get_recovery_generation(fe); + brelse(bh); + bh = NULL; + + /* + * As the fs recovery is asynchronous, there is a small chance that + * another node mounted (and recovered) the slot before the recovery + * thread could get the lock. To handle that, we dirty read the journal + * inode for that slot to get the recovery generation. If it is + * different than what we expected, the slot has been recovered. + * If not, it needs recovery. + */ + if (osb->slot_recovery_generations[slot_num] != slot_reco_gen) { + mlog(0, "Slot %u already recovered (old/new=%u/%u)\n", slot_num, + osb->slot_recovery_generations[slot_num], slot_reco_gen); + osb->slot_recovery_generations[slot_num] = slot_reco_gen; + status = -EBUSY; goto done; } - SET_INODE_JOURNAL(inode); + + /* Continue with recovery as the journal has not yet been recovered */ status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY); if (status < 0) { @@ -1138,9 +1207,12 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, fe = (struct ocfs2_dinode *) bh->b_data; flags = le32_to_cpu(fe->id1.journal1.ij_flags); + slot_reco_gen = ocfs2_get_recovery_generation(fe); if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) { mlog(0, "No recovery required for node %d\n", node_num); + /* Refresh recovery generation for the slot */ + osb->slot_recovery_generations[slot_num] = slot_reco_gen; goto done; } @@ -1188,6 +1260,11 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, flags &= ~OCFS2_JOURNAL_DIRTY_FL; fe->id1.journal1.ij_flags = cpu_to_le32(flags); + /* Increment recovery generation to indicate successful recovery */ + ocfs2_bump_recovery_generation(fe); + osb->slot_recovery_generations[slot_num] = + ocfs2_get_recovery_generation(fe); + status = ocfs2_write_block(osb, bh, inode); if (status < 0) mlog_errno(status); @@ -1252,6 +1329,13 @@ static int ocfs2_recover_node(struct ocfs2_super *osb, status = ocfs2_replay_journal(osb, node_num, slot_num); if (status < 0) { + if (status == -EBUSY) { + mlog(0, "Skipping recovery for slot %u (node %u) " + "as another node has recovered it\n", slot_num, + node_num); + status = 0; + goto done; + } mlog_errno(status); goto done; } @@ -1334,12 +1418,29 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) { unsigned int node_num; int status, i; + struct buffer_head *bh = NULL; + struct ocfs2_dinode *di; /* This is called with the super block cluster lock, so we * know that the slot map can't change underneath us. */ spin_lock(&osb->osb_lock); for (i = 0; i < osb->max_slots; i++) { + /* Read journal inode to get the recovery generation */ + status = ocfs2_read_journal_inode(osb, i, &bh, NULL); + if (status) { + mlog_errno(status); + goto bail; + } + di = (struct ocfs2_dinode *)bh->b_data; + osb->slot_recovery_generations[i] = + ocfs2_get_recovery_generation(di); + brelse(bh); + bh = NULL; + + mlog(0, "Slot %u recovery generation is %u\n", i, + osb->slot_recovery_generations[i]); + if (i == osb->slot_num) continue; @@ -1603,49 +1704,41 @@ static int ocfs2_commit_thread(void *arg) return 0; } -/* Look for a dirty journal without taking any cluster locks. Used for - * hard readonly access to determine whether the file system journals - * require recovery. */ +/* Reads all the journal inodes without taking any cluster locks. Used + * for hard readonly access to determine whether any journal requires + * recovery. Also used to refresh the recovery generation numbers after + * a journal has been recovered by another node. + */ int ocfs2_check_journals_nolocks(struct ocfs2_super *osb) { int ret = 0; unsigned int slot; - struct buffer_head *di_bh; + struct buffer_head *di_bh = NULL; struct ocfs2_dinode *di; - struct inode *journal = NULL; + int journal_dirty = 0; for(slot = 0; slot < osb->max_slots; slot++) { - journal = ocfs2_get_system_file_inode(osb, - JOURNAL_SYSTEM_INODE, - slot); - if (!journal || is_bad_inode(journal)) { - ret = -EACCES; - mlog_errno(ret); - goto out; - } - - di_bh = NULL; - ret = ocfs2_read_block(osb, OCFS2_I(journal)->ip_blkno, &di_bh, - 0, journal); - if (ret < 0) { + ret = ocfs2_read_journal_inode(osb, slot, &di_bh, NULL); + if (ret) { mlog_errno(ret); goto out; } di = (struct ocfs2_dinode *) di_bh->b_data; + osb->slot_recovery_generations[slot] = + ocfs2_get_recovery_generation(di); + if (le32_to_cpu(di->id1.journal1.ij_flags) & OCFS2_JOURNAL_DIRTY_FL) - ret = -EROFS; + journal_dirty = 1; brelse(di_bh); - if (ret) - break; + di_bh = NULL; } out: - if (journal) - iput(journal); - + if (journal_dirty) + ret = -EROFS; return ret; } diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index db82be2532e..2178ebffa05 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h @@ -161,7 +161,8 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, void ocfs2_journal_shutdown(struct ocfs2_super *osb); int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full); -int ocfs2_journal_load(struct ocfs2_journal *journal, int local); +int ocfs2_journal_load(struct ocfs2_journal *journal, int local, + int replayed); int ocfs2_check_journals_nolocks(struct ocfs2_super *osb); void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num); diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 1cb814be8ef..7f625f2b111 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -204,6 +204,8 @@ struct ocfs2_super struct ocfs2_slot_info *slot_info; + u32 *slot_recovery_generations; + spinlock_t node_map_lock; u64 root_blkno; diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 3f194517762..4f619850ccf 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -660,7 +660,10 @@ struct ocfs2_dinode { struct { /* Info for journal system inodes */ __le32 ij_flags; /* Mounted, version, etc. */ - __le32 ij_pad; + __le32 ij_recovery_generation; /* Incremented when the + journal is recovered + after an unclean + shutdown */ } journal1; } id1; /* Inode type dependant 1 */ /*C0*/ union { diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index ccecfe5094f..88255d3f52b 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1118,7 +1118,7 @@ bail: return status; } -static void ocfs2_inode_init_once(struct kmem_cache *cachep, void *data) +static void ocfs2_inode_init_once(void *data) { struct ocfs2_inode_info *oi = data; @@ -1442,6 +1442,15 @@ static int ocfs2_initialize_super(struct super_block *sb, } mlog(0, "max_slots for this device: %u\n", osb->max_slots); + osb->slot_recovery_generations = + kcalloc(osb->max_slots, sizeof(*osb->slot_recovery_generations), + GFP_KERNEL); + if (!osb->slot_recovery_generations) { + status = -ENOMEM; + mlog_errno(status); + goto bail; + } + init_waitqueue_head(&osb->osb_wipe_event); osb->osb_orphan_wipes = kcalloc(osb->max_slots, sizeof(*osb->osb_orphan_wipes), @@ -1703,7 +1712,7 @@ static int ocfs2_check_volume(struct ocfs2_super *osb) local = ocfs2_mount_local(osb); /* will play back anything left in the journal. */ - status = ocfs2_journal_load(osb->journal, local); + status = ocfs2_journal_load(osb->journal, local, dirty); if (status < 0) { mlog(ML_ERROR, "ocfs2 journal load failed! %d\n", status); goto finally; @@ -1768,6 +1777,7 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb) ocfs2_free_slot_info(osb); kfree(osb->osb_orphan_wipes); + kfree(osb->slot_recovery_generations); /* FIXME * This belongs in journal shutdown, but because we have to * allocate osb->journal at the start of ocfs2_initalize_osb(), diff --git a/fs/omfs/Makefile b/fs/omfs/Makefile new file mode 100644 index 00000000000..8b82b63f112 --- /dev/null +++ b/fs/omfs/Makefile @@ -0,0 +1,4 @@ + +obj-$(CONFIG_OMFS_FS) += omfs.o + +omfs-y := bitmap.o dir.o file.o inode.o diff --git a/fs/omfs/bitmap.c b/fs/omfs/bitmap.c new file mode 100644 index 00000000000..e1c0ec0ae98 --- /dev/null +++ b/fs/omfs/bitmap.c @@ -0,0 +1,193 @@ +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/buffer_head.h> +#include <asm/div64.h> +#include "omfs.h" + +unsigned long omfs_count_free(struct super_block *sb) +{ + unsigned int i; + unsigned long sum = 0; + struct omfs_sb_info *sbi = OMFS_SB(sb); + int nbits = sb->s_blocksize * 8; + + for (i = 0; i < sbi->s_imap_size; i++) + sum += nbits - bitmap_weight(sbi->s_imap[i], nbits); + + return sum; +} + +/* + * Counts the run of zero bits starting at bit up to max. + * It handles the case where a run might spill over a buffer. + * Called with bitmap lock. + */ +static int count_run(unsigned long **addr, int nbits, + int addrlen, int bit, int max) +{ + int count = 0; + int x; + + for (; addrlen > 0; addrlen--, addr++) { + x = find_next_bit(*addr, nbits, bit); + count += x - bit; + + if (x < nbits || count > max) + return min(count, max); + + bit = 0; + } + return min(count, max); +} + +/* + * Sets or clears the run of count bits starting with bit. + * Called with bitmap lock. + */ +static int set_run(struct super_block *sb, int map, + int nbits, int bit, int count, int set) +{ + int i; + int err; + struct buffer_head *bh; + struct omfs_sb_info *sbi = OMFS_SB(sb); + + err = -ENOMEM; + bh = sb_bread(sb, clus_to_blk(sbi, sbi->s_bitmap_ino) + map); + if (!bh) + goto out; + + for (i = 0; i < count; i++, bit++) { + if (bit >= nbits) { + bit = 0; + map++; + + mark_buffer_dirty(bh); + brelse(bh); + bh = sb_bread(sb, + clus_to_blk(sbi, sbi->s_bitmap_ino) + map); + if (!bh) + goto out; + } + if (set) { + set_bit(bit, sbi->s_imap[map]); + set_bit(bit, (unsigned long *)bh->b_data); + } else { + clear_bit(bit, sbi->s_imap[map]); + clear_bit(bit, (unsigned long *)bh->b_data); + } + } + mark_buffer_dirty(bh); + brelse(bh); + err = 0; +out: + return err; +} + +/* + * Tries to allocate exactly one block. Returns true if sucessful. + */ +int omfs_allocate_block(struct super_block *sb, u64 block) +{ + struct buffer_head *bh; + struct omfs_sb_info *sbi = OMFS_SB(sb); + int bits_per_entry = 8 * sb->s_blocksize; + unsigned int map, bit; + int ret = 0; + u64 tmp; + + tmp = block; + bit = do_div(tmp, bits_per_entry); + map = tmp; + + mutex_lock(&sbi->s_bitmap_lock); + if (map >= sbi->s_imap_size || test_and_set_bit(bit, sbi->s_imap[map])) + goto out; + + if (sbi->s_bitmap_ino > 0) { + bh = sb_bread(sb, clus_to_blk(sbi, sbi->s_bitmap_ino) + map); + if (!bh) + goto out; + + set_bit(bit, (unsigned long *)bh->b_data); + mark_buffer_dirty(bh); + brelse(bh); + } + ret = 1; +out: + mutex_unlock(&sbi->s_bitmap_lock); + return ret; +} + + +/* + * Tries to allocate a set of blocks. The request size depends on the + * type: for inodes, we must allocate sbi->s_mirrors blocks, and for file + * blocks, we try to allocate sbi->s_clustersize, but can always get away + * with just one block. + */ +int omfs_allocate_range(struct super_block *sb, + int min_request, + int max_request, + u64 *return_block, + int *return_size) +{ + struct omfs_sb_info *sbi = OMFS_SB(sb); + int bits_per_entry = 8 * sb->s_blocksize; + int ret = 0; + int i, run, bit; + + mutex_lock(&sbi->s_bitmap_lock); + for (i = 0; i < sbi->s_imap_size; i++) { + bit = 0; + while (bit < bits_per_entry) { + bit = find_next_zero_bit(sbi->s_imap[i], bits_per_entry, + bit); + + if (bit == bits_per_entry) + break; + + run = count_run(&sbi->s_imap[i], bits_per_entry, + sbi->s_imap_size-i, bit, max_request); + + if (run >= min_request) + goto found; + bit += run; + } + } + ret = -ENOSPC; + goto out; + +found: + *return_block = i * bits_per_entry + bit; + *return_size = run; + ret = set_run(sb, i, bits_per_entry, bit, run, 1); + +out: + mutex_unlock(&sbi->s_bitmap_lock); + return ret; +} + +/* + * Clears count bits starting at a given block. + */ +int omfs_clear_range(struct super_block *sb, u64 block, int count) +{ + struct omfs_sb_info *sbi = OMFS_SB(sb); + int bits_per_entry = 8 * sb->s_blocksize; + u64 tmp; + unsigned int map, bit; + int ret; + + tmp = block; + bit = do_div(tmp, bits_per_entry); + map = tmp; + + if (map >= sbi->s_imap_size) + return 0; + + mutex_lock(&sbi->s_bitmap_lock); + ret = set_run(sb, map, bits_per_entry, bit, count, 0); + mutex_unlock(&sbi->s_bitmap_lock); + return ret; +} diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c new file mode 100644 index 00000000000..c0757e99887 --- /dev/null +++ b/fs/omfs/dir.c @@ -0,0 +1,504 @@ +/* + * OMFS (as used by RIO Karma) directory operations. + * Copyright (C) 2005 Bob Copeland <me@bobcopeland.com> + * Released under GPL v2. + */ + +#include <linux/fs.h> +#include <linux/ctype.h> +#include <linux/buffer_head.h> +#include "omfs.h" + +static int omfs_hash(const char *name, int namelen, int mod) +{ + int i, hash = 0; + for (i = 0; i < namelen; i++) + hash ^= tolower(name[i]) << (i % 24); + return hash % mod; +} + +/* + * Finds the bucket for a given name and reads the containing block; + * *ofs is set to the offset of the first list entry. + */ +static struct buffer_head *omfs_get_bucket(struct inode *dir, + const char *name, int namelen, int *ofs) +{ + int nbuckets = (dir->i_size - OMFS_DIR_START)/8; + int block = clus_to_blk(OMFS_SB(dir->i_sb), dir->i_ino); + int bucket = omfs_hash(name, namelen, nbuckets); + + *ofs = OMFS_DIR_START + bucket * 8; + return sb_bread(dir->i_sb, block); +} + +static struct buffer_head *omfs_scan_list(struct inode *dir, u64 block, + const char *name, int namelen, + u64 *prev_block) +{ + struct buffer_head *bh; + struct omfs_inode *oi; + int err = -ENOENT; + *prev_block = ~0; + + while (block != ~0) { + bh = sb_bread(dir->i_sb, + clus_to_blk(OMFS_SB(dir->i_sb), block)); + if (!bh) { + err = -EIO; + goto err; + } + + oi = (struct omfs_inode *) bh->b_data; + if (omfs_is_bad(OMFS_SB(dir->i_sb), &oi->i_head, block)) { + brelse(bh); + goto err; + } + + if (strncmp(oi->i_name, name, namelen) == 0) + return bh; + + *prev_block = block; + block = be64_to_cpu(oi->i_sibling); + brelse(bh); + } +err: + return ERR_PTR(err); +} + +static struct buffer_head *omfs_find_entry(struct inode *dir, + const char *name, int namelen) +{ + struct buffer_head *bh; + int ofs; + u64 block, dummy; + + bh = omfs_get_bucket(dir, name, namelen, &ofs); + if (!bh) + return ERR_PTR(-EIO); + + block = be64_to_cpu(*((__be64 *) &bh->b_data[ofs])); + brelse(bh); + + return omfs_scan_list(dir, block, name, namelen, &dummy); +} + +int omfs_make_empty(struct inode *inode, struct super_block *sb) +{ + struct omfs_sb_info *sbi = OMFS_SB(sb); + int block = clus_to_blk(sbi, inode->i_ino); + struct buffer_head *bh; + struct omfs_inode *oi; + + bh = sb_bread(sb, block); + if (!bh) + return -ENOMEM; + + memset(bh->b_data, 0, sizeof(struct omfs_inode)); + + if (inode->i_mode & S_IFDIR) { + memset(&bh->b_data[OMFS_DIR_START], 0xff, + sbi->s_sys_blocksize - OMFS_DIR_START); + } else + omfs_make_empty_table(bh, OMFS_EXTENT_START); + + oi = (struct omfs_inode *) bh->b_data; + oi->i_head.h_self = cpu_to_be64(inode->i_ino); + oi->i_sibling = ~cpu_to_be64(0ULL); + + mark_buffer_dirty(bh); + brelse(bh); + return 0; +} + +static int omfs_add_link(struct dentry *dentry, struct inode *inode) +{ + struct inode *dir = dentry->d_parent->d_inode; + const char *name = dentry->d_name.name; + int namelen = dentry->d_name.len; + struct omfs_inode *oi; + struct buffer_head *bh; + u64 block; + __be64 *entry; + int ofs; + + /* just prepend to head of queue in proper bucket */ + bh = omfs_get_bucket(dir, name, namelen, &ofs); + if (!bh) + goto out; + + entry = (__be64 *) &bh->b_data[ofs]; + block = be64_to_cpu(*entry); + *entry = cpu_to_be64(inode->i_ino); + mark_buffer_dirty(bh); + brelse(bh); + + /* now set the sibling and parent pointers on the new inode */ + bh = sb_bread(dir->i_sb, clus_to_blk(OMFS_SB(dir->i_sb), inode->i_ino)); + if (!bh) + goto out; + + oi = (struct omfs_inode *) bh->b_data; + memcpy(oi->i_name, name, namelen); + memset(oi->i_name + namelen, 0, OMFS_NAMELEN - namelen); + oi->i_sibling = cpu_to_be64(block); + oi->i_parent = cpu_to_be64(dir->i_ino); + mark_buffer_dirty(bh); + brelse(bh); + + dir->i_ctime = CURRENT_TIME_SEC; + + /* mark affected inodes dirty to rebuild checksums */ + mark_inode_dirty(dir); + mark_inode_dirty(inode); + return 0; +out: + return -ENOMEM; +} + +static int omfs_delete_entry(struct dentry *dentry) +{ + struct inode *dir = dentry->d_parent->d_inode; + struct inode *dirty; + const char *name = dentry->d_name.name; + int namelen = dentry->d_name.len; + struct omfs_inode *oi; + struct buffer_head *bh, *bh2; + __be64 *entry, next; + u64 block, prev; + int ofs; + int err = -ENOMEM; + + /* delete the proper node in the bucket's linked list */ + bh = omfs_get_bucket(dir, name, namelen, &ofs); + if (!bh) + goto out; + + entry = (__be64 *) &bh->b_data[ofs]; + block = be64_to_cpu(*entry); + + bh2 = omfs_scan_list(dir, block, name, namelen, &prev); + if (IS_ERR(bh2)) { + err = PTR_ERR(bh2); + goto out_free_bh; + } + + oi = (struct omfs_inode *) bh2->b_data; + next = oi->i_sibling; + brelse(bh2); + + if (prev != ~0) { + /* found in middle of list, get list ptr */ + brelse(bh); + bh = sb_bread(dir->i_sb, + clus_to_blk(OMFS_SB(dir->i_sb), prev)); + if (!bh) + goto out; + + oi = (struct omfs_inode *) bh->b_data; + entry = &oi->i_sibling; + } + + *entry = next; + mark_buffer_dirty(bh); + + if (prev != ~0) { + dirty = omfs_iget(dir->i_sb, prev); + if (!IS_ERR(dirty)) { + mark_inode_dirty(dirty); + iput(dirty); + } + } + + err = 0; +out_free_bh: + brelse(bh); +out: + return err; +} + +static int omfs_dir_is_empty(struct inode *inode) +{ + int nbuckets = (inode->i_size - OMFS_DIR_START) / 8; + struct buffer_head *bh; + u64 *ptr; + int i; + + bh = sb_bread(inode->i_sb, clus_to_blk(OMFS_SB(inode->i_sb), + inode->i_ino)); + + if (!bh) + return 0; + + ptr = (u64 *) &bh->b_data[OMFS_DIR_START]; + + for (i = 0; i < nbuckets; i++, ptr++) + if (*ptr != ~0) + break; + + brelse(bh); + return *ptr != ~0; +} + +static int omfs_unlink(struct inode *dir, struct dentry *dentry) +{ + int ret; + struct inode *inode = dentry->d_inode; + + ret = omfs_delete_entry(dentry); + if (ret) + goto end_unlink; + + inode_dec_link_count(inode); + mark_inode_dirty(dir); + +end_unlink: + return ret; +} + +static int omfs_rmdir(struct inode *dir, struct dentry *dentry) +{ + int err = -ENOTEMPTY; + struct inode *inode = dentry->d_inode; + + if (omfs_dir_is_empty(inode)) { + err = omfs_unlink(dir, dentry); + if (!err) + inode_dec_link_count(inode); + } + return err; +} + +static int omfs_add_node(struct inode *dir, struct dentry *dentry, int mode) +{ + int err; + struct inode *inode = omfs_new_inode(dir, mode); + + if (IS_ERR(inode)) + return PTR_ERR(inode); + + err = omfs_make_empty(inode, dir->i_sb); + if (err) + goto out_free_inode; + + err = omfs_add_link(dentry, inode); + if (err) + goto out_free_inode; + + d_instantiate(dentry, inode); + return 0; + +out_free_inode: + iput(inode); + return err; +} + +static int omfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) +{ + return omfs_add_node(dir, dentry, mode | S_IFDIR); +} + +static int omfs_create(struct inode *dir, struct dentry *dentry, int mode, + struct nameidata *nd) +{ + return omfs_add_node(dir, dentry, mode | S_IFREG); +} + +static struct dentry *omfs_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + struct buffer_head *bh; + struct inode *inode = NULL; + + if (dentry->d_name.len > OMFS_NAMELEN) + return ERR_PTR(-ENAMETOOLONG); + + bh = omfs_find_entry(dir, dentry->d_name.name, dentry->d_name.len); + if (!IS_ERR(bh)) { + struct omfs_inode *oi = (struct omfs_inode *)bh->b_data; + ino_t ino = be64_to_cpu(oi->i_head.h_self); + brelse(bh); + inode = omfs_iget(dir->i_sb, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + } + d_add(dentry, inode); + return NULL; +} + +/* sanity check block's self pointer */ +int omfs_is_bad(struct omfs_sb_info *sbi, struct omfs_header *header, + u64 fsblock) +{ + int is_bad; + u64 ino = be64_to_cpu(header->h_self); + is_bad = ((ino != fsblock) || (ino < sbi->s_root_ino) || + (ino > sbi->s_num_blocks)); + + if (is_bad) + printk(KERN_WARNING "omfs: bad hash chain detected\n"); + + return is_bad; +} + +static int omfs_fill_chain(struct file *filp, void *dirent, filldir_t filldir, + u64 fsblock, int hindex) +{ + struct inode *dir = filp->f_dentry->d_inode; + struct buffer_head *bh; + struct omfs_inode *oi; + u64 self; + int res = 0; + unsigned char d_type; + + /* follow chain in this bucket */ + while (fsblock != ~0) { + bh = sb_bread(dir->i_sb, clus_to_blk(OMFS_SB(dir->i_sb), + fsblock)); + if (!bh) + goto out; + + oi = (struct omfs_inode *) bh->b_data; + if (omfs_is_bad(OMFS_SB(dir->i_sb), &oi->i_head, fsblock)) { + brelse(bh); + goto out; + } + + self = fsblock; + fsblock = be64_to_cpu(oi->i_sibling); + + /* skip visited nodes */ + if (hindex) { + hindex--; + brelse(bh); + continue; + } + + d_type = (oi->i_type == OMFS_DIR) ? DT_DIR : DT_REG; + + res = filldir(dirent, oi->i_name, strnlen(oi->i_name, + OMFS_NAMELEN), filp->f_pos, self, d_type); + if (res == 0) + filp->f_pos++; + brelse(bh); + } +out: + return res; +} + +static int omfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + struct inode *new_inode = new_dentry->d_inode; + struct inode *old_inode = old_dentry->d_inode; + struct buffer_head *bh; + int is_dir; + int err; + + is_dir = S_ISDIR(old_inode->i_mode); + + if (new_inode) { + /* overwriting existing file/dir */ + err = -ENOTEMPTY; + if (is_dir && !omfs_dir_is_empty(new_inode)) + goto out; + + err = -ENOENT; + bh = omfs_find_entry(new_dir, new_dentry->d_name.name, + new_dentry->d_name.len); + if (IS_ERR(bh)) + goto out; + brelse(bh); + + err = omfs_unlink(new_dir, new_dentry); + if (err) + goto out; + } + + /* since omfs locates files by name, we need to unlink _before_ + * adding the new link or we won't find the old one */ + inode_inc_link_count(old_inode); + err = omfs_unlink(old_dir, old_dentry); + if (err) { + inode_dec_link_count(old_inode); + goto out; + } + + err = omfs_add_link(new_dentry, old_inode); + if (err) + goto out; + + old_inode->i_ctime = CURRENT_TIME_SEC; +out: + return err; +} + +static int omfs_readdir(struct file *filp, void *dirent, filldir_t filldir) +{ + struct inode *dir = filp->f_dentry->d_inode; + struct buffer_head *bh; + loff_t offset, res; + unsigned int hchain, hindex; + int nbuckets; + u64 fsblock; + int ret = -EINVAL; + + if (filp->f_pos >> 32) + goto success; + + switch ((unsigned long) filp->f_pos) { + case 0: + if (filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR) < 0) + goto success; + filp->f_pos++; + /* fall through */ + case 1: + if (filldir(dirent, "..", 2, 1, + parent_ino(filp->f_dentry), DT_DIR) < 0) + goto success; + filp->f_pos = 1 << 20; + /* fall through */ + } + + nbuckets = (dir->i_size - OMFS_DIR_START) / 8; + + /* high 12 bits store bucket + 1 and low 20 bits store hash index */ + hchain = (filp->f_pos >> 20) - 1; + hindex = filp->f_pos & 0xfffff; + + bh = sb_bread(dir->i_sb, clus_to_blk(OMFS_SB(dir->i_sb), dir->i_ino)); + if (!bh) + goto out; + + offset = OMFS_DIR_START + hchain * 8; + + for (; hchain < nbuckets; hchain++, offset += 8) { + fsblock = be64_to_cpu(*((__be64 *) &bh->b_data[offset])); + + res = omfs_fill_chain(filp, dirent, filldir, fsblock, hindex); + hindex = 0; + if (res < 0) + break; + + filp->f_pos = (hchain+2) << 20; + } + brelse(bh); +success: + ret = 0; +out: + return ret; +} + +struct inode_operations omfs_dir_inops = { + .lookup = omfs_lookup, + .mkdir = omfs_mkdir, + .rename = omfs_rename, + .create = omfs_create, + .unlink = omfs_unlink, + .rmdir = omfs_rmdir, +}; + +struct file_operations omfs_dir_operations = { + .read = generic_read_dir, + .readdir = omfs_readdir, +}; diff --git a/fs/omfs/file.c b/fs/omfs/file.c new file mode 100644 index 00000000000..834b2331f6b --- /dev/null +++ b/fs/omfs/file.c @@ -0,0 +1,365 @@ +/* + * OMFS (as used by RIO Karma) file operations. + * Copyright (C) 2005 Bob Copeland <me@bobcopeland.com> + * Released under GPL v2. + */ + +#include <linux/version.h> +#include <linux/module.h> +#include <linux/fs.h> +#include <linux/buffer_head.h> +#include <linux/mpage.h> +#include "omfs.h" + +static int omfs_sync_file(struct file *file, struct dentry *dentry, + int datasync) +{ + struct inode *inode = dentry->d_inode; + int err; + + err = sync_mapping_buffers(inode->i_mapping); + if (!(inode->i_state & I_DIRTY)) + return err; + if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) + return err; + err |= omfs_sync_inode(inode); + return err ? -EIO : 0; +} + +static u32 omfs_max_extents(struct omfs_sb_info *sbi, int offset) +{ + return (sbi->s_sys_blocksize - offset - + sizeof(struct omfs_extent)) / + sizeof(struct omfs_extent_entry) + 1; +} + +void omfs_make_empty_table(struct buffer_head *bh, int offset) +{ + struct omfs_extent *oe = (struct omfs_extent *) &bh->b_data[offset]; + + oe->e_next = ~cpu_to_be64(0ULL); + oe->e_extent_count = cpu_to_be32(1), + oe->e_fill = cpu_to_be32(0x22), + oe->e_entry.e_cluster = ~cpu_to_be64(0ULL); + oe->e_entry.e_blocks = ~cpu_to_be64(0ULL); +} + +int omfs_shrink_inode(struct inode *inode) +{ + struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb); + struct omfs_extent *oe; + struct omfs_extent_entry *entry; + struct buffer_head *bh; + u64 next, last; + u32 extent_count; + u32 max_extents; + int ret; + + /* traverse extent table, freeing each entry that is greater + * than inode->i_size; + */ + next = inode->i_ino; + + /* only support truncate -> 0 for now */ + ret = -EIO; + if (inode->i_size != 0) + goto out; + + bh = sb_bread(inode->i_sb, clus_to_blk(sbi, next)); + if (!bh) + goto out; + + oe = (struct omfs_extent *)(&bh->b_data[OMFS_EXTENT_START]); + max_extents = omfs_max_extents(sbi, OMFS_EXTENT_START); + + for (;;) { + + if (omfs_is_bad(sbi, (struct omfs_header *) bh->b_data, next)) + goto out_brelse; + + extent_count = be32_to_cpu(oe->e_extent_count); + + if (extent_count > max_extents) + goto out_brelse; + + last = next; + next = be64_to_cpu(oe->e_next); + entry = &oe->e_entry; + + /* ignore last entry as it is the terminator */ + for (; extent_count > 1; extent_count--) { + u64 start, count; + start = be64_to_cpu(entry->e_cluster); + count = be64_to_cpu(entry->e_blocks); + + omfs_clear_range(inode->i_sb, start, (int) count); + entry++; + } + omfs_make_empty_table(bh, (char *) oe - bh->b_data); + mark_buffer_dirty(bh); + brelse(bh); + + if (last != inode->i_ino) + omfs_clear_range(inode->i_sb, last, sbi->s_mirrors); + + if (next == ~0) + break; + + bh = sb_bread(inode->i_sb, clus_to_blk(sbi, next)); + if (!bh) + goto out; + oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]); + max_extents = omfs_max_extents(sbi, OMFS_EXTENT_CONT); + } + ret = 0; +out: + return ret; +out_brelse: + brelse(bh); + return ret; +} + +static void omfs_truncate(struct inode *inode) +{ + omfs_shrink_inode(inode); + mark_inode_dirty(inode); +} + +/* + * Add new blocks to the current extent, or create new entries/continuations + * as necessary. + */ +static int omfs_grow_extent(struct inode *inode, struct omfs_extent *oe, + u64 *ret_block) +{ + struct omfs_extent_entry *terminator; + struct omfs_extent_entry *entry = &oe->e_entry; + struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb); + u32 extent_count = be32_to_cpu(oe->e_extent_count); + u64 new_block = 0; + u32 max_count; + int new_count; + int ret = 0; + + /* reached the end of the extent table with no blocks mapped. + * there are three possibilities for adding: grow last extent, + * add a new extent to the current extent table, and add a + * continuation inode. in last two cases need an allocator for + * sbi->s_cluster_size + */ + + /* TODO: handle holes */ + + /* should always have a terminator */ + if (extent_count < 1) + return -EIO; + + /* trivially grow current extent, if next block is not taken */ + terminator = entry + extent_count - 1; + if (extent_count > 1) { + entry = terminator-1; + new_block = be64_to_cpu(entry->e_cluster) + + be64_to_cpu(entry->e_blocks); + + if (omfs_allocate_block(inode->i_sb, new_block)) { + entry->e_blocks = + cpu_to_be64(be64_to_cpu(entry->e_blocks) + 1); + terminator->e_blocks = ~(cpu_to_be64( + be64_to_cpu(~terminator->e_blocks) + 1)); + goto out; + } + } + max_count = omfs_max_extents(sbi, OMFS_EXTENT_START); + + /* TODO: add a continuation block here */ + if (be32_to_cpu(oe->e_extent_count) > max_count-1) + return -EIO; + + /* try to allocate a new cluster */ + ret = omfs_allocate_range(inode->i_sb, 1, sbi->s_clustersize, + &new_block, &new_count); + if (ret) + goto out_fail; + + /* copy terminator down an entry */ + entry = terminator; + terminator++; + memcpy(terminator, entry, sizeof(struct omfs_extent_entry)); + + entry->e_cluster = cpu_to_be64(new_block); + entry->e_blocks = cpu_to_be64((u64) new_count); + + terminator->e_blocks = ~(cpu_to_be64( + be64_to_cpu(~terminator->e_blocks) + (u64) new_count)); + + /* write in new entry */ + oe->e_extent_count = cpu_to_be32(1 + be32_to_cpu(oe->e_extent_count)); + +out: + *ret_block = new_block; +out_fail: + return ret; +} + +/* + * Scans across the directory table for a given file block number. + * If block not found, return 0. + */ +static sector_t find_block(struct inode *inode, struct omfs_extent_entry *ent, + sector_t block, int count, int *left) +{ + /* count > 1 because of terminator */ + sector_t searched = 0; + for (; count > 1; count--) { + int numblocks = clus_to_blk(OMFS_SB(inode->i_sb), + be64_to_cpu(ent->e_blocks)); + + if (block >= searched && + block < searched + numblocks) { + /* + * found it at cluster + (block - searched) + * numblocks - (block - searched) is remainder + */ + *left = numblocks - (block - searched); + return clus_to_blk(OMFS_SB(inode->i_sb), + be64_to_cpu(ent->e_cluster)) + + block - searched; + } + searched += numblocks; + ent++; + } + return 0; +} + +static int omfs_get_block(struct inode *inode, sector_t block, + struct buffer_head *bh_result, int create) +{ + struct buffer_head *bh; + sector_t next, offset; + int ret; + u64 new_block; + u32 max_extents; + int extent_count; + struct omfs_extent *oe; + struct omfs_extent_entry *entry; + struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb); + int max_blocks = bh_result->b_size >> inode->i_blkbits; + int remain; + + ret = -EIO; + bh = sb_bread(inode->i_sb, clus_to_blk(sbi, inode->i_ino)); + if (!bh) + goto out; + + oe = (struct omfs_extent *)(&bh->b_data[OMFS_EXTENT_START]); + max_extents = omfs_max_extents(sbi, OMFS_EXTENT_START); + next = inode->i_ino; + + for (;;) { + + if (omfs_is_bad(sbi, (struct omfs_header *) bh->b_data, next)) + goto out_brelse; + + extent_count = be32_to_cpu(oe->e_extent_count); + next = be64_to_cpu(oe->e_next); + entry = &oe->e_entry; + + if (extent_count > max_extents) + goto out_brelse; + + offset = find_block(inode, entry, block, extent_count, &remain); + if (offset > 0) { + ret = 0; + map_bh(bh_result, inode->i_sb, offset); + if (remain > max_blocks) + remain = max_blocks; + bh_result->b_size = (remain << inode->i_blkbits); + goto out_brelse; + } + if (next == ~0) + break; + + brelse(bh); + bh = sb_bread(inode->i_sb, clus_to_blk(sbi, next)); + if (!bh) + goto out; + oe = (struct omfs_extent *) (&bh->b_data[OMFS_EXTENT_CONT]); + max_extents = omfs_max_extents(sbi, OMFS_EXTENT_CONT); + } + if (create) { + ret = omfs_grow_extent(inode, oe, &new_block); + if (ret == 0) { + mark_buffer_dirty(bh); + mark_inode_dirty(inode); + map_bh(bh_result, inode->i_sb, + clus_to_blk(sbi, new_block)); + } + } +out_brelse: + brelse(bh); +out: + return ret; +} + +static int omfs_readpage(struct file *file, struct page *page) +{ + return block_read_full_page(page, omfs_get_block); +} + +static int omfs_readpages(struct file *file, struct address_space *mapping, + struct list_head *pages, unsigned nr_pages) +{ + return mpage_readpages(mapping, pages, nr_pages, omfs_get_block); +} + +static int omfs_writepage(struct page *page, struct writeback_control *wbc) +{ + return block_write_full_page(page, omfs_get_block, wbc); +} + +static int +omfs_writepages(struct address_space *mapping, struct writeback_control *wbc) +{ + return mpage_writepages(mapping, wbc, omfs_get_block); +} + +static int omfs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata) +{ + *pagep = NULL; + return block_write_begin(file, mapping, pos, len, flags, + pagep, fsdata, omfs_get_block); +} + +static sector_t omfs_bmap(struct address_space *mapping, sector_t block) +{ + return generic_block_bmap(mapping, block, omfs_get_block); +} + +struct file_operations omfs_file_operations = { + .llseek = generic_file_llseek, + .read = do_sync_read, + .write = do_sync_write, + .aio_read = generic_file_aio_read, + .aio_write = generic_file_aio_write, + .mmap = generic_file_mmap, + .fsync = omfs_sync_file, + .splice_read = generic_file_splice_read, +}; + +struct inode_operations omfs_file_inops = { + .truncate = omfs_truncate +}; + +struct address_space_operations omfs_aops = { + .readpage = omfs_readpage, + .readpages = omfs_readpages, + .writepage = omfs_writepage, + .writepages = omfs_writepages, + .sync_page = block_sync_page, + .write_begin = omfs_write_begin, + .write_end = generic_write_end, + .bmap = omfs_bmap, +}; + diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c new file mode 100644 index 00000000000..d29047b1b9b --- /dev/null +++ b/fs/omfs/inode.c @@ -0,0 +1,553 @@ +/* + * Optimized MPEG FS - inode and super operations. + * Copyright (C) 2006 Bob Copeland <me@bobcopeland.com> + * Released under GPL v2. + */ +#include <linux/version.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/vfs.h> +#include <linux/parser.h> +#include <linux/buffer_head.h> +#include <linux/vmalloc.h> +#include <linux/crc-itu-t.h> +#include "omfs.h" + +MODULE_AUTHOR("Bob Copeland <me@bobcopeland.com>"); +MODULE_DESCRIPTION("OMFS (ReplayTV/Karma) Filesystem for Linux"); +MODULE_LICENSE("GPL"); + +struct inode *omfs_new_inode(struct inode *dir, int mode) +{ + struct inode *inode; + u64 new_block; + int err; + int len; + struct omfs_sb_info *sbi = OMFS_SB(dir->i_sb); + + inode = new_inode(dir->i_sb); + if (!inode) + return ERR_PTR(-ENOMEM); + + err = omfs_allocate_range(dir->i_sb, sbi->s_mirrors, sbi->s_mirrors, + &new_block, &len); + if (err) + goto fail; + + inode->i_ino = new_block; + inode->i_mode = mode; + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_blocks = 0; + inode->i_mapping->a_ops = &omfs_aops; + + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + switch (mode & S_IFMT) { + case S_IFDIR: + inode->i_op = &omfs_dir_inops; + inode->i_fop = &omfs_dir_operations; + inode->i_size = sbi->s_sys_blocksize; + inc_nlink(inode); + break; + case S_IFREG: + inode->i_op = &omfs_file_inops; + inode->i_fop = &omfs_file_operations; + inode->i_size = 0; + break; + } + + insert_inode_hash(inode); + mark_inode_dirty(inode); + return inode; +fail: + make_bad_inode(inode); + iput(inode); + return ERR_PTR(err); +} + +/* + * Update the header checksums for a dirty inode based on its contents. + * Caller is expected to hold the buffer head underlying oi and mark it + * dirty. + */ +static void omfs_update_checksums(struct omfs_inode *oi) +{ + int xor, i, ofs = 0, count; + u16 crc = 0; + unsigned char *ptr = (unsigned char *) oi; + + count = be32_to_cpu(oi->i_head.h_body_size); + ofs = sizeof(struct omfs_header); + + crc = crc_itu_t(crc, ptr + ofs, count); + oi->i_head.h_crc = cpu_to_be16(crc); + + xor = ptr[0]; + for (i = 1; i < OMFS_XOR_COUNT; i++) + xor ^= ptr[i]; + + oi->i_head.h_check_xor = xor; +} + +static int omfs_write_inode(struct inode *inode, int wait) +{ + struct omfs_inode *oi; + struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb); + struct buffer_head *bh, *bh2; + unsigned int block; + u64 ctime; + int i; + int ret = -EIO; + int sync_failed = 0; + + /* get current inode since we may have written sibling ptrs etc. */ + block = clus_to_blk(sbi, inode->i_ino); + bh = sb_bread(inode->i_sb, block); + if (!bh) + goto out; + + oi = (struct omfs_inode *) bh->b_data; + + oi->i_head.h_self = cpu_to_be64(inode->i_ino); + if (S_ISDIR(inode->i_mode)) + oi->i_type = OMFS_DIR; + else if (S_ISREG(inode->i_mode)) + oi->i_type = OMFS_FILE; + else { + printk(KERN_WARNING "omfs: unknown file type: %d\n", + inode->i_mode); + goto out_brelse; + } + + oi->i_head.h_body_size = cpu_to_be32(sbi->s_sys_blocksize - + sizeof(struct omfs_header)); + oi->i_head.h_version = 1; + oi->i_head.h_type = OMFS_INODE_NORMAL; + oi->i_head.h_magic = OMFS_IMAGIC; + oi->i_size = cpu_to_be64(inode->i_size); + + ctime = inode->i_ctime.tv_sec * 1000LL + + ((inode->i_ctime.tv_nsec + 999)/1000); + oi->i_ctime = cpu_to_be64(ctime); + + omfs_update_checksums(oi); + + mark_buffer_dirty(bh); + if (wait) { + sync_dirty_buffer(bh); + if (buffer_req(bh) && !buffer_uptodate(bh)) + sync_failed = 1; + } + + /* if mirroring writes, copy to next fsblock */ + for (i = 1; i < sbi->s_mirrors; i++) { + bh2 = sb_bread(inode->i_sb, block + i * + (sbi->s_blocksize / sbi->s_sys_blocksize)); + if (!bh2) + goto out_brelse; + + memcpy(bh2->b_data, bh->b_data, bh->b_size); + mark_buffer_dirty(bh2); + if (wait) { + sync_dirty_buffer(bh2); + if (buffer_req(bh2) && !buffer_uptodate(bh2)) + sync_failed = 1; + } + brelse(bh2); + } + ret = (sync_failed) ? -EIO : 0; +out_brelse: + brelse(bh); +out: + return ret; +} + +int omfs_sync_inode(struct inode *inode) +{ + return omfs_write_inode(inode, 1); +} + +/* + * called when an entry is deleted, need to clear the bits in the + * bitmaps. + */ +static void omfs_delete_inode(struct inode *inode) +{ + truncate_inode_pages(&inode->i_data, 0); + + if (S_ISREG(inode->i_mode)) { + inode->i_size = 0; + omfs_shrink_inode(inode); + } + + omfs_clear_range(inode->i_sb, inode->i_ino, 2); + clear_inode(inode); +} + +struct inode *omfs_iget(struct super_block *sb, ino_t ino) +{ + struct omfs_sb_info *sbi = OMFS_SB(sb); + struct omfs_inode *oi; + struct buffer_head *bh; + unsigned int block; + u64 ctime; + unsigned long nsecs; + struct inode *inode; + + inode = iget_locked(sb, ino); + if (!inode) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + + block = clus_to_blk(sbi, ino); + bh = sb_bread(inode->i_sb, block); + if (!bh) + goto iget_failed; + + oi = (struct omfs_inode *)bh->b_data; + + /* check self */ + if (ino != be64_to_cpu(oi->i_head.h_self)) + goto fail_bh; + + inode->i_uid = sbi->s_uid; + inode->i_gid = sbi->s_gid; + + ctime = be64_to_cpu(oi->i_ctime); + nsecs = do_div(ctime, 1000) * 1000L; + + inode->i_atime.tv_sec = ctime; + inode->i_mtime.tv_sec = ctime; + inode->i_ctime.tv_sec = ctime; + inode->i_atime.tv_nsec = nsecs; + inode->i_mtime.tv_nsec = nsecs; + inode->i_ctime.tv_nsec = nsecs; + + inode->i_mapping->a_ops = &omfs_aops; + + switch (oi->i_type) { + case OMFS_DIR: + inode->i_mode = S_IFDIR | (S_IRWXUGO & ~sbi->s_dmask); + inode->i_op = &omfs_dir_inops; + inode->i_fop = &omfs_dir_operations; + inode->i_size = sbi->s_sys_blocksize; + inc_nlink(inode); + break; + case OMFS_FILE: + inode->i_mode = S_IFREG | (S_IRWXUGO & ~sbi->s_fmask); + inode->i_fop = &omfs_file_operations; + inode->i_size = be64_to_cpu(oi->i_size); + break; + } + brelse(bh); + unlock_new_inode(inode); + return inode; +fail_bh: + brelse(bh); +iget_failed: + iget_failed(inode); + return ERR_PTR(-EIO); +} + +static void omfs_put_super(struct super_block *sb) +{ + struct omfs_sb_info *sbi = OMFS_SB(sb); + kfree(sbi->s_imap); + kfree(sbi); + sb->s_fs_info = NULL; +} + +static int omfs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + struct super_block *s = dentry->d_sb; + struct omfs_sb_info *sbi = OMFS_SB(s); + buf->f_type = OMFS_MAGIC; + buf->f_bsize = sbi->s_blocksize; + buf->f_blocks = sbi->s_num_blocks; + buf->f_files = sbi->s_num_blocks; + buf->f_namelen = OMFS_NAMELEN; + + buf->f_bfree = buf->f_bavail = buf->f_ffree = + omfs_count_free(s); + return 0; +} + +static struct super_operations omfs_sops = { + .write_inode = omfs_write_inode, + .delete_inode = omfs_delete_inode, + .put_super = omfs_put_super, + .statfs = omfs_statfs, + .show_options = generic_show_options, +}; + +/* + * For Rio Karma, there is an on-disk free bitmap whose location is + * stored in the root block. For ReplayTV, there is no such free bitmap + * so we have to walk the tree. Both inodes and file data are allocated + * from the same map. This array can be big (300k) so we allocate + * in units of the blocksize. + */ +static int omfs_get_imap(struct super_block *sb) +{ + int bitmap_size; + int array_size; + int count; + struct omfs_sb_info *sbi = OMFS_SB(sb); + struct buffer_head *bh; + unsigned long **ptr; + sector_t block; + + bitmap_size = DIV_ROUND_UP(sbi->s_num_blocks, 8); + array_size = DIV_ROUND_UP(bitmap_size, sb->s_blocksize); + + if (sbi->s_bitmap_ino == ~0ULL) + goto out; + + sbi->s_imap_size = array_size; + sbi->s_imap = kzalloc(array_size * sizeof(unsigned long *), GFP_KERNEL); + if (!sbi->s_imap) + goto nomem; + + block = clus_to_blk(sbi, sbi->s_bitmap_ino); + ptr = sbi->s_imap; + for (count = bitmap_size; count > 0; count -= sb->s_blocksize) { + bh = sb_bread(sb, block++); + if (!bh) + goto nomem_free; + *ptr = kmalloc(sb->s_blocksize, GFP_KERNEL); + if (!*ptr) { + brelse(bh); + goto nomem_free; + } + memcpy(*ptr, bh->b_data, sb->s_blocksize); + if (count < sb->s_blocksize) + memset((void *)*ptr + count, 0xff, + sb->s_blocksize - count); + brelse(bh); + ptr++; + } +out: + return 0; + +nomem_free: + for (count = 0; count < array_size; count++) + kfree(sbi->s_imap[count]); + + kfree(sbi->s_imap); +nomem: + sbi->s_imap = NULL; + sbi->s_imap_size = 0; + return -ENOMEM; +} + +enum { + Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask +}; + +static match_table_t tokens = { + {Opt_uid, "uid=%u"}, + {Opt_gid, "gid=%u"}, + {Opt_umask, "umask=%o"}, + {Opt_dmask, "dmask=%o"}, + {Opt_fmask, "fmask=%o"}, +}; + +static int parse_options(char *options, struct omfs_sb_info *sbi) +{ + char *p; + substring_t args[MAX_OPT_ARGS]; + int option; + + if (!options) + return 1; + + while ((p = strsep(&options, ",")) != NULL) { + int token; + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + case Opt_uid: + if (match_int(&args[0], &option)) + return 0; + sbi->s_uid = option; + break; + case Opt_gid: + if (match_int(&args[0], &option)) + return 0; + sbi->s_gid = option; + break; + case Opt_umask: + if (match_octal(&args[0], &option)) + return 0; + sbi->s_fmask = sbi->s_dmask = option; + break; + case Opt_dmask: + if (match_octal(&args[0], &option)) + return 0; + sbi->s_dmask = option; + break; + case Opt_fmask: + if (match_octal(&args[0], &option)) + return 0; + sbi->s_fmask = option; + break; + default: + return 0; + } + } + return 1; +} + +static int omfs_fill_super(struct super_block *sb, void *data, int silent) +{ + struct buffer_head *bh, *bh2; + struct omfs_super_block *omfs_sb; + struct omfs_root_block *omfs_rb; + struct omfs_sb_info *sbi; + struct inode *root; + sector_t start; + int ret = -EINVAL; + + save_mount_options(sb, (char *) data); + + sbi = kzalloc(sizeof(struct omfs_sb_info), GFP_KERNEL); + if (!sbi) + return -ENOMEM; + + sb->s_fs_info = sbi; + + sbi->s_uid = current->uid; + sbi->s_gid = current->gid; + sbi->s_dmask = sbi->s_fmask = current->fs->umask; + + if (!parse_options((char *) data, sbi)) + goto end; + + sb->s_maxbytes = 0xffffffff; + + sb_set_blocksize(sb, 0x200); + + bh = sb_bread(sb, 0); + if (!bh) + goto end; + + omfs_sb = (struct omfs_super_block *)bh->b_data; + + if (omfs_sb->s_magic != cpu_to_be32(OMFS_MAGIC)) { + if (!silent) + printk(KERN_ERR "omfs: Invalid superblock (%x)\n", + omfs_sb->s_magic); + goto out_brelse_bh; + } + sb->s_magic = OMFS_MAGIC; + + sbi->s_num_blocks = be64_to_cpu(omfs_sb->s_num_blocks); + sbi->s_blocksize = be32_to_cpu(omfs_sb->s_blocksize); + sbi->s_mirrors = be32_to_cpu(omfs_sb->s_mirrors); + sbi->s_root_ino = be64_to_cpu(omfs_sb->s_root_block); + sbi->s_sys_blocksize = be32_to_cpu(omfs_sb->s_sys_blocksize); + mutex_init(&sbi->s_bitmap_lock); + + if (sbi->s_sys_blocksize > PAGE_SIZE) { + printk(KERN_ERR "omfs: sysblock size (%d) is out of range\n", + sbi->s_sys_blocksize); + goto out_brelse_bh; + } + + if (sbi->s_blocksize < sbi->s_sys_blocksize || + sbi->s_blocksize > OMFS_MAX_BLOCK_SIZE) { + printk(KERN_ERR "omfs: block size (%d) is out of range\n", + sbi->s_blocksize); + goto out_brelse_bh; + } + + /* + * Use sys_blocksize as the fs block since it is smaller than a + * page while the fs blocksize can be larger. + */ + sb_set_blocksize(sb, sbi->s_sys_blocksize); + + /* + * ...and the difference goes into a shift. sys_blocksize is always + * a power of two factor of blocksize. + */ + sbi->s_block_shift = get_bitmask_order(sbi->s_blocksize) - + get_bitmask_order(sbi->s_sys_blocksize); + + start = clus_to_blk(sbi, be64_to_cpu(omfs_sb->s_root_block)); + bh2 = sb_bread(sb, start); + if (!bh2) + goto out_brelse_bh; + + omfs_rb = (struct omfs_root_block *)bh2->b_data; + + sbi->s_bitmap_ino = be64_to_cpu(omfs_rb->r_bitmap); + sbi->s_clustersize = be32_to_cpu(omfs_rb->r_clustersize); + + if (sbi->s_num_blocks != be64_to_cpu(omfs_rb->r_num_blocks)) { + printk(KERN_ERR "omfs: block count discrepancy between " + "super and root blocks (%llx, %llx)\n", + (unsigned long long)sbi->s_num_blocks, + (unsigned long long)be64_to_cpu(omfs_rb->r_num_blocks)); + goto out_brelse_bh2; + } + + ret = omfs_get_imap(sb); + if (ret) + goto out_brelse_bh2; + + sb->s_op = &omfs_sops; + + root = omfs_iget(sb, be64_to_cpu(omfs_rb->r_root_dir)); + if (IS_ERR(root)) { + ret = PTR_ERR(root); + goto out_brelse_bh2; + } + + sb->s_root = d_alloc_root(root); + if (!sb->s_root) { + iput(root); + goto out_brelse_bh2; + } + printk(KERN_DEBUG "omfs: Mounted volume %s\n", omfs_rb->r_name); + + ret = 0; +out_brelse_bh2: + brelse(bh2); +out_brelse_bh: + brelse(bh); +end: + return ret; +} + +static int omfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data, struct vfsmount *m) +{ + return get_sb_bdev(fs_type, flags, dev_name, data, omfs_fill_super, m); +} + +static struct file_system_type omfs_fs_type = { + .owner = THIS_MODULE, + .name = "omfs", + .get_sb = omfs_get_sb, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; + +static int __init init_omfs_fs(void) +{ + return register_filesystem(&omfs_fs_type); +} + +static void __exit exit_omfs_fs(void) +{ + unregister_filesystem(&omfs_fs_type); +} + +module_init(init_omfs_fs); +module_exit(exit_omfs_fs); diff --git a/fs/omfs/omfs.h b/fs/omfs/omfs.h new file mode 100644 index 00000000000..2bc0f067040 --- /dev/null +++ b/fs/omfs/omfs.h @@ -0,0 +1,67 @@ +#ifndef _OMFS_H +#define _OMFS_H + +#include <linux/module.h> +#include <linux/fs.h> + +#include "omfs_fs.h" + +/* In-memory structures */ +struct omfs_sb_info { + u64 s_num_blocks; + u64 s_bitmap_ino; + u64 s_root_ino; + u32 s_blocksize; + u32 s_mirrors; + u32 s_sys_blocksize; + u32 s_clustersize; + int s_block_shift; + unsigned long **s_imap; + int s_imap_size; + struct mutex s_bitmap_lock; + int s_uid; + int s_gid; + int s_dmask; + int s_fmask; +}; + +/* convert a cluster number to a scaled block number */ +static inline sector_t clus_to_blk(struct omfs_sb_info *sbi, sector_t block) +{ + return block << sbi->s_block_shift; +} + +static inline struct omfs_sb_info *OMFS_SB(struct super_block *sb) +{ + return sb->s_fs_info; +} + +/* bitmap.c */ +extern unsigned long omfs_count_free(struct super_block *sb); +extern int omfs_allocate_block(struct super_block *sb, u64 block); +extern int omfs_allocate_range(struct super_block *sb, int min_request, + int max_request, u64 *return_block, int *return_size); +extern int omfs_clear_range(struct super_block *sb, u64 block, int count); + +/* dir.c */ +extern struct file_operations omfs_dir_operations; +extern struct inode_operations omfs_dir_inops; +extern int omfs_make_empty(struct inode *inode, struct super_block *sb); +extern int omfs_is_bad(struct omfs_sb_info *sbi, struct omfs_header *header, + u64 fsblock); + +/* file.c */ +extern struct file_operations omfs_file_operations; +extern struct inode_operations omfs_file_inops; +extern struct address_space_operations omfs_aops; +extern void omfs_make_empty_table(struct buffer_head *bh, int offset); +extern int omfs_shrink_inode(struct inode *inode); + +/* inode.c */ +extern struct inode *omfs_iget(struct super_block *sb, ino_t inode); +extern struct inode *omfs_new_inode(struct inode *dir, int mode); +extern int omfs_reserve_block(struct super_block *sb, sector_t block); +extern int omfs_find_empty_block(struct super_block *sb, int mode, ino_t *ino); +extern int omfs_sync_inode(struct inode *inode); + +#endif diff --git a/fs/omfs/omfs_fs.h b/fs/omfs/omfs_fs.h new file mode 100644 index 00000000000..12cca245d6e --- /dev/null +++ b/fs/omfs/omfs_fs.h @@ -0,0 +1,80 @@ +#ifndef _OMFS_FS_H +#define _OMFS_FS_H + +/* OMFS On-disk structures */ + +#define OMFS_MAGIC 0xC2993D87 +#define OMFS_IMAGIC 0xD2 + +#define OMFS_DIR 'D' +#define OMFS_FILE 'F' +#define OMFS_INODE_NORMAL 'e' +#define OMFS_INODE_CONTINUATION 'c' +#define OMFS_INODE_SYSTEM 's' +#define OMFS_NAMELEN 256 +#define OMFS_DIR_START 0x1b8 +#define OMFS_EXTENT_START 0x1d0 +#define OMFS_EXTENT_CONT 0x40 +#define OMFS_XOR_COUNT 19 +#define OMFS_MAX_BLOCK_SIZE 8192 + +struct omfs_super_block { + char s_fill1[256]; + __be64 s_root_block; /* block number of omfs_root_block */ + __be64 s_num_blocks; /* total number of FS blocks */ + __be32 s_magic; /* OMFS_MAGIC */ + __be32 s_blocksize; /* size of a block */ + __be32 s_mirrors; /* # of mirrors of system blocks */ + __be32 s_sys_blocksize; /* size of non-data blocks */ +}; + +struct omfs_header { + __be64 h_self; /* FS block where this is located */ + __be32 h_body_size; /* size of useful data after header */ + __be16 h_crc; /* crc-ccitt of body_size bytes */ + char h_fill1[2]; + u8 h_version; /* version, always 1 */ + char h_type; /* OMFS_INODE_X */ + u8 h_magic; /* OMFS_IMAGIC */ + u8 h_check_xor; /* XOR of header bytes before this */ + __be32 h_fill2; +}; + +struct omfs_root_block { + struct omfs_header r_head; /* header */ + __be64 r_fill1; + __be64 r_num_blocks; /* total number of FS blocks */ + __be64 r_root_dir; /* block # of root directory */ + __be64 r_bitmap; /* block # of free space bitmap */ + __be32 r_blocksize; /* size of a block */ + __be32 r_clustersize; /* size allocated for data blocks */ + __be64 r_mirrors; /* # of mirrors of system blocks */ + char r_name[OMFS_NAMELEN]; /* partition label */ +}; + +struct omfs_inode { + struct omfs_header i_head; /* header */ + __be64 i_parent; /* parent containing this inode */ + __be64 i_sibling; /* next inode in hash bucket */ + __be64 i_ctime; /* ctime, in milliseconds */ + char i_fill1[35]; + char i_type; /* OMFS_[DIR,FILE] */ + __be32 i_fill2; + char i_fill3[64]; + char i_name[OMFS_NAMELEN]; /* filename */ + __be64 i_size; /* size of file, in bytes */ +}; + +struct omfs_extent_entry { + __be64 e_cluster; /* start location of a set of blocks */ + __be64 e_blocks; /* number of blocks after e_cluster */ +}; + +struct omfs_extent { + __be64 e_next; /* next extent table location */ + __be32 e_extent_count; /* total # extents in this table */ + __be32 e_fill; + struct omfs_extent_entry e_entry; /* start of extent entries */ +}; + +#endif diff --git a/fs/open.c b/fs/open.c index bb98d2fe809..07da9359481 100644 --- a/fs/open.c +++ b/fs/open.c @@ -122,37 +122,37 @@ static int vfs_statfs64(struct dentry *dentry, struct statfs64 *buf) return 0; } -asmlinkage long sys_statfs(const char __user * path, struct statfs __user * buf) +asmlinkage long sys_statfs(const char __user *pathname, struct statfs __user * buf) { - struct nameidata nd; + struct path path; int error; - error = user_path_walk(path, &nd); + error = user_path(pathname, &path); if (!error) { struct statfs tmp; - error = vfs_statfs_native(nd.path.dentry, &tmp); + error = vfs_statfs_native(path.dentry, &tmp); if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) error = -EFAULT; - path_put(&nd.path); + path_put(&path); } return error; } -asmlinkage long sys_statfs64(const char __user *path, size_t sz, struct statfs64 __user *buf) +asmlinkage long sys_statfs64(const char __user *pathname, size_t sz, struct statfs64 __user *buf) { - struct nameidata nd; + struct path path; long error; if (sz != sizeof(*buf)) return -EINVAL; - error = user_path_walk(path, &nd); + error = user_path(pathname, &path); if (!error) { struct statfs64 tmp; - error = vfs_statfs64(nd.path.dentry, &tmp); + error = vfs_statfs64(path.dentry, &tmp); if (!error && copy_to_user(buf, &tmp, sizeof(tmp))) error = -EFAULT; - path_put(&nd.path); + path_put(&path); } return error; } @@ -223,20 +223,20 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs, return err; } -static long do_sys_truncate(const char __user * path, loff_t length) +static long do_sys_truncate(const char __user *pathname, loff_t length) { - struct nameidata nd; - struct inode * inode; + struct path path; + struct inode *inode; int error; error = -EINVAL; if (length < 0) /* sorry, but loff_t says... */ goto out; - error = user_path_walk(path, &nd); + error = user_path(pathname, &path); if (error) goto out; - inode = nd.path.dentry->d_inode; + inode = path.dentry->d_inode; /* For directories it's -EISDIR, for other non-regulars - -EINVAL */ error = -EISDIR; @@ -247,16 +247,16 @@ static long do_sys_truncate(const char __user * path, loff_t length) if (!S_ISREG(inode->i_mode)) goto dput_and_out; - error = mnt_want_write(nd.path.mnt); + error = mnt_want_write(path.mnt); if (error) goto dput_and_out; - error = vfs_permission(&nd, MAY_WRITE); + error = inode_permission(inode, MAY_WRITE); if (error) goto mnt_drop_write_and_out; error = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + if (IS_APPEND(inode)) goto mnt_drop_write_and_out; error = get_write_access(inode); @@ -274,15 +274,15 @@ static long do_sys_truncate(const char __user * path, loff_t length) error = locks_verify_truncate(inode, NULL, length); if (!error) { DQUOT_INIT(inode); - error = do_truncate(nd.path.dentry, length, 0, NULL); + error = do_truncate(path.dentry, length, 0, NULL); } put_write_and_out: put_write_access(inode); mnt_drop_write_and_out: - mnt_drop_write(nd.path.mnt); + mnt_drop_write(path.mnt); dput_and_out: - path_put(&nd.path); + path_put(&path); out: return error; } @@ -425,7 +425,8 @@ out: */ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) { - struct nameidata nd; + struct path path; + struct inode *inode; int old_fsuid, old_fsgid; kernel_cap_t uninitialized_var(old_cap); /* !SECURE_NO_SETUID_FIXUP */ int res; @@ -448,7 +449,7 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) * FIXME: There is a race here against sys_capset. The * capabilities can change yet we will restore the old * value below. We should hold task_capabilities_lock, - * but we cannot because user_path_walk can sleep. + * but we cannot because user_path_at can sleep. */ #endif /* ndef CONFIG_SECURITY_FILE_CAPABILITIES */ if (current->uid) @@ -457,14 +458,25 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) old_cap = cap_set_effective(current->cap_permitted); } - res = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd); + res = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path); if (res) goto out; - res = vfs_permission(&nd, mode); + inode = path.dentry->d_inode; + + if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) { + /* + * MAY_EXEC on regular files is denied if the fs is mounted + * with the "noexec" flag. + */ + res = -EACCES; + if (path.mnt->mnt_flags & MNT_NOEXEC) + goto out_path_release; + } + + res = inode_permission(inode, mode | MAY_ACCESS); /* SuS v2 requires we report a read only fs too */ - if(res || !(mode & S_IWOTH) || - special_file(nd.path.dentry->d_inode->i_mode)) + if (res || !(mode & S_IWOTH) || special_file(inode->i_mode)) goto out_path_release; /* * This is a rare case where using __mnt_is_readonly() @@ -476,11 +488,11 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) * inherently racy and know that the fs may change * state before we even see this result. */ - if (__mnt_is_readonly(nd.path.mnt)) + if (__mnt_is_readonly(path.mnt)) res = -EROFS; out_path_release: - path_put(&nd.path); + path_put(&path); out: current->fsuid = old_fsuid; current->fsgid = old_fsgid; @@ -498,22 +510,21 @@ asmlinkage long sys_access(const char __user *filename, int mode) asmlinkage long sys_chdir(const char __user * filename) { - struct nameidata nd; + struct path path; int error; - error = __user_walk(filename, - LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_CHDIR, &nd); + error = user_path_dir(filename, &path); if (error) goto out; - error = vfs_permission(&nd, MAY_EXEC); + error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS); if (error) goto dput_and_out; - set_fs_pwd(current->fs, &nd.path); + set_fs_pwd(current->fs, &path); dput_and_out: - path_put(&nd.path); + path_put(&path); out: return error; } @@ -535,7 +546,7 @@ asmlinkage long sys_fchdir(unsigned int fd) if (!S_ISDIR(inode->i_mode)) goto out_putf; - error = file_permission(file, MAY_EXEC); + error = inode_permission(inode, MAY_EXEC | MAY_ACCESS); if (!error) set_fs_pwd(current->fs, &file->f_path); out_putf: @@ -546,14 +557,14 @@ out: asmlinkage long sys_chroot(const char __user * filename) { - struct nameidata nd; + struct path path; int error; - error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); + error = user_path_dir(filename, &path); if (error) goto out; - error = vfs_permission(&nd, MAY_EXEC); + error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_ACCESS); if (error) goto dput_and_out; @@ -561,11 +572,10 @@ asmlinkage long sys_chroot(const char __user * filename) if (!capable(CAP_SYS_CHROOT)) goto dput_and_out; - set_fs_root(current->fs, &nd.path); - set_fs_altroot(); + set_fs_root(current->fs, &path); error = 0; dput_and_out: - path_put(&nd.path); + path_put(&path); out: return error; } @@ -590,9 +600,6 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) err = mnt_want_write(file->f_path.mnt); if (err) goto out_putf; - err = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - goto out_drop_write; mutex_lock(&inode->i_mutex); if (mode == (mode_t) -1) mode = inode->i_mode; @@ -600,8 +607,6 @@ asmlinkage long sys_fchmod(unsigned int fd, mode_t mode) newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; err = notify_change(dentry, &newattrs); mutex_unlock(&inode->i_mutex); - -out_drop_write: mnt_drop_write(file->f_path.mnt); out_putf: fput(file); @@ -612,36 +617,29 @@ out: asmlinkage long sys_fchmodat(int dfd, const char __user *filename, mode_t mode) { - struct nameidata nd; - struct inode * inode; + struct path path; + struct inode *inode; int error; struct iattr newattrs; - error = __user_walk_fd(dfd, filename, LOOKUP_FOLLOW, &nd); + error = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path); if (error) goto out; - inode = nd.path.dentry->d_inode; + inode = path.dentry->d_inode; - error = mnt_want_write(nd.path.mnt); + error = mnt_want_write(path.mnt); if (error) goto dput_and_out; - - error = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - goto out_drop_write; - mutex_lock(&inode->i_mutex); if (mode == (mode_t) -1) mode = inode->i_mode; newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO); newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; - error = notify_change(nd.path.dentry, &newattrs); + error = notify_change(path.dentry, &newattrs); mutex_unlock(&inode->i_mutex); - -out_drop_write: - mnt_drop_write(nd.path.mnt); + mnt_drop_write(path.mnt); dput_and_out: - path_put(&nd.path); + path_put(&path); out: return error; } @@ -653,18 +651,10 @@ asmlinkage long sys_chmod(const char __user *filename, mode_t mode) static int chown_common(struct dentry * dentry, uid_t user, gid_t group) { - struct inode * inode; + struct inode *inode = dentry->d_inode; int error; struct iattr newattrs; - error = -ENOENT; - if (!(inode = dentry->d_inode)) { - printk(KERN_ERR "chown_common: NULL inode\n"); - goto out; - } - error = -EPERM; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - goto out; newattrs.ia_valid = ATTR_CTIME; if (user != (uid_t) -1) { newattrs.ia_valid |= ATTR_UID; @@ -680,25 +670,25 @@ static int chown_common(struct dentry * dentry, uid_t user, gid_t group) mutex_lock(&inode->i_mutex); error = notify_change(dentry, &newattrs); mutex_unlock(&inode->i_mutex); -out: + return error; } asmlinkage long sys_chown(const char __user * filename, uid_t user, gid_t group) { - struct nameidata nd; + struct path path; int error; - error = user_path_walk(filename, &nd); + error = user_path(filename, &path); if (error) goto out; - error = mnt_want_write(nd.path.mnt); + error = mnt_want_write(path.mnt); if (error) goto out_release; - error = chown_common(nd.path.dentry, user, group); - mnt_drop_write(nd.path.mnt); + error = chown_common(path.dentry, user, group); + mnt_drop_write(path.mnt); out_release: - path_put(&nd.path); + path_put(&path); out: return error; } @@ -706,7 +696,7 @@ out: asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user, gid_t group, int flag) { - struct nameidata nd; + struct path path; int error = -EINVAL; int follow; @@ -714,35 +704,35 @@ asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user, goto out; follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; - error = __user_walk_fd(dfd, filename, follow, &nd); + error = user_path_at(dfd, filename, follow, &path); if (error) goto out; - error = mnt_want_write(nd.path.mnt); + error = mnt_want_write(path.mnt); if (error) goto out_release; - error = chown_common(nd.path.dentry, user, group); - mnt_drop_write(nd.path.mnt); + error = chown_common(path.dentry, user, group); + mnt_drop_write(path.mnt); out_release: - path_put(&nd.path); + path_put(&path); out: return error; } asmlinkage long sys_lchown(const char __user * filename, uid_t user, gid_t group) { - struct nameidata nd; + struct path path; int error; - error = user_path_walk_link(filename, &nd); + error = user_lpath(filename, &path); if (error) goto out; - error = mnt_want_write(nd.path.mnt); + error = mnt_want_write(path.mnt); if (error) goto out_release; - error = chown_common(nd.path.dentry, user, group); - mnt_drop_write(nd.path.mnt); + error = chown_common(path.dentry, user, group); + mnt_drop_write(path.mnt); out_release: - path_put(&nd.path); + path_put(&path); out: return error; } @@ -973,71 +963,6 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) } EXPORT_SYMBOL(dentry_open); -/* - * Find an empty file descriptor entry, and mark it busy. - */ -int get_unused_fd_flags(int flags) -{ - struct files_struct * files = current->files; - int fd, error; - struct fdtable *fdt; - - error = -EMFILE; - spin_lock(&files->file_lock); - -repeat: - fdt = files_fdtable(files); - fd = find_next_zero_bit(fdt->open_fds->fds_bits, fdt->max_fds, - files->next_fd); - - /* - * N.B. For clone tasks sharing a files structure, this test - * will limit the total number of files that can be opened. - */ - if (fd >= current->signal->rlim[RLIMIT_NOFILE].rlim_cur) - goto out; - - /* Do we need to expand the fd array or fd set? */ - error = expand_files(files, fd); - if (error < 0) - goto out; - - if (error) { - /* - * If we needed to expand the fs array we - * might have blocked - try again. - */ - error = -EMFILE; - goto repeat; - } - - FD_SET(fd, fdt->open_fds); - if (flags & O_CLOEXEC) - FD_SET(fd, fdt->close_on_exec); - else - FD_CLR(fd, fdt->close_on_exec); - files->next_fd = fd + 1; -#if 1 - /* Sanity check */ - if (fdt->fd[fd] != NULL) { - printk(KERN_WARNING "get_unused_fd: slot %d not NULL!\n", fd); - fdt->fd[fd] = NULL; - } -#endif - error = fd; - -out: - spin_unlock(&files->file_lock); - return error; -} - -int get_unused_fd(void) -{ - return get_unused_fd_flags(0); -} - -EXPORT_SYMBOL(get_unused_fd); - static void __put_unused_fd(struct files_struct *files, unsigned int fd) { struct fdtable *fdt = files_fdtable(files); diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index d17b4fd204e..9f5b054f06b 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -430,7 +430,7 @@ static struct file_system_type openprom_fs_type = { .kill_sb = kill_anon_super, }; -static void op_inode_init_once(struct kmem_cache * cachep, void *data) +static void op_inode_init_once(void *data) { struct op_inode_info *oi = (struct op_inode_info *) data; diff --git a/fs/pipe.c b/fs/pipe.c index 10c4e9aa5c4..fcba6542b8d 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -777,45 +777,10 @@ pipe_rdwr_open(struct inode *inode, struct file *filp) /* * The file_operations structs are not static because they * are also used in linux/fs/fifo.c to do operations on FIFOs. + * + * Pipes reuse fifos' file_operations structs. */ -const struct file_operations read_fifo_fops = { - .llseek = no_llseek, - .read = do_sync_read, - .aio_read = pipe_read, - .write = bad_pipe_w, - .poll = pipe_poll, - .unlocked_ioctl = pipe_ioctl, - .open = pipe_read_open, - .release = pipe_read_release, - .fasync = pipe_read_fasync, -}; - -const struct file_operations write_fifo_fops = { - .llseek = no_llseek, - .read = bad_pipe_r, - .write = do_sync_write, - .aio_write = pipe_write, - .poll = pipe_poll, - .unlocked_ioctl = pipe_ioctl, - .open = pipe_write_open, - .release = pipe_write_release, - .fasync = pipe_write_fasync, -}; - -const struct file_operations rdwr_fifo_fops = { - .llseek = no_llseek, - .read = do_sync_read, - .aio_read = pipe_read, - .write = do_sync_write, - .aio_write = pipe_write, - .poll = pipe_poll, - .unlocked_ioctl = pipe_ioctl, - .open = pipe_rdwr_open, - .release = pipe_rdwr_release, - .fasync = pipe_rdwr_fasync, -}; - -static const struct file_operations read_pipe_fops = { +const struct file_operations read_pipefifo_fops = { .llseek = no_llseek, .read = do_sync_read, .aio_read = pipe_read, @@ -827,7 +792,7 @@ static const struct file_operations read_pipe_fops = { .fasync = pipe_read_fasync, }; -static const struct file_operations write_pipe_fops = { +const struct file_operations write_pipefifo_fops = { .llseek = no_llseek, .read = bad_pipe_r, .write = do_sync_write, @@ -839,7 +804,7 @@ static const struct file_operations write_pipe_fops = { .fasync = pipe_write_fasync, }; -static const struct file_operations rdwr_pipe_fops = { +const struct file_operations rdwr_pipefifo_fops = { .llseek = no_llseek, .read = do_sync_read, .aio_read = pipe_read, @@ -927,7 +892,7 @@ static struct inode * get_pipe_inode(void) inode->i_pipe = pipe; pipe->readers = pipe->writers = 1; - inode->i_fop = &rdwr_pipe_fops; + inode->i_fop = &rdwr_pipefifo_fops; /* * Mark the inode dirty from the very beginning, @@ -978,7 +943,7 @@ struct file *create_write_pipe(int flags) d_instantiate(dentry, inode); err = -ENFILE; - f = alloc_file(pipe_mnt, dentry, FMODE_WRITE, &write_pipe_fops); + f = alloc_file(pipe_mnt, dentry, FMODE_WRITE, &write_pipefifo_fops); if (!f) goto err_dentry; f->f_mapping = inode->i_mapping; @@ -1020,7 +985,7 @@ struct file *create_read_pipe(struct file *wrf, int flags) f->f_pos = 0; f->f_flags = O_RDONLY | (flags & O_NONBLOCK); - f->f_op = &read_pipe_fops; + f->f_op = &read_pipefifo_fops; f->f_mode = FMODE_READ; f->f_version = 0; diff --git a/fs/proc/array.c b/fs/proc/array.c index 797d775e035..0d6eb33597c 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -80,6 +80,7 @@ #include <linux/delayacct.h> #include <linux/seq_file.h> #include <linux/pid_namespace.h> +#include <linux/tracehook.h> #include <asm/pgtable.h> #include <asm/processor.h> @@ -168,8 +169,12 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, rcu_read_lock(); ppid = pid_alive(p) ? task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0; - tpid = pid_alive(p) && p->ptrace ? - task_pid_nr_ns(rcu_dereference(p->parent), ns) : 0; + tpid = 0; + if (pid_alive(p)) { + struct task_struct *tracer = tracehook_tracer_task(p); + if (tracer) + tpid = task_pid_nr_ns(tracer, ns); + } seq_printf(m, "State:\t%s\n" "Tgid:\t%d\n" diff --git a/fs/proc/base.c b/fs/proc/base.c index a891fe4cb43..a28840b11b8 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -53,6 +53,7 @@ #include <linux/time.h> #include <linux/proc_fs.h> #include <linux/stat.h> +#include <linux/task_io_accounting_ops.h> #include <linux/init.h> #include <linux/capability.h> #include <linux/file.h> @@ -69,6 +70,7 @@ #include <linux/mount.h> #include <linux/security.h> #include <linux/ptrace.h> +#include <linux/tracehook.h> #include <linux/cgroup.h> #include <linux/cpuset.h> #include <linux/audit.h> @@ -231,10 +233,14 @@ static int check_mem_permission(struct task_struct *task) * If current is actively ptrace'ing, and would also be * permitted to freshly attach with ptrace now, permit it. */ - if (task->parent == current && (task->ptrace & PT_PTRACED) && - task_is_stopped_or_traced(task) && - ptrace_may_access(task, PTRACE_MODE_ATTACH)) - return 0; + if (task_is_stopped_or_traced(task)) { + int match; + rcu_read_lock(); + match = (tracehook_tracer_task(task) == current); + rcu_read_unlock(); + if (match && ptrace_may_access(task, PTRACE_MODE_ATTACH)) + return 0; + } /* * Noone else is allowed. @@ -504,6 +510,26 @@ static int proc_pid_limits(struct task_struct *task, char *buffer) return count; } +#ifdef CONFIG_HAVE_ARCH_TRACEHOOK +static int proc_pid_syscall(struct task_struct *task, char *buffer) +{ + long nr; + unsigned long args[6], sp, pc; + + if (task_current_syscall(task, &nr, args, 6, &sp, &pc)) + return sprintf(buffer, "running\n"); + + if (nr < 0) + return sprintf(buffer, "%ld 0x%lx 0x%lx\n", nr, sp, pc); + + return sprintf(buffer, + "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n", + nr, + args[0], args[1], args[2], args[3], args[4], args[5], + sp, pc); +} +#endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ + /************************************************************************/ /* Here the fs part begins */ /************************************************************************/ @@ -1834,8 +1860,7 @@ static const struct file_operations proc_fd_operations = { * /proc/pid/fd needs a special permission handler so that a process can still * access /proc/self/fd after it has executed a setuid(). */ -static int proc_fd_permission(struct inode *inode, int mask, - struct nameidata *nd) +static int proc_fd_permission(struct inode *inode, int mask) { int rv; @@ -2378,53 +2403,18 @@ static int proc_base_fill_cache(struct file *filp, void *dirent, #ifdef CONFIG_TASK_IO_ACCOUNTING static int do_io_accounting(struct task_struct *task, char *buffer, int whole) { - u64 rchar, wchar, syscr, syscw; - struct task_io_accounting ioac; - - if (!whole) { - rchar = task->rchar; - wchar = task->wchar; - syscr = task->syscr; - syscw = task->syscw; - memcpy(&ioac, &task->ioac, sizeof(ioac)); - } else { - unsigned long flags; - struct task_struct *t = task; - rchar = wchar = syscr = syscw = 0; - memset(&ioac, 0, sizeof(ioac)); - - rcu_read_lock(); - do { - rchar += t->rchar; - wchar += t->wchar; - syscr += t->syscr; - syscw += t->syscw; - - ioac.read_bytes += t->ioac.read_bytes; - ioac.write_bytes += t->ioac.write_bytes; - ioac.cancelled_write_bytes += - t->ioac.cancelled_write_bytes; - t = next_thread(t); - } while (t != task); - rcu_read_unlock(); - - if (lock_task_sighand(task, &flags)) { - struct signal_struct *sig = task->signal; + struct task_io_accounting acct = task->ioac; + unsigned long flags; - rchar += sig->rchar; - wchar += sig->wchar; - syscr += sig->syscr; - syscw += sig->syscw; + if (whole && lock_task_sighand(task, &flags)) { + struct task_struct *t = task; - ioac.read_bytes += sig->ioac.read_bytes; - ioac.write_bytes += sig->ioac.write_bytes; - ioac.cancelled_write_bytes += - sig->ioac.cancelled_write_bytes; + task_io_accounting_add(&acct, &task->signal->ioac); + while_each_thread(task, t) + task_io_accounting_add(&acct, &t->ioac); - unlock_task_sighand(task, &flags); - } + unlock_task_sighand(task, &flags); } - return sprintf(buffer, "rchar: %llu\n" "wchar: %llu\n" @@ -2433,13 +2423,13 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole) "read_bytes: %llu\n" "write_bytes: %llu\n" "cancelled_write_bytes: %llu\n", - (unsigned long long)rchar, - (unsigned long long)wchar, - (unsigned long long)syscr, - (unsigned long long)syscw, - (unsigned long long)ioac.read_bytes, - (unsigned long long)ioac.write_bytes, - (unsigned long long)ioac.cancelled_write_bytes); + (unsigned long long)acct.rchar, + (unsigned long long)acct.wchar, + (unsigned long long)acct.syscr, + (unsigned long long)acct.syscw, + (unsigned long long)acct.read_bytes, + (unsigned long long)acct.write_bytes, + (unsigned long long)acct.cancelled_write_bytes); } static int proc_tid_io_accounting(struct task_struct *task, char *buffer) @@ -2473,6 +2463,9 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, pid_sched), #endif +#ifdef CONFIG_HAVE_ARCH_TRACEHOOK + INF("syscall", S_IRUSR, pid_syscall), +#endif INF("cmdline", S_IRUGO, pid_cmdline), ONE("stat", S_IRUGO, tgid_stat), ONE("statm", S_IRUGO, pid_statm), @@ -2805,6 +2798,9 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, pid_sched), #endif +#ifdef CONFIG_HAVE_ARCH_TRACEHOOK + INF("syscall", S_IRUSR, pid_syscall), +#endif INF("cmdline", S_IRUGO, pid_cmdline), ONE("stat", S_IRUGO, tid_stat), ONE("statm", S_IRUGO, pid_statm), diff --git a/fs/proc/generic.c b/fs/proc/generic.c index bc0a0dd2d84..4fb81e9c94e 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -300,10 +300,10 @@ out: return rtn; } -static DEFINE_IDR(proc_inum_idr); +static DEFINE_IDA(proc_inum_ida); static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */ -#define PROC_DYNAMIC_FIRST 0xF0000000UL +#define PROC_DYNAMIC_FIRST 0xF0000000U /* * Return an inode number between PROC_DYNAMIC_FIRST and @@ -311,36 +311,33 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */ */ static unsigned int get_inode_number(void) { - int i, inum = 0; + unsigned int i; int error; retry: - if (idr_pre_get(&proc_inum_idr, GFP_KERNEL) == 0) + if (ida_pre_get(&proc_inum_ida, GFP_KERNEL) == 0) return 0; spin_lock(&proc_inum_lock); - error = idr_get_new(&proc_inum_idr, NULL, &i); + error = ida_get_new(&proc_inum_ida, &i); spin_unlock(&proc_inum_lock); if (error == -EAGAIN) goto retry; else if (error) return 0; - inum = (i & MAX_ID_MASK) + PROC_DYNAMIC_FIRST; - - /* inum will never be more than 0xf0ffffff, so no check - * for overflow. - */ - - return inum; + if (i > UINT_MAX - PROC_DYNAMIC_FIRST) { + spin_lock(&proc_inum_lock); + ida_remove(&proc_inum_ida, i); + spin_unlock(&proc_inum_lock); + } + return PROC_DYNAMIC_FIRST + i; } static void release_inode_number(unsigned int inum) { - int id = (inum - PROC_DYNAMIC_FIRST) | ~MAX_ID_MASK; - spin_lock(&proc_inum_lock); - idr_remove(&proc_inum_idr, id); + ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST); spin_unlock(&proc_inum_lock); } @@ -806,12 +803,9 @@ continue_removing: if (S_ISDIR(de->mode)) parent->nlink--; de->nlink = 0; - if (de->subdir) { - printk(KERN_WARNING "%s: removing non-empty directory " + WARN(de->subdir, KERN_WARNING "%s: removing non-empty directory " "'%s/%s', leaking at least '%s'\n", __func__, de->parent->name, de->name, de->subdir->name); - WARN_ON(1); - } if (atomic_dec_and_test(&de->count)) free_proc_entry(de); } diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 02eca2ed9dd..8bb03f056c2 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -17,6 +17,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/smp_lock.h> +#include <linux/sysctl.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -65,6 +66,8 @@ static void proc_delete_inode(struct inode *inode) module_put(de->owner); de_put(de); } + if (PROC_I(inode)->sysctl) + sysctl_head_put(PROC_I(inode)->sysctl); clear_inode(inode); } @@ -84,6 +87,8 @@ static struct inode *proc_alloc_inode(struct super_block *sb) ei->fd = 0; ei->op.proc_get_link = NULL; ei->pde = NULL; + ei->sysctl = NULL; + ei->sysctl_entry = NULL; inode = &ei->vfs_inode; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; return inode; @@ -94,7 +99,7 @@ static void proc_destroy_inode(struct inode *inode) kmem_cache_free(proc_inode_cachep, PROC_I(inode)); } -static void init_once(struct kmem_cache * cachep, void *foo) +static void init_once(void *foo) { struct proc_inode *ei = (struct proc_inode *) foo; diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 5acc001d49f..f9a8b892718 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -10,149 +10,110 @@ static struct dentry_operations proc_sys_dentry_operations; static const struct file_operations proc_sys_file_operations; static const struct inode_operations proc_sys_inode_operations; +static const struct file_operations proc_sys_dir_file_operations; +static const struct inode_operations proc_sys_dir_operations; -static void proc_sys_refresh_inode(struct inode *inode, struct ctl_table *table) -{ - /* Refresh the cached information bits in the inode */ - if (table) { - inode->i_uid = 0; - inode->i_gid = 0; - inode->i_mode = table->mode; - if (table->proc_handler) { - inode->i_mode |= S_IFREG; - inode->i_nlink = 1; - } else { - inode->i_mode |= S_IFDIR; - inode->i_nlink = 0; /* It is too hard to figure out */ - } - } -} - -static struct inode *proc_sys_make_inode(struct inode *dir, struct ctl_table *table) +static struct inode *proc_sys_make_inode(struct super_block *sb, + struct ctl_table_header *head, struct ctl_table *table) { struct inode *inode; - struct proc_inode *dir_ei, *ei; - int depth; + struct proc_inode *ei; - inode = new_inode(dir->i_sb); + inode = new_inode(sb); if (!inode) goto out; - /* A directory is always one deeper than it's parent */ - dir_ei = PROC_I(dir); - depth = dir_ei->fd + 1; - + sysctl_head_get(head); ei = PROC_I(inode); - ei->fd = depth; + ei->sysctl = head; + ei->sysctl_entry = table; + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - inode->i_op = &proc_sys_inode_operations; - inode->i_fop = &proc_sys_file_operations; inode->i_flags |= S_PRIVATE; /* tell selinux to ignore this inode */ - proc_sys_refresh_inode(inode, table); + inode->i_mode = table->mode; + if (!table->child) { + inode->i_mode |= S_IFREG; + inode->i_op = &proc_sys_inode_operations; + inode->i_fop = &proc_sys_file_operations; + } else { + inode->i_mode |= S_IFDIR; + inode->i_nlink = 0; + inode->i_op = &proc_sys_dir_operations; + inode->i_fop = &proc_sys_dir_file_operations; + } out: return inode; } -static struct dentry *proc_sys_ancestor(struct dentry *dentry, int depth) -{ - for (;;) { - struct proc_inode *ei; - - ei = PROC_I(dentry->d_inode); - if (ei->fd == depth) - break; /* found */ - - dentry = dentry->d_parent; - } - return dentry; -} - -static struct ctl_table *proc_sys_lookup_table_one(struct ctl_table *table, - struct qstr *name) +static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name) { int len; - for ( ; table->ctl_name || table->procname; table++) { + for ( ; p->ctl_name || p->procname; p++) { - if (!table->procname) + if (!p->procname) continue; - len = strlen(table->procname); + len = strlen(p->procname); if (len != name->len) continue; - if (memcmp(table->procname, name->name, len) != 0) + if (memcmp(p->procname, name->name, len) != 0) continue; /* I have a match */ - return table; + return p; } return NULL; } -static struct ctl_table *proc_sys_lookup_table(struct dentry *dentry, - struct ctl_table *table) +struct ctl_table_header *grab_header(struct inode *inode) { - struct dentry *ancestor; - struct proc_inode *ei; - int depth, i; + if (PROC_I(inode)->sysctl) + return sysctl_head_grab(PROC_I(inode)->sysctl); + else + return sysctl_head_next(NULL); +} - ei = PROC_I(dentry->d_inode); - depth = ei->fd; +static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, + struct nameidata *nd) +{ + struct ctl_table_header *head = grab_header(dir); + struct ctl_table *table = PROC_I(dir)->sysctl_entry; + struct ctl_table_header *h = NULL; + struct qstr *name = &dentry->d_name; + struct ctl_table *p; + struct inode *inode; + struct dentry *err = ERR_PTR(-ENOENT); - if (depth == 0) - return table; + if (IS_ERR(head)) + return ERR_CAST(head); - for (i = 1; table && (i <= depth); i++) { - ancestor = proc_sys_ancestor(dentry, i); - table = proc_sys_lookup_table_one(table, &ancestor->d_name); - if (table) - table = table->child; + if (table && !table->child) { + WARN_ON(1); + goto out; } - return table; - -} -static struct ctl_table *proc_sys_lookup_entry(struct dentry *dparent, - struct qstr *name, - struct ctl_table *table) -{ - table = proc_sys_lookup_table(dparent, table); - if (table) - table = proc_sys_lookup_table_one(table, name); - return table; -} -static struct ctl_table *do_proc_sys_lookup(struct dentry *parent, - struct qstr *name, - struct ctl_table_header **ptr) -{ - struct ctl_table_header *head; - struct ctl_table *table = NULL; + table = table ? table->child : head->ctl_table; - for (head = sysctl_head_next(NULL); head; - head = sysctl_head_next(head)) { - table = proc_sys_lookup_entry(parent, name, head->ctl_table); - if (table) - break; + p = find_in_table(table, name); + if (!p) { + for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) { + if (h->attached_to != table) + continue; + p = find_in_table(h->attached_by, name); + if (p) + break; + } } - *ptr = head; - return table; -} - -static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) -{ - struct ctl_table_header *head; - struct inode *inode; - struct dentry *err; - struct ctl_table *table; - err = ERR_PTR(-ENOENT); - table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); - if (!table) + if (!p) goto out; err = ERR_PTR(-ENOMEM); - inode = proc_sys_make_inode(dir, table); + inode = proc_sys_make_inode(dir->i_sb, h ? h : head, p); + if (h) + sysctl_head_finish(h); + if (!inode) goto out; @@ -168,22 +129,14 @@ out: static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, size_t count, loff_t *ppos, int write) { - struct dentry *dentry = filp->f_dentry; - struct ctl_table_header *head; - struct ctl_table *table; + struct inode *inode = filp->f_path.dentry->d_inode; + struct ctl_table_header *head = grab_header(inode); + struct ctl_table *table = PROC_I(inode)->sysctl_entry; ssize_t error; size_t res; - table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); - /* Has the sysctl entry disappeared on us? */ - error = -ENOENT; - if (!table) - goto out; - - /* Has the sysctl entry been replaced by a directory? */ - error = -EISDIR; - if (!table->proc_handler) - goto out; + if (IS_ERR(head)) + return PTR_ERR(head); /* * At this point we know that the sysctl was not unregistered @@ -193,6 +146,11 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, if (sysctl_perm(head->root, table, write ? MAY_WRITE : MAY_READ)) goto out; + /* if that can happen at all, it should be -EINVAL, not -EISDIR */ + error = -EINVAL; + if (!table->proc_handler) + goto out; + /* careful: calling conventions are nasty here */ res = count; error = table->proc_handler(table, write, filp, buf, &res, ppos); @@ -218,82 +176,86 @@ static ssize_t proc_sys_write(struct file *filp, const char __user *buf, static int proc_sys_fill_cache(struct file *filp, void *dirent, - filldir_t filldir, struct ctl_table *table) + filldir_t filldir, + struct ctl_table_header *head, + struct ctl_table *table) { - struct ctl_table_header *head; - struct ctl_table *child_table = NULL; struct dentry *child, *dir = filp->f_path.dentry; struct inode *inode; struct qstr qname; ino_t ino = 0; unsigned type = DT_UNKNOWN; - int ret; qname.name = table->procname; qname.len = strlen(table->procname); qname.hash = full_name_hash(qname.name, qname.len); - /* Suppress duplicates. - * Only fill a directory entry if it is the value that - * an ordinary lookup of that name returns. Hide all - * others. - * - * If we ever cache this translation in the dcache - * I should do a dcache lookup first. But for now - * it is just simpler not to. - */ - ret = 0; - child_table = do_proc_sys_lookup(dir, &qname, &head); - sysctl_head_finish(head); - if (child_table != table) - return 0; - child = d_lookup(dir, &qname); if (!child) { - struct dentry *new; - new = d_alloc(dir, &qname); - if (new) { - inode = proc_sys_make_inode(dir->d_inode, table); - if (!inode) - child = ERR_PTR(-ENOMEM); - else { - new->d_op = &proc_sys_dentry_operations; - d_add(new, inode); + child = d_alloc(dir, &qname); + if (child) { + inode = proc_sys_make_inode(dir->d_sb, head, table); + if (!inode) { + dput(child); + return -ENOMEM; + } else { + child->d_op = &proc_sys_dentry_operations; + d_add(child, inode); } - if (child) - dput(new); - else - child = new; + } else { + return -ENOMEM; } } - if (!child || IS_ERR(child) || !child->d_inode) - goto end_instantiate; inode = child->d_inode; - if (inode) { - ino = inode->i_ino; - type = inode->i_mode >> 12; - } + ino = inode->i_ino; + type = inode->i_mode >> 12; dput(child); -end_instantiate: - if (!ino) - ino= find_inode_number(dir, &qname); - if (!ino) - ino = 1; - return filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); + return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); +} + +static int scan(struct ctl_table_header *head, ctl_table *table, + unsigned long *pos, struct file *file, + void *dirent, filldir_t filldir) +{ + + for (; table->ctl_name || table->procname; table++, (*pos)++) { + int res; + + /* Can't do anything without a proc name */ + if (!table->procname) + continue; + + if (*pos < file->f_pos) + continue; + + res = proc_sys_fill_cache(file, dirent, filldir, head, table); + if (res) + return res; + + file->f_pos = *pos + 1; + } + return 0; } static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) { - struct dentry *dentry = filp->f_dentry; + struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; - struct ctl_table_header *head = NULL; - struct ctl_table *table; + struct ctl_table_header *head = grab_header(inode); + struct ctl_table *table = PROC_I(inode)->sysctl_entry; + struct ctl_table_header *h = NULL; unsigned long pos; - int ret; + int ret = -EINVAL; + + if (IS_ERR(head)) + return PTR_ERR(head); - ret = -ENOTDIR; - if (!S_ISDIR(inode->i_mode)) + if (table && !table->child) { + WARN_ON(1); goto out; + } + + table = table ? table->child : head->ctl_table; ret = 0; /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ @@ -311,30 +273,17 @@ static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) } pos = 2; - /* - Find each instance of the directory - * - Read all entries in each instance - * - Before returning an entry to user space lookup the entry - * by name and if I find a different entry don't return - * this one because it means it is a buried dup. - * For sysctl this should only happen for directory entries. - */ - for (head = sysctl_head_next(NULL); head; head = sysctl_head_next(head)) { - table = proc_sys_lookup_table(dentry, head->ctl_table); + ret = scan(head, table, &pos, filp, dirent, filldir); + if (ret) + goto out; - if (!table) + for (h = sysctl_head_next(NULL); h; h = sysctl_head_next(h)) { + if (h->attached_to != table) continue; - - for (; table->ctl_name || table->procname; table++, pos++) { - /* Can't do anything without a proc name */ - if (!table->procname) - continue; - - if (pos < filp->f_pos) - continue; - - if (proc_sys_fill_cache(filp, dirent, filldir, table) < 0) - goto out; - filp->f_pos = pos + 1; + ret = scan(h, h->attached_by, &pos, filp, dirent, filldir); + if (ret) { + sysctl_head_finish(h); + break; } } ret = 1; @@ -343,53 +292,24 @@ out: return ret; } -static int proc_sys_permission(struct inode *inode, int mask, struct nameidata *nd) +static int proc_sys_permission(struct inode *inode, int mask) { /* * sysctl entries that are not writeable, * are _NOT_ writeable, capabilities or not. */ - struct ctl_table_header *head; - struct ctl_table *table; - struct dentry *dentry; - int mode; - int depth; + struct ctl_table_header *head = grab_header(inode); + struct ctl_table *table = PROC_I(inode)->sysctl_entry; int error; - head = NULL; - depth = PROC_I(inode)->fd; - - /* First check the cached permissions, in case we don't have - * enough information to lookup the sysctl table entry. - */ - error = -EACCES; - mode = inode->i_mode; - - if (current->euid == 0) - mode >>= 6; - else if (in_group_p(0)) - mode >>= 3; - - if ((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask) - error = 0; - - /* If we can't get a sysctl table entry the permission - * checks on the cached mode will have to be enough. - */ - if (!nd || !depth) - goto out; + if (IS_ERR(head)) + return PTR_ERR(head); - dentry = nd->path.dentry; - table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); + if (!table) /* global root - r-xr-xr-x */ + error = mask & MAY_WRITE ? -EACCES : 0; + else /* Use the permissions on the sysctl table entry */ + error = sysctl_perm(head->root, table, mask); - /* If the entry does not exist deny permission */ - error = -EACCES; - if (!table) - goto out; - - /* Use the permissions on the sysctl table entry */ - error = sysctl_perm(head->root, table, mask); -out: sysctl_head_finish(head); return error; } @@ -409,33 +329,70 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr) return error; } -/* I'm lazy and don't distinguish between files and directories, - * until access time. - */ +static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +{ + struct inode *inode = dentry->d_inode; + struct ctl_table_header *head = grab_header(inode); + struct ctl_table *table = PROC_I(inode)->sysctl_entry; + + if (IS_ERR(head)) + return PTR_ERR(head); + + generic_fillattr(inode, stat); + if (table) + stat->mode = (stat->mode & S_IFMT) | table->mode; + + sysctl_head_finish(head); + return 0; +} + static const struct file_operations proc_sys_file_operations = { .read = proc_sys_read, .write = proc_sys_write, +}; + +static const struct file_operations proc_sys_dir_file_operations = { .readdir = proc_sys_readdir, }; static const struct inode_operations proc_sys_inode_operations = { + .permission = proc_sys_permission, + .setattr = proc_sys_setattr, + .getattr = proc_sys_getattr, +}; + +static const struct inode_operations proc_sys_dir_operations = { .lookup = proc_sys_lookup, .permission = proc_sys_permission, .setattr = proc_sys_setattr, + .getattr = proc_sys_getattr, }; static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd) { - struct ctl_table_header *head; - struct ctl_table *table; - table = do_proc_sys_lookup(dentry->d_parent, &dentry->d_name, &head); - proc_sys_refresh_inode(dentry->d_inode, table); - sysctl_head_finish(head); - return !!table; + return !PROC_I(dentry->d_inode)->sysctl->unregistering; +} + +static int proc_sys_delete(struct dentry *dentry) +{ + return !!PROC_I(dentry->d_inode)->sysctl->unregistering; +} + +static int proc_sys_compare(struct dentry *dir, struct qstr *qstr, + struct qstr *name) +{ + struct dentry *dentry = container_of(qstr, struct dentry, d_name); + if (qstr->len != name->len) + return 1; + if (memcmp(qstr->name, name->name, name->len)) + return 1; + return !sysctl_is_seen(PROC_I(dentry->d_inode)->sysctl); } static struct dentry_operations proc_sys_dentry_operations = { .d_revalidate = proc_sys_revalidate, + .d_delete = proc_sys_delete, + .d_compare = proc_sys_compare, }; static struct proc_dir_entry *proc_sys_root; @@ -443,8 +400,8 @@ static struct proc_dir_entry *proc_sys_root; int proc_sys_init(void) { proc_sys_root = proc_mkdir("sys", NULL); - proc_sys_root->proc_iops = &proc_sys_inode_operations; - proc_sys_root->proc_fops = &proc_sys_file_operations; + proc_sys_root->proc_iops = &proc_sys_dir_operations; + proc_sys_root->proc_fops = &proc_sys_dir_file_operations; proc_sys_root->nlink = 0; return 0; } diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index b31ab78052b..2aad1044b84 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -553,7 +553,7 @@ static void qnx4_destroy_inode(struct inode *inode) kmem_cache_free(qnx4_inode_cachep, qnx4_i(inode)); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct qnx4_inode_info *ei = (struct qnx4_inode_info *) foo; diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 192269698a8..5699171212a 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -2435,7 +2435,7 @@ static int reiserfs_write_full_page(struct page *page, if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { lock_buffer(bh); } else { - if (test_set_buffer_locked(bh)) { + if (!trylock_buffer(bh)) { redirty_page_for_writepage(wbc, page); continue; } diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index c8f60ee183b..c21df71943a 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -627,7 +627,7 @@ static int journal_list_still_alive(struct super_block *s, static void release_buffer_page(struct buffer_head *bh) { struct page *page = bh->b_page; - if (!page->mapping && !TestSetPageLocked(page)) { + if (!page->mapping && trylock_page(page)) { page_cache_get(page); put_bh(bh); if (!page->mapping) @@ -855,7 +855,7 @@ static int write_ordered_buffers(spinlock_t * lock, jh = JH_ENTRY(list->next); bh = jh->bh; get_bh(bh); - if (test_set_buffer_locked(bh)) { + if (!trylock_buffer(bh)) { if (!buffer_dirty(bh)) { list_move(&jh->list, &tmp); goto loop_next; @@ -3871,7 +3871,7 @@ int reiserfs_prepare_for_journal(struct super_block *p_s_sb, { PROC_INFO_INC(p_s_sb, journal.prepare); - if (test_set_buffer_locked(bh)) { + if (!trylock_buffer(bh)) { if (!wait) return 0; lock_buffer(bh); diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 2ec748ba0bd..d318c7e663f 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -27,7 +27,6 @@ #include <linux/mnt_namespace.h> #include <linux/mount.h> #include <linux/namei.h> -#include <linux/quotaops.h> struct file_system_type reiserfs_fs_type; @@ -521,7 +520,7 @@ static void reiserfs_destroy_inode(struct inode *inode) kmem_cache_free(reiserfs_inode_cachep, REISERFS_I(inode)); } -static void init_once(struct kmem_cache * cachep, void *foo) +static void init_once(void *foo) { struct reiserfs_inode_info *ei = (struct reiserfs_inode_info *)foo; @@ -2076,8 +2075,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, return err; /* Quotafile not on the same filesystem? */ if (nd.path.mnt->mnt_sb != sb) { - path_put(&nd.path); - return -EXDEV; + err = -EXDEV; + goto out; } inode = nd.path.dentry->d_inode; /* We must not pack tails for quota files on reiserfs for quota IO to work */ @@ -2087,8 +2086,8 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, reiserfs_warning(sb, "reiserfs: Unpacking tail of quota file failed" " (%d). Cannot turn on quotas.", err); - path_put(&nd.path); - return -EINVAL; + err = -EINVAL; + goto out; } mark_inode_dirty(inode); } @@ -2109,13 +2108,15 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, /* Just start temporary transaction and finish it */ err = journal_begin(&th, sb, 1); if (err) - return err; + goto out; err = journal_end_sync(&th, sb, 1); if (err) - return err; + goto out; } + err = vfs_quota_on_path(sb, type, format_id, &nd.path); +out: path_put(&nd.path); - return vfs_quota_on(sb, type, format_id, path, 0); + return err; } /* Read data from quotafile - avoid pagecache and such because we cannot afford diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index d7c4935c103..bb3cb5b7cdb 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -1250,7 +1250,7 @@ static int reiserfs_check_acl(struct inode *inode, int mask) return error; } -int reiserfs_permission(struct inode *inode, int mask, struct nameidata *nd) +int reiserfs_permission(struct inode *inode, int mask) { /* * We don't do permission checks on the internal objects. diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c index 3f13d491c7c..60d2f822e87 100644 --- a/fs/romfs/inode.c +++ b/fs/romfs/inode.c @@ -418,7 +418,8 @@ static int romfs_readpage(struct file *file, struct page * page) { struct inode *inode = page->mapping->host; - loff_t offset, avail, readlen; + loff_t offset, size; + unsigned long filled; void *buf; int result = -EIO; @@ -430,21 +431,29 @@ romfs_readpage(struct file *file, struct page * page) /* 32 bit warning -- but not for us :) */ offset = page_offset(page); - if (offset < i_size_read(inode)) { - avail = inode->i_size-offset; - readlen = min_t(unsigned long, avail, PAGE_SIZE); - if (romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen) == readlen) { - if (readlen < PAGE_SIZE) { - memset(buf + readlen,0,PAGE_SIZE-readlen); - } - SetPageUptodate(page); - result = 0; + size = i_size_read(inode); + filled = 0; + result = 0; + if (offset < size) { + unsigned long readlen; + + size -= offset; + readlen = size > PAGE_SIZE ? PAGE_SIZE : size; + + filled = romfs_copyfrom(inode, buf, ROMFS_I(inode)->i_dataoffset+offset, readlen); + + if (filled != readlen) { + SetPageError(page); + filled = 0; + result = -EIO; } } - if (result) { - memset(buf, 0, PAGE_SIZE); - SetPageError(page); - } + + if (filled < PAGE_SIZE) + memset(buf + filled, 0, PAGE_SIZE-filled); + + if (!result) + SetPageUptodate(page); flush_dcache_page(page); unlock_page(page); @@ -577,7 +586,7 @@ static void romfs_destroy_inode(struct inode *inode) kmem_cache_free(romfs_inode_cachep, ROMFS_I(inode)); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct romfs_inode_info *ei = foo; diff --git a/fs/seq_file.c b/fs/seq_file.c index 3f54dbd6c49..5d54205e486 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -443,6 +443,20 @@ int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc) return -1; } +int seq_bitmap(struct seq_file *m, unsigned long *bits, unsigned int nr_bits) +{ + size_t len = bitmap_scnprintf_len(nr_bits); + + if (m->count + len < m->size) { + bitmap_scnprintf(m->buf + m->count, m->size - m->count, + bits, nr_bits); + m->count += len; + return 0; + } + m->count = m->size; + return -1; +} + static void *single_start(struct seq_file *p, loff_t *pos) { return NULL + (*pos == 0); diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c index 2294783320c..e4f8d51a555 100644 --- a/fs/smbfs/file.c +++ b/fs/smbfs/file.c @@ -408,7 +408,7 @@ smb_file_release(struct inode *inode, struct file * file) * privileges, so we need our own check for this. */ static int -smb_file_permission(struct inode *inode, int mask, struct nameidata *nd) +smb_file_permission(struct inode *inode, int mask) { int mode = inode->i_mode; int error = 0; @@ -417,7 +417,7 @@ smb_file_permission(struct inode *inode, int mask, struct nameidata *nd) /* Look at user permissions */ mode >>= 6; - if ((mode & 7 & mask) != mask) + if (mask & ~mode & (MAY_READ | MAY_WRITE | MAY_EXEC)) error = -EACCES; return error; } diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 376ef3ee6ed..3528f40ffb0 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -67,7 +67,7 @@ static void smb_destroy_inode(struct inode *inode) kmem_cache_free(smb_inode_cachep, SMB_I(inode)); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct smb_inode_info *ei = (struct smb_inode_info *) foo; diff --git a/fs/splice.c b/fs/splice.c index 399442179d8..1bbc6f4bb09 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -371,7 +371,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos, * for an in-flight io page */ if (flags & SPLICE_F_NONBLOCK) { - if (TestSetPageLocked(page)) { + if (!trylock_page(page)) { error = -EAGAIN; break; } @@ -772,7 +772,7 @@ generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out, ssize_t ret; int err; - err = remove_suid(out->f_path.dentry); + err = file_remove_suid(out); if (unlikely(err)) return err; @@ -830,7 +830,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, ssize_t ret; inode_double_lock(inode, pipe->inode); - ret = remove_suid(out->f_path.dentry); + ret = file_remove_suid(out); if (likely(!ret)) ret = __splice_from_pipe(pipe, &sd, pipe_to_file); inode_double_unlock(inode, pipe->inode); @@ -1161,36 +1161,6 @@ static long do_splice(struct file *in, loff_t __user *off_in, } /* - * Do a copy-from-user while holding the mmap_semaphore for reading, in a - * manner safe from deadlocking with simultaneous mmap() (grabbing mmap_sem - * for writing) and page faulting on the user memory pointed to by src. - * This assumes that we will very rarely hit the partial != 0 path, or this - * will not be a win. - */ -static int copy_from_user_mmap_sem(void *dst, const void __user *src, size_t n) -{ - int partial; - - if (!access_ok(VERIFY_READ, src, n)) - return -EFAULT; - - pagefault_disable(); - partial = __copy_from_user_inatomic(dst, src, n); - pagefault_enable(); - - /* - * Didn't copy everything, drop the mmap_sem and do a faulting copy - */ - if (unlikely(partial)) { - up_read(¤t->mm->mmap_sem); - partial = copy_from_user(dst, src, n); - down_read(¤t->mm->mmap_sem); - } - - return partial; -} - -/* * Map an iov into an array of pages and offset/length tupples. With the * partial_page structure, we can map several non-contiguous ranges into * our ones pages[] map instead of splitting that operation into pieces. @@ -1203,8 +1173,6 @@ static int get_iovec_page_array(const struct iovec __user *iov, { int buffers = 0, error = 0; - down_read(¤t->mm->mmap_sem); - while (nr_vecs) { unsigned long off, npages; struct iovec entry; @@ -1213,7 +1181,7 @@ static int get_iovec_page_array(const struct iovec __user *iov, int i; error = -EFAULT; - if (copy_from_user_mmap_sem(&entry, iov, sizeof(entry))) + if (copy_from_user(&entry, iov, sizeof(entry))) break; base = entry.iov_base; @@ -1247,9 +1215,8 @@ static int get_iovec_page_array(const struct iovec __user *iov, if (npages > PIPE_BUFFERS - buffers) npages = PIPE_BUFFERS - buffers; - error = get_user_pages(current, current->mm, - (unsigned long) base, npages, 0, 0, - &pages[buffers], NULL); + error = get_user_pages_fast((unsigned long)base, npages, + 0, &pages[buffers]); if (unlikely(error <= 0)) break; @@ -1288,8 +1255,6 @@ static int get_iovec_page_array(const struct iovec __user *iov, iov++; } - up_read(¤t->mm->mmap_sem); - if (buffers) return buffers; diff --git a/fs/stat.c b/fs/stat.c index 9cf41f719d5..7c46fbeb8b7 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -57,13 +57,13 @@ EXPORT_SYMBOL(vfs_getattr); int vfs_stat_fd(int dfd, char __user *name, struct kstat *stat) { - struct nameidata nd; + struct path path; int error; - error = __user_walk_fd(dfd, name, LOOKUP_FOLLOW, &nd); + error = user_path_at(dfd, name, LOOKUP_FOLLOW, &path); if (!error) { - error = vfs_getattr(nd.path.mnt, nd.path.dentry, stat); - path_put(&nd.path); + error = vfs_getattr(path.mnt, path.dentry, stat); + path_put(&path); } return error; } @@ -77,13 +77,13 @@ EXPORT_SYMBOL(vfs_stat); int vfs_lstat_fd(int dfd, char __user *name, struct kstat *stat) { - struct nameidata nd; + struct path path; int error; - error = __user_walk_fd(dfd, name, 0, &nd); + error = user_path_at(dfd, name, 0, &path); if (!error) { - error = vfs_getattr(nd.path.mnt, nd.path.dentry, stat); - path_put(&nd.path); + error = vfs_getattr(path.mnt, path.dentry, stat); + path_put(&path); } return error; } @@ -291,29 +291,29 @@ asmlinkage long sys_newfstat(unsigned int fd, struct stat __user *statbuf) return error; } -asmlinkage long sys_readlinkat(int dfd, const char __user *path, +asmlinkage long sys_readlinkat(int dfd, const char __user *pathname, char __user *buf, int bufsiz) { - struct nameidata nd; + struct path path; int error; if (bufsiz <= 0) return -EINVAL; - error = __user_walk_fd(dfd, path, 0, &nd); + error = user_path_at(dfd, pathname, 0, &path); if (!error) { - struct inode *inode = nd.path.dentry->d_inode; + struct inode *inode = path.dentry->d_inode; error = -EINVAL; if (inode->i_op && inode->i_op->readlink) { - error = security_inode_readlink(nd.path.dentry); + error = security_inode_readlink(path.dentry); if (!error) { - touch_atime(nd.path.mnt, nd.path.dentry); - error = inode->i_op->readlink(nd.path.dentry, + touch_atime(path.mnt, path.dentry); + error = inode->i_op->readlink(path.dentry, buf, bufsiz); } } - path_put(&nd.path); + path_put(&path); } return error; } diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index c1a7efb310b..aedaeba82ae 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -459,11 +459,8 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) int ret; ret = __sysfs_add_one(acxt, sd); - if (ret == -EEXIST) { - printk(KERN_WARNING "sysfs: duplicate filename '%s' " + WARN(ret == -EEXIST, KERN_WARNING "sysfs: duplicate filename '%s' " "can not be created\n", sd->s_name); - WARN_ON(1); - } return ret; } diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 3f07893ff89..c9e4e5091da 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -337,9 +337,8 @@ static int sysfs_open_file(struct inode *inode, struct file *file) if (kobj->ktype && kobj->ktype->sysfs_ops) ops = kobj->ktype->sysfs_ops; else { - printk(KERN_ERR "missing sysfs attribute operations for " + WARN(1, KERN_ERR "missing sysfs attribute operations for " "kobject: %s\n", kobject_name(kobj)); - WARN_ON(1); goto err_out; } diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index eeba38417b1..fe611949a7f 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -134,9 +134,8 @@ void sysfs_remove_group(struct kobject * kobj, if (grp->name) { sd = sysfs_get_dirent(dir_sd, grp->name); if (!sd) { - printk(KERN_WARNING "sysfs group %p not found for " + WARN(!sd, KERN_WARNING "sysfs group %p not found for " "kobject '%s'\n", grp, kobject_name(kobj)); - WARN_ON(!sd); return; } } else diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index c5d60de0658..df0d435baa4 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -326,7 +326,7 @@ static void sysv_destroy_inode(struct inode *inode) kmem_cache_free(sysv_inode_cachep, SYSV_I(inode)); } -static void init_once(struct kmem_cache *cachep, void *p) +static void init_once(void *p) { struct sysv_inode_info *si = (struct sysv_inode_info *)p; diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index d81fb9ed2b8..15409815747 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -263,8 +263,8 @@ int ubifs_calc_min_idx_lebs(struct ubifs_info *c) idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; - /* And make sure we have twice the index size of space reserved */ - idx_size <<= 1; + /* And make sure we have thrice the index size of space reserved */ + idx_size = idx_size + (idx_size << 1); /* * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes' @@ -388,11 +388,11 @@ static int can_use_rp(struct ubifs_info *c) * This function makes sure UBIFS has enough free eraseblocks for index growth * and data. * - * When budgeting index space, UBIFS reserves twice as more LEBs as the index + * When budgeting index space, UBIFS reserves thrice as many LEBs as the index * would take if it was consolidated and written to the flash. This guarantees * that the "in-the-gaps" commit method always succeeds and UBIFS will always * be able to commit dirty index. So this function basically adds amount of - * budgeted index space to the size of the current index, multiplies this by 2, + * budgeted index space to the size of the current index, multiplies this by 3, * and makes sure this does not exceed the amount of free eraseblocks. * * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables: @@ -543,8 +543,16 @@ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req) int err, idx_growth, data_growth, dd_growth; struct retries_info ri; + ubifs_assert(req->new_page <= 1); + ubifs_assert(req->dirtied_page <= 1); + ubifs_assert(req->new_dent <= 1); + ubifs_assert(req->mod_dent <= 1); + ubifs_assert(req->new_ino <= 1); + ubifs_assert(req->new_ino_d <= UBIFS_MAX_INO_DATA); ubifs_assert(req->dirtied_ino <= 4); ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4); + ubifs_assert(!(req->new_ino_d & 7)); + ubifs_assert(!(req->dirtied_ino_d & 7)); data_growth = calc_data_growth(c, req); dd_growth = calc_dd_growth(c, req); @@ -618,8 +626,16 @@ again: */ void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) { + ubifs_assert(req->new_page <= 1); + ubifs_assert(req->dirtied_page <= 1); + ubifs_assert(req->new_dent <= 1); + ubifs_assert(req->mod_dent <= 1); + ubifs_assert(req->new_ino <= 1); + ubifs_assert(req->new_ino_d <= UBIFS_MAX_INO_DATA); ubifs_assert(req->dirtied_ino <= 4); ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4); + ubifs_assert(!(req->new_ino_d & 7)); + ubifs_assert(!(req->dirtied_ino_d & 7)); if (!req->recalculate) { ubifs_assert(req->idx_growth >= 0); ubifs_assert(req->data_growth >= 0); @@ -647,7 +663,11 @@ void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) ubifs_assert(c->budg_idx_growth >= 0); ubifs_assert(c->budg_data_growth >= 0); + ubifs_assert(c->budg_dd_growth >= 0); ubifs_assert(c->min_idx_lebs < c->main_lebs); + ubifs_assert(!(c->budg_idx_growth & 7)); + ubifs_assert(!(c->budg_data_growth & 7)); + ubifs_assert(!(c->budg_dd_growth & 7)); spin_unlock(&c->space_lock); } @@ -686,9 +706,10 @@ void ubifs_convert_page_budget(struct ubifs_info *c) void ubifs_release_dirty_inode_budget(struct ubifs_info *c, struct ubifs_inode *ui) { - struct ubifs_budget_req req = {.dd_growth = c->inode_budget, - .dirtied_ino_d = ui->data_len}; + struct ubifs_budget_req req; + memset(&req, 0, sizeof(struct ubifs_budget_req)); + req.dd_growth = c->inode_budget + ALIGN(ui->data_len, 8); ubifs_release_budget(c, &req); } diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c index 3b516316c9b..0a6aa2cc78f 100644 --- a/fs/ubifs/commit.c +++ b/fs/ubifs/commit.c @@ -74,6 +74,7 @@ static int do_commit(struct ubifs_info *c) goto out_up; } + c->cmt_no += 1; err = ubifs_gc_start_commit(c); if (err) goto out_up; @@ -115,7 +116,7 @@ static int do_commit(struct ubifs_info *c) goto out; mutex_lock(&c->mst_mutex); - c->mst_node->cmt_no = cpu_to_le64(++c->cmt_no); + c->mst_node->cmt_no = cpu_to_le64(c->cmt_no); c->mst_node->log_lnum = cpu_to_le32(new_ltail_lnum); c->mst_node->root_lnum = cpu_to_le32(zroot.lnum); c->mst_node->root_offs = cpu_to_le32(zroot.offs); diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 4e3aaeba4ec..b9cb7747375 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -568,8 +568,8 @@ void dbg_dump_budget_req(const struct ubifs_budget_req *req) void dbg_dump_lstats(const struct ubifs_lp_stats *lst) { spin_lock(&dbg_lock); - printk(KERN_DEBUG "Lprops statistics: empty_lebs %d, idx_lebs %d\n", - lst->empty_lebs, lst->idx_lebs); + printk(KERN_DEBUG "(pid %d) Lprops statistics: empty_lebs %d, " + "idx_lebs %d\n", current->pid, lst->empty_lebs, lst->idx_lebs); printk(KERN_DEBUG "\ttaken_empty_lebs %d, total_free %lld, " "total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free, lst->total_dirty); @@ -587,8 +587,8 @@ void dbg_dump_budg(struct ubifs_info *c) struct ubifs_gced_idx_leb *idx_gc; spin_lock(&dbg_lock); - printk(KERN_DEBUG "Budgeting info: budg_data_growth %lld, " - "budg_dd_growth %lld, budg_idx_growth %lld\n", + printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, " + "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid, c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth); printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, " "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth, @@ -634,7 +634,7 @@ void dbg_dump_lprops(struct ubifs_info *c) struct ubifs_lprops lp; struct ubifs_lp_stats lst; - printk(KERN_DEBUG "Dumping LEB properties\n"); + printk(KERN_DEBUG "(pid %d) Dumping LEB properties\n", current->pid); ubifs_get_lp_stats(c, &lst); dbg_dump_lstats(&lst); @@ -655,7 +655,7 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum) if (dbg_failure_mode) return; - printk(KERN_DEBUG "Dumping LEB %d\n", lnum); + printk(KERN_DEBUG "(pid %d) Dumping LEB %d\n", current->pid, lnum); sleb = ubifs_scan(c, lnum, 0, c->dbg_buf); if (IS_ERR(sleb)) { @@ -720,8 +720,8 @@ void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat) { int i; - printk(KERN_DEBUG "Dumping heap cat %d (%d elements)\n", - cat, heap->cnt); + printk(KERN_DEBUG "(pid %d) Dumping heap cat %d (%d elements)\n", + current->pid, cat, heap->cnt); for (i = 0; i < heap->cnt; i++) { struct ubifs_lprops *lprops = heap->arr[i]; @@ -736,7 +736,7 @@ void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode, { int i; - printk(KERN_DEBUG "Dumping pnode:\n"); + printk(KERN_DEBUG "(pid %d) Dumping pnode:\n", current->pid); printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n", (size_t)pnode, (size_t)parent, (size_t)pnode->cnext); printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n", @@ -755,7 +755,7 @@ void dbg_dump_tnc(struct ubifs_info *c) int level; printk(KERN_DEBUG "\n"); - printk(KERN_DEBUG "Dumping the TNC tree\n"); + printk(KERN_DEBUG "(pid %d) Dumping the TNC tree\n", current->pid); znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL); level = znode->level; printk(KERN_DEBUG "== Level %d ==\n", level); @@ -2208,16 +2208,17 @@ int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, int offset, int len, int dtype) { - int err; + int err, failing; if (in_failure_mode(desc)) return -EIO; - if (do_fail(desc, lnum, 1)) + failing = do_fail(desc, lnum, 1); + if (failing) cut_data(buf, len); err = ubi_leb_write(desc, lnum, buf, offset, len, dtype); if (err) return err; - if (in_failure_mode(desc)) + if (failing) return -EIO; return 0; } diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 3c4f1e93c9e..50315fc5718 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -27,7 +27,7 @@ #define UBIFS_DBG(op) op -#define ubifs_assert(expr) do { \ +#define ubifs_assert(expr) do { \ if (unlikely(!(expr))) { \ printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ __func__, __LINE__, current->pid); \ @@ -73,50 +73,50 @@ const char *dbg_key_str1(const struct ubifs_info *c, const union ubifs_key *key); /* - * DBGKEY macros require dbg_lock to be held, which it is in the dbg message + * DBGKEY macros require @dbg_lock to be held, which it is in the dbg message * macros. */ #define DBGKEY(key) dbg_key_str0(c, (key)) #define DBGKEY1(key) dbg_key_str1(c, (key)) /* General messages */ -#define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__) +#define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__) /* Additional journal messages */ -#define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__) +#define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__) /* Additional TNC messages */ -#define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__) +#define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__) /* Additional lprops messages */ -#define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__) +#define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__) /* Additional LEB find messages */ -#define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__) +#define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__) /* Additional mount messages */ -#define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__) +#define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__) /* Additional I/O messages */ -#define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__) +#define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__) /* Additional commit messages */ -#define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__) +#define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__) /* Additional budgeting messages */ -#define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__) +#define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__) /* Additional log messages */ -#define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__) +#define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__) /* Additional gc messages */ -#define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__) +#define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__) /* Additional scan messages */ -#define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__) +#define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__) /* Additional recovery messages */ -#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__) +#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__) /* * Debugging message type flags (must match msg_type_names in debug.c). @@ -239,34 +239,23 @@ typedef int (*dbg_leaf_callback)(struct ubifs_info *c, struct ubifs_zbranch *zbr, void *priv); typedef int (*dbg_znode_callback)(struct ubifs_info *c, struct ubifs_znode *znode, void *priv); - int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, dbg_znode_callback znode_cb, void *priv); /* Checking functions */ int dbg_check_lprops(struct ubifs_info *c); - int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot); int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot); - int dbg_check_cats(struct ubifs_info *c); - int dbg_check_ltab(struct ubifs_info *c); - int dbg_check_synced_i_size(struct inode *inode); - int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir); - int dbg_check_tnc(struct ubifs_info *c, int extra); - int dbg_check_idx_size(struct ubifs_info *c, long long idx_size); - int dbg_check_filesystem(struct ubifs_info *c); - void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, int add_pos); - int dbg_check_lprops(struct ubifs_info *c); int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, int row, int col); @@ -329,71 +318,77 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum, #else /* !CONFIG_UBIFS_FS_DEBUG */ #define UBIFS_DBG(op) -#define ubifs_assert(expr) ({}) -#define ubifs_assert_cmt_locked(c) + +/* Use "if (0)" to make compiler check arguments even if debugging is off */ +#define ubifs_assert(expr) do { \ + if (0 && (expr)) \ + printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \ + __func__, __LINE__, current->pid); \ +} while (0) + +#define dbg_err(fmt, ...) do { \ + if (0) \ + ubifs_err(fmt, ##__VA_ARGS__); \ +} while (0) + +#define dbg_msg(fmt, ...) do { \ + if (0) \ + printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", \ + current->pid, __func__, ##__VA_ARGS__); \ +} while (0) + #define dbg_dump_stack() -#define dbg_err(fmt, ...) ({}) -#define dbg_msg(fmt, ...) ({}) -#define dbg_key(c, key, fmt, ...) ({}) - -#define dbg_gen(fmt, ...) ({}) -#define dbg_jnl(fmt, ...) ({}) -#define dbg_tnc(fmt, ...) ({}) -#define dbg_lp(fmt, ...) ({}) -#define dbg_find(fmt, ...) ({}) -#define dbg_mnt(fmt, ...) ({}) -#define dbg_io(fmt, ...) ({}) -#define dbg_cmt(fmt, ...) ({}) -#define dbg_budg(fmt, ...) ({}) -#define dbg_log(fmt, ...) ({}) -#define dbg_gc(fmt, ...) ({}) -#define dbg_scan(fmt, ...) ({}) -#define dbg_rcvry(fmt, ...) ({}) - -#define dbg_ntype(type) "" -#define dbg_cstate(cmt_state) "" -#define dbg_get_key_dump(c, key) ({}) -#define dbg_dump_inode(c, inode) ({}) -#define dbg_dump_node(c, node) ({}) -#define dbg_dump_budget_req(req) ({}) -#define dbg_dump_lstats(lst) ({}) -#define dbg_dump_budg(c) ({}) -#define dbg_dump_lprop(c, lp) ({}) -#define dbg_dump_lprops(c) ({}) -#define dbg_dump_leb(c, lnum) ({}) -#define dbg_dump_znode(c, znode) ({}) -#define dbg_dump_heap(c, heap, cat) ({}) -#define dbg_dump_pnode(c, pnode, parent, iip) ({}) -#define dbg_dump_tnc(c) ({}) -#define dbg_dump_index(c) ({}) +#define ubifs_assert_cmt_locked(c) -#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 +#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_jnl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_tnc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_lp(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_find(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_mnt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_cmt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_budg(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_log(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_gc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_scan(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_rcvry(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) + +#define DBGKEY(key) ((char *)(key)) +#define DBGKEY1(key) ((char *)(key)) + +#define dbg_ntype(type) "" +#define dbg_cstate(cmt_state) "" +#define dbg_get_key_dump(c, key) ({}) +#define dbg_dump_inode(c, inode) ({}) +#define dbg_dump_node(c, node) ({}) +#define dbg_dump_budget_req(req) ({}) +#define dbg_dump_lstats(lst) ({}) +#define dbg_dump_budg(c) ({}) +#define dbg_dump_lprop(c, lp) ({}) +#define dbg_dump_lprops(c) ({}) +#define dbg_dump_leb(c, lnum) ({}) +#define dbg_dump_znode(c, znode) ({}) +#define dbg_dump_heap(c, heap, cat) ({}) +#define dbg_dump_pnode(c, pnode, parent, iip) ({}) +#define dbg_dump_tnc(c) ({}) +#define dbg_dump_index(c) ({}) +#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 #define dbg_old_index_check_init(c, zroot) 0 #define dbg_check_old_index(c, zroot) 0 - #define dbg_check_cats(c) 0 - #define dbg_check_ltab(c) 0 - #define dbg_check_synced_i_size(inode) 0 - #define dbg_check_dir_size(c, dir) 0 - #define dbg_check_tnc(c, x) 0 - #define dbg_check_idx_size(c, idx_size) 0 - #define dbg_check_filesystem(c) 0 - #define dbg_check_heap(c, heap, cat, add_pos) ({}) - #define dbg_check_lprops(c) 0 #define dbg_check_lpt_nodes(c, cnode, row, col) 0 - #define dbg_force_in_the_gaps_enabled 0 #define dbg_force_in_the_gaps() 0 - #define dbg_failure_mode 0 #define dbg_failure_mode_registration(c) ({}) #define dbg_failure_mode_deregistration(c) ({}) diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index e90374be7d3..5c96f1fb701 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -165,7 +165,6 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, } inode->i_ino = ++c->highest_inum; - inode->i_generation = ++c->vfs_gen; /* * The creation sequence number remains with this inode for its * lifetime. All nodes for this inode have a greater sequence number, @@ -220,15 +219,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, err = ubifs_tnc_lookup_nm(c, &key, dent, &dentry->d_name); if (err) { - /* - * Do not hash the direntry if parent 'i_nlink' is zero, because - * this has side-effects - '->delete_inode()' call will not be - * called for the parent orphan inode, because 'd_count' of its - * direntry will stay 1 (it'll be negative direntry I guess) - * and prevent 'iput_final()' until the dentry is destroyed due - * to unmount or memory pressure. - */ - if (err == -ENOENT && dir->i_nlink != 0) { + if (err == -ENOENT) { dbg_gen("not found"); goto done; } @@ -525,7 +516,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, struct ubifs_inode *dir_ui = ubifs_inode(dir); int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len); struct ubifs_budget_req req = { .new_dent = 1, .dirtied_ino = 2, - .dirtied_ino_d = ui->data_len }; + .dirtied_ino_d = ALIGN(ui->data_len, 8) }; /* * Budget request settings: new direntry, changing the target inode, @@ -727,8 +718,7 @@ static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, int mode) struct ubifs_inode *dir_ui = ubifs_inode(dir); struct ubifs_info *c = dir->i_sb->s_fs_info; int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len); - struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, - .dirtied_ino_d = 1 }; + struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1 }; /* * Budget request settings: new inode, new direntry and changing parent @@ -789,7 +779,8 @@ static int ubifs_mknod(struct inode *dir, struct dentry *dentry, int sz_change = CALC_DENT_SIZE(dentry->d_name.len); int err, devlen = 0; struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, - .new_ino_d = devlen, .dirtied_ino = 1 }; + .new_ino_d = ALIGN(devlen, 8), + .dirtied_ino = 1 }; /* * Budget request settings: new inode, new direntry and changing parent @@ -863,7 +854,8 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry, int err, len = strlen(symname); int sz_change = CALC_DENT_SIZE(dentry->d_name.len); struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, - .new_ino_d = len, .dirtied_ino = 1 }; + .new_ino_d = ALIGN(len, 8), + .dirtied_ino = 1 }; /* * Budget request settings: new inode, new direntry and changing parent @@ -1012,7 +1004,7 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, struct ubifs_budget_req req = { .new_dent = 1, .mod_dent = 1, .dirtied_ino = 3 }; struct ubifs_budget_req ino_req = { .dirtied_ino = 1, - .dirtied_ino_d = old_inode_ui->data_len }; + .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) }; struct timespec time; /* diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 005a3b854d9..4071d1cae29 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -53,6 +53,7 @@ #include "ubifs.h" #include <linux/mount.h> +#include <linux/namei.h> static int read_block(struct inode *inode, void *addr, unsigned int block, struct ubifs_data_node *dn) @@ -889,7 +890,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode, loff_t new_size = attr->ia_size; struct ubifs_inode *ui = ubifs_inode(inode); struct ubifs_budget_req req = { .dirtied_ino = 1, - .dirtied_ino_d = ui->data_len }; + .dirtied_ino_d = ALIGN(ui->data_len, 8) }; err = ubifs_budget_space(c, &req); if (err) @@ -940,7 +941,8 @@ int ubifs_setattr(struct dentry *dentry, struct iattr *attr) struct inode *inode = dentry->d_inode; struct ubifs_info *c = inode->i_sb->s_fs_info; - dbg_gen("ino %lu, ia_valid %#x", inode->i_ino, attr->ia_valid); + dbg_gen("ino %lu, mode %#x, ia_valid %#x", + inode->i_ino, inode->i_mode, attr->ia_valid); err = inode_change_ok(inode, attr); if (err) return err; @@ -1050,7 +1052,7 @@ static int update_mctime(struct ubifs_info *c, struct inode *inode) if (mctime_update_needed(inode, &now)) { int err, release; struct ubifs_budget_req req = { .dirtied_ino = 1, - .dirtied_ino_d = ui->data_len }; + .dirtied_ino_d = ALIGN(ui->data_len, 8) }; err = ubifs_budget_space(c, &req); if (err) @@ -1269,6 +1271,7 @@ struct file_operations ubifs_file_operations = { .fsync = ubifs_fsync, .unlocked_ioctl = ubifs_ioctl, .splice_read = generic_file_splice_read, + .splice_write = generic_file_splice_write, #ifdef CONFIG_COMPAT .compat_ioctl = ubifs_compat_ioctl, #endif diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index 10394c54836..adee7b5ddea 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c @@ -290,9 +290,14 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, idx_lp = idx_heap->arr[0]; sum = idx_lp->free + idx_lp->dirty; /* - * Since we reserve twice as more space for the index than it + * Since we reserve thrice as much space for the index than it * actually takes, it does not make sense to pick indexing LEBs - * with less than half LEB of dirty space. + * with less than, say, half LEB of dirty space. May be half is + * not the optimal boundary - this should be tested and + * checked. This boundary should determine how much we use + * in-the-gaps to consolidate the index comparing to how much + * we use garbage collector to consolidate it. The "half" + * criteria just feels to be fine. */ if (sum < min_space || sum < c->half_leb_size) idx_lp = NULL; diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index 3374f91b670..054363f2b20 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -54,6 +54,20 @@ #include "ubifs.h" /** + * ubifs_ro_mode - switch UBIFS to read read-only mode. + * @c: UBIFS file-system description object + * @err: error code which is the reason of switching to R/O mode + */ +void ubifs_ro_mode(struct ubifs_info *c, int err) +{ + if (!c->ro_media) { + c->ro_media = 1; + ubifs_warn("switched to read-only mode, error %d", err); + dbg_dump_stack(); + } +} + +/** * ubifs_check_node - check node. * @c: UBIFS file-system description object * @buf: node to check diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 283155abe5f..22993f867d1 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -447,13 +447,11 @@ static int get_dent_type(int mode) * @ino: buffer in which to pack inode node * @inode: inode to pack * @last: indicates the last node of the group - * @last_reference: non-zero if this is a deletion inode */ static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino, - const struct inode *inode, int last, - int last_reference) + const struct inode *inode, int last) { - int data_len = 0; + int data_len = 0, last_reference = !inode->i_nlink; struct ubifs_inode *ui = ubifs_inode(inode); ino->ch.node_type = UBIFS_INO_NODE; @@ -596,9 +594,9 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, ubifs_prep_grp_node(c, dent, dlen, 0); ino = (void *)dent + aligned_dlen; - pack_inode(c, ino, inode, 0, last_reference); + pack_inode(c, ino, inode, 0); ino = (void *)ino + aligned_ilen; - pack_inode(c, ino, dir, 1, 0); + pack_inode(c, ino, dir, 1); if (last_reference) { err = ubifs_add_orphan(c, inode->i_ino); @@ -606,6 +604,7 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, release_head(c, BASEHD); goto out_finish; } + ui->del_cmtno = c->cmt_no; } err = write_head(c, BASEHD, dent, len, &lnum, &dent_offs, sync); @@ -750,30 +749,25 @@ out_free: * ubifs_jnl_write_inode - flush inode to the journal. * @c: UBIFS file-system description object * @inode: inode to flush - * @deletion: inode has been deleted * * This function writes inode @inode to the journal. If the inode is * synchronous, it also synchronizes the write-buffer. Returns zero in case of * success and a negative error code in case of failure. */ -int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode, - int deletion) +int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode) { - int err, len, lnum, offs, sync = 0; + int err, lnum, offs; struct ubifs_ino_node *ino; struct ubifs_inode *ui = ubifs_inode(inode); + int sync = 0, len = UBIFS_INO_NODE_SZ, last_reference = !inode->i_nlink; - dbg_jnl("ino %lu%s", inode->i_ino, - deletion ? " (last reference)" : ""); - if (deletion) - ubifs_assert(inode->i_nlink == 0); + dbg_jnl("ino %lu, nlink %u", inode->i_ino, inode->i_nlink); - len = UBIFS_INO_NODE_SZ; /* * If the inode is being deleted, do not write the attached data. No * need to synchronize the write-buffer either. */ - if (!deletion) { + if (!last_reference) { len += ui->data_len; sync = IS_SYNC(inode); } @@ -786,7 +780,7 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode, if (err) goto out_free; - pack_inode(c, ino, inode, 1, deletion); + pack_inode(c, ino, inode, 1); err = write_head(c, BASEHD, ino, len, &lnum, &offs, sync); if (err) goto out_release; @@ -795,7 +789,7 @@ int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode, inode->i_ino); release_head(c, BASEHD); - if (deletion) { + if (last_reference) { err = ubifs_tnc_remove_ino(c, inode->i_ino); if (err) goto out_ro; @@ -828,6 +822,65 @@ out_free: } /** + * ubifs_jnl_delete_inode - delete an inode. + * @c: UBIFS file-system description object + * @inode: inode to delete + * + * This function deletes inode @inode which includes removing it from orphans, + * deleting it from TNC and, in some cases, writing a deletion inode to the + * journal. + * + * When regular file inodes are unlinked or a directory inode is removed, the + * 'ubifs_jnl_update()' function writes a corresponding deletion inode and + * direntry to the media, and adds the inode to orphans. After this, when the + * last reference to this inode has been dropped, this function is called. In + * general, it has to write one more deletion inode to the media, because if + * a commit happened between 'ubifs_jnl_update()' and + * 'ubifs_jnl_delete_inode()', the deletion inode is not in the journal + * anymore, and in fact it might not be on the flash anymore, because it might + * have been garbage-collected already. And for optimization reasons UBIFS does + * not read the orphan area if it has been unmounted cleanly, so it would have + * no indication in the journal that there is a deleted inode which has to be + * removed from TNC. + * + * However, if there was no commit between 'ubifs_jnl_update()' and + * 'ubifs_jnl_delete_inode()', then there is no need to write the deletion + * inode to the media for the second time. And this is quite a typical case. + * + * This function returns zero in case of success and a negative error code in + * case of failure. + */ +int ubifs_jnl_delete_inode(struct ubifs_info *c, const struct inode *inode) +{ + int err; + struct ubifs_inode *ui = ubifs_inode(inode); + + ubifs_assert(inode->i_nlink == 0); + + if (ui->del_cmtno != c->cmt_no) + /* A commit happened for sure */ + return ubifs_jnl_write_inode(c, inode); + + down_read(&c->commit_sem); + /* + * Check commit number again, because the first test has been done + * without @c->commit_sem, so a commit might have happened. + */ + if (ui->del_cmtno != c->cmt_no) { + up_read(&c->commit_sem); + return ubifs_jnl_write_inode(c, inode); + } + + err = ubifs_tnc_remove_ino(c, inode->i_ino); + if (err) + ubifs_ro_mode(c, err); + else + ubifs_delete_orphan(c, inode->i_ino); + up_read(&c->commit_sem); + return err; +} + +/** * ubifs_jnl_rename - rename a directory entry. * @c: UBIFS file-system description object * @old_dir: parent inode of directory entry to rename @@ -917,16 +970,16 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, p = (void *)dent2 + aligned_dlen2; if (new_inode) { - pack_inode(c, p, new_inode, 0, last_reference); + pack_inode(c, p, new_inode, 0); p += ALIGN(ilen, 8); } if (!move) - pack_inode(c, p, old_dir, 1, 0); + pack_inode(c, p, old_dir, 1); else { - pack_inode(c, p, old_dir, 0, 0); + pack_inode(c, p, old_dir, 0); p += ALIGN(plen, 8); - pack_inode(c, p, new_dir, 1, 0); + pack_inode(c, p, new_dir, 1); } if (last_reference) { @@ -935,6 +988,7 @@ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, release_head(c, BASEHD); goto out_finish; } + new_ui->del_cmtno = c->cmt_no; } err = write_head(c, BASEHD, dent, len, &lnum, &offs, sync); @@ -1131,7 +1185,7 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, if (err) goto out_free; - pack_inode(c, ino, inode, 0, 0); + pack_inode(c, ino, inode, 0); ubifs_prep_grp_node(c, trun, UBIFS_TRUN_NODE_SZ, dlen ? 0 : 1); if (dlen) ubifs_prep_grp_node(c, dn, dlen, 1); @@ -1251,9 +1305,9 @@ int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host, ubifs_prep_grp_node(c, xent, xlen, 0); ino = (void *)xent + aligned_xlen; - pack_inode(c, ino, inode, 0, 1); + pack_inode(c, ino, inode, 0); ino = (void *)ino + UBIFS_INO_NODE_SZ; - pack_inode(c, ino, host, 1, 0); + pack_inode(c, ino, host, 1); err = write_head(c, BASEHD, xent, len, &lnum, &xent_offs, sync); if (!sync && !err) @@ -1320,7 +1374,7 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode, const struct inode *host) { int err, len1, len2, aligned_len, aligned_len1, lnum, offs; - struct ubifs_inode *host_ui = ubifs_inode(inode); + struct ubifs_inode *host_ui = ubifs_inode(host); struct ubifs_ino_node *ino; union ubifs_key key; int sync = IS_DIRSYNC(host); @@ -1344,8 +1398,8 @@ int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode, if (err) goto out_free; - pack_inode(c, ino, host, 0, 0); - pack_inode(c, (void *)ino + aligned_len1, inode, 1, 0); + pack_inode(c, ino, host, 0); + pack_inode(c, (void *)ino + aligned_len1, inode, 1); err = write_head(c, BASEHD, ino, aligned_len, &lnum, &offs, 0); if (!sync && !err) { diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c index 36857b9ed59..3e0aa736755 100644 --- a/fs/ubifs/log.c +++ b/fs/ubifs/log.c @@ -317,6 +317,8 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) return 0; out_unlock: + if (err != -EAGAIN) + ubifs_ro_mode(c, err); mutex_unlock(&c->log_mutex); kfree(ref); kfree(bud); @@ -410,7 +412,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum) return -ENOMEM; cs->ch.node_type = UBIFS_CS_NODE; - cs->cmt_no = cpu_to_le64(c->cmt_no + 1); + cs->cmt_no = cpu_to_le64(c->cmt_no); ubifs_prepare_node(c, cs, UBIFS_CS_NODE_SZ, 0); /* diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h index 4beccfc256d..87dabf9fe74 100644 --- a/fs/ubifs/misc.h +++ b/fs/ubifs/misc.h @@ -80,20 +80,6 @@ static inline struct ubifs_inode *ubifs_inode(const struct inode *inode) } /** - * ubifs_ro_mode - switch UBIFS to read read-only mode. - * @c: UBIFS file-system description object - * @err: error code which is the reason of switching to R/O mode - */ -static inline void ubifs_ro_mode(struct ubifs_info *c, int err) -{ - if (!c->ro_media) { - c->ro_media = 1; - ubifs_warn("switched to read-only mode, error %d", err); - dbg_dump_stack(); - } -} - -/** * ubifs_compr_present - check if compressor was compiled in. * @compr_type: compressor type to check * @@ -322,7 +308,7 @@ static inline long long ubifs_reported_space(const struct ubifs_info *c, { int divisor, factor; - divisor = UBIFS_MAX_DATA_NODE_SZ + (c->max_idx_node_sz << 1); + divisor = UBIFS_MAX_DATA_NODE_SZ + (c->max_idx_node_sz * 3); factor = UBIFS_MAX_DATA_NODE_SZ - UBIFS_DATA_NODE_SZ; do_div(free, divisor); diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index 3afeb9242c6..02d3462f4d3 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -310,10 +310,10 @@ static int write_orph_node(struct ubifs_info *c, int atomic) c->cmt_orphans -= cnt; spin_unlock(&c->orphan_lock); if (c->cmt_orphans) - orph->cmt_no = cpu_to_le64(c->cmt_no + 1); + orph->cmt_no = cpu_to_le64(c->cmt_no); else /* Mark the last node of the commit */ - orph->cmt_no = cpu_to_le64((c->cmt_no + 1) | (1ULL << 63)); + orph->cmt_no = cpu_to_le64((c->cmt_no) | (1ULL << 63)); ubifs_assert(c->ohead_offs + len <= c->leb_size); ubifs_assert(c->ohead_lnum >= c->orph_first); ubifs_assert(c->ohead_lnum <= c->orph_last); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 00eb9c68ad0..f71e6b8822c 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -30,7 +30,6 @@ #include <linux/slab.h> #include <linux/module.h> #include <linux/ctype.h> -#include <linux/random.h> #include <linux/kthread.h> #include <linux/parser.h> #include <linux/seq_file.h> @@ -149,7 +148,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum) if (err) goto out_invalid; - /* Disable readahead */ + /* Disable read-ahead */ inode->i_mapping->backing_dev_info = &c->bdi; switch (inode->i_mode & S_IFMT) { @@ -278,7 +277,7 @@ static void ubifs_destroy_inode(struct inode *inode) */ static int ubifs_write_inode(struct inode *inode, int wait) { - int err; + int err = 0; struct ubifs_info *c = inode->i_sb->s_fs_info; struct ubifs_inode *ui = ubifs_inode(inode); @@ -299,10 +298,18 @@ static int ubifs_write_inode(struct inode *inode, int wait) return 0; } - dbg_gen("inode %lu", inode->i_ino); - err = ubifs_jnl_write_inode(c, inode, 0); - if (err) - ubifs_err("can't write inode %lu, error %d", inode->i_ino, err); + /* + * As an optimization, do not write orphan inodes to the media just + * because this is not needed. + */ + dbg_gen("inode %lu, mode %#x, nlink %u", + inode->i_ino, (int)inode->i_mode, inode->i_nlink); + if (inode->i_nlink) { + err = ubifs_jnl_write_inode(c, inode); + if (err) + ubifs_err("can't write inode %lu, error %d", + inode->i_ino, err); + } ui->dirty = 0; mutex_unlock(&ui->ui_mutex); @@ -314,8 +321,9 @@ static void ubifs_delete_inode(struct inode *inode) { int err; struct ubifs_info *c = inode->i_sb->s_fs_info; + struct ubifs_inode *ui = ubifs_inode(inode); - if (ubifs_inode(inode)->xattr) + if (ui->xattr) /* * Extended attribute inode deletions are fully handled in * 'ubifs_removexattr()'. These inodes are special and have @@ -323,7 +331,7 @@ static void ubifs_delete_inode(struct inode *inode) */ goto out; - dbg_gen("inode %lu", inode->i_ino); + dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode); ubifs_assert(!atomic_read(&inode->i_count)); ubifs_assert(inode->i_nlink == 0); @@ -331,15 +339,19 @@ static void ubifs_delete_inode(struct inode *inode) if (is_bad_inode(inode)) goto out; - ubifs_inode(inode)->ui_size = inode->i_size = 0; - err = ubifs_jnl_write_inode(c, inode, 1); + ui->ui_size = inode->i_size = 0; + err = ubifs_jnl_delete_inode(c, inode); if (err) /* * Worst case we have a lost orphan inode wasting space, so a - * simple error message is ok here. + * simple error message is OK here. */ - ubifs_err("can't write inode %lu, error %d", inode->i_ino, err); + ubifs_err("can't delete inode %lu, error %d", + inode->i_ino, err); + out: + if (ui->dirty) + ubifs_release_dirty_inode_budget(c, ui); clear_inode(inode); } @@ -1122,8 +1134,8 @@ static int mount_ubifs(struct ubifs_info *c) if (err) goto out_infos; - ubifs_msg("mounted UBI device %d, volume %d", c->vi.ubi_num, - c->vi.vol_id); + ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"", + c->vi.ubi_num, c->vi.vol_id, c->vi.name); if (mounted_read_only) ubifs_msg("mounted read-only"); x = (long long)c->main_lebs * c->leb_size; @@ -1469,6 +1481,7 @@ static void ubifs_put_super(struct super_block *sb) */ ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0); ubifs_assert(c->budg_idx_growth == 0); + ubifs_assert(c->budg_dd_growth == 0); ubifs_assert(c->budg_data_growth == 0); /* @@ -1657,7 +1670,6 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) INIT_LIST_HEAD(&c->orph_new); c->highest_inum = UBIFS_FIRST_INO; - get_random_bytes(&c->vfs_gen, sizeof(int)); c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM; ubi_get_volume_info(ubi, &c->vi); @@ -1671,10 +1683,10 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) } /* - * UBIFS provids 'backing_dev_info' in order to disable readahead. For + * UBIFS provides 'backing_dev_info' in order to disable read-ahead. For * UBIFS, I/O is not deferred, it is done immediately in readpage, * which means the user would have to wait not just for their own I/O - * but the readahead I/O as well i.e. completely pointless. + * but the read-ahead I/O as well i.e. completely pointless. * * Read-ahead will be disabled because @c->bdi.ra_pages is 0. */ @@ -1841,7 +1853,7 @@ static struct file_system_type ubifs_fs_type = { /* * Inode slab cache constructor. */ -static void inode_slab_ctor(struct kmem_cache *cachep, void *obj) +static void inode_slab_ctor(void *obj) { struct ubifs_inode *ui = obj; inode_init_once(&ui->vfs_inode); diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c index 8117e65ba2e..8ac76b1c2d5 100644 --- a/fs/ubifs/tnc_commit.c +++ b/fs/ubifs/tnc_commit.c @@ -372,26 +372,25 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt) written = layout_leb_in_gaps(c, p); if (written < 0) { err = written; - if (err == -ENOSPC) { - if (!dbg_force_in_the_gaps_enabled) { - /* - * Do not print scary warnings if the - * debugging option which forces - * in-the-gaps is enabled. - */ - ubifs_err("out of space"); - spin_lock(&c->space_lock); - dbg_dump_budg(c); - spin_unlock(&c->space_lock); - dbg_dump_lprops(c); - } - /* Try to commit anyway */ - err = 0; - break; + if (err != -ENOSPC) { + kfree(c->gap_lebs); + c->gap_lebs = NULL; + return err; } - kfree(c->gap_lebs); - c->gap_lebs = NULL; - return err; + if (!dbg_force_in_the_gaps_enabled) { + /* + * Do not print scary warnings if the debugging + * option which forces in-the-gaps is enabled. + */ + ubifs_err("out of space"); + spin_lock(&c->space_lock); + dbg_dump_budg(c); + spin_unlock(&c->space_lock); + dbg_dump_lprops(c); + } + /* Try to commit anyway */ + err = 0; + break; } p++; cnt -= written; diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h index 0cc7da9bed4..bd2121f3426 100644 --- a/fs/ubifs/ubifs-media.h +++ b/fs/ubifs/ubifs-media.h @@ -228,10 +228,10 @@ enum { /* Minimum number of orphan area logical eraseblocks */ #define UBIFS_MIN_ORPH_LEBS 1 /* - * Minimum number of main area logical eraseblocks (buds, 2 for the index, 1 + * Minimum number of main area logical eraseblocks (buds, 3 for the index, 1 * for GC, 1 for deletions, and at least 1 for committed data). */ -#define UBIFS_MIN_MAIN_LEBS (UBIFS_MIN_BUD_LEBS + 5) +#define UBIFS_MIN_MAIN_LEBS (UBIFS_MIN_BUD_LEBS + 6) /* Minimum number of logical eraseblocks */ #define UBIFS_MIN_LEB_CNT (UBIFS_SB_LEBS + UBIFS_MST_LEBS + \ diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index e4f89f27182..d7f706f7a30 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -20,8 +20,6 @@ * Adrian Hunter */ -/* Implementation version 0.7 */ - #ifndef __UBIFS_H__ #define __UBIFS_H__ @@ -322,6 +320,8 @@ struct ubifs_gced_idx_leb { * struct ubifs_inode - UBIFS in-memory inode description. * @vfs_inode: VFS inode description object * @creat_sqnum: sequence number at time of creation + * @del_cmtno: commit number corresponding to the time the inode was deleted, + * protected by @c->commit_sem; * @xattr_size: summarized size of all extended attributes in bytes * @xattr_cnt: count of extended attributes this inode has * @xattr_names: sum of lengths of all extended attribute names belonging to @@ -373,6 +373,7 @@ struct ubifs_gced_idx_leb { struct ubifs_inode { struct inode vfs_inode; unsigned long long creat_sqnum; + unsigned long long del_cmtno; unsigned int xattr_size; unsigned int xattr_cnt; unsigned int xattr_names; @@ -779,7 +780,7 @@ struct ubifs_compressor { /** * struct ubifs_budget_req - budget requirements of an operation. * - * @fast: non-zero if the budgeting should try to aquire budget quickly and + * @fast: non-zero if the budgeting should try to acquire budget quickly and * should not try to call write-back * @recalculate: non-zero if @idx_growth, @data_growth, and @dd_growth fields * have to be re-calculated @@ -805,21 +806,31 @@ struct ubifs_compressor { * An inode may contain 4KiB of data at max., thus the widths of @new_ino_d * is 13 bits, and @dirtied_ino_d - 15, because up to 4 inodes may be made * dirty by the re-name operation. + * + * Note, UBIFS aligns node lengths to 8-bytes boundary, so the requester has to + * make sure the amount of inode data which contribute to @new_ino_d and + * @dirtied_ino_d fields are aligned. */ struct ubifs_budget_req { unsigned int fast:1; unsigned int recalculate:1; +#ifndef UBIFS_DEBUG unsigned int new_page:1; unsigned int dirtied_page:1; unsigned int new_dent:1; unsigned int mod_dent:1; unsigned int new_ino:1; unsigned int new_ino_d:13; -#ifndef UBIFS_DEBUG unsigned int dirtied_ino:4; unsigned int dirtied_ino_d:15; #else /* Not bit-fields to check for overflows */ + unsigned int new_page; + unsigned int dirtied_page; + unsigned int new_dent; + unsigned int mod_dent; + unsigned int new_ino; + unsigned int new_ino_d; unsigned int dirtied_ino; unsigned int dirtied_ino_d; #endif @@ -860,13 +871,13 @@ struct ubifs_mount_opts { * struct ubifs_info - UBIFS file-system description data structure * (per-superblock). * @vfs_sb: VFS @struct super_block object - * @bdi: backing device info object to make VFS happy and disable readahead + * @bdi: backing device info object to make VFS happy and disable read-ahead * * @highest_inum: highest used inode number - * @vfs_gen: VFS inode generation counter * @max_sqnum: current global sequence number - * @cmt_no: commit number (last successfully completed commit) - * @cnt_lock: protects @highest_inum, @vfs_gen, and @max_sqnum counters + * @cmt_no: commit number of the last successfully completed commit, protected + * by @commit_sem + * @cnt_lock: protects @highest_inum and @max_sqnum counters * @fmt_version: UBIFS on-flash format version * @uuid: UUID from super block * @@ -1103,7 +1114,6 @@ struct ubifs_info { struct backing_dev_info bdi; ino_t highest_inum; - unsigned int vfs_gen; unsigned long long max_sqnum; unsigned long long cmt_no; spinlock_t cnt_lock; @@ -1346,6 +1356,7 @@ extern struct backing_dev_info ubifs_backing_dev_info; extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; /* io.c */ +void ubifs_ro_mode(struct ubifs_info *c, int err); int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len); int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, int dtype); @@ -1399,8 +1410,8 @@ int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir, int deletion, int xent); int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode, const union ubifs_key *key, const void *buf, int len); -int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode, - int last_reference); +int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode); +int ubifs_jnl_delete_inode(struct ubifs_info *c, const struct inode *inode); int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, const struct dentry *old_dentry, const struct inode *new_dir, diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index 1388a078e1a..649bec78b64 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -61,7 +61,7 @@ /* * Limit the number of extended attributes per inode so that the total size - * (xattr_size) is guaranteeded to fit in an 'unsigned int'. + * (@xattr_size) is guaranteeded to fit in an 'unsigned int'. */ #define MAX_XATTRS_PER_INODE 65535 @@ -103,14 +103,14 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, struct inode *inode; struct ubifs_inode *ui, *host_ui = ubifs_inode(host); struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, - .new_ino_d = size, .dirtied_ino = 1, - .dirtied_ino_d = host_ui->data_len}; + .new_ino_d = ALIGN(size, 8), .dirtied_ino = 1, + .dirtied_ino_d = ALIGN(host_ui->data_len, 8) }; if (host_ui->xattr_cnt >= MAX_XATTRS_PER_INODE) return -ENOSPC; /* * Linux limits the maximum size of the extended attribute names list - * to %XATTR_LIST_MAX. This means we should not allow creating more* + * to %XATTR_LIST_MAX. This means we should not allow creating more * extended attributes if the name list becomes larger. This limitation * is artificial for UBIFS, though. */ @@ -128,7 +128,6 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, goto out_budg; } - mutex_lock(&host_ui->ui_mutex); /* Re-define all operations to be "nothing" */ inode->i_mapping->a_ops = &none_address_operations; inode->i_op = &none_inode_operations; @@ -141,23 +140,19 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, ui->data = kmalloc(size, GFP_NOFS); if (!ui->data) { err = -ENOMEM; - goto out_unlock; + goto out_free; } - memcpy(ui->data, value, size); + inode->i_size = ui->ui_size = size; + ui->data_len = size; + + mutex_lock(&host_ui->ui_mutex); host->i_ctime = ubifs_current_time(host); host_ui->xattr_cnt += 1; host_ui->xattr_size += CALC_DENT_SIZE(nm->len); host_ui->xattr_size += CALC_XATTR_BYTES(size); host_ui->xattr_names += nm->len; - /* - * We do not use i_size_write() because nobody can race with us as we - * are holding host @host->i_mutex - every xattr operation for this - * inode is serialized by it. - */ - inode->i_size = ui->ui_size = size; - ui->data_len = size; err = ubifs_jnl_update(c, host, nm, inode, 0, 1); if (err) goto out_cancel; @@ -172,8 +167,8 @@ out_cancel: host_ui->xattr_cnt -= 1; host_ui->xattr_size -= CALC_DENT_SIZE(nm->len); host_ui->xattr_size -= CALC_XATTR_BYTES(size); -out_unlock: mutex_unlock(&host_ui->ui_mutex); +out_free: make_bad_inode(inode); iput(inode); out_budg: @@ -200,29 +195,28 @@ static int change_xattr(struct ubifs_info *c, struct inode *host, struct ubifs_inode *host_ui = ubifs_inode(host); struct ubifs_inode *ui = ubifs_inode(inode); struct ubifs_budget_req req = { .dirtied_ino = 2, - .dirtied_ino_d = size + host_ui->data_len }; + .dirtied_ino_d = ALIGN(size, 8) + ALIGN(host_ui->data_len, 8) }; ubifs_assert(ui->data_len == inode->i_size); err = ubifs_budget_space(c, &req); if (err) return err; - mutex_lock(&host_ui->ui_mutex); - host->i_ctime = ubifs_current_time(host); - host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len); - host_ui->xattr_size += CALC_XATTR_BYTES(size); - kfree(ui->data); ui->data = kmalloc(size, GFP_NOFS); if (!ui->data) { err = -ENOMEM; - goto out_unlock; + goto out_free; } - memcpy(ui->data, value, size); inode->i_size = ui->ui_size = size; ui->data_len = size; + mutex_lock(&host_ui->ui_mutex); + host->i_ctime = ubifs_current_time(host); + host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len); + host_ui->xattr_size += CALC_XATTR_BYTES(size); + /* * It is important to write the host inode after the xattr inode * because if the host inode gets synchronized (via 'fsync()'), then @@ -240,9 +234,9 @@ static int change_xattr(struct ubifs_info *c, struct inode *host, out_cancel: host_ui->xattr_size -= CALC_XATTR_BYTES(size); host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len); - make_bad_inode(inode); -out_unlock: mutex_unlock(&host_ui->ui_mutex); + make_bad_inode(inode); +out_free: ubifs_release_budget(c, &req); return err; } @@ -312,6 +306,7 @@ int ubifs_setxattr(struct dentry *dentry, const char *name, dbg_gen("xattr '%s', host ino %lu ('%.*s'), size %zd", name, host->i_ino, dentry->d_name.len, dentry->d_name.name, size); + ubifs_assert(mutex_is_locked(&host->i_mutex)); if (size > UBIFS_MAX_INO_DATA) return -ERANGE; @@ -384,7 +379,6 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, if (!xent) return -ENOMEM; - mutex_lock(&host->i_mutex); xent_key_init(c, &key, host->i_ino, &nm); err = ubifs_tnc_lookup_nm(c, &key, xent, &nm); if (err) { @@ -419,7 +413,6 @@ ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, out_iput: iput(inode); out_unlock: - mutex_unlock(&host->i_mutex); kfree(xent); return err; } @@ -449,8 +442,6 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size) return -ERANGE; lowest_xent_key(c, &key, host->i_ino); - - mutex_lock(&host->i_mutex); while (1) { int type; @@ -479,7 +470,6 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size) pxent = xent; key_read(c, &xent->key, &key); } - mutex_unlock(&host->i_mutex); kfree(pxent); if (err != -ENOENT) { @@ -497,8 +487,8 @@ static int remove_xattr(struct ubifs_info *c, struct inode *host, int err; struct ubifs_inode *host_ui = ubifs_inode(host); struct ubifs_inode *ui = ubifs_inode(inode); - struct ubifs_budget_req req = { .dirtied_ino = 1, .mod_dent = 1, - .dirtied_ino_d = host_ui->data_len }; + struct ubifs_budget_req req = { .dirtied_ino = 2, .mod_dent = 1, + .dirtied_ino_d = ALIGN(host_ui->data_len, 8) }; ubifs_assert(ui->data_len == inode->i_size); diff --git a/fs/udf/super.c b/fs/udf/super.c index 44cc702f96c..5698bbf83bb 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -148,7 +148,7 @@ static void udf_destroy_inode(struct inode *inode) kmem_cache_free(udf_inode_cachep, UDF_I(inode)); } -static void init_once(struct kmem_cache *cachep, void *foo) +static void init_once(void *foo) { struct udf_inode_info *ei = (struct udf_inode_info *)foo; diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 227c9d70004..3141969b456 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1233,7 +1233,7 @@ static int ufs_show_options(struct seq_file *seq, struct vfsmount *vfs) { struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb); unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE; - const struct match_token *tp = tokens; + struct match_token *tp = tokens; while (tp->token != Opt_onerror_panic && tp->token != mval) ++tp; @@ -1302,7 +1302,7 @@ static void ufs_destroy_inode(struct inode *inode) kmem_cache_free(ufs_inode_cachep, UFS_I(inode)); } -static void init_once(struct kmem_cache * cachep, void *foo) +static void init_once(void *foo) { struct ufs_inode_info *ei = (struct ufs_inode_info *) foo; diff --git a/fs/utimes.c b/fs/utimes.c index b6b664e7145..6929e3e91d0 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -48,66 +48,22 @@ static bool nsec_valid(long nsec) return nsec >= 0 && nsec <= 999999999; } -/* If times==NULL, set access and modification to current time, - * must be owner or have write permission. - * Else, update from *times, must be owner or super user. - */ -long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags) +static int utimes_common(struct path *path, struct timespec *times) { int error; - struct nameidata nd; - struct dentry *dentry; - struct inode *inode; struct iattr newattrs; - struct file *f = NULL; - struct vfsmount *mnt; - - error = -EINVAL; - if (times && (!nsec_valid(times[0].tv_nsec) || - !nsec_valid(times[1].tv_nsec))) { - goto out; - } - - if (flags & ~AT_SYMLINK_NOFOLLOW) - goto out; - - if (filename == NULL && dfd != AT_FDCWD) { - error = -EINVAL; - if (flags & AT_SYMLINK_NOFOLLOW) - goto out; + struct inode *inode = path->dentry->d_inode; - error = -EBADF; - f = fget(dfd); - if (!f) - goto out; - dentry = f->f_path.dentry; - mnt = f->f_path.mnt; - } else { - error = __user_walk_fd(dfd, filename, (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW, &nd); - if (error) - goto out; - - dentry = nd.path.dentry; - mnt = nd.path.mnt; - } - - inode = dentry->d_inode; - - error = mnt_want_write(mnt); + error = mnt_want_write(path->mnt); if (error) - goto dput_and_out; + goto out; if (times && times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW) times = NULL; - /* In most cases, the checks are done in inode_change_ok() */ newattrs.ia_valid = ATTR_CTIME | ATTR_MTIME | ATTR_ATIME; if (times) { - error = -EPERM; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - goto mnt_drop_write_and_out; - if (times[0].tv_nsec == UTIME_OMIT) newattrs.ia_valid &= ~ATTR_ATIME; else if (times[0].tv_nsec != UTIME_NOW) { @@ -123,21 +79,13 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags newattrs.ia_mtime.tv_nsec = times[1].tv_nsec; newattrs.ia_valid |= ATTR_MTIME_SET; } - /* - * For the UTIME_OMIT/UTIME_NOW and UTIME_NOW/UTIME_OMIT - * cases, we need to make an extra check that is not done by - * inode_change_ok(). + * Tell inode_change_ok(), that this is an explicit time + * update, even if neither ATTR_ATIME_SET nor ATTR_MTIME_SET + * were used. */ - if (((times[0].tv_nsec == UTIME_NOW && - times[1].tv_nsec == UTIME_OMIT) - || - (times[0].tv_nsec == UTIME_OMIT && - times[1].tv_nsec == UTIME_NOW)) - && !is_owner_or_cap(inode)) - goto mnt_drop_write_and_out; + newattrs.ia_valid |= ATTR_TIMES_SET; } else { - /* * If times is NULL (or both times are UTIME_NOW), * then we need to check permissions, because @@ -148,21 +96,76 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags goto mnt_drop_write_and_out; if (!is_owner_or_cap(inode)) { - error = permission(inode, MAY_WRITE, NULL); + error = inode_permission(inode, MAY_WRITE); if (error) goto mnt_drop_write_and_out; } } mutex_lock(&inode->i_mutex); - error = notify_change(dentry, &newattrs); + error = notify_change(path->dentry, &newattrs); mutex_unlock(&inode->i_mutex); + mnt_drop_write_and_out: - mnt_drop_write(mnt); -dput_and_out: - if (f) - fput(f); - else - path_put(&nd.path); + mnt_drop_write(path->mnt); +out: + return error; +} + +/* + * do_utimes - change times on filename or file descriptor + * @dfd: open file descriptor, -1 or AT_FDCWD + * @filename: path name or NULL + * @times: new times or NULL + * @flags: zero or more flags (only AT_SYMLINK_NOFOLLOW for the moment) + * + * If filename is NULL and dfd refers to an open file, then operate on + * the file. Otherwise look up filename, possibly using dfd as a + * starting point. + * + * If times==NULL, set access and modification to current time, + * must be owner or have write permission. + * Else, update from *times, must be owner or super user. + */ +long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags) +{ + int error = -EINVAL; + + if (times && (!nsec_valid(times[0].tv_nsec) || + !nsec_valid(times[1].tv_nsec))) { + goto out; + } + + if (flags & ~AT_SYMLINK_NOFOLLOW) + goto out; + + if (filename == NULL && dfd != AT_FDCWD) { + struct file *file; + + if (flags & AT_SYMLINK_NOFOLLOW) + goto out; + + file = fget(dfd); + error = -EBADF; + if (!file) + goto out; + + error = utimes_common(&file->f_path, times); + fput(file); + } else { + struct path path; + int lookup_flags = 0; + + if (!(flags & AT_SYMLINK_NOFOLLOW)) + lookup_flags |= LOOKUP_FOLLOW; + + error = user_path_at(dfd, filename, lookup_flags, &path); + if (error) + goto out; + + error = utimes_common(&path, times); + path_put(&path); + } + out: return error; } diff --git a/fs/xattr.c b/fs/xattr.c index 4706a8b1f49..468377e6653 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -63,7 +63,7 @@ xattr_permission(struct inode *inode, const char *name, int mask) return -EPERM; } - return permission(inode, mask, NULL); + return inode_permission(inode, mask); } int @@ -252,40 +252,40 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value, } asmlinkage long -sys_setxattr(const char __user *path, const char __user *name, +sys_setxattr(const char __user *pathname, const char __user *name, const void __user *value, size_t size, int flags) { - struct nameidata nd; + struct path path; int error; - error = user_path_walk(path, &nd); + error = user_path(pathname, &path); if (error) return error; - error = mnt_want_write(nd.path.mnt); + error = mnt_want_write(path.mnt); if (!error) { - error = setxattr(nd.path.dentry, name, value, size, flags); - mnt_drop_write(nd.path.mnt); + error = setxattr(path.dentry, name, value, size, flags); + mnt_drop_write(path.mnt); } - path_put(&nd.path); + path_put(&path); return error; } asmlinkage long -sys_lsetxattr(const char __user *path, const char __user *name, +sys_lsetxattr(const char __user *pathname, const char __user *name, const void __user *value, size_t size, int flags) { - struct nameidata nd; + struct path path; int error; - error = user_path_walk_link(path, &nd); + error = user_lpath(pathname, &path); if (error) return error; - error = mnt_want_write(nd.path.mnt); + error = mnt_want_write(path.mnt); if (!error) { - error = setxattr(nd.path.dentry, name, value, size, flags); - mnt_drop_write(nd.path.mnt); + error = setxattr(path.dentry, name, value, size, flags); + mnt_drop_write(path.mnt); } - path_put(&nd.path); + path_put(&path); return error; } @@ -350,32 +350,32 @@ getxattr(struct dentry *d, const char __user *name, void __user *value, } asmlinkage ssize_t -sys_getxattr(const char __user *path, const char __user *name, +sys_getxattr(const char __user *pathname, const char __user *name, void __user *value, size_t size) { - struct nameidata nd; + struct path path; ssize_t error; - error = user_path_walk(path, &nd); + error = user_path(pathname, &path); if (error) return error; - error = getxattr(nd.path.dentry, name, value, size); - path_put(&nd.path); + error = getxattr(path.dentry, name, value, size); + path_put(&path); return error; } asmlinkage ssize_t -sys_lgetxattr(const char __user *path, const char __user *name, void __user *value, +sys_lgetxattr(const char __user *pathname, const char __user *name, void __user *value, size_t size) { - struct nameidata nd; + struct path path; ssize_t error; - error = user_path_walk_link(path, &nd); + error = user_lpath(pathname, &path); if (error) return error; - error = getxattr(nd.path.dentry, name, value, size); - path_put(&nd.path); + error = getxattr(path.dentry, name, value, size); + path_put(&path); return error; } @@ -425,30 +425,30 @@ listxattr(struct dentry *d, char __user *list, size_t size) } asmlinkage ssize_t -sys_listxattr(const char __user *path, char __user *list, size_t size) +sys_listxattr(const char __user *pathname, char __user *list, size_t size) { - struct nameidata nd; + struct path path; ssize_t error; - error = user_path_walk(path, &nd); + error = user_path(pathname, &path); if (error) return error; - error = listxattr(nd.path.dentry, list, size); - path_put(&nd.path); + error = listxattr(path.dentry, list, size); + path_put(&path); return error; } asmlinkage ssize_t -sys_llistxattr(const char __user *path, char __user *list, size_t size) +sys_llistxattr(const char __user *pathname, char __user *list, size_t size) { - struct nameidata nd; + struct path path; ssize_t error; - error = user_path_walk_link(path, &nd); + error = user_lpath(pathname, &path); if (error) return error; - error = listxattr(nd.path.dentry, list, size); - path_put(&nd.path); + error = listxattr(path.dentry, list, size); + path_put(&path); return error; } @@ -486,38 +486,38 @@ removexattr(struct dentry *d, const char __user *name) } asmlinkage long -sys_removexattr(const char __user *path, const char __user *name) +sys_removexattr(const char __user *pathname, const char __user *name) { - struct nameidata nd; + struct path path; int error; - error = user_path_walk(path, &nd); + error = user_path(pathname, &path); if (error) return error; - error = mnt_want_write(nd.path.mnt); + error = mnt_want_write(path.mnt); if (!error) { - error = removexattr(nd.path.dentry, name); - mnt_drop_write(nd.path.mnt); + error = removexattr(path.dentry, name); + mnt_drop_write(path.mnt); } - path_put(&nd.path); + path_put(&path); return error; } asmlinkage long -sys_lremovexattr(const char __user *path, const char __user *name) +sys_lremovexattr(const char __user *pathname, const char __user *name) { - struct nameidata nd; + struct path path; int error; - error = user_path_walk_link(path, &nd); + error = user_lpath(pathname, &path); if (error) return error; - error = mnt_want_write(nd.path.mnt); + error = mnt_want_write(path.mnt); if (!error) { - error = removexattr(nd.path.dentry, name); - mnt_drop_write(nd.path.mnt); + error = removexattr(path.dentry, name); + mnt_drop_write(path.mnt); } - path_put(&nd.path); + path_put(&path); return error; } diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 36ec614e699..737c9a42536 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -106,7 +106,8 @@ xfs-y += $(addprefix $(XFS_LINUX)/, \ xfs_iops.o \ xfs_lrw.o \ xfs_super.o \ - xfs_vnode.o) + xfs_vnode.o \ + xfs_xattr.o) # Objects in support/ xfs-y += $(addprefix support/, \ diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c index 9b1bb17a050..1cd3b55ee3d 100644 --- a/fs/xfs/linux-2.6/kmem.c +++ b/fs/xfs/linux-2.6/kmem.c @@ -90,7 +90,7 @@ kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize, } void -kmem_free(void *ptr, size_t size) +kmem_free(const void *ptr) { if (!is_vmalloc_addr(ptr)) { kfree(ptr); @@ -100,7 +100,7 @@ kmem_free(void *ptr, size_t size) } void * -kmem_realloc(void *ptr, size_t newsize, size_t oldsize, +kmem_realloc(const void *ptr, size_t newsize, size_t oldsize, unsigned int __nocast flags) { void *new; @@ -110,7 +110,7 @@ kmem_realloc(void *ptr, size_t newsize, size_t oldsize, if (new) memcpy(new, ptr, ((oldsize < newsize) ? oldsize : newsize)); - kmem_free(ptr, oldsize); + kmem_free(ptr); } return new; } diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h index 5e956490297..af6843c7ee4 100644 --- a/fs/xfs/linux-2.6/kmem.h +++ b/fs/xfs/linux-2.6/kmem.h @@ -57,8 +57,8 @@ kmem_flags_convert(unsigned int __nocast flags) extern void *kmem_alloc(size_t, unsigned int __nocast); extern void *kmem_zalloc(size_t, unsigned int __nocast); extern void *kmem_zalloc_greedy(size_t *, size_t, size_t, unsigned int __nocast); -extern void *kmem_realloc(void *, size_t, size_t, unsigned int __nocast); -extern void kmem_free(void *, size_t); +extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast); +extern void kmem_free(const void *); /* * Zone interfaces @@ -79,7 +79,7 @@ kmem_zone_init(int size, char *zone_name) static inline kmem_zone_t * kmem_zone_init_flags(int size, char *zone_name, unsigned long flags, - void (*construct)(kmem_zone_t *, void *)) + void (*construct)(void *)) { return kmem_cache_create(zone_name, size, 0, flags, construct); } diff --git a/fs/xfs/linux-2.6/sema.h b/fs/xfs/linux-2.6/sema.h deleted file mode 100644 index 3abe7e9ceb3..00000000000 --- a/fs/xfs/linux-2.6/sema.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_SUPPORT_SEMA_H__ -#define __XFS_SUPPORT_SEMA_H__ - -#include <linux/time.h> -#include <linux/wait.h> -#include <linux/semaphore.h> -#include <asm/atomic.h> - -/* - * sema_t structure just maps to struct semaphore in Linux kernel. - */ - -typedef struct semaphore sema_t; - -#define initnsema(sp, val, name) sema_init(sp, val) -#define psema(sp, b) down(sp) -#define vsema(sp) up(sp) -#define freesema(sema) do { } while (0) - -static inline int issemalocked(sema_t *sp) -{ - return down_trylock(sp) || (up(sp), 0); -} - -/* - * Map cpsema (try to get the sema) to down_trylock. We need to switch - * the return values since cpsema returns 1 (acquired) 0 (failed) and - * down_trylock returns the reverse 0 (acquired) 1 (failed). - */ -static inline int cpsema(sema_t *sp) -{ - return down_trylock(sp) ? 0 : 1; -} - -#endif /* __XFS_SUPPORT_SEMA_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index a55c3b26d84..f42f80a3b1f 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -73,7 +73,6 @@ xfs_page_trace( unsigned long pgoff) { xfs_inode_t *ip; - bhv_vnode_t *vp = vn_from_inode(inode); loff_t isize = i_size_read(inode); loff_t offset = page_offset(page); int delalloc = -1, unmapped = -1, unwritten = -1; @@ -81,7 +80,7 @@ xfs_page_trace( if (page_has_buffers(page)) xfs_count_page_state(page, &delalloc, &unmapped, &unwritten); - ip = xfs_vtoi(vp); + ip = XFS_I(inode); if (!ip->i_rwtrace) return; @@ -409,7 +408,6 @@ xfs_start_buffer_writeback( STATIC void xfs_start_page_writeback( struct page *page, - struct writeback_control *wbc, int clear_dirty, int buffers) { @@ -676,7 +674,7 @@ xfs_probe_cluster( } else pg_offset = PAGE_CACHE_SIZE; - if (page->index == tindex && !TestSetPageLocked(page)) { + if (page->index == tindex && trylock_page(page)) { pg_len = xfs_probe_page(page, pg_offset, mapped); unlock_page(page); } @@ -760,7 +758,7 @@ xfs_convert_page( if (page->index != tindex) goto fail; - if (TestSetPageLocked(page)) + if (!trylock_page(page)) goto fail; if (PageWriteback(page)) goto fail_unlock_page; @@ -858,7 +856,7 @@ xfs_convert_page( done = 1; } } - xfs_start_page_writeback(page, wbc, !page_dirty, count); + xfs_start_page_writeback(page, !page_dirty, count); } return done; @@ -1105,7 +1103,7 @@ xfs_page_state_convert( * that we are writing into for the first time. */ type = IOMAP_NEW; - if (!test_and_set_bit(BH_Lock, &bh->b_state)) { + if (trylock_buffer(bh)) { ASSERT(buffer_mapped(bh)); if (iomap_valid) all_bh = 1; @@ -1130,7 +1128,7 @@ xfs_page_state_convert( SetPageUptodate(page); if (startio) - xfs_start_page_writeback(page, wbc, 1, count); + xfs_start_page_writeback(page, 1, count); if (ioend && iomap_valid) { offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >> diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 98e0e86093b..986061ae1b9 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -58,7 +58,7 @@ xfs_buf_trace( bp, id, (void *)(unsigned long)bp->b_flags, (void *)(unsigned long)bp->b_hold.counter, - (void *)(unsigned long)bp->b_sema.count.counter, + (void *)(unsigned long)bp->b_sema.count, (void *)current, data, ra, (void *)(unsigned long)((bp->b_file_offset>>32) & 0xffffffff), @@ -253,7 +253,7 @@ _xfs_buf_initialize( memset(bp, 0, sizeof(xfs_buf_t)); atomic_set(&bp->b_hold, 1); - init_MUTEX_LOCKED(&bp->b_iodonesema); + init_completion(&bp->b_iowait); INIT_LIST_HEAD(&bp->b_list); INIT_LIST_HEAD(&bp->b_hash_list); init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */ @@ -310,8 +310,7 @@ _xfs_buf_free_pages( xfs_buf_t *bp) { if (bp->b_pages != bp->b_page_array) { - kmem_free(bp->b_pages, - bp->b_page_count * sizeof(struct page *)); + kmem_free(bp->b_pages); } } @@ -839,6 +838,7 @@ xfs_buf_rele( return; } + ASSERT(atomic_read(&bp->b_hold) > 0); if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) { if (bp->b_relse) { atomic_inc(&bp->b_hold); @@ -852,11 +852,6 @@ xfs_buf_rele( spin_unlock(&hash->bh_lock); xfs_buf_free(bp); } - } else { - /* - * Catch reference count leaks - */ - ASSERT(atomic_read(&bp->b_hold) >= 0); } } @@ -1038,7 +1033,7 @@ xfs_buf_ioend( xfs_buf_iodone_work(&bp->b_iodone_work); } } else { - up(&bp->b_iodonesema); + complete(&bp->b_iowait); } } @@ -1276,7 +1271,7 @@ xfs_buf_iowait( XB_TRACE(bp, "iowait", 0); if (atomic_read(&bp->b_io_remaining)) blk_run_address_space(bp->b_target->bt_mapping); - down(&bp->b_iodonesema); + wait_for_completion(&bp->b_iowait); XB_TRACE(bp, "iowaited", (long)bp->b_error); return bp->b_error; } @@ -1398,7 +1393,7 @@ STATIC void xfs_free_bufhash( xfs_buftarg_t *btp) { - kmem_free(btp->bt_hash, (1<<btp->bt_hashshift) * sizeof(xfs_bufhash_t)); + kmem_free(btp->bt_hash); btp->bt_hash = NULL; } @@ -1428,13 +1423,10 @@ xfs_unregister_buftarg( void xfs_free_buftarg( - xfs_buftarg_t *btp, - int external) + xfs_buftarg_t *btp) { xfs_flush_buftarg(btp, 1); xfs_blkdev_issue_flush(btp); - if (external) - xfs_blkdev_put(btp->bt_bdev); xfs_free_bufhash(btp); iput(btp->bt_mapping->host); @@ -1444,7 +1436,7 @@ xfs_free_buftarg( xfs_unregister_buftarg(btp); kthread_stop(btp->bt_task); - kmem_free(btp, sizeof(*btp)); + kmem_free(btp); } STATIC int @@ -1575,7 +1567,7 @@ xfs_alloc_buftarg( return btp; error: - kmem_free(btp, sizeof(*btp)); + kmem_free(btp); return NULL; } @@ -1803,7 +1795,7 @@ int __init xfs_buf_init(void) { #ifdef XFS_BUF_TRACE - xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_SLEEP); + xfs_buf_trace_buf = ktrace_alloc(XFS_BUF_TRACE_SIZE, KM_NOFS); #endif xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf", diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index f948ec7ba9a..fe010995665 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -157,7 +157,7 @@ typedef struct xfs_buf { xfs_buf_iodone_t b_iodone; /* I/O completion function */ xfs_buf_relse_t b_relse; /* releasing function */ xfs_buf_bdstrat_t b_strat; /* pre-write function */ - struct semaphore b_iodonesema; /* Semaphore for I/O waiters */ + struct completion b_iowait; /* queue for I/O waiters */ void *b_fspriv; void *b_fspriv2; void *b_fspriv3; @@ -352,7 +352,7 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *); #define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0) #define XFS_BUF_VSEMA(bp) xfs_buf_unlock(bp) #define XFS_BUF_PSEMA(bp,x) xfs_buf_lock(bp) -#define XFS_BUF_V_IODONESEMA(bp) up(&bp->b_iodonesema); +#define XFS_BUF_FINISH_IOWAIT(bp) complete(&bp->b_iowait); #define XFS_BUF_SET_TARGET(bp, target) ((bp)->b_target = (target)) #define XFS_BUF_TARGET(bp) ((bp)->b_target) @@ -429,7 +429,7 @@ static inline void xfs_bdwrite(void *mp, xfs_buf_t *bp) * Handling of buftargs. */ extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int); -extern void xfs_free_buftarg(xfs_buftarg_t *, int); +extern void xfs_free_buftarg(xfs_buftarg_t *); extern void xfs_wait_buftarg(xfs_buftarg_t *); extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); extern int xfs_flush_buftarg(xfs_buftarg_t *, int); diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c index c672b3238b1..24fd598af84 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/linux-2.6/xfs_export.c @@ -139,7 +139,7 @@ xfs_nfs_get_inode( } xfs_iunlock(ip, XFS_ILOCK_SHARED); - return ip->i_vnode; + return VFS_I(ip); } STATIC struct dentry * @@ -167,7 +167,7 @@ xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid, if (!inode) return NULL; if (IS_ERR(inode)) - return ERR_PTR(PTR_ERR(inode)); + return ERR_CAST(inode); result = d_alloc_anon(inode); if (!result) { iput(inode); @@ -198,7 +198,7 @@ xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid, if (!inode) return NULL; if (IS_ERR(inode)) - return ERR_PTR(PTR_ERR(inode)); + return ERR_CAST(inode); result = d_alloc_anon(inode); if (!result) { iput(inode); @@ -215,13 +215,13 @@ xfs_fs_get_parent( struct xfs_inode *cip; struct dentry *parent; - error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip); + error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL); if (unlikely(error)) return ERR_PTR(-error); - parent = d_alloc_anon(cip->i_vnode); + parent = d_alloc_anon(VFS_I(cip)); if (unlikely(!parent)) { - iput(cip->i_vnode); + iput(VFS_I(cip)); return ERR_PTR(-ENOMEM); } return parent; diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c index 1eefe61f0e1..36caa6d957d 100644 --- a/fs/xfs/linux-2.6/xfs_fs_subr.c +++ b/fs/xfs/linux-2.6/xfs_fs_subr.c @@ -31,7 +31,7 @@ xfs_tosspages( xfs_off_t last, int fiopt) { - struct address_space *mapping = ip->i_vnode->i_mapping; + struct address_space *mapping = VFS_I(ip)->i_mapping; if (mapping->nrpages) truncate_inode_pages(mapping, first); @@ -44,7 +44,7 @@ xfs_flushinval_pages( xfs_off_t last, int fiopt) { - struct address_space *mapping = ip->i_vnode->i_mapping; + struct address_space *mapping = VFS_I(ip)->i_mapping; int ret = 0; if (mapping->nrpages) { @@ -64,7 +64,7 @@ xfs_flush_pages( uint64_t flags, int fiopt) { - struct address_space *mapping = ip->i_vnode->i_mapping; + struct address_space *mapping = VFS_I(ip)->i_mapping; int ret = 0; int ret2; diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index a42ba9d7115..48799ba7e3e 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -48,6 +48,8 @@ #include "xfs_dfrag.h" #include "xfs_fsops.h" #include "xfs_vnodeops.h" +#include "xfs_quota.h" +#include "xfs_inode_item.h" #include <linux/capability.h> #include <linux/dcache.h> @@ -84,17 +86,15 @@ xfs_find_handle( switch (cmd) { case XFS_IOC_PATH_TO_FSHANDLE: case XFS_IOC_PATH_TO_HANDLE: { - struct nameidata nd; - int error; - - error = user_path_walk_link((const char __user *)hreq.path, &nd); + struct path path; + int error = user_lpath((const char __user *)hreq.path, &path); if (error) return error; - ASSERT(nd.path.dentry); - ASSERT(nd.path.dentry->d_inode); - inode = igrab(nd.path.dentry->d_inode); - path_put(&nd.path); + ASSERT(path.dentry); + ASSERT(path.dentry->d_inode); + inode = igrab(path.dentry->d_inode); + path_put(&path); break; } @@ -245,7 +245,7 @@ xfs_vget_fsop_handlereq( xfs_iunlock(ip, XFS_ILOCK_SHARED); - *inode = XFS_ITOV(ip); + *inode = VFS_I(ip); return 0; } @@ -470,6 +470,12 @@ xfs_attrlist_by_handle( if (al_hreq.buflen > XATTR_LIST_MAX) return -XFS_ERROR(EINVAL); + /* + * Reject flags, only allow namespaces. + */ + if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE)) + return -XFS_ERROR(EINVAL); + error = xfs_vget_fsop_handlereq(mp, parinode, &al_hreq.hreq, &inode); if (error) goto out; @@ -589,7 +595,7 @@ xfs_attrmulti_by_handle( goto out; error = E2BIG; - size = am_hreq.opcount * sizeof(attr_multiop_t); + size = am_hreq.opcount * sizeof(xfs_attr_multiop_t); if (!size || size > 16 * PAGE_SIZE) goto out_vn_rele; @@ -682,9 +688,9 @@ xfs_ioc_space( return -XFS_ERROR(EFAULT); if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) - attr_flags |= ATTR_NONBLOCK; + attr_flags |= XFS_ATTR_NONBLOCK; if (ioflags & IO_INVIS) - attr_flags |= ATTR_DMI; + attr_flags |= XFS_ATTR_DMI; error = xfs_change_file_space(ip, cmd, &bf, filp->f_pos, NULL, attr_flags); @@ -875,6 +881,322 @@ xfs_ioc_fsgetxattr( return 0; } +STATIC void +xfs_set_diflags( + struct xfs_inode *ip, + unsigned int xflags) +{ + unsigned int di_flags; + + /* can't set PREALLOC this way, just preserve it */ + di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC); + if (xflags & XFS_XFLAG_IMMUTABLE) + di_flags |= XFS_DIFLAG_IMMUTABLE; + if (xflags & XFS_XFLAG_APPEND) + di_flags |= XFS_DIFLAG_APPEND; + if (xflags & XFS_XFLAG_SYNC) + di_flags |= XFS_DIFLAG_SYNC; + if (xflags & XFS_XFLAG_NOATIME) + di_flags |= XFS_DIFLAG_NOATIME; + if (xflags & XFS_XFLAG_NODUMP) + di_flags |= XFS_DIFLAG_NODUMP; + if (xflags & XFS_XFLAG_PROJINHERIT) + di_flags |= XFS_DIFLAG_PROJINHERIT; + if (xflags & XFS_XFLAG_NODEFRAG) + di_flags |= XFS_DIFLAG_NODEFRAG; + if (xflags & XFS_XFLAG_FILESTREAM) + di_flags |= XFS_DIFLAG_FILESTREAM; + if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { + if (xflags & XFS_XFLAG_RTINHERIT) + di_flags |= XFS_DIFLAG_RTINHERIT; + if (xflags & XFS_XFLAG_NOSYMLINKS) + di_flags |= XFS_DIFLAG_NOSYMLINKS; + if (xflags & XFS_XFLAG_EXTSZINHERIT) + di_flags |= XFS_DIFLAG_EXTSZINHERIT; + } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { + if (xflags & XFS_XFLAG_REALTIME) + di_flags |= XFS_DIFLAG_REALTIME; + if (xflags & XFS_XFLAG_EXTSIZE) + di_flags |= XFS_DIFLAG_EXTSIZE; + } + + ip->i_d.di_flags = di_flags; +} + +STATIC void +xfs_diflags_to_linux( + struct xfs_inode *ip) +{ + struct inode *inode = VFS_I(ip); + unsigned int xflags = xfs_ip2xflags(ip); + + if (xflags & XFS_XFLAG_IMMUTABLE) + inode->i_flags |= S_IMMUTABLE; + else + inode->i_flags &= ~S_IMMUTABLE; + if (xflags & XFS_XFLAG_APPEND) + inode->i_flags |= S_APPEND; + else + inode->i_flags &= ~S_APPEND; + if (xflags & XFS_XFLAG_SYNC) + inode->i_flags |= S_SYNC; + else + inode->i_flags &= ~S_SYNC; + if (xflags & XFS_XFLAG_NOATIME) + inode->i_flags |= S_NOATIME; + else + inode->i_flags &= ~S_NOATIME; +} + +#define FSX_PROJID 1 +#define FSX_EXTSIZE 2 +#define FSX_XFLAGS 4 +#define FSX_NONBLOCK 8 + +STATIC int +xfs_ioctl_setattr( + xfs_inode_t *ip, + struct fsxattr *fa, + int mask) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + unsigned int lock_flags = 0; + struct xfs_dquot *udqp = NULL, *gdqp = NULL; + struct xfs_dquot *olddquot = NULL; + int code; + + xfs_itrace_entry(ip); + + if (mp->m_flags & XFS_MOUNT_RDONLY) + return XFS_ERROR(EROFS); + if (XFS_FORCED_SHUTDOWN(mp)) + return XFS_ERROR(EIO); + + /* + * If disk quotas is on, we make sure that the dquots do exist on disk, + * before we start any other transactions. Trying to do this later + * is messy. We don't care to take a readlock to look at the ids + * in inode here, because we can't hold it across the trans_reserve. + * If the IDs do change before we take the ilock, we're covered + * because the i_*dquot fields will get updated anyway. + */ + if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) { + code = XFS_QM_DQVOPALLOC(mp, ip, ip->i_d.di_uid, + ip->i_d.di_gid, fa->fsx_projid, + XFS_QMOPT_PQUOTA, &udqp, &gdqp); + if (code) + return code; + } + + /* + * For the other attributes, we acquire the inode lock and + * first do an error checking pass. + */ + tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); + code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); + if (code) + goto error_return; + + lock_flags = XFS_ILOCK_EXCL; + xfs_ilock(ip, lock_flags); + + /* + * CAP_FOWNER overrides the following restrictions: + * + * The user ID of the calling process must be equal + * to the file owner ID, except in cases where the + * CAP_FSETID capability is applicable. + */ + if (current->fsuid != ip->i_d.di_uid && !capable(CAP_FOWNER)) { + code = XFS_ERROR(EPERM); + goto error_return; + } + + /* + * Do a quota reservation only if projid is actually going to change. + */ + if (mask & FSX_PROJID) { + if (XFS_IS_PQUOTA_ON(mp) && + ip->i_d.di_projid != fa->fsx_projid) { + ASSERT(tp); + code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp, + capable(CAP_FOWNER) ? + XFS_QMOPT_FORCE_RES : 0); + if (code) /* out of quota */ + goto error_return; + } + } + + if (mask & FSX_EXTSIZE) { + /* + * Can't change extent size if any extents are allocated. + */ + if (ip->i_d.di_nextents && + ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != + fa->fsx_extsize)) { + code = XFS_ERROR(EINVAL); /* EFBIG? */ + goto error_return; + } + + /* + * Extent size must be a multiple of the appropriate block + * size, if set at all. + */ + if (fa->fsx_extsize != 0) { + xfs_extlen_t size; + + if (XFS_IS_REALTIME_INODE(ip) || + ((mask & FSX_XFLAGS) && + (fa->fsx_xflags & XFS_XFLAG_REALTIME))) { + size = mp->m_sb.sb_rextsize << + mp->m_sb.sb_blocklog; + } else { + size = mp->m_sb.sb_blocksize; + } + + if (fa->fsx_extsize % size) { + code = XFS_ERROR(EINVAL); + goto error_return; + } + } + } + + + if (mask & FSX_XFLAGS) { + /* + * Can't change realtime flag if any extents are allocated. + */ + if ((ip->i_d.di_nextents || ip->i_delayed_blks) && + (XFS_IS_REALTIME_INODE(ip)) != + (fa->fsx_xflags & XFS_XFLAG_REALTIME)) { + code = XFS_ERROR(EINVAL); /* EFBIG? */ + goto error_return; + } + + /* + * If realtime flag is set then must have realtime data. + */ + if ((fa->fsx_xflags & XFS_XFLAG_REALTIME)) { + if ((mp->m_sb.sb_rblocks == 0) || + (mp->m_sb.sb_rextsize == 0) || + (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) { + code = XFS_ERROR(EINVAL); + goto error_return; + } + } + + /* + * Can't modify an immutable/append-only file unless + * we have appropriate permission. + */ + if ((ip->i_d.di_flags & + (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) || + (fa->fsx_xflags & + (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) && + !capable(CAP_LINUX_IMMUTABLE)) { + code = XFS_ERROR(EPERM); + goto error_return; + } + } + + xfs_trans_ijoin(tp, ip, lock_flags); + xfs_trans_ihold(tp, ip); + + /* + * Change file ownership. Must be the owner or privileged. + * If the system was configured with the "restricted_chown" + * option, the owner is not permitted to give away the file, + * and can change the group id only to a group of which he + * or she is a member. + */ + if (mask & FSX_PROJID) { + /* + * CAP_FSETID overrides the following restrictions: + * + * The set-user-ID and set-group-ID bits of a file will be + * cleared upon successful return from chown() + */ + if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && + !capable(CAP_FSETID)) + ip->i_d.di_mode &= ~(S_ISUID|S_ISGID); + + /* + * Change the ownerships and register quota modifications + * in the transaction. + */ + if (ip->i_d.di_projid != fa->fsx_projid) { + if (XFS_IS_PQUOTA_ON(mp)) { + olddquot = XFS_QM_DQVOPCHOWN(mp, tp, ip, + &ip->i_gdquot, gdqp); + } + ip->i_d.di_projid = fa->fsx_projid; + + /* + * We may have to rev the inode as well as + * the superblock version number since projids didn't + * exist before DINODE_VERSION_2 and SB_VERSION_NLINK. + */ + if (ip->i_d.di_version == XFS_DINODE_VERSION_1) + xfs_bump_ino_vers2(tp, ip); + } + + } + + if (mask & FSX_EXTSIZE) + ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog; + if (mask & FSX_XFLAGS) { + xfs_set_diflags(ip, fa->fsx_xflags); + xfs_diflags_to_linux(ip); + } + + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + xfs_ichgtime(ip, XFS_ICHGTIME_CHG); + + XFS_STATS_INC(xs_ig_attrchg); + + /* + * If this is a synchronous mount, make sure that the + * transaction goes to disk before returning to the user. + * This is slightly sub-optimal in that truncates require + * two sync transactions instead of one for wsync filesystems. + * One for the truncate and one for the timestamps since we + * don't want to change the timestamps unless we're sure the + * truncate worked. Truncates are less than 1% of the laddis + * mix so this probably isn't worth the trouble to optimize. + */ + if (mp->m_flags & XFS_MOUNT_WSYNC) + xfs_trans_set_sync(tp); + code = xfs_trans_commit(tp, 0); + xfs_iunlock(ip, lock_flags); + + /* + * Release any dquot(s) the inode had kept before chown. + */ + XFS_QM_DQRELE(mp, olddquot); + XFS_QM_DQRELE(mp, udqp); + XFS_QM_DQRELE(mp, gdqp); + + if (code) + return code; + + if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE)) { + XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL, + NULL, DM_RIGHT_NULL, NULL, NULL, 0, 0, + (mask & FSX_NONBLOCK) ? DM_FLAGS_NDELAY : 0); + } + + return 0; + + error_return: + XFS_QM_DQRELE(mp, udqp); + XFS_QM_DQRELE(mp, gdqp); + xfs_trans_cancel(tp, 0); + if (lock_flags) + xfs_iunlock(ip, lock_flags); + return code; +} + STATIC int xfs_ioc_fssetxattr( xfs_inode_t *ip, @@ -882,31 +1204,16 @@ xfs_ioc_fssetxattr( void __user *arg) { struct fsxattr fa; - struct bhv_vattr *vattr; - int error; - int attr_flags; + unsigned int mask; if (copy_from_user(&fa, arg, sizeof(fa))) return -EFAULT; - vattr = kmalloc(sizeof(*vattr), GFP_KERNEL); - if (unlikely(!vattr)) - return -ENOMEM; - - attr_flags = 0; + mask = FSX_XFLAGS | FSX_EXTSIZE | FSX_PROJID; if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) - attr_flags |= ATTR_NONBLOCK; - - vattr->va_mask = XFS_AT_XFLAGS | XFS_AT_EXTSIZE | XFS_AT_PROJID; - vattr->va_xflags = fa.fsx_xflags; - vattr->va_extsize = fa.fsx_extsize; - vattr->va_projid = fa.fsx_projid; + mask |= FSX_NONBLOCK; - error = -xfs_setattr(ip, vattr, attr_flags, NULL); - if (!error) - vn_revalidate(XFS_ITOV(ip)); /* update flags */ - kfree(vattr); - return 0; + return -xfs_ioctl_setattr(ip, &fa, mask); } STATIC int @@ -928,10 +1235,9 @@ xfs_ioc_setxflags( struct file *filp, void __user *arg) { - struct bhv_vattr *vattr; + struct fsxattr fa; unsigned int flags; - int attr_flags; - int error; + unsigned int mask; if (copy_from_user(&flags, arg, sizeof(flags))) return -EFAULT; @@ -941,22 +1247,12 @@ xfs_ioc_setxflags( FS_SYNC_FL)) return -EOPNOTSUPP; - vattr = kmalloc(sizeof(*vattr), GFP_KERNEL); - if (unlikely(!vattr)) - return -ENOMEM; - - attr_flags = 0; + mask = FSX_XFLAGS; if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) - attr_flags |= ATTR_NONBLOCK; - - vattr->va_mask = XFS_AT_XFLAGS; - vattr->va_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip)); + mask |= FSX_NONBLOCK; + fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip)); - error = -xfs_setattr(ip, vattr, attr_flags, NULL); - if (likely(!error)) - vn_revalidate(XFS_ITOV(ip)); /* update flags */ - kfree(vattr); - return error; + return -xfs_ioctl_setattr(ip, &fa, mask); } STATIC int diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 2bf287ef548..91bcd979242 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -62,7 +62,7 @@ void xfs_synchronize_atime( xfs_inode_t *ip) { - struct inode *inode = ip->i_vnode; + struct inode *inode = VFS_I(ip); if (inode) { ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; @@ -79,7 +79,7 @@ void xfs_mark_inode_dirty_sync( xfs_inode_t *ip) { - struct inode *inode = ip->i_vnode; + struct inode *inode = VFS_I(ip); if (inode) mark_inode_dirty_sync(inode); @@ -89,36 +89,31 @@ xfs_mark_inode_dirty_sync( * Change the requested timestamp in the given inode. * We don't lock across timestamp updates, and we don't log them but * we do record the fact that there is dirty information in core. - * - * NOTE -- callers MUST combine XFS_ICHGTIME_MOD or XFS_ICHGTIME_CHG - * with XFS_ICHGTIME_ACC to be sure that access time - * update will take. Calling first with XFS_ICHGTIME_ACC - * and then XFS_ICHGTIME_MOD may fail to modify the access - * timestamp if the filesystem is mounted noacctm. */ void xfs_ichgtime( xfs_inode_t *ip, int flags) { - struct inode *inode = vn_to_inode(XFS_ITOV(ip)); + struct inode *inode = VFS_I(ip); timespec_t tv; + int sync_it = 0; + + tv = current_fs_time(inode->i_sb); - nanotime(&tv); - if (flags & XFS_ICHGTIME_MOD) { + if ((flags & XFS_ICHGTIME_MOD) && + !timespec_equal(&inode->i_mtime, &tv)) { inode->i_mtime = tv; ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec; ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec; + sync_it = 1; } - if (flags & XFS_ICHGTIME_ACC) { - inode->i_atime = tv; - ip->i_d.di_atime.t_sec = (__int32_t)tv.tv_sec; - ip->i_d.di_atime.t_nsec = (__int32_t)tv.tv_nsec; - } - if (flags & XFS_ICHGTIME_CHG) { + if ((flags & XFS_ICHGTIME_CHG) && + !timespec_equal(&inode->i_ctime, &tv)) { inode->i_ctime = tv; ip->i_d.di_ctime.t_sec = (__int32_t)tv.tv_sec; ip->i_d.di_ctime.t_nsec = (__int32_t)tv.tv_nsec; + sync_it = 1; } /* @@ -130,72 +125,11 @@ xfs_ichgtime( * ensure that the compiler does not reorder the update * of i_update_core above the timestamp updates above. */ - SYNCHRONIZE(); - ip->i_update_core = 1; - if (!(inode->i_state & I_NEW)) + if (sync_it) { + SYNCHRONIZE(); + ip->i_update_core = 1; mark_inode_dirty_sync(inode); -} - -/* - * Variant on the above which avoids querying the system clock - * in situations where we know the Linux inode timestamps have - * just been updated (and so we can update our inode cheaply). - */ -void -xfs_ichgtime_fast( - xfs_inode_t *ip, - struct inode *inode, - int flags) -{ - timespec_t *tvp; - - /* - * Atime updates for read() & friends are handled lazily now, and - * explicit updates must go through xfs_ichgtime() - */ - ASSERT((flags & XFS_ICHGTIME_ACC) == 0); - - if (flags & XFS_ICHGTIME_MOD) { - tvp = &inode->i_mtime; - ip->i_d.di_mtime.t_sec = (__int32_t)tvp->tv_sec; - ip->i_d.di_mtime.t_nsec = (__int32_t)tvp->tv_nsec; - } - if (flags & XFS_ICHGTIME_CHG) { - tvp = &inode->i_ctime; - ip->i_d.di_ctime.t_sec = (__int32_t)tvp->tv_sec; - ip->i_d.di_ctime.t_nsec = (__int32_t)tvp->tv_nsec; } - - /* - * We update the i_update_core field _after_ changing - * the timestamps in order to coordinate properly with - * xfs_iflush() so that we don't lose timestamp updates. - * This keeps us from having to hold the inode lock - * while doing this. We use the SYNCHRONIZE macro to - * ensure that the compiler does not reorder the update - * of i_update_core above the timestamp updates above. - */ - SYNCHRONIZE(); - ip->i_update_core = 1; - if (!(inode->i_state & I_NEW)) - mark_inode_dirty_sync(inode); -} - - -/* - * Pull the link count and size up from the xfs inode to the linux inode - */ -STATIC void -xfs_validate_fields( - struct inode *inode) -{ - struct xfs_inode *ip = XFS_I(inode); - loff_t size; - - /* we're under i_sem so i_size can't change under us */ - size = XFS_ISIZE(ip); - if (i_size_read(inode) != size) - i_size_write(inode, size); } /* @@ -245,8 +179,7 @@ STATIC void xfs_cleanup_inode( struct inode *dir, struct inode *inode, - struct dentry *dentry, - int mode) + struct dentry *dentry) { struct xfs_name teardown; @@ -257,10 +190,7 @@ xfs_cleanup_inode( */ xfs_dentry_to_name(&teardown, dentry); - if (S_ISDIR(mode)) - xfs_rmdir(XFS_I(dir), &teardown, XFS_I(inode)); - else - xfs_remove(XFS_I(dir), &teardown, XFS_I(inode)); + xfs_remove(XFS_I(dir), &teardown, XFS_I(inode)); iput(inode); } @@ -275,7 +205,7 @@ xfs_vn_mknod( struct xfs_inode *ip = NULL; xfs_acl_t *default_acl = NULL; struct xfs_name name; - attrexists_t test_default_acl = _ACL_DEFAULT_EXISTS; + int (*test_default_acl)(struct inode *) = _ACL_DEFAULT_EXISTS; int error; /* @@ -320,7 +250,7 @@ xfs_vn_mknod( if (unlikely(error)) goto out_free_acl; - inode = ip->i_vnode; + inode = VFS_I(ip); error = xfs_init_security(inode, dir); if (unlikely(error)) @@ -335,14 +265,11 @@ xfs_vn_mknod( } - if (S_ISDIR(mode)) - xfs_validate_fields(inode); d_instantiate(dentry, inode); - xfs_validate_fields(dir); return -error; out_cleanup_inode: - xfs_cleanup_inode(dir, inode, dentry, mode); + xfs_cleanup_inode(dir, inode, dentry); out_free_acl: if (default_acl) _ACL_FREE(default_acl); @@ -382,7 +309,7 @@ xfs_vn_lookup( return ERR_PTR(-ENAMETOOLONG); xfs_dentry_to_name(&name, dentry); - error = xfs_lookup(XFS_I(dir), &name, &cip); + error = xfs_lookup(XFS_I(dir), &name, &cip, NULL); if (unlikely(error)) { if (unlikely(error != ENOENT)) return ERR_PTR(-error); @@ -390,7 +317,47 @@ xfs_vn_lookup( return NULL; } - return d_splice_alias(cip->i_vnode, dentry); + return d_splice_alias(VFS_I(cip), dentry); +} + +STATIC struct dentry * +xfs_vn_ci_lookup( + struct inode *dir, + struct dentry *dentry, + struct nameidata *nd) +{ + struct xfs_inode *ip; + struct xfs_name xname; + struct xfs_name ci_name; + struct qstr dname; + int error; + + if (dentry->d_name.len >= MAXNAMELEN) + return ERR_PTR(-ENAMETOOLONG); + + xfs_dentry_to_name(&xname, dentry); + error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name); + if (unlikely(error)) { + if (unlikely(error != ENOENT)) + return ERR_PTR(-error); + /* + * call d_add(dentry, NULL) here when d_drop_negative_children + * is called in xfs_vn_mknod (ie. allow negative dentries + * with CI filesystems). + */ + return NULL; + } + + /* if exact match, just splice and exit */ + if (!ci_name.name) + return d_splice_alias(VFS_I(ip), dentry); + + /* else case-insensitive match... */ + dname.name = ci_name.name; + dname.len = ci_name.len; + dentry = d_add_ci(VFS_I(ip), dentry, &dname); + kmem_free(ci_name.name); + return dentry; } STATIC int @@ -414,7 +381,6 @@ xfs_vn_link( } xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED); - xfs_validate_fields(inode); d_instantiate(dentry, inode); return 0; } @@ -424,19 +390,23 @@ xfs_vn_unlink( struct inode *dir, struct dentry *dentry) { - struct inode *inode; struct xfs_name name; int error; - inode = dentry->d_inode; xfs_dentry_to_name(&name, dentry); - error = xfs_remove(XFS_I(dir), &name, XFS_I(inode)); - if (likely(!error)) { - xfs_validate_fields(dir); /* size needs update */ - xfs_validate_fields(inode); - } - return -error; + error = -xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode)); + if (error) + return error; + + /* + * With unlink, the VFS makes the dentry "negative": no inode, + * but still hashed. This is incompatible with case-insensitive + * mode, so invalidate (unhash) the dentry in CI-mode. + */ + if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb)) + d_invalidate(dentry); + return 0; } STATIC int @@ -459,43 +429,22 @@ xfs_vn_symlink( if (unlikely(error)) goto out; - inode = cip->i_vnode; + inode = VFS_I(cip); error = xfs_init_security(inode, dir); if (unlikely(error)) goto out_cleanup_inode; d_instantiate(dentry, inode); - xfs_validate_fields(dir); - xfs_validate_fields(inode); return 0; out_cleanup_inode: - xfs_cleanup_inode(dir, inode, dentry, 0); + xfs_cleanup_inode(dir, inode, dentry); out: return -error; } STATIC int -xfs_vn_rmdir( - struct inode *dir, - struct dentry *dentry) -{ - struct inode *inode = dentry->d_inode; - struct xfs_name name; - int error; - - xfs_dentry_to_name(&name, dentry); - - error = xfs_rmdir(XFS_I(dir), &name, XFS_I(inode)); - if (likely(!error)) { - xfs_validate_fields(inode); - xfs_validate_fields(dir); - } - return -error; -} - -STATIC int xfs_vn_rename( struct inode *odir, struct dentry *odentry, @@ -505,22 +454,13 @@ xfs_vn_rename( struct inode *new_inode = ndentry->d_inode; struct xfs_name oname; struct xfs_name nname; - int error; xfs_dentry_to_name(&oname, odentry); xfs_dentry_to_name(&nname, ndentry); - error = xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode), + return -xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode), XFS_I(ndir), &nname, new_inode ? XFS_I(new_inode) : NULL); - if (likely(!error)) { - if (new_inode) - xfs_validate_fields(new_inode); - xfs_validate_fields(odir); - if (ndir != odir) - xfs_validate_fields(ndir); - } - return -error; } /* @@ -589,8 +529,7 @@ xfs_check_acl( STATIC int xfs_vn_permission( struct inode *inode, - int mask, - struct nameidata *nd) + int mask) { return generic_permission(inode, mask, xfs_check_acl); } @@ -660,57 +599,9 @@ xfs_vn_getattr( STATIC int xfs_vn_setattr( struct dentry *dentry, - struct iattr *attr) + struct iattr *iattr) { - struct inode *inode = dentry->d_inode; - unsigned int ia_valid = attr->ia_valid; - bhv_vattr_t vattr = { 0 }; - int flags = 0; - int error; - - if (ia_valid & ATTR_UID) { - vattr.va_mask |= XFS_AT_UID; - vattr.va_uid = attr->ia_uid; - } - if (ia_valid & ATTR_GID) { - vattr.va_mask |= XFS_AT_GID; - vattr.va_gid = attr->ia_gid; - } - if (ia_valid & ATTR_SIZE) { - vattr.va_mask |= XFS_AT_SIZE; - vattr.va_size = attr->ia_size; - } - if (ia_valid & ATTR_ATIME) { - vattr.va_mask |= XFS_AT_ATIME; - vattr.va_atime = attr->ia_atime; - inode->i_atime = attr->ia_atime; - } - if (ia_valid & ATTR_MTIME) { - vattr.va_mask |= XFS_AT_MTIME; - vattr.va_mtime = attr->ia_mtime; - } - if (ia_valid & ATTR_CTIME) { - vattr.va_mask |= XFS_AT_CTIME; - vattr.va_ctime = attr->ia_ctime; - } - if (ia_valid & ATTR_MODE) { - vattr.va_mask |= XFS_AT_MODE; - vattr.va_mode = attr->ia_mode; - if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) - inode->i_mode &= ~S_ISGID; - } - - if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET)) - flags |= ATTR_UTIME; -#ifdef ATTR_NO_BLOCK - if ((ia_valid & ATTR_NO_BLOCK)) - flags |= ATTR_NONBLOCK; -#endif - - error = xfs_setattr(XFS_I(inode), &vattr, flags, NULL); - if (likely(!error)) - vn_revalidate(vn_from_inode(inode)); - return -error; + return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0, NULL); } /* @@ -728,109 +619,6 @@ xfs_vn_truncate( WARN_ON(error); } -STATIC int -xfs_vn_setxattr( - struct dentry *dentry, - const char *name, - const void *data, - size_t size, - int flags) -{ - bhv_vnode_t *vp = vn_from_inode(dentry->d_inode); - char *attr = (char *)name; - attrnames_t *namesp; - int xflags = 0; - int error; - - namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT); - if (!namesp) - return -EOPNOTSUPP; - attr += namesp->attr_namelen; - error = namesp->attr_capable(vp, NULL); - if (error) - return error; - - /* Convert Linux syscall to XFS internal ATTR flags */ - if (flags & XATTR_CREATE) - xflags |= ATTR_CREATE; - if (flags & XATTR_REPLACE) - xflags |= ATTR_REPLACE; - xflags |= namesp->attr_flag; - return namesp->attr_set(vp, attr, (void *)data, size, xflags); -} - -STATIC ssize_t -xfs_vn_getxattr( - struct dentry *dentry, - const char *name, - void *data, - size_t size) -{ - bhv_vnode_t *vp = vn_from_inode(dentry->d_inode); - char *attr = (char *)name; - attrnames_t *namesp; - int xflags = 0; - ssize_t error; - - namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT); - if (!namesp) - return -EOPNOTSUPP; - attr += namesp->attr_namelen; - error = namesp->attr_capable(vp, NULL); - if (error) - return error; - - /* Convert Linux syscall to XFS internal ATTR flags */ - if (!size) { - xflags |= ATTR_KERNOVAL; - data = NULL; - } - xflags |= namesp->attr_flag; - return namesp->attr_get(vp, attr, (void *)data, size, xflags); -} - -STATIC ssize_t -xfs_vn_listxattr( - struct dentry *dentry, - char *data, - size_t size) -{ - bhv_vnode_t *vp = vn_from_inode(dentry->d_inode); - int error, xflags = ATTR_KERNAMELS; - ssize_t result; - - if (!size) - xflags |= ATTR_KERNOVAL; - xflags |= capable(CAP_SYS_ADMIN) ? ATTR_KERNFULLS : ATTR_KERNORMALS; - - error = attr_generic_list(vp, data, size, xflags, &result); - if (error < 0) - return error; - return result; -} - -STATIC int -xfs_vn_removexattr( - struct dentry *dentry, - const char *name) -{ - bhv_vnode_t *vp = vn_from_inode(dentry->d_inode); - char *attr = (char *)name; - attrnames_t *namesp; - int xflags = 0; - int error; - - namesp = attr_lookup_namespace(attr, attr_namespaces, ATTR_NAMECOUNT); - if (!namesp) - return -EOPNOTSUPP; - attr += namesp->attr_namelen; - error = namesp->attr_capable(vp, NULL); - if (error) - return error; - xflags |= namesp->attr_flag; - return namesp->attr_remove(vp, attr, xflags); -} - STATIC long xfs_vn_fallocate( struct inode *inode, @@ -854,18 +642,18 @@ xfs_vn_fallocate( xfs_ilock(ip, XFS_IOLOCK_EXCL); error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf, - 0, NULL, ATTR_NOLOCK); + 0, NULL, XFS_ATTR_NOLOCK); if (!error && !(mode & FALLOC_FL_KEEP_SIZE) && offset + len > i_size_read(inode)) new_size = offset + len; /* Change file size if needed */ if (new_size) { - bhv_vattr_t va; + struct iattr iattr; - va.va_mask = XFS_AT_SIZE; - va.va_size = new_size; - error = xfs_setattr(ip, &va, ATTR_NOLOCK, NULL); + iattr.ia_valid = ATTR_SIZE; + iattr.ia_size = new_size; + error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK, NULL); } xfs_iunlock(ip, XFS_IOLOCK_EXCL); @@ -873,46 +661,172 @@ out_error: return error; } -const struct inode_operations xfs_inode_operations = { +static const struct inode_operations xfs_inode_operations = { .permission = xfs_vn_permission, .truncate = xfs_vn_truncate, .getattr = xfs_vn_getattr, .setattr = xfs_vn_setattr, - .setxattr = xfs_vn_setxattr, - .getxattr = xfs_vn_getxattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .removexattr = generic_removexattr, .listxattr = xfs_vn_listxattr, - .removexattr = xfs_vn_removexattr, .fallocate = xfs_vn_fallocate, }; -const struct inode_operations xfs_dir_inode_operations = { +static const struct inode_operations xfs_dir_inode_operations = { .create = xfs_vn_create, .lookup = xfs_vn_lookup, .link = xfs_vn_link, .unlink = xfs_vn_unlink, .symlink = xfs_vn_symlink, .mkdir = xfs_vn_mkdir, - .rmdir = xfs_vn_rmdir, + /* + * Yes, XFS uses the same method for rmdir and unlink. + * + * There are some subtile differences deeper in the code, + * but we use S_ISDIR to check for those. + */ + .rmdir = xfs_vn_unlink, .mknod = xfs_vn_mknod, .rename = xfs_vn_rename, .permission = xfs_vn_permission, .getattr = xfs_vn_getattr, .setattr = xfs_vn_setattr, - .setxattr = xfs_vn_setxattr, - .getxattr = xfs_vn_getxattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .removexattr = generic_removexattr, .listxattr = xfs_vn_listxattr, - .removexattr = xfs_vn_removexattr, }; -const struct inode_operations xfs_symlink_inode_operations = { +static const struct inode_operations xfs_dir_ci_inode_operations = { + .create = xfs_vn_create, + .lookup = xfs_vn_ci_lookup, + .link = xfs_vn_link, + .unlink = xfs_vn_unlink, + .symlink = xfs_vn_symlink, + .mkdir = xfs_vn_mkdir, + /* + * Yes, XFS uses the same method for rmdir and unlink. + * + * There are some subtile differences deeper in the code, + * but we use S_ISDIR to check for those. + */ + .rmdir = xfs_vn_unlink, + .mknod = xfs_vn_mknod, + .rename = xfs_vn_rename, + .permission = xfs_vn_permission, + .getattr = xfs_vn_getattr, + .setattr = xfs_vn_setattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .removexattr = generic_removexattr, + .listxattr = xfs_vn_listxattr, +}; + +static const struct inode_operations xfs_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = xfs_vn_follow_link, .put_link = xfs_vn_put_link, .permission = xfs_vn_permission, .getattr = xfs_vn_getattr, .setattr = xfs_vn_setattr, - .setxattr = xfs_vn_setxattr, - .getxattr = xfs_vn_getxattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .removexattr = generic_removexattr, .listxattr = xfs_vn_listxattr, - .removexattr = xfs_vn_removexattr, }; + +STATIC void +xfs_diflags_to_iflags( + struct inode *inode, + struct xfs_inode *ip) +{ + if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE) + inode->i_flags |= S_IMMUTABLE; + else + inode->i_flags &= ~S_IMMUTABLE; + if (ip->i_d.di_flags & XFS_DIFLAG_APPEND) + inode->i_flags |= S_APPEND; + else + inode->i_flags &= ~S_APPEND; + if (ip->i_d.di_flags & XFS_DIFLAG_SYNC) + inode->i_flags |= S_SYNC; + else + inode->i_flags &= ~S_SYNC; + if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME) + inode->i_flags |= S_NOATIME; + else + inode->i_flags &= ~S_NOATIME; +} + +/* + * Initialize the Linux inode, set up the operation vectors and + * unlock the inode. + * + * When reading existing inodes from disk this is called directly + * from xfs_iget, when creating a new inode it is called from + * xfs_ialloc after setting up the inode. + */ +void +xfs_setup_inode( + struct xfs_inode *ip) +{ + struct inode *inode = ip->i_vnode; + + inode->i_mode = ip->i_d.di_mode; + inode->i_nlink = ip->i_d.di_nlink; + inode->i_uid = ip->i_d.di_uid; + inode->i_gid = ip->i_d.di_gid; + + switch (inode->i_mode & S_IFMT) { + case S_IFBLK: + case S_IFCHR: + inode->i_rdev = + MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, + sysv_minor(ip->i_df.if_u2.if_rdev)); + break; + default: + inode->i_rdev = 0; + break; + } + + inode->i_generation = ip->i_d.di_gen; + i_size_write(inode, ip->i_d.di_size); + inode->i_atime.tv_sec = ip->i_d.di_atime.t_sec; + inode->i_atime.tv_nsec = ip->i_d.di_atime.t_nsec; + inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec; + inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; + inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec; + inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; + xfs_diflags_to_iflags(inode, ip); + xfs_iflags_clear(ip, XFS_IMODIFIED); + + switch (inode->i_mode & S_IFMT) { + case S_IFREG: + inode->i_op = &xfs_inode_operations; + inode->i_fop = &xfs_file_operations; + inode->i_mapping->a_ops = &xfs_address_space_operations; + break; + case S_IFDIR: + if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) + inode->i_op = &xfs_dir_ci_inode_operations; + else + inode->i_op = &xfs_dir_inode_operations; + inode->i_fop = &xfs_dir_file_operations; + break; + case S_IFLNK: + inode->i_op = &xfs_symlink_inode_operations; + if (!(ip->i_df.if_flags & XFS_IFINLINE)) + inode->i_mapping->a_ops = &xfs_address_space_operations; + break; + default: + inode->i_op = &xfs_inode_operations; + init_special_inode(inode, inode->i_mode, inode->i_rdev); + break; + } + + xfs_iflags_clear(ip, XFS_INEW); + barrier(); + + unlock_new_inode(inode); +} diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h index 14d0deb7aff..8b1a1e31dc2 100644 --- a/fs/xfs/linux-2.6/xfs_iops.h +++ b/fs/xfs/linux-2.6/xfs_iops.h @@ -18,23 +18,14 @@ #ifndef __XFS_IOPS_H__ #define __XFS_IOPS_H__ -extern const struct inode_operations xfs_inode_operations; -extern const struct inode_operations xfs_dir_inode_operations; -extern const struct inode_operations xfs_symlink_inode_operations; +struct xfs_inode; extern const struct file_operations xfs_file_operations; extern const struct file_operations xfs_dir_file_operations; extern const struct file_operations xfs_invis_file_operations; +extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size); -struct xfs_inode; -extern void xfs_ichgtime(struct xfs_inode *, int); -extern void xfs_ichgtime_fast(struct xfs_inode *, struct inode *, int); - -#define xfs_vtoi(vp) \ - ((struct xfs_inode *)vn_to_inode(vp)->i_private) - -#define XFS_I(inode) \ - ((struct xfs_inode *)(inode)->i_private) +extern void xfs_setup_inode(struct xfs_inode *); #endif /* __XFS_IOPS_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 4edc46915b5..cc0f7b3a979 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -45,13 +45,13 @@ #include <mrlock.h> #include <sv.h> #include <mutex.h> -#include <sema.h> #include <time.h> #include <support/ktrace.h> #include <support/debug.h> #include <support/uuid.h> +#include <linux/semaphore.h> #include <linux/mm.h> #include <linux/kernel.h> #include <linux/blkdev.h> @@ -76,6 +76,7 @@ #include <linux/log2.h> #include <linux/spinlock.h> #include <linux/random.h> +#include <linux/ctype.h> #include <asm/page.h> #include <asm/div64.h> @@ -125,8 +126,6 @@ #define current_cpu() (raw_smp_processor_id()) #define current_pid() (current->pid) -#define current_fsuid(cred) (current->fsuid) -#define current_fsgid(cred) (current->fsgid) #define current_test_flags(f) (current->flags & (f)) #define current_set_flags_nested(sp, f) \ (*(sp) = current->flags, current->flags |= (f)) @@ -179,7 +178,7 @@ #define xfs_sort(a,n,s,fn) sort(a,n,s,fn,NULL) #define xfs_stack_trace() dump_stack() #define xfs_itruncate_data(ip, off) \ - (-vmtruncate(vn_to_inode(XFS_ITOV(ip)), (off))) + (-vmtruncate(VFS_I(ip), (off))) /* Move the kernel do_div definition off to one side */ @@ -299,4 +298,11 @@ static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y) return x; } +/* ARM old ABI has some weird alignment/padding */ +#if defined(__arm__) && !defined(__ARM_EABI__) +#define __arch_pack __attribute__((packed)) +#else +#define __arch_pack +#endif + #endif /* __XFS_LINUX__ */ diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 5e3b57516ec..1957e5357d0 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -137,7 +137,7 @@ xfs_iozero( struct address_space *mapping; int status; - mapping = ip->i_vnode->i_mapping; + mapping = VFS_I(ip)->i_mapping; do { unsigned offset, bytes; void *fsdata; @@ -674,9 +674,7 @@ start: */ if (likely(!(ioflags & IO_INVIS) && !mnt_want_write(file->f_path.mnt))) { - file_update_time(file); - xfs_ichgtime_fast(xip, inode, - XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); mnt_drop_write(file->f_path.mnt); } @@ -711,7 +709,7 @@ start: !capable(CAP_FSETID)) { error = xfs_write_clear_setuid(xip); if (likely(!error)) - error = -remove_suid(file->f_path.dentry); + error = -file_remove_suid(file); if (unlikely(error)) { goto out_unlock_internal; } diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c index e480b610205..3d5b67c075c 100644 --- a/fs/xfs/linux-2.6/xfs_stats.c +++ b/fs/xfs/linux-2.6/xfs_stats.c @@ -98,12 +98,21 @@ xfs_read_xfsstats( return len; } -void +int xfs_init_procfs(void) { if (!proc_mkdir("fs/xfs", NULL)) - return; - create_proc_read_entry("fs/xfs/stat", 0, NULL, xfs_read_xfsstats, NULL); + goto out; + + if (!create_proc_read_entry("fs/xfs/stat", 0, NULL, + xfs_read_xfsstats, NULL)) + goto out_remove_entry; + return 0; + + out_remove_entry: + remove_proc_entry("fs/xfs", NULL); + out: + return -ENOMEM; } void diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h index afd0b0d5fdb..e83820febc9 100644 --- a/fs/xfs/linux-2.6/xfs_stats.h +++ b/fs/xfs/linux-2.6/xfs_stats.h @@ -134,7 +134,7 @@ DECLARE_PER_CPU(struct xfsstats, xfsstats); #define XFS_STATS_DEC(v) (per_cpu(xfsstats, current_cpu()).v--) #define XFS_STATS_ADD(v, inc) (per_cpu(xfsstats, current_cpu()).v += (inc)) -extern void xfs_init_procfs(void); +extern int xfs_init_procfs(void); extern void xfs_cleanup_procfs(void); @@ -144,8 +144,14 @@ extern void xfs_cleanup_procfs(void); # define XFS_STATS_DEC(count) # define XFS_STATS_ADD(count, inc) -static inline void xfs_init_procfs(void) { }; -static inline void xfs_cleanup_procfs(void) { }; +static inline int xfs_init_procfs(void) +{ + return 0; +} + +static inline void xfs_cleanup_procfs(void) +{ +} #endif /* !CONFIG_PROC_FS */ diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 742b2c7852c..73c65f19e54 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -52,6 +52,12 @@ #include "xfs_version.h" #include "xfs_log_priv.h" #include "xfs_trans_priv.h" +#include "xfs_filestream.h" +#include "xfs_da_btree.h" +#include "xfs_dir2_trace.h" +#include "xfs_extfree_item.h" +#include "xfs_mru_cache.h" +#include "xfs_inode_item.h" #include <linux/namei.h> #include <linux/init.h> @@ -60,6 +66,7 @@ #include <linux/writeback.h> #include <linux/kthread.h> #include <linux/freezer.h> +#include <linux/parser.h> static struct quotactl_ops xfs_quotactl_operations; static struct super_operations xfs_super_operations; @@ -74,7 +81,10 @@ xfs_args_allocate( { struct xfs_mount_args *args; - args = kmem_zalloc(sizeof(struct xfs_mount_args), KM_SLEEP); + args = kzalloc(sizeof(struct xfs_mount_args), GFP_KERNEL); + if (!args) + return NULL; + args->logbufs = args->logbufsize = -1; strncpy(args->fsname, sb->s_id, MAXNAMELEN); @@ -138,6 +148,23 @@ xfs_args_allocate( #define MNTOPT_XDSM "xdsm" /* DMI enabled (DMAPI / XDSM) */ #define MNTOPT_DMI "dmi" /* DMI enabled (DMAPI / XDSM) */ +/* + * Table driven mount option parser. + * + * Currently only used for remount, but it will be used for mount + * in the future, too. + */ +enum { + Opt_barrier, Opt_nobarrier, Opt_err +}; + +static match_table_t tokens = { + {Opt_barrier, "barrier"}, + {Opt_nobarrier, "nobarrier"}, + {Opt_err, NULL} +}; + + STATIC unsigned long suffix_strtoul(char *s, char **endp, unsigned int base) { @@ -314,6 +341,7 @@ xfs_parseargs( args->flags |= XFSMNT_ATTR2; } else if (!strcmp(this_char, MNTOPT_NOATTR2)) { args->flags &= ~XFSMNT_ATTR2; + args->flags |= XFSMNT_NOATTR2; } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) { args->flags2 |= XFSMNT2_FILESTREAMS; } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) { @@ -553,115 +581,6 @@ xfs_max_file_offset( return (((__uint64_t)pagefactor) << bitshift) - 1; } -STATIC_INLINE void -xfs_set_inodeops( - struct inode *inode) -{ - switch (inode->i_mode & S_IFMT) { - case S_IFREG: - inode->i_op = &xfs_inode_operations; - inode->i_fop = &xfs_file_operations; - inode->i_mapping->a_ops = &xfs_address_space_operations; - break; - case S_IFDIR: - inode->i_op = &xfs_dir_inode_operations; - inode->i_fop = &xfs_dir_file_operations; - break; - case S_IFLNK: - inode->i_op = &xfs_symlink_inode_operations; - if (!(XFS_I(inode)->i_df.if_flags & XFS_IFINLINE)) - inode->i_mapping->a_ops = &xfs_address_space_operations; - break; - default: - inode->i_op = &xfs_inode_operations; - init_special_inode(inode, inode->i_mode, inode->i_rdev); - break; - } -} - -STATIC_INLINE void -xfs_revalidate_inode( - xfs_mount_t *mp, - bhv_vnode_t *vp, - xfs_inode_t *ip) -{ - struct inode *inode = vn_to_inode(vp); - - inode->i_mode = ip->i_d.di_mode; - inode->i_nlink = ip->i_d.di_nlink; - inode->i_uid = ip->i_d.di_uid; - inode->i_gid = ip->i_d.di_gid; - - switch (inode->i_mode & S_IFMT) { - case S_IFBLK: - case S_IFCHR: - inode->i_rdev = - MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff, - sysv_minor(ip->i_df.if_u2.if_rdev)); - break; - default: - inode->i_rdev = 0; - break; - } - - inode->i_generation = ip->i_d.di_gen; - i_size_write(inode, ip->i_d.di_size); - inode->i_atime.tv_sec = ip->i_d.di_atime.t_sec; - inode->i_atime.tv_nsec = ip->i_d.di_atime.t_nsec; - inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec; - inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; - inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec; - inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; - if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE) - inode->i_flags |= S_IMMUTABLE; - else - inode->i_flags &= ~S_IMMUTABLE; - if (ip->i_d.di_flags & XFS_DIFLAG_APPEND) - inode->i_flags |= S_APPEND; - else - inode->i_flags &= ~S_APPEND; - if (ip->i_d.di_flags & XFS_DIFLAG_SYNC) - inode->i_flags |= S_SYNC; - else - inode->i_flags &= ~S_SYNC; - if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME) - inode->i_flags |= S_NOATIME; - else - inode->i_flags &= ~S_NOATIME; - xfs_iflags_clear(ip, XFS_IMODIFIED); -} - -void -xfs_initialize_vnode( - struct xfs_mount *mp, - bhv_vnode_t *vp, - struct xfs_inode *ip) -{ - struct inode *inode = vn_to_inode(vp); - - if (!ip->i_vnode) { - ip->i_vnode = vp; - inode->i_private = ip; - } - - /* - * We need to set the ops vectors, and unlock the inode, but if - * we have been called during the new inode create process, it is - * too early to fill in the Linux inode. We will get called a - * second time once the inode is properly set up, and then we can - * finish our work. - */ - if (ip->i_d.di_mode != 0 && (inode->i_state & I_NEW)) { - xfs_revalidate_inode(mp, vp, ip); - xfs_set_inodeops(inode); - - xfs_iflags_clear(ip, XFS_INEW); - barrier(); - - unlock_new_inode(inode); - } -} - int xfs_blkdev_get( xfs_mount_t *mp, @@ -733,14 +652,6 @@ xfs_mountfs_check_barriers(xfs_mount_t *mp) return; } - if (mp->m_ddev_targp->bt_bdev->bd_disk->queue->ordered == - QUEUE_ORDERED_NONE) { - xfs_fs_cmn_err(CE_NOTE, mp, - "Disabling barriers, not supported by the underlying device"); - mp->m_flags &= ~XFS_MOUNT_BARRIER; - return; - } - if (xfs_readonly_buftarg(mp->m_ddev_targp)) { xfs_fs_cmn_err(CE_NOTE, mp, "Disabling barriers, underlying device is readonly"); @@ -764,6 +675,139 @@ xfs_blkdev_issue_flush( blkdev_issue_flush(buftarg->bt_bdev, NULL); } +STATIC void +xfs_close_devices( + struct xfs_mount *mp) +{ + if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { + struct block_device *logdev = mp->m_logdev_targp->bt_bdev; + xfs_free_buftarg(mp->m_logdev_targp); + xfs_blkdev_put(logdev); + } + if (mp->m_rtdev_targp) { + struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev; + xfs_free_buftarg(mp->m_rtdev_targp); + xfs_blkdev_put(rtdev); + } + xfs_free_buftarg(mp->m_ddev_targp); +} + +/* + * The file system configurations are: + * (1) device (partition) with data and internal log + * (2) logical volume with data and log subvolumes. + * (3) logical volume with data, log, and realtime subvolumes. + * + * We only have to handle opening the log and realtime volumes here if + * they are present. The data subvolume has already been opened by + * get_sb_bdev() and is stored in sb->s_bdev. + */ +STATIC int +xfs_open_devices( + struct xfs_mount *mp, + struct xfs_mount_args *args) +{ + struct block_device *ddev = mp->m_super->s_bdev; + struct block_device *logdev = NULL, *rtdev = NULL; + int error; + + /* + * Open real time and log devices - order is important. + */ + if (args->logname[0]) { + error = xfs_blkdev_get(mp, args->logname, &logdev); + if (error) + goto out; + } + + if (args->rtname[0]) { + error = xfs_blkdev_get(mp, args->rtname, &rtdev); + if (error) + goto out_close_logdev; + + if (rtdev == ddev || rtdev == logdev) { + cmn_err(CE_WARN, + "XFS: Cannot mount filesystem with identical rtdev and ddev/logdev."); + error = EINVAL; + goto out_close_rtdev; + } + } + + /* + * Setup xfs_mount buffer target pointers + */ + error = ENOMEM; + mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0); + if (!mp->m_ddev_targp) + goto out_close_rtdev; + + if (rtdev) { + mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1); + if (!mp->m_rtdev_targp) + goto out_free_ddev_targ; + } + + if (logdev && logdev != ddev) { + mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1); + if (!mp->m_logdev_targp) + goto out_free_rtdev_targ; + } else { + mp->m_logdev_targp = mp->m_ddev_targp; + } + + return 0; + + out_free_rtdev_targ: + if (mp->m_rtdev_targp) + xfs_free_buftarg(mp->m_rtdev_targp); + out_free_ddev_targ: + xfs_free_buftarg(mp->m_ddev_targp); + out_close_rtdev: + if (rtdev) + xfs_blkdev_put(rtdev); + out_close_logdev: + if (logdev && logdev != ddev) + xfs_blkdev_put(logdev); + out: + return error; +} + +/* + * Setup xfs_mount buffer target pointers based on superblock + */ +STATIC int +xfs_setup_devices( + struct xfs_mount *mp) +{ + int error; + + error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize, + mp->m_sb.sb_sectsize); + if (error) + return error; + + if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) { + unsigned int log_sector_size = BBSIZE; + + if (xfs_sb_version_hassector(&mp->m_sb)) + log_sector_size = mp->m_sb.sb_logsectsize; + error = xfs_setsize_buftarg(mp->m_logdev_targp, + mp->m_sb.sb_blocksize, + log_sector_size); + if (error) + return error; + } + if (mp->m_rtdev_targp) { + error = xfs_setsize_buftarg(mp->m_rtdev_targp, + mp->m_sb.sb_blocksize, + mp->m_sb.sb_sectsize); + if (error) + return error; + } + + return 0; +} + /* * XFS AIL push thread support */ @@ -826,63 +870,21 @@ STATIC struct inode * xfs_fs_alloc_inode( struct super_block *sb) { - bhv_vnode_t *vp; - - vp = kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP); - if (unlikely(!vp)) - return NULL; - return vn_to_inode(vp); + return kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP); } STATIC void xfs_fs_destroy_inode( struct inode *inode) { - kmem_zone_free(xfs_vnode_zone, vn_from_inode(inode)); + kmem_zone_free(xfs_vnode_zone, inode); } STATIC void xfs_fs_inode_init_once( - kmem_zone_t *zonep, void *vnode) { - inode_init_once(vn_to_inode((bhv_vnode_t *)vnode)); -} - -STATIC int __init -xfs_init_zones(void) -{ - xfs_vnode_zone = kmem_zone_init_flags(sizeof(bhv_vnode_t), "xfs_vnode", - KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | - KM_ZONE_SPREAD, - xfs_fs_inode_init_once); - if (!xfs_vnode_zone) - goto out; - - xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend"); - if (!xfs_ioend_zone) - goto out_destroy_vnode_zone; - - xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE, - xfs_ioend_zone); - if (!xfs_ioend_pool) - goto out_free_ioend_zone; - return 0; - - out_free_ioend_zone: - kmem_zone_destroy(xfs_ioend_zone); - out_destroy_vnode_zone: - kmem_zone_destroy(xfs_vnode_zone); - out: - return -ENOMEM; -} - -STATIC void -xfs_destroy_zones(void) -{ - mempool_destroy(xfs_ioend_pool); - kmem_zone_destroy(xfs_vnode_zone); - kmem_zone_destroy(xfs_ioend_zone); + inode_init_once((struct inode *)vnode); } /* @@ -987,7 +989,7 @@ void xfs_flush_inode( xfs_inode_t *ip) { - struct inode *inode = ip->i_vnode; + struct inode *inode = VFS_I(ip); igrab(inode); xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work); @@ -1012,7 +1014,7 @@ void xfs_flush_device( xfs_inode_t *ip) { - struct inode *inode = vn_to_inode(XFS_ITOV(ip)); + struct inode *inode = VFS_I(ip); igrab(inode); xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work); @@ -1074,7 +1076,7 @@ xfssyncd( list_del(&work->w_list); if (work == &mp->m_sync_work) continue; - kmem_free(work, sizeof(struct bhv_vfs_sync_work)); + kmem_free(work); } } @@ -1082,18 +1084,76 @@ xfssyncd( } STATIC void +xfs_free_fsname( + struct xfs_mount *mp) +{ + kfree(mp->m_fsname); + kfree(mp->m_rtname); + kfree(mp->m_logname); +} + +STATIC void xfs_fs_put_super( struct super_block *sb) { struct xfs_mount *mp = XFS_M(sb); + struct xfs_inode *rip = mp->m_rootip; + int unmount_event_flags = 0; int error; kthread_stop(mp->m_sync_task); xfs_sync(mp, SYNC_ATTR | SYNC_DELWRI); - error = xfs_unmount(mp, 0, NULL); - if (error) - printk("XFS: unmount got error=%d\n", error); + +#ifdef HAVE_DMAPI + if (mp->m_flags & XFS_MOUNT_DMAPI) { + unmount_event_flags = + (mp->m_dmevmask & (1 << DM_EVENT_UNMOUNT)) ? + 0 : DM_FLAGS_UNWANTED; + /* + * Ignore error from dmapi here, first unmount is not allowed + * to fail anyway, and second we wouldn't want to fail a + * unmount because of dmapi. + */ + XFS_SEND_PREUNMOUNT(mp, rip, DM_RIGHT_NULL, rip, DM_RIGHT_NULL, + NULL, NULL, 0, 0, unmount_event_flags); + } +#endif + + /* + * Blow away any referenced inode in the filestreams cache. + * This can and will cause log traffic as inodes go inactive + * here. + */ + xfs_filestream_unmount(mp); + + XFS_bflush(mp->m_ddev_targp); + error = xfs_unmount_flush(mp, 0); + WARN_ON(error); + + /* + * If we're forcing a shutdown, typically because of a media error, + * we want to make sure we invalidate dirty pages that belong to + * referenced vnodes as well. + */ + if (XFS_FORCED_SHUTDOWN(mp)) { + error = xfs_sync(mp, SYNC_WAIT | SYNC_CLOSE); + ASSERT(error != EFSCORRUPTED); + } + + if (mp->m_flags & XFS_MOUNT_DMAPI) { + XFS_SEND_UNMOUNT(mp, rip, DM_RIGHT_NULL, 0, 0, + unmount_event_flags); + } + + xfs_unmountfs(mp); + xfs_freesb(mp); + xfs_icsb_destroy_counters(mp); + xfs_close_devices(mp); + xfs_qmops_put(mp); + xfs_dmops_put(mp); + xfs_free_fsname(mp); + kfree(mp); } STATIC void @@ -1216,14 +1276,54 @@ xfs_fs_remount( char *options) { struct xfs_mount *mp = XFS_M(sb); - struct xfs_mount_args *args = xfs_args_allocate(sb, 0); - int error; + substring_t args[MAX_OPT_ARGS]; + char *p; - error = xfs_parseargs(mp, options, args, 1); - if (!error) - error = xfs_mntupdate(mp, flags, args); - kmem_free(args, sizeof(*args)); - return -error; + while ((p = strsep(&options, ",")) != NULL) { + int token; + + if (!*p) + continue; + + token = match_token(p, tokens, args); + switch (token) { + case Opt_barrier: + mp->m_flags |= XFS_MOUNT_BARRIER; + + /* + * Test if barriers are actually working if we can, + * else delay this check until the filesystem is + * marked writeable. + */ + if (!(mp->m_flags & XFS_MOUNT_RDONLY)) + xfs_mountfs_check_barriers(mp); + break; + case Opt_nobarrier: + mp->m_flags &= ~XFS_MOUNT_BARRIER; + break; + default: + printk(KERN_INFO + "XFS: mount option \"%s\" not supported for remount\n", p); + return -EINVAL; + } + } + + /* rw/ro -> rw */ + if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) { + mp->m_flags &= ~XFS_MOUNT_RDONLY; + if (mp->m_flags & XFS_MOUNT_BARRIER) + xfs_mountfs_check_barriers(mp); + } + + /* rw -> ro */ + if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) { + xfs_filestream_flush(mp); + xfs_sync(mp, SYNC_DATA_QUIESCE); + xfs_attr_quiesce(mp); + mp->m_flags |= XFS_MOUNT_RDONLY; + } + + return 0; } /* @@ -1300,6 +1400,245 @@ xfs_fs_setxquota( Q_XSETPQLIM), id, (caddr_t)fdq); } +/* + * This function fills in xfs_mount_t fields based on mount args. + * Note: the superblock has _not_ yet been read in. + */ +STATIC int +xfs_start_flags( + struct xfs_mount_args *ap, + struct xfs_mount *mp) +{ + int error; + + /* Values are in BBs */ + if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) { + /* + * At this point the superblock has not been read + * in, therefore we do not know the block size. + * Before the mount call ends we will convert + * these to FSBs. + */ + mp->m_dalign = ap->sunit; + mp->m_swidth = ap->swidth; + } + + if (ap->logbufs != -1 && + ap->logbufs != 0 && + (ap->logbufs < XLOG_MIN_ICLOGS || + ap->logbufs > XLOG_MAX_ICLOGS)) { + cmn_err(CE_WARN, + "XFS: invalid logbufs value: %d [not %d-%d]", + ap->logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); + return XFS_ERROR(EINVAL); + } + mp->m_logbufs = ap->logbufs; + if (ap->logbufsize != -1 && + ap->logbufsize != 0 && + (ap->logbufsize < XLOG_MIN_RECORD_BSIZE || + ap->logbufsize > XLOG_MAX_RECORD_BSIZE || + !is_power_of_2(ap->logbufsize))) { + cmn_err(CE_WARN, + "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", + ap->logbufsize); + return XFS_ERROR(EINVAL); + } + + error = ENOMEM; + + mp->m_logbsize = ap->logbufsize; + mp->m_fsname_len = strlen(ap->fsname) + 1; + + mp->m_fsname = kstrdup(ap->fsname, GFP_KERNEL); + if (!mp->m_fsname) + goto out; + + if (ap->rtname[0]) { + mp->m_rtname = kstrdup(ap->rtname, GFP_KERNEL); + if (!mp->m_rtname) + goto out_free_fsname; + + } + + if (ap->logname[0]) { + mp->m_logname = kstrdup(ap->logname, GFP_KERNEL); + if (!mp->m_logname) + goto out_free_rtname; + } + + if (ap->flags & XFSMNT_WSYNC) + mp->m_flags |= XFS_MOUNT_WSYNC; +#if XFS_BIG_INUMS + if (ap->flags & XFSMNT_INO64) { + mp->m_flags |= XFS_MOUNT_INO64; + mp->m_inoadd = XFS_INO64_OFFSET; + } +#endif + if (ap->flags & XFSMNT_RETERR) + mp->m_flags |= XFS_MOUNT_RETERR; + if (ap->flags & XFSMNT_NOALIGN) + mp->m_flags |= XFS_MOUNT_NOALIGN; + if (ap->flags & XFSMNT_SWALLOC) + mp->m_flags |= XFS_MOUNT_SWALLOC; + if (ap->flags & XFSMNT_OSYNCISOSYNC) + mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC; + if (ap->flags & XFSMNT_32BITINODES) + mp->m_flags |= XFS_MOUNT_32BITINODES; + + if (ap->flags & XFSMNT_IOSIZE) { + if (ap->iosizelog > XFS_MAX_IO_LOG || + ap->iosizelog < XFS_MIN_IO_LOG) { + cmn_err(CE_WARN, + "XFS: invalid log iosize: %d [not %d-%d]", + ap->iosizelog, XFS_MIN_IO_LOG, + XFS_MAX_IO_LOG); + return XFS_ERROR(EINVAL); + } + + mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE; + mp->m_readio_log = mp->m_writeio_log = ap->iosizelog; + } + + if (ap->flags & XFSMNT_IKEEP) + mp->m_flags |= XFS_MOUNT_IKEEP; + if (ap->flags & XFSMNT_DIRSYNC) + mp->m_flags |= XFS_MOUNT_DIRSYNC; + if (ap->flags & XFSMNT_ATTR2) + mp->m_flags |= XFS_MOUNT_ATTR2; + if (ap->flags & XFSMNT_NOATTR2) + mp->m_flags |= XFS_MOUNT_NOATTR2; + + if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE) + mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; + + /* + * no recovery flag requires a read-only mount + */ + if (ap->flags & XFSMNT_NORECOVERY) { + if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { + cmn_err(CE_WARN, + "XFS: tried to mount a FS read-write without recovery!"); + return XFS_ERROR(EINVAL); + } + mp->m_flags |= XFS_MOUNT_NORECOVERY; + } + + if (ap->flags & XFSMNT_NOUUID) + mp->m_flags |= XFS_MOUNT_NOUUID; + if (ap->flags & XFSMNT_BARRIER) + mp->m_flags |= XFS_MOUNT_BARRIER; + else + mp->m_flags &= ~XFS_MOUNT_BARRIER; + + if (ap->flags2 & XFSMNT2_FILESTREAMS) + mp->m_flags |= XFS_MOUNT_FILESTREAMS; + + if (ap->flags & XFSMNT_DMAPI) + mp->m_flags |= XFS_MOUNT_DMAPI; + return 0; + + + out_free_rtname: + kfree(mp->m_rtname); + out_free_fsname: + kfree(mp->m_fsname); + out: + return error; +} + +/* + * This function fills in xfs_mount_t fields based on mount args. + * Note: the superblock _has_ now been read in. + */ +STATIC int +xfs_finish_flags( + struct xfs_mount_args *ap, + struct xfs_mount *mp) +{ + int ronly = (mp->m_flags & XFS_MOUNT_RDONLY); + + /* Fail a mount where the logbuf is smaller then the log stripe */ + if (xfs_sb_version_haslogv2(&mp->m_sb)) { + if ((ap->logbufsize <= 0) && + (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) { + mp->m_logbsize = mp->m_sb.sb_logsunit; + } else if (ap->logbufsize > 0 && + ap->logbufsize < mp->m_sb.sb_logsunit) { + cmn_err(CE_WARN, + "XFS: logbuf size must be greater than or equal to log stripe size"); + return XFS_ERROR(EINVAL); + } + } else { + /* Fail a mount if the logbuf is larger than 32K */ + if (ap->logbufsize > XLOG_BIG_RECORD_BSIZE) { + cmn_err(CE_WARN, + "XFS: logbuf size for version 1 logs must be 16K or 32K"); + return XFS_ERROR(EINVAL); + } + } + + /* + * mkfs'ed attr2 will turn on attr2 mount unless explicitly + * told by noattr2 to turn it off + */ + if (xfs_sb_version_hasattr2(&mp->m_sb) && + !(ap->flags & XFSMNT_NOATTR2)) + mp->m_flags |= XFS_MOUNT_ATTR2; + + /* + * prohibit r/w mounts of read-only filesystems + */ + if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) { + cmn_err(CE_WARN, + "XFS: cannot mount a read-only filesystem as read-write"); + return XFS_ERROR(EROFS); + } + + /* + * check for shared mount. + */ + if (ap->flags & XFSMNT_SHARED) { + if (!xfs_sb_version_hasshared(&mp->m_sb)) + return XFS_ERROR(EINVAL); + + /* + * For IRIX 6.5, shared mounts must have the shared + * version bit set, have the persistent readonly + * field set, must be version 0 and can only be mounted + * read-only. + */ + if (!ronly || !(mp->m_sb.sb_flags & XFS_SBF_READONLY) || + (mp->m_sb.sb_shared_vn != 0)) + return XFS_ERROR(EINVAL); + + mp->m_flags |= XFS_MOUNT_SHARED; + + /* + * Shared XFS V0 can't deal with DMI. Return EINVAL. + */ + if (mp->m_sb.sb_shared_vn == 0 && (ap->flags & XFSMNT_DMAPI)) + return XFS_ERROR(EINVAL); + } + + if (ap->flags & XFSMNT_UQUOTA) { + mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE); + if (ap->flags & XFSMNT_UQUOTAENF) + mp->m_qflags |= XFS_UQUOTA_ENFD; + } + + if (ap->flags & XFSMNT_GQUOTA) { + mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); + if (ap->flags & XFSMNT_GQUOTAENF) + mp->m_qflags |= XFS_OQUOTA_ENFD; + } else if (ap->flags & XFSMNT_PQUOTA) { + mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE); + if (ap->flags & XFSMNT_PQUOTAENF) + mp->m_qflags |= XFS_OQUOTA_ENFD; + } + + return 0; +} + STATIC int xfs_fs_fill_super( struct super_block *sb, @@ -1308,11 +1647,21 @@ xfs_fs_fill_super( { struct inode *root; struct xfs_mount *mp = NULL; - struct xfs_mount_args *args = xfs_args_allocate(sb, silent); - int error; + struct xfs_mount_args *args; + int flags = 0, error = ENOMEM; - mp = xfs_mount_init(); + args = xfs_args_allocate(sb, silent); + if (!args) + return -ENOMEM; + mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL); + if (!mp) + goto out_free_args; + + spin_lock_init(&mp->m_sb_lock); + mutex_init(&mp->m_ilock); + mutex_init(&mp->m_growlock); + atomic_set(&mp->m_active_trans, 0); INIT_LIST_HEAD(&mp->m_sync_list); spin_lock_init(&mp->m_sync_lock); init_waitqueue_head(&mp->m_wait_single_sync_task); @@ -1325,16 +1674,60 @@ xfs_fs_fill_super( error = xfs_parseargs(mp, (char *)data, args, 0); if (error) - goto fail_vfsop; + goto out_free_mp; sb_min_blocksize(sb, BBSIZE); + sb->s_xattr = xfs_xattr_handlers; sb->s_export_op = &xfs_export_operations; sb->s_qcop = &xfs_quotactl_operations; sb->s_op = &xfs_super_operations; - error = xfs_mount(mp, args, NULL); + error = xfs_dmops_get(mp, args); + if (error) + goto out_free_mp; + error = xfs_qmops_get(mp, args); + if (error) + goto out_put_dmops; + + if (args->flags & XFSMNT_QUIET) + flags |= XFS_MFSI_QUIET; + + error = xfs_open_devices(mp, args); + if (error) + goto out_put_qmops; + + if (xfs_icsb_init_counters(mp)) + mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB; + + /* + * Setup flags based on mount(2) options and then the superblock + */ + error = xfs_start_flags(args, mp); + if (error) + goto out_free_fsname; + error = xfs_readsb(mp, flags); + if (error) + goto out_free_fsname; + error = xfs_finish_flags(args, mp); if (error) - goto fail_vfsop; + goto out_free_sb; + + error = xfs_setup_devices(mp); + if (error) + goto out_free_sb; + + if (mp->m_flags & XFS_MOUNT_BARRIER) + xfs_mountfs_check_barriers(mp); + + error = xfs_filestream_mount(mp); + if (error) + goto out_free_sb; + + error = xfs_mountfs(mp); + if (error) + goto out_filestream_unmount; + + XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, args->mtpt, args->fsname); sb->s_dirt = 1; sb->s_magic = XFS_SB_MAGIC; @@ -1344,7 +1737,7 @@ xfs_fs_fill_super( sb->s_time_gran = 1; set_posix_acl_flag(sb); - root = igrab(mp->m_rootip->i_vnode); + root = igrab(VFS_I(mp->m_rootip)); if (!root) { error = ENOENT; goto fail_unmount; @@ -1369,10 +1762,28 @@ xfs_fs_fill_super( xfs_itrace_exit(XFS_I(sb->s_root->d_inode)); - kmem_free(args, sizeof(*args)); + kfree(args); return 0; -fail_vnrele: + out_filestream_unmount: + xfs_filestream_unmount(mp); + out_free_sb: + xfs_freesb(mp); + out_free_fsname: + xfs_free_fsname(mp); + xfs_icsb_destroy_counters(mp); + xfs_close_devices(mp); + out_put_qmops: + xfs_qmops_put(mp); + out_put_dmops: + xfs_dmops_put(mp); + out_free_mp: + kfree(mp); + out_free_args: + kfree(args); + return -error; + + fail_vnrele: if (sb->s_root) { dput(sb->s_root); sb->s_root = NULL; @@ -1380,12 +1791,20 @@ fail_vnrele: iput(root); } -fail_unmount: - xfs_unmount(mp, 0, NULL); + fail_unmount: + /* + * Blow away any referenced inode in the filestreams cache. + * This can and will cause log traffic as inodes go inactive + * here. + */ + xfs_filestream_unmount(mp); + + XFS_bflush(mp->m_ddev_targp); + error = xfs_unmount_flush(mp, 0); + WARN_ON(error); -fail_vfsop: - kmem_free(args, sizeof(*args)); - return -error; + xfs_unmountfs(mp); + goto out_free_sb; } STATIC int @@ -1430,9 +1849,235 @@ static struct file_system_type xfs_fs_type = { .fs_flags = FS_REQUIRES_DEV, }; +STATIC int __init +xfs_alloc_trace_bufs(void) +{ +#ifdef XFS_ALLOC_TRACE + xfs_alloc_trace_buf = ktrace_alloc(XFS_ALLOC_TRACE_SIZE, KM_MAYFAIL); + if (!xfs_alloc_trace_buf) + goto out; +#endif +#ifdef XFS_BMAP_TRACE + xfs_bmap_trace_buf = ktrace_alloc(XFS_BMAP_TRACE_SIZE, KM_MAYFAIL); + if (!xfs_bmap_trace_buf) + goto out_free_alloc_trace; +#endif +#ifdef XFS_BMBT_TRACE + xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_MAYFAIL); + if (!xfs_bmbt_trace_buf) + goto out_free_bmap_trace; +#endif +#ifdef XFS_ATTR_TRACE + xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_MAYFAIL); + if (!xfs_attr_trace_buf) + goto out_free_bmbt_trace; +#endif +#ifdef XFS_DIR2_TRACE + xfs_dir2_trace_buf = ktrace_alloc(XFS_DIR2_GTRACE_SIZE, KM_MAYFAIL); + if (!xfs_dir2_trace_buf) + goto out_free_attr_trace; +#endif + + return 0; + +#ifdef XFS_DIR2_TRACE + out_free_attr_trace: +#endif +#ifdef XFS_ATTR_TRACE + ktrace_free(xfs_attr_trace_buf); + out_free_bmbt_trace: +#endif +#ifdef XFS_BMBT_TRACE + ktrace_free(xfs_bmbt_trace_buf); + out_free_bmap_trace: +#endif +#ifdef XFS_BMAP_TRACE + ktrace_free(xfs_bmap_trace_buf); + out_free_alloc_trace: +#endif +#ifdef XFS_ALLOC_TRACE + ktrace_free(xfs_alloc_trace_buf); + out: +#endif + return -ENOMEM; +} + +STATIC void +xfs_free_trace_bufs(void) +{ +#ifdef XFS_DIR2_TRACE + ktrace_free(xfs_dir2_trace_buf); +#endif +#ifdef XFS_ATTR_TRACE + ktrace_free(xfs_attr_trace_buf); +#endif +#ifdef XFS_BMBT_TRACE + ktrace_free(xfs_bmbt_trace_buf); +#endif +#ifdef XFS_BMAP_TRACE + ktrace_free(xfs_bmap_trace_buf); +#endif +#ifdef XFS_ALLOC_TRACE + ktrace_free(xfs_alloc_trace_buf); +#endif +} + +STATIC int __init +xfs_init_zones(void) +{ + xfs_vnode_zone = kmem_zone_init_flags(sizeof(struct inode), "xfs_vnode", + KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | + KM_ZONE_SPREAD, + xfs_fs_inode_init_once); + if (!xfs_vnode_zone) + goto out; + + xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend"); + if (!xfs_ioend_zone) + goto out_destroy_vnode_zone; + + xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE, + xfs_ioend_zone); + if (!xfs_ioend_pool) + goto out_destroy_ioend_zone; + + xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t), + "xfs_log_ticket"); + if (!xfs_log_ticket_zone) + goto out_destroy_ioend_pool; + + xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t), + "xfs_bmap_free_item"); + if (!xfs_bmap_free_item_zone) + goto out_destroy_log_ticket_zone; + xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t), + "xfs_btree_cur"); + if (!xfs_btree_cur_zone) + goto out_destroy_bmap_free_item_zone; + + xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t), + "xfs_da_state"); + if (!xfs_da_state_zone) + goto out_destroy_btree_cur_zone; + + xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf"); + if (!xfs_dabuf_zone) + goto out_destroy_da_state_zone; + + xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork"); + if (!xfs_ifork_zone) + goto out_destroy_dabuf_zone; + + xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans"); + if (!xfs_trans_zone) + goto out_destroy_ifork_zone; + + /* + * The size of the zone allocated buf log item is the maximum + * size possible under XFS. This wastes a little bit of memory, + * but it is much faster. + */ + xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) + + (((XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK) / + NBWORD) * sizeof(int))), "xfs_buf_item"); + if (!xfs_buf_item_zone) + goto out_destroy_trans_zone; + + xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) + + ((XFS_EFD_MAX_FAST_EXTENTS - 1) * + sizeof(xfs_extent_t))), "xfs_efd_item"); + if (!xfs_efd_zone) + goto out_destroy_buf_item_zone; + + xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) + + ((XFS_EFI_MAX_FAST_EXTENTS - 1) * + sizeof(xfs_extent_t))), "xfs_efi_item"); + if (!xfs_efi_zone) + goto out_destroy_efd_zone; + + xfs_inode_zone = + kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode", + KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | + KM_ZONE_SPREAD, NULL); + if (!xfs_inode_zone) + goto out_destroy_efi_zone; + + xfs_ili_zone = + kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili", + KM_ZONE_SPREAD, NULL); + if (!xfs_ili_zone) + goto out_destroy_inode_zone; + +#ifdef CONFIG_XFS_POSIX_ACL + xfs_acl_zone = kmem_zone_init(sizeof(xfs_acl_t), "xfs_acl"); + if (!xfs_acl_zone) + goto out_destroy_ili_zone; +#endif + + return 0; + +#ifdef CONFIG_XFS_POSIX_ACL + out_destroy_ili_zone: +#endif + kmem_zone_destroy(xfs_ili_zone); + out_destroy_inode_zone: + kmem_zone_destroy(xfs_inode_zone); + out_destroy_efi_zone: + kmem_zone_destroy(xfs_efi_zone); + out_destroy_efd_zone: + kmem_zone_destroy(xfs_efd_zone); + out_destroy_buf_item_zone: + kmem_zone_destroy(xfs_buf_item_zone); + out_destroy_trans_zone: + kmem_zone_destroy(xfs_trans_zone); + out_destroy_ifork_zone: + kmem_zone_destroy(xfs_ifork_zone); + out_destroy_dabuf_zone: + kmem_zone_destroy(xfs_dabuf_zone); + out_destroy_da_state_zone: + kmem_zone_destroy(xfs_da_state_zone); + out_destroy_btree_cur_zone: + kmem_zone_destroy(xfs_btree_cur_zone); + out_destroy_bmap_free_item_zone: + kmem_zone_destroy(xfs_bmap_free_item_zone); + out_destroy_log_ticket_zone: + kmem_zone_destroy(xfs_log_ticket_zone); + out_destroy_ioend_pool: + mempool_destroy(xfs_ioend_pool); + out_destroy_ioend_zone: + kmem_zone_destroy(xfs_ioend_zone); + out_destroy_vnode_zone: + kmem_zone_destroy(xfs_vnode_zone); + out: + return -ENOMEM; +} + +STATIC void +xfs_destroy_zones(void) +{ +#ifdef CONFIG_XFS_POSIX_ACL + kmem_zone_destroy(xfs_acl_zone); +#endif + kmem_zone_destroy(xfs_ili_zone); + kmem_zone_destroy(xfs_inode_zone); + kmem_zone_destroy(xfs_efi_zone); + kmem_zone_destroy(xfs_efd_zone); + kmem_zone_destroy(xfs_buf_item_zone); + kmem_zone_destroy(xfs_trans_zone); + kmem_zone_destroy(xfs_ifork_zone); + kmem_zone_destroy(xfs_dabuf_zone); + kmem_zone_destroy(xfs_da_state_zone); + kmem_zone_destroy(xfs_btree_cur_zone); + kmem_zone_destroy(xfs_bmap_free_item_zone); + kmem_zone_destroy(xfs_log_ticket_zone); + mempool_destroy(xfs_ioend_pool); + kmem_zone_destroy(xfs_ioend_zone); + kmem_zone_destroy(xfs_vnode_zone); + +} STATIC int __init -init_xfs_fs( void ) +init_xfs_fs(void) { int error; static char message[] __initdata = KERN_INFO \ @@ -1441,42 +2086,73 @@ init_xfs_fs( void ) printk(message); ktrace_init(64); + vn_init(); + xfs_dir_startup(); error = xfs_init_zones(); - if (error < 0) - goto undo_zones; + if (error) + goto out; + + error = xfs_alloc_trace_bufs(); + if (error) + goto out_destroy_zones; + + error = xfs_mru_cache_init(); + if (error) + goto out_free_trace_buffers; + + error = xfs_filestream_init(); + if (error) + goto out_mru_cache_uninit; error = xfs_buf_init(); - if (error < 0) - goto undo_buffers; + if (error) + goto out_filestream_uninit; + + error = xfs_init_procfs(); + if (error) + goto out_buf_terminate; + + error = xfs_sysctl_register(); + if (error) + goto out_cleanup_procfs; - vn_init(); - xfs_init(); - uuid_init(); vfs_initquota(); error = register_filesystem(&xfs_fs_type); if (error) - goto undo_register; + goto out_sysctl_unregister; return 0; -undo_register: + out_sysctl_unregister: + xfs_sysctl_unregister(); + out_cleanup_procfs: + xfs_cleanup_procfs(); + out_buf_terminate: xfs_buf_terminate(); - -undo_buffers: + out_filestream_uninit: + xfs_filestream_uninit(); + out_mru_cache_uninit: + xfs_mru_cache_uninit(); + out_free_trace_buffers: + xfs_free_trace_bufs(); + out_destroy_zones: xfs_destroy_zones(); - -undo_zones: + out: return error; } STATIC void __exit -exit_xfs_fs( void ) +exit_xfs_fs(void) { vfs_exitquota(); unregister_filesystem(&xfs_fs_type); - xfs_cleanup(); + xfs_sysctl_unregister(); + xfs_cleanup_procfs(); xfs_buf_terminate(); + xfs_filestream_uninit(); + xfs_mru_cache_uninit(); + xfs_free_trace_bufs(); xfs_destroy_zones(); ktrace_uninit(); } diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h index 3efb7c6d330..fe2ef4e6a0f 100644 --- a/fs/xfs/linux-2.6/xfs_super.h +++ b/fs/xfs/linux-2.6/xfs_super.h @@ -101,18 +101,13 @@ struct block_device; extern __uint64_t xfs_max_file_offset(unsigned int); -extern void xfs_initialize_vnode(struct xfs_mount *mp, bhv_vnode_t *vp, - struct xfs_inode *ip); - extern void xfs_flush_inode(struct xfs_inode *); extern void xfs_flush_device(struct xfs_inode *); -extern int xfs_blkdev_get(struct xfs_mount *, const char *, - struct block_device **); -extern void xfs_blkdev_put(struct block_device *); extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); extern const struct export_operations xfs_export_operations; +extern struct xattr_handler *xfs_xattr_handlers[]; #define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info)) diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c index bb997d75c05..7dacb5bbde3 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.c +++ b/fs/xfs/linux-2.6/xfs_sysctl.c @@ -259,15 +259,17 @@ static ctl_table xfs_root_table[] = { {} }; -void +int xfs_sysctl_register(void) { xfs_table_header = register_sysctl_table(xfs_root_table); + if (!xfs_table_header) + return -ENOMEM; + return 0; } void xfs_sysctl_unregister(void) { - if (xfs_table_header) - unregister_sysctl_table(xfs_table_header); + unregister_sysctl_table(xfs_table_header); } diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h index 98b97e399d6..4aadb8056c3 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.h +++ b/fs/xfs/linux-2.6/xfs_sysctl.h @@ -93,10 +93,10 @@ enum { extern xfs_param_t xfs_params; #ifdef CONFIG_SYSCTL -extern void xfs_sysctl_register(void); +extern int xfs_sysctl_register(void); extern void xfs_sysctl_unregister(void); #else -# define xfs_sysctl_register() do { } while (0) +# define xfs_sysctl_register() (0) # define xfs_sysctl_unregister() do { } while (0) #endif /* CONFIG_SYSCTL */ diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c index bc7afe00733..b52528bbbff 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ b/fs/xfs/linux-2.6/xfs_vnode.c @@ -33,7 +33,7 @@ /* - * Dedicated vnode inactive/reclaim sync semaphores. + * Dedicated vnode inactive/reclaim sync wait queues. * Prime number of hash buckets since address is used as the key. */ #define NVSYNC 37 @@ -82,74 +82,6 @@ vn_ioerror( xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, f, l); } -/* - * Revalidate the Linux inode from the XFS inode. - * Note: i_size _not_ updated; we must hold the inode - * semaphore when doing that - callers responsibility. - */ -int -vn_revalidate( - bhv_vnode_t *vp) -{ - struct inode *inode = vn_to_inode(vp); - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - unsigned long xflags; - - xfs_itrace_entry(ip); - - if (XFS_FORCED_SHUTDOWN(mp)) - return -EIO; - - xfs_ilock(ip, XFS_ILOCK_SHARED); - inode->i_mode = ip->i_d.di_mode; - inode->i_uid = ip->i_d.di_uid; - inode->i_gid = ip->i_d.di_gid; - inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec; - inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; - inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec; - inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; - - xflags = xfs_ip2xflags(ip); - if (xflags & XFS_XFLAG_IMMUTABLE) - inode->i_flags |= S_IMMUTABLE; - else - inode->i_flags &= ~S_IMMUTABLE; - if (xflags & XFS_XFLAG_APPEND) - inode->i_flags |= S_APPEND; - else - inode->i_flags &= ~S_APPEND; - if (xflags & XFS_XFLAG_SYNC) - inode->i_flags |= S_SYNC; - else - inode->i_flags &= ~S_SYNC; - if (xflags & XFS_XFLAG_NOATIME) - inode->i_flags |= S_NOATIME; - else - inode->i_flags &= ~S_NOATIME; - xfs_iunlock(ip, XFS_ILOCK_SHARED); - - xfs_iflags_clear(ip, XFS_IMODIFIED); - return 0; -} - -/* - * Add a reference to a referenced vnode. - */ -bhv_vnode_t * -vn_hold( - bhv_vnode_t *vp) -{ - struct inode *inode; - - XFS_STATS_INC(vn_hold); - - inode = igrab(vn_to_inode(vp)); - ASSERT(inode); - - return vp; -} - #ifdef XFS_INODE_TRACE /* @@ -158,7 +90,7 @@ vn_hold( */ static inline int xfs_icount(struct xfs_inode *ip) { - bhv_vnode_t *vp = XFS_ITOV_NULL(ip); + struct inode *vp = VFS_I(ip); if (vp) return vn_count(vp); diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index 25eb2a9e8d9..683ce16210f 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -19,24 +19,9 @@ #define __XFS_VNODE_H__ struct file; -struct bhv_vattr; struct xfs_iomap; struct attrlist_cursor_kern; -typedef struct inode bhv_vnode_t; - -/* - * Vnode to Linux inode mapping. - */ -static inline bhv_vnode_t *vn_from_inode(struct inode *inode) -{ - return inode; -} -static inline struct inode *vn_to_inode(bhv_vnode_t *vnode) -{ - return vnode; -} - /* * Return values for xfs_inactive. A return value of * VN_INACTIVE_NOCACHE implies that the file system behavior @@ -66,87 +51,8 @@ static inline struct inode *vn_to_inode(bhv_vnode_t *vnode) Prevent VM access to the pages until the operation completes. */ -/* - * Vnode attributes. va_mask indicates those attributes the caller - * wants to set or extract. - */ -typedef struct bhv_vattr { - int va_mask; /* bit-mask of attributes present */ - mode_t va_mode; /* file access mode and type */ - xfs_nlink_t va_nlink; /* number of references to file */ - uid_t va_uid; /* owner user id */ - gid_t va_gid; /* owner group id */ - xfs_ino_t va_nodeid; /* file id */ - xfs_off_t va_size; /* file size in bytes */ - u_long va_blocksize; /* blocksize preferred for i/o */ - struct timespec va_atime; /* time of last access */ - struct timespec va_mtime; /* time of last modification */ - struct timespec va_ctime; /* time file changed */ - u_int va_gen; /* generation number of file */ - xfs_dev_t va_rdev; /* device the special file represents */ - __int64_t va_nblocks; /* number of blocks allocated */ - u_long va_xflags; /* random extended file flags */ - u_long va_extsize; /* file extent size */ - u_long va_nextents; /* number of extents in file */ - u_long va_anextents; /* number of attr extents in file */ - prid_t va_projid; /* project id */ -} bhv_vattr_t; - -/* - * setattr or getattr attributes - */ -#define XFS_AT_TYPE 0x00000001 -#define XFS_AT_MODE 0x00000002 -#define XFS_AT_UID 0x00000004 -#define XFS_AT_GID 0x00000008 -#define XFS_AT_FSID 0x00000010 -#define XFS_AT_NODEID 0x00000020 -#define XFS_AT_NLINK 0x00000040 -#define XFS_AT_SIZE 0x00000080 -#define XFS_AT_ATIME 0x00000100 -#define XFS_AT_MTIME 0x00000200 -#define XFS_AT_CTIME 0x00000400 -#define XFS_AT_RDEV 0x00000800 -#define XFS_AT_BLKSIZE 0x00001000 -#define XFS_AT_NBLOCKS 0x00002000 -#define XFS_AT_VCODE 0x00004000 -#define XFS_AT_MAC 0x00008000 -#define XFS_AT_UPDATIME 0x00010000 -#define XFS_AT_UPDMTIME 0x00020000 -#define XFS_AT_UPDCTIME 0x00040000 -#define XFS_AT_ACL 0x00080000 -#define XFS_AT_CAP 0x00100000 -#define XFS_AT_INF 0x00200000 -#define XFS_AT_XFLAGS 0x00400000 -#define XFS_AT_EXTSIZE 0x00800000 -#define XFS_AT_NEXTENTS 0x01000000 -#define XFS_AT_ANEXTENTS 0x02000000 -#define XFS_AT_PROJID 0x04000000 -#define XFS_AT_SIZE_NOPERM 0x08000000 -#define XFS_AT_GENCOUNT 0x10000000 - -#define XFS_AT_ALL (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\ - XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\ - XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\ - XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|XFS_AT_MAC|\ - XFS_AT_ACL|XFS_AT_CAP|XFS_AT_INF|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|\ - XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_PROJID|XFS_AT_GENCOUNT) - -#define XFS_AT_STAT (XFS_AT_TYPE|XFS_AT_MODE|XFS_AT_UID|XFS_AT_GID|\ - XFS_AT_FSID|XFS_AT_NODEID|XFS_AT_NLINK|XFS_AT_SIZE|\ - XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME|XFS_AT_RDEV|\ - XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_PROJID) - -#define XFS_AT_TIMES (XFS_AT_ATIME|XFS_AT_MTIME|XFS_AT_CTIME) - -#define XFS_AT_UPDTIMES (XFS_AT_UPDATIME|XFS_AT_UPDMTIME|XFS_AT_UPDCTIME) - -#define XFS_AT_NOSET (XFS_AT_NLINK|XFS_AT_RDEV|XFS_AT_FSID|XFS_AT_NODEID|\ - XFS_AT_TYPE|XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|\ - XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_GENCOUNT) extern void vn_init(void); -extern int vn_revalidate(bhv_vnode_t *); /* * Yeah, these don't take vnode anymore at all, all this should be @@ -156,57 +62,52 @@ extern void vn_iowait(struct xfs_inode *ip); extern void vn_iowake(struct xfs_inode *ip); extern void vn_ioerror(struct xfs_inode *ip, int error, char *f, int l); -static inline int vn_count(bhv_vnode_t *vp) +static inline int vn_count(struct inode *vp) { - return atomic_read(&vn_to_inode(vp)->i_count); + return atomic_read(&vp->i_count); } -/* - * Vnode reference counting functions (and macros for compatibility). - */ -extern bhv_vnode_t *vn_hold(bhv_vnode_t *); +#define IHOLD(ip) \ +do { \ + ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ + atomic_inc(&(VFS_I(ip)->i_count)); \ + xfs_itrace_hold((ip), __FILE__, __LINE__, (inst_t *)__return_address); \ +} while (0) -#if defined(XFS_INODE_TRACE) -#define VN_HOLD(vp) \ - ((void)vn_hold(vp), \ - xfs_itrace_hold(xfs_vtoi(vp), __FILE__, __LINE__, (inst_t *)__return_address)) -#define VN_RELE(vp) \ - (xfs_itrace_rele(xfs_vtoi(vp), __FILE__, __LINE__, (inst_t *)__return_address), \ - iput(vn_to_inode(vp))) -#else -#define VN_HOLD(vp) ((void)vn_hold(vp)) -#define VN_RELE(vp) (iput(vn_to_inode(vp))) -#endif +#define IRELE(ip) \ +do { \ + xfs_itrace_rele((ip), __FILE__, __LINE__, (inst_t *)__return_address); \ + iput(VFS_I(ip)); \ +} while (0) -static inline bhv_vnode_t *vn_grab(bhv_vnode_t *vp) +static inline struct inode *vn_grab(struct inode *vp) { - struct inode *inode = igrab(vn_to_inode(vp)); - return inode ? vn_from_inode(inode) : NULL; + return igrab(vp); } /* * Dealing with bad inodes */ -static inline int VN_BAD(bhv_vnode_t *vp) +static inline int VN_BAD(struct inode *vp) { - return is_bad_inode(vn_to_inode(vp)); + return is_bad_inode(vp); } /* * Extracting atime values in various formats */ -static inline void vn_atime_to_bstime(bhv_vnode_t *vp, xfs_bstime_t *bs_atime) +static inline void vn_atime_to_bstime(struct inode *vp, xfs_bstime_t *bs_atime) { bs_atime->tv_sec = vp->i_atime.tv_sec; bs_atime->tv_nsec = vp->i_atime.tv_nsec; } -static inline void vn_atime_to_timespec(bhv_vnode_t *vp, struct timespec *ts) +static inline void vn_atime_to_timespec(struct inode *vp, struct timespec *ts) { *ts = vp->i_atime; } -static inline void vn_atime_to_time_t(bhv_vnode_t *vp, time_t *tt) +static inline void vn_atime_to_time_t(struct inode *vp, time_t *tt) { *tt = vp->i_atime.tv_sec; } @@ -214,20 +115,11 @@ static inline void vn_atime_to_time_t(bhv_vnode_t *vp, time_t *tt) /* * Some useful predicates. */ -#define VN_MAPPED(vp) mapping_mapped(vn_to_inode(vp)->i_mapping) -#define VN_CACHED(vp) (vn_to_inode(vp)->i_mapping->nrpages) -#define VN_DIRTY(vp) mapping_tagged(vn_to_inode(vp)->i_mapping, \ +#define VN_MAPPED(vp) mapping_mapped(vp->i_mapping) +#define VN_CACHED(vp) (vp->i_mapping->nrpages) +#define VN_DIRTY(vp) mapping_tagged(vp->i_mapping, \ PAGECACHE_TAG_DIRTY) -/* - * Flags to vop_setattr/getattr. - */ -#define ATTR_UTIME 0x01 /* non-default utime(2) request */ -#define ATTR_DMI 0x08 /* invocation from a DMI function */ -#define ATTR_LAZY 0x80 /* set/get attributes lazily */ -#define ATTR_NONBLOCK 0x100 /* return EAGAIN if operation would block */ -#define ATTR_NOLOCK 0x200 /* Don't grab any conflicting locks */ -#define ATTR_NOSIZETOK 0x400 /* Don't get the SIZE token */ /* * Tracking vnode activity. diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c new file mode 100644 index 00000000000..964621fde6e --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_xattr.c @@ -0,0 +1,330 @@ +/* + * Copyright (C) 2008 Christoph Hellwig. + * Portions Copyright (C) 2000-2008 Silicon Graphics, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "xfs.h" +#include "xfs_da_btree.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" +#include "xfs_attr.h" +#include "xfs_attr_leaf.h" +#include "xfs_acl.h" +#include "xfs_vnodeops.h" + +#include <linux/posix_acl_xattr.h> +#include <linux/xattr.h> + + +/* + * ACL handling. Should eventually be moved into xfs_acl.c + */ + +static int +xfs_decode_acl(const char *name) +{ + if (strcmp(name, "posix_acl_access") == 0) + return _ACL_TYPE_ACCESS; + else if (strcmp(name, "posix_acl_default") == 0) + return _ACL_TYPE_DEFAULT; + return -EINVAL; +} + +/* + * Get system extended attributes which at the moment only + * includes Posix ACLs. + */ +static int +xfs_xattr_system_get(struct inode *inode, const char *name, + void *buffer, size_t size) +{ + int acl; + + acl = xfs_decode_acl(name); + if (acl < 0) + return acl; + + return xfs_acl_vget(inode, buffer, size, acl); +} + +static int +xfs_xattr_system_set(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + int acl; + + acl = xfs_decode_acl(name); + if (acl < 0) + return acl; + if (flags & XATTR_CREATE) + return -EINVAL; + + if (!value) + return xfs_acl_vremove(inode, acl); + + return xfs_acl_vset(inode, (void *)value, size, acl); +} + +static struct xattr_handler xfs_xattr_system_handler = { + .prefix = XATTR_SYSTEM_PREFIX, + .get = xfs_xattr_system_get, + .set = xfs_xattr_system_set, +}; + + +/* + * Real xattr handling. The only difference between the namespaces is + * a flag passed to the low-level attr code. + */ + +static int +__xfs_xattr_get(struct inode *inode, const char *name, + void *value, size_t size, int xflags) +{ + struct xfs_inode *ip = XFS_I(inode); + int error, asize = size; + + if (strcmp(name, "") == 0) + return -EINVAL; + + /* Convert Linux syscall to XFS internal ATTR flags */ + if (!size) { + xflags |= ATTR_KERNOVAL; + value = NULL; + } + + error = -xfs_attr_get(ip, name, value, &asize, xflags); + if (error) + return error; + return asize; +} + +static int +__xfs_xattr_set(struct inode *inode, const char *name, const void *value, + size_t size, int flags, int xflags) +{ + struct xfs_inode *ip = XFS_I(inode); + + if (strcmp(name, "") == 0) + return -EINVAL; + + /* Convert Linux syscall to XFS internal ATTR flags */ + if (flags & XATTR_CREATE) + xflags |= ATTR_CREATE; + if (flags & XATTR_REPLACE) + xflags |= ATTR_REPLACE; + + if (!value) + return -xfs_attr_remove(ip, name, xflags); + return -xfs_attr_set(ip, name, (void *)value, size, xflags); +} + +static int +xfs_xattr_user_get(struct inode *inode, const char *name, + void *value, size_t size) +{ + return __xfs_xattr_get(inode, name, value, size, 0); +} + +static int +xfs_xattr_user_set(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + return __xfs_xattr_set(inode, name, value, size, flags, 0); +} + +static struct xattr_handler xfs_xattr_user_handler = { + .prefix = XATTR_USER_PREFIX, + .get = xfs_xattr_user_get, + .set = xfs_xattr_user_set, +}; + + +static int +xfs_xattr_trusted_get(struct inode *inode, const char *name, + void *value, size_t size) +{ + return __xfs_xattr_get(inode, name, value, size, ATTR_ROOT); +} + +static int +xfs_xattr_trusted_set(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + return __xfs_xattr_set(inode, name, value, size, flags, ATTR_ROOT); +} + +static struct xattr_handler xfs_xattr_trusted_handler = { + .prefix = XATTR_TRUSTED_PREFIX, + .get = xfs_xattr_trusted_get, + .set = xfs_xattr_trusted_set, +}; + + +static int +xfs_xattr_secure_get(struct inode *inode, const char *name, + void *value, size_t size) +{ + return __xfs_xattr_get(inode, name, value, size, ATTR_SECURE); +} + +static int +xfs_xattr_secure_set(struct inode *inode, const char *name, + const void *value, size_t size, int flags) +{ + return __xfs_xattr_set(inode, name, value, size, flags, ATTR_SECURE); +} + +static struct xattr_handler xfs_xattr_security_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .get = xfs_xattr_secure_get, + .set = xfs_xattr_secure_set, +}; + + +struct xattr_handler *xfs_xattr_handlers[] = { + &xfs_xattr_user_handler, + &xfs_xattr_trusted_handler, + &xfs_xattr_security_handler, + &xfs_xattr_system_handler, + NULL +}; + +static unsigned int xfs_xattr_prefix_len(int flags) +{ + if (flags & XFS_ATTR_SECURE) + return sizeof("security"); + else if (flags & XFS_ATTR_ROOT) + return sizeof("trusted"); + else + return sizeof("user"); +} + +static const char *xfs_xattr_prefix(int flags) +{ + if (flags & XFS_ATTR_SECURE) + return xfs_xattr_security_handler.prefix; + else if (flags & XFS_ATTR_ROOT) + return xfs_xattr_trusted_handler.prefix; + else + return xfs_xattr_user_handler.prefix; +} + +static int +xfs_xattr_put_listent(struct xfs_attr_list_context *context, int flags, + char *name, int namelen, int valuelen, char *value) +{ + unsigned int prefix_len = xfs_xattr_prefix_len(flags); + char *offset; + int arraytop; + + ASSERT(context->count >= 0); + + /* + * Only show root namespace entries if we are actually allowed to + * see them. + */ + if ((flags & XFS_ATTR_ROOT) && !capable(CAP_SYS_ADMIN)) + return 0; + + arraytop = context->count + prefix_len + namelen + 1; + if (arraytop > context->firstu) { + context->count = -1; /* insufficient space */ + return 1; + } + offset = (char *)context->alist + context->count; + strncpy(offset, xfs_xattr_prefix(flags), prefix_len); + offset += prefix_len; + strncpy(offset, name, namelen); /* real name */ + offset += namelen; + *offset = '\0'; + context->count += prefix_len + namelen + 1; + return 0; +} + +static int +xfs_xattr_put_listent_sizes(struct xfs_attr_list_context *context, int flags, + char *name, int namelen, int valuelen, char *value) +{ + context->count += xfs_xattr_prefix_len(flags) + namelen + 1; + return 0; +} + +static int +list_one_attr(const char *name, const size_t len, void *data, + size_t size, ssize_t *result) +{ + char *p = data + *result; + + *result += len; + if (!size) + return 0; + if (*result > size) + return -ERANGE; + + strcpy(p, name); + return 0; +} + +ssize_t +xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size) +{ + struct xfs_attr_list_context context; + struct attrlist_cursor_kern cursor = { 0 }; + struct inode *inode = dentry->d_inode; + int error; + + /* + * First read the regular on-disk attributes. + */ + memset(&context, 0, sizeof(context)); + context.dp = XFS_I(inode); + context.cursor = &cursor; + context.resynch = 1; + context.alist = data; + context.bufsize = size; + context.firstu = context.bufsize; + + if (size) + context.put_listent = xfs_xattr_put_listent; + else + context.put_listent = xfs_xattr_put_listent_sizes; + + xfs_attr_list_int(&context); + if (context.count < 0) + return -ERANGE; + + /* + * Then add the two synthetic ACL attributes. + */ + if (xfs_acl_vhasacl_access(inode)) { + error = list_one_attr(POSIX_ACL_XATTR_ACCESS, + strlen(POSIX_ACL_XATTR_ACCESS) + 1, + data, size, &context.count); + if (error) + return error; + } + + if (xfs_acl_vhasacl_default(inode)) { + error = list_one_attr(POSIX_ACL_XATTR_DEFAULT, + strlen(POSIX_ACL_XATTR_DEFAULT) + 1, + data, size, &context.count); + if (error) + return error; + } + + return context.count; +} diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index 85df3288efd..f2705f2fd43 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c @@ -101,11 +101,18 @@ xfs_qm_dqinit( if (brandnewdquot) { dqp->dq_flnext = dqp->dq_flprev = dqp; mutex_init(&dqp->q_qlock); - initnsema(&dqp->q_flock, 1, "fdq"); sv_init(&dqp->q_pinwait, SV_DEFAULT, "pdq"); + /* + * Because we want to use a counting completion, complete + * the flush completion once to allow a single access to + * the flush completion without blocking. + */ + init_completion(&dqp->q_flush); + complete(&dqp->q_flush); + #ifdef XFS_DQUOT_TRACE - dqp->q_trace = ktrace_alloc(DQUOT_TRACE_SIZE, KM_SLEEP); + dqp->q_trace = ktrace_alloc(DQUOT_TRACE_SIZE, KM_NOFS); xfs_dqtrace_entry(dqp, "DQINIT"); #endif } else { @@ -150,7 +157,6 @@ xfs_qm_dqdestroy( ASSERT(! XFS_DQ_IS_ON_FREELIST(dqp)); mutex_destroy(&dqp->q_qlock); - freesema(&dqp->q_flock); sv_destroy(&dqp->q_pinwait); #ifdef XFS_DQUOT_TRACE @@ -431,7 +437,7 @@ xfs_qm_dqalloc( * when it unlocks the inode. Since we want to keep the quota * inode around, we bump the vnode ref count now. */ - VN_HOLD(XFS_ITOV(quotip)); + IHOLD(quotip); xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL); nmaps = 1; @@ -1211,7 +1217,7 @@ xfs_qm_dqflush( int error; ASSERT(XFS_DQ_IS_LOCKED(dqp)); - ASSERT(XFS_DQ_IS_FLUSH_LOCKED(dqp)); + ASSERT(!completion_done(&dqp->q_flush)); xfs_dqtrace_entry(dqp, "DQFLUSH"); /* @@ -1348,34 +1354,18 @@ xfs_qm_dqflush_done( xfs_dqfunlock(dqp); } - -int -xfs_qm_dqflock_nowait( - xfs_dquot_t *dqp) -{ - int locked; - - locked = cpsema(&((dqp)->q_flock)); - - /* XXX ifdef these out */ - if (locked) - (dqp)->dq_flags |= XFS_DQ_FLOCKED; - return (locked); -} - - int xfs_qm_dqlock_nowait( xfs_dquot_t *dqp) { - return (mutex_trylock(&((dqp)->q_qlock))); + return mutex_trylock(&dqp->q_qlock); } void xfs_dqlock( xfs_dquot_t *dqp) { - mutex_lock(&(dqp->q_qlock)); + mutex_lock(&dqp->q_qlock); } void @@ -1435,8 +1425,7 @@ xfs_dqlock2( /* ARGSUSED */ int xfs_qm_dqpurge( - xfs_dquot_t *dqp, - uint flags) + xfs_dquot_t *dqp) { xfs_dqhash_t *thishash; xfs_mount_t *mp = dqp->q_mount; @@ -1469,7 +1458,7 @@ xfs_qm_dqpurge( * if we're turning off quotas. Basically, we need this flush * lock, and are willing to block on it. */ - if (! xfs_qm_dqflock_nowait(dqp)) { + if (!xfs_dqflock_nowait(dqp)) { /* * Block on the flush lock after nudging dquot buffer, * if it is incore. diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h index 5c371a92e3e..8958d0faf8d 100644 --- a/fs/xfs/quota/xfs_dquot.h +++ b/fs/xfs/quota/xfs_dquot.h @@ -82,7 +82,7 @@ typedef struct xfs_dquot { xfs_qcnt_t q_res_icount; /* total inos allocd+reserved */ xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */ mutex_t q_qlock; /* quota lock */ - sema_t q_flock; /* flush lock */ + struct completion q_flush; /* flush completion queue */ uint q_pincount; /* pin count for this dquot */ sv_t q_pinwait; /* sync var for pinning */ #ifdef XFS_DQUOT_TRACE @@ -113,17 +113,25 @@ XFS_DQ_IS_LOCKED(xfs_dquot_t *dqp) /* - * The following three routines simply manage the q_flock - * semaphore embedded in the dquot. This semaphore synchronizes - * processes attempting to flush the in-core dquot back to disk. + * Manage the q_flush completion queue embedded in the dquot. This completion + * queue synchronizes processes attempting to flush the in-core dquot back to + * disk. */ -#define xfs_dqflock(dqp) { psema(&((dqp)->q_flock), PINOD | PRECALC);\ - (dqp)->dq_flags |= XFS_DQ_FLOCKED; } -#define xfs_dqfunlock(dqp) { ASSERT(issemalocked(&((dqp)->q_flock))); \ - vsema(&((dqp)->q_flock)); \ - (dqp)->dq_flags &= ~(XFS_DQ_FLOCKED); } +static inline void xfs_dqflock(xfs_dquot_t *dqp) +{ + wait_for_completion(&dqp->q_flush); +} + +static inline int xfs_dqflock_nowait(xfs_dquot_t *dqp) +{ + return try_wait_for_completion(&dqp->q_flush); +} + +static inline void xfs_dqfunlock(xfs_dquot_t *dqp) +{ + complete(&dqp->q_flush); +} -#define XFS_DQ_IS_FLUSH_LOCKED(dqp) (issemalocked(&((dqp)->q_flock))) #define XFS_DQ_IS_ON_FREELIST(dqp) ((dqp)->dq_flnext != (dqp)) #define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY) #define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER) @@ -164,10 +172,9 @@ extern void xfs_qm_dqprint(xfs_dquot_t *); extern void xfs_qm_dqdestroy(xfs_dquot_t *); extern int xfs_qm_dqflush(xfs_dquot_t *, uint); -extern int xfs_qm_dqpurge(xfs_dquot_t *, uint); +extern int xfs_qm_dqpurge(xfs_dquot_t *); extern void xfs_qm_dqunpin_wait(xfs_dquot_t *); extern int xfs_qm_dqlock_nowait(xfs_dquot_t *); -extern int xfs_qm_dqflock_nowait(xfs_dquot_t *); extern void xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp); extern void xfs_qm_adjust_dqtimers(xfs_mount_t *, xfs_disk_dquot_t *); diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c index 36e05ca7841..f028644caa5 100644 --- a/fs/xfs/quota/xfs_dquot_item.c +++ b/fs/xfs/quota/xfs_dquot_item.c @@ -151,7 +151,7 @@ xfs_qm_dquot_logitem_push( dqp = logitem->qli_dquot; ASSERT(XFS_DQ_IS_LOCKED(dqp)); - ASSERT(XFS_DQ_IS_FLUSH_LOCKED(dqp)); + ASSERT(!completion_done(&dqp->q_flush)); /* * Since we were able to lock the dquot's flush lock and @@ -245,7 +245,7 @@ xfs_qm_dquot_logitem_pushbuf( * inode flush completed and the inode was taken off the AIL. * So, just get out. */ - if (!issemalocked(&(dqp->q_flock)) || + if (completion_done(&dqp->q_flush) || ((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) { qip->qli_pushbuf_flag = 0; xfs_dqunlock(dqp); @@ -258,7 +258,7 @@ xfs_qm_dquot_logitem_pushbuf( if (bp != NULL) { if (XFS_BUF_ISDELAYWRITE(bp)) { dopush = ((qip->qli_item.li_flags & XFS_LI_IN_AIL) && - issemalocked(&(dqp->q_flock))); + !completion_done(&dqp->q_flush)); qip->qli_pushbuf_flag = 0; xfs_dqunlock(dqp); @@ -317,7 +317,7 @@ xfs_qm_dquot_logitem_trylock( return (XFS_ITEM_LOCKED); retval = XFS_ITEM_SUCCESS; - if (! xfs_qm_dqflock_nowait(dqp)) { + if (!xfs_dqflock_nowait(dqp)) { /* * The dquot is already being flushed. It may have been * flushed delayed write, however, and we don't want to @@ -576,8 +576,8 @@ xfs_qm_qoffend_logitem_committed( * xfs_trans_delete_ail() drops the AIL lock. */ xfs_trans_delete_ail(qfs->qql_item.li_mountp, (xfs_log_item_t *)qfs); - kmem_free(qfs, sizeof(xfs_qoff_logitem_t)); - kmem_free(qfe, sizeof(xfs_qoff_logitem_t)); + kmem_free(qfs); + kmem_free(qfe); return (xfs_lsn_t)-1; } diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index d31cce1165c..df0ffef9775 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -192,8 +192,8 @@ xfs_qm_destroy( xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i])); xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i])); } - kmem_free(xqm->qm_usr_dqhtable, hsize * sizeof(xfs_dqhash_t)); - kmem_free(xqm->qm_grp_dqhtable, hsize * sizeof(xfs_dqhash_t)); + kmem_free(xqm->qm_usr_dqhtable); + kmem_free(xqm->qm_grp_dqhtable); xqm->qm_usr_dqhtable = NULL; xqm->qm_grp_dqhtable = NULL; xqm->qm_dqhashmask = 0; @@ -201,7 +201,7 @@ xfs_qm_destroy( #ifdef DEBUG mutex_destroy(&qcheck_lock); #endif - kmem_free(xqm, sizeof(xfs_qm_t)); + kmem_free(xqm); } /* @@ -310,8 +310,7 @@ xfs_qm_unmount_quotadestroy( */ void xfs_qm_mount_quotas( - xfs_mount_t *mp, - int mfsi_flags) + xfs_mount_t *mp) { int error = 0; uint sbf; @@ -346,8 +345,7 @@ xfs_qm_mount_quotas( /* * If any of the quotas are not consistent, do a quotacheck. */ - if (XFS_QM_NEED_QUOTACHECK(mp) && - !(mfsi_flags & XFS_MFSI_NO_QUOTACHECK)) { + if (XFS_QM_NEED_QUOTACHECK(mp)) { error = xfs_qm_quotacheck(mp); if (error) { /* Quotacheck failed and disabled quotas. */ @@ -445,11 +443,11 @@ xfs_qm_unmount_quotas( } } if (uqp) { - XFS_PURGE_INODE(uqp); + IRELE(uqp); mp->m_quotainfo->qi_uquotaip = NULL; } if (gqp) { - XFS_PURGE_INODE(gqp); + IRELE(gqp); mp->m_quotainfo->qi_gquotaip = NULL; } out: @@ -484,7 +482,7 @@ again: xfs_dqtrace_entry(dqp, "FLUSHALL: DQDIRTY"); /* XXX a sentinel would be better */ recl = XFS_QI_MPLRECLAIMS(mp); - if (! xfs_qm_dqflock_nowait(dqp)) { + if (!xfs_dqflock_nowait(dqp)) { /* * If we can't grab the flush lock then check * to see if the dquot has been flushed delayed @@ -631,7 +629,7 @@ xfs_qm_dqpurge_int( * freelist in INACTIVE state. */ nextdqp = dqp->MPL_NEXT; - nmisses += xfs_qm_dqpurge(dqp, flags); + nmisses += xfs_qm_dqpurge(dqp); dqp = nextdqp; } xfs_qm_mplist_unlock(mp); @@ -1062,7 +1060,7 @@ xfs_qm_sync( /* XXX a sentinel would be better */ recl = XFS_QI_MPLRECLAIMS(mp); - if (! xfs_qm_dqflock_nowait(dqp)) { + if (!xfs_dqflock_nowait(dqp)) { if (nowait) { xfs_dqunlock(dqp); continue; @@ -1134,7 +1132,7 @@ xfs_qm_init_quotainfo( * and change the superblock accordingly. */ if ((error = xfs_qm_init_quotainos(mp))) { - kmem_free(qinf, sizeof(xfs_quotainfo_t)); + kmem_free(qinf); mp->m_quotainfo = NULL; return error; } @@ -1240,15 +1238,15 @@ xfs_qm_destroy_quotainfo( xfs_qm_list_destroy(&qi->qi_dqlist); if (qi->qi_uquotaip) { - XFS_PURGE_INODE(qi->qi_uquotaip); + IRELE(qi->qi_uquotaip); qi->qi_uquotaip = NULL; /* paranoia */ } if (qi->qi_gquotaip) { - XFS_PURGE_INODE(qi->qi_gquotaip); + IRELE(qi->qi_gquotaip); qi->qi_gquotaip = NULL; } mutex_destroy(&qi->qi_quotaofflock); - kmem_free(qi, sizeof(xfs_quotainfo_t)); + kmem_free(qi); mp->m_quotainfo = NULL; } @@ -1394,7 +1392,7 @@ xfs_qm_qino_alloc( * locked exclusively and joined to the transaction already. */ ASSERT(xfs_isilocked(*ip, XFS_ILOCK_EXCL)); - VN_HOLD(XFS_ITOV((*ip))); + IHOLD(*ip); /* * Make the changes in the superblock, and log those too. @@ -1623,7 +1621,7 @@ xfs_qm_dqiterate( break; } while (nmaps > 0); - kmem_free(map, XFS_DQITER_MAP_SIZE * sizeof(*map)); + kmem_free(map); return error; } @@ -2079,7 +2077,7 @@ xfs_qm_shake_freelist( * Try to grab the flush lock. If this dquot is in the process of * getting flushed to disk, we don't want to reclaim it. */ - if (! xfs_qm_dqflock_nowait(dqp)) { + if (!xfs_dqflock_nowait(dqp)) { xfs_dqunlock(dqp); dqp = dqp->dq_flnext; continue; @@ -2257,7 +2255,7 @@ xfs_qm_dqreclaim_one(void) * Try to grab the flush lock. If this dquot is in the process of * getting flushed to disk, we don't want to reclaim it. */ - if (! xfs_qm_dqflock_nowait(dqp)) { + if (!xfs_dqflock_nowait(dqp)) { xfs_dqunlock(dqp); continue; } diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h index cd2300e374a..44f25349e47 100644 --- a/fs/xfs/quota/xfs_qm.h +++ b/fs/xfs/quota/xfs_qm.h @@ -165,7 +165,7 @@ typedef struct xfs_dquot_acct { #define XFS_QM_RELE(xqm) ((xqm)->qm_nrefs--) extern void xfs_qm_destroy_quotainfo(xfs_mount_t *); -extern void xfs_qm_mount_quotas(xfs_mount_t *, int); +extern void xfs_qm_mount_quotas(xfs_mount_t *); extern int xfs_qm_quotacheck(xfs_mount_t *); extern void xfs_qm_unmount_quotadestroy(xfs_mount_t *); extern int xfs_qm_unmount_quotas(xfs_mount_t *); diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c index f4f6c4c861d..eea2e60b456 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/quota/xfs_qm_bhv.c @@ -162,7 +162,7 @@ xfs_qm_newmount( * mounting, and get on with the boring life * without disk quotas. */ - xfs_qm_mount_quotas(mp, 0); + xfs_qm_mount_quotas(mp); } else { /* * Clear the quota flags, but remember them. This @@ -184,13 +184,12 @@ STATIC int xfs_qm_endmount( xfs_mount_t *mp, uint needquotamount, - uint quotaflags, - int mfsi_flags) + uint quotaflags) { if (needquotamount) { ASSERT(mp->m_qflags == 0); mp->m_qflags = quotaflags; - xfs_qm_mount_quotas(mp, mfsi_flags); + xfs_qm_mount_quotas(mp); } #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY) diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 768a3b27d2b..1a3b803dfa5 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c @@ -362,11 +362,11 @@ xfs_qm_scall_quotaoff( * if we don't need them anymore. */ if ((dqtype & XFS_QMOPT_UQUOTA) && XFS_QI_UQIP(mp)) { - XFS_PURGE_INODE(XFS_QI_UQIP(mp)); + IRELE(XFS_QI_UQIP(mp)); XFS_QI_UQIP(mp) = NULL; } if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && XFS_QI_GQIP(mp)) { - XFS_PURGE_INODE(XFS_QI_GQIP(mp)); + IRELE(XFS_QI_GQIP(mp)); XFS_QI_GQIP(mp) = NULL; } out_error: @@ -1034,7 +1034,7 @@ xfs_qm_dqrele_all_inodes( { xfs_inode_t *ip, *topino; uint ireclaims; - bhv_vnode_t *vp; + struct inode *vp; boolean_t vnode_refd; ASSERT(mp->m_quotainfo); @@ -1059,7 +1059,7 @@ again: ip = ip->i_mnext; continue; } - vp = XFS_ITOV_NULL(ip); + vp = VFS_I(ip); if (!vp) { ASSERT(ip->i_udquot == NULL); ASSERT(ip->i_gdquot == NULL); @@ -1449,14 +1449,14 @@ xfs_qm_internalqcheck( for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) { xfs_dqtest_cmp(d); e = (xfs_dqtest_t *) d->HL_NEXT; - kmem_free(d, sizeof(xfs_dqtest_t)); + kmem_free(d); d = e; } h1 = &qmtest_gdqtab[i]; for (d = (xfs_dqtest_t *) h1->qh_next; d != NULL; ) { xfs_dqtest_cmp(d); e = (xfs_dqtest_t *) d->HL_NEXT; - kmem_free(d, sizeof(xfs_dqtest_t)); + kmem_free(d); d = e; } } @@ -1467,8 +1467,8 @@ xfs_qm_internalqcheck( } else { cmn_err(CE_DEBUG, "******** quotacheck successful! ********"); } - kmem_free(qmtest_udqtab, qmtest_hashmask * sizeof(xfs_dqhash_t)); - kmem_free(qmtest_gdqtab, qmtest_hashmask * sizeof(xfs_dqhash_t)); + kmem_free(qmtest_udqtab); + kmem_free(qmtest_gdqtab); mutex_unlock(&qcheck_lock); return (qmtest_nfails); } diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h index 5e4a40b1c56..c4fcea600bc 100644 --- a/fs/xfs/quota/xfs_quota_priv.h +++ b/fs/xfs/quota/xfs_quota_priv.h @@ -158,9 +158,6 @@ for ((dqp) = (qlist)->qh_next; (dqp) != (xfs_dquot_t *)(qlist); \ #define XFS_IS_SUSER_DQUOT(dqp) \ (!((dqp)->q_core.d_id)) -#define XFS_PURGE_INODE(ip) \ - IRELE(ip); - #define DQFLAGTO_TYPESTR(d) (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \ (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \ (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???"))) diff --git a/fs/xfs/support/ktrace.c b/fs/xfs/support/ktrace.c index 0b75d302508..a34ef05489b 100644 --- a/fs/xfs/support/ktrace.c +++ b/fs/xfs/support/ktrace.c @@ -89,7 +89,7 @@ ktrace_alloc(int nentries, unsigned int __nocast sleep) if (sleep & KM_SLEEP) panic("ktrace_alloc: NULL memory on KM_SLEEP request!"); - kmem_free(ktp, sizeof(*ktp)); + kmem_free(ktp); return NULL; } @@ -126,7 +126,7 @@ ktrace_free(ktrace_t *ktp) } else { entries_size = (int)(ktp->kt_nentries * sizeof(ktrace_entry_t)); - kmem_free(ktp->kt_entries, entries_size); + kmem_free(ktp->kt_entries); } kmem_zone_free(ktrace_hdr_zone, ktp); diff --git a/fs/xfs/support/uuid.c b/fs/xfs/support/uuid.c index 493a6ecf859..5830c040ea7 100644 --- a/fs/xfs/support/uuid.c +++ b/fs/xfs/support/uuid.c @@ -17,7 +17,7 @@ */ #include <xfs.h> -static mutex_t uuid_monitor; +static DEFINE_MUTEX(uuid_monitor); static int uuid_table_size; static uuid_t *uuid_table; @@ -132,9 +132,3 @@ uuid_table_remove(uuid_t *uuid) ASSERT(i < uuid_table_size); mutex_unlock(&uuid_monitor); } - -void __init -uuid_init(void) -{ - mutex_init(&uuid_monitor); -} diff --git a/fs/xfs/support/uuid.h b/fs/xfs/support/uuid.h index b6f5922199b..cff5b607d44 100644 --- a/fs/xfs/support/uuid.h +++ b/fs/xfs/support/uuid.h @@ -22,7 +22,6 @@ typedef struct { unsigned char __u_bits[16]; } uuid_t; -extern void uuid_init(void); extern void uuid_create_nil(uuid_t *uuid); extern int uuid_is_nil(uuid_t *uuid); extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2); diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index ebee3a4f703..b2f639a1416 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -37,15 +37,15 @@ #include <linux/capability.h> #include <linux/posix_acl_xattr.h> -STATIC int xfs_acl_setmode(bhv_vnode_t *, xfs_acl_t *, int *); +STATIC int xfs_acl_setmode(struct inode *, xfs_acl_t *, int *); STATIC void xfs_acl_filter_mode(mode_t, xfs_acl_t *); STATIC void xfs_acl_get_endian(xfs_acl_t *); STATIC int xfs_acl_access(uid_t, gid_t, xfs_acl_t *, mode_t, cred_t *); STATIC int xfs_acl_invalid(xfs_acl_t *); STATIC void xfs_acl_sync_mode(mode_t, xfs_acl_t *); -STATIC void xfs_acl_get_attr(bhv_vnode_t *, xfs_acl_t *, int, int, int *); -STATIC void xfs_acl_set_attr(bhv_vnode_t *, xfs_acl_t *, int, int *); -STATIC int xfs_acl_allow_set(bhv_vnode_t *, int); +STATIC void xfs_acl_get_attr(struct inode *, xfs_acl_t *, int, int, int *); +STATIC void xfs_acl_set_attr(struct inode *, xfs_acl_t *, int, int *); +STATIC int xfs_acl_allow_set(struct inode *, int); kmem_zone_t *xfs_acl_zone; @@ -55,7 +55,7 @@ kmem_zone_t *xfs_acl_zone; */ int xfs_acl_vhasacl_access( - bhv_vnode_t *vp) + struct inode *vp) { int error; @@ -68,7 +68,7 @@ xfs_acl_vhasacl_access( */ int xfs_acl_vhasacl_default( - bhv_vnode_t *vp) + struct inode *vp) { int error; @@ -207,7 +207,7 @@ posix_acl_xfs_to_xattr( int xfs_acl_vget( - bhv_vnode_t *vp, + struct inode *vp, void *acl, size_t size, int kind) @@ -217,7 +217,6 @@ xfs_acl_vget( posix_acl_xattr_header *ext_acl = acl; int flags = 0; - VN_HOLD(vp); if(size) { if (!(_ACL_ALLOC(xfs_acl))) { error = ENOMEM; @@ -239,11 +238,10 @@ xfs_acl_vget( goto out; } if (kind == _ACL_TYPE_ACCESS) - xfs_acl_sync_mode(xfs_vtoi(vp)->i_d.di_mode, xfs_acl); + xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, xfs_acl); error = -posix_acl_xfs_to_xattr(xfs_acl, ext_acl, size); } out: - VN_RELE(vp); if(xfs_acl) _ACL_FREE(xfs_acl); return -error; @@ -251,28 +249,26 @@ out: int xfs_acl_vremove( - bhv_vnode_t *vp, + struct inode *vp, int kind) { int error; - VN_HOLD(vp); error = xfs_acl_allow_set(vp, kind); if (!error) { - error = xfs_attr_remove(xfs_vtoi(vp), + error = xfs_attr_remove(XFS_I(vp), kind == _ACL_TYPE_DEFAULT? SGI_ACL_DEFAULT: SGI_ACL_FILE, ATTR_ROOT); if (error == ENOATTR) error = 0; /* 'scool */ } - VN_RELE(vp); return -error; } int xfs_acl_vset( - bhv_vnode_t *vp, + struct inode *vp, void *acl, size_t size, int kind) @@ -298,7 +294,6 @@ xfs_acl_vset( return 0; } - VN_HOLD(vp); error = xfs_acl_allow_set(vp, kind); /* Incoming ACL exists, set file mode based on its value */ @@ -321,7 +316,6 @@ xfs_acl_vset( } out: - VN_RELE(vp); _ACL_FREE(xfs_acl); return -error; } @@ -341,8 +335,7 @@ xfs_acl_iaccess( /* If the file has no ACL return -1. */ rval = sizeof(xfs_acl_t); - if (xfs_attr_fetch(ip, &acl_name, (char *)acl, &rval, - ATTR_ROOT | ATTR_KERNACCESS)) { + if (xfs_attr_fetch(ip, &acl_name, (char *)acl, &rval, ATTR_ROOT)) { _ACL_FREE(acl); return -1; } @@ -364,7 +357,7 @@ xfs_acl_iaccess( STATIC int xfs_acl_allow_set( - bhv_vnode_t *vp, + struct inode *vp, int kind) { if (vp->i_flags & (S_IMMUTABLE|S_APPEND)) @@ -373,7 +366,7 @@ xfs_acl_allow_set( return ENOTDIR; if (vp->i_sb->s_flags & MS_RDONLY) return EROFS; - if (xfs_vtoi(vp)->i_d.di_uid != current->fsuid && !capable(CAP_FOWNER)) + if (XFS_I(vp)->i_d.di_uid != current->fsuid && !capable(CAP_FOWNER)) return EPERM; return 0; } @@ -567,7 +560,7 @@ xfs_acl_get_endian( */ STATIC void xfs_acl_get_attr( - bhv_vnode_t *vp, + struct inode *vp, xfs_acl_t *aclp, int kind, int flags, @@ -577,7 +570,7 @@ xfs_acl_get_attr( ASSERT((flags & ATTR_KERNOVAL) ? (aclp == NULL) : 1); flags |= ATTR_ROOT; - *error = xfs_attr_get(xfs_vtoi(vp), + *error = xfs_attr_get(XFS_I(vp), kind == _ACL_TYPE_ACCESS ? SGI_ACL_FILE : SGI_ACL_DEFAULT, (char *)aclp, &len, flags); @@ -591,7 +584,7 @@ xfs_acl_get_attr( */ STATIC void xfs_acl_set_attr( - bhv_vnode_t *vp, + struct inode *vp, xfs_acl_t *aclp, int kind, int *error) @@ -616,7 +609,7 @@ xfs_acl_set_attr( INT_SET(newace->ae_perm, ARCH_CONVERT, ace->ae_perm); } INT_SET(newacl->acl_cnt, ARCH_CONVERT, aclp->acl_cnt); - *error = xfs_attr_set(xfs_vtoi(vp), + *error = xfs_attr_set(XFS_I(vp), kind == _ACL_TYPE_ACCESS ? SGI_ACL_FILE: SGI_ACL_DEFAULT, (char *)newacl, len, ATTR_ROOT); @@ -625,7 +618,7 @@ xfs_acl_set_attr( int xfs_acl_vtoacl( - bhv_vnode_t *vp, + struct inode *vp, xfs_acl_t *access_acl, xfs_acl_t *default_acl) { @@ -640,7 +633,7 @@ xfs_acl_vtoacl( if (error) access_acl->acl_cnt = XFS_ACL_NOT_PRESENT; else /* We have a good ACL and the file mode, synchronize. */ - xfs_acl_sync_mode(xfs_vtoi(vp)->i_d.di_mode, access_acl); + xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, access_acl); } if (default_acl) { @@ -657,7 +650,7 @@ xfs_acl_vtoacl( */ int xfs_acl_inherit( - bhv_vnode_t *vp, + struct inode *vp, mode_t mode, xfs_acl_t *pdaclp) { @@ -716,11 +709,11 @@ out_error: */ STATIC int xfs_acl_setmode( - bhv_vnode_t *vp, + struct inode *vp, xfs_acl_t *acl, int *basicperms) { - bhv_vattr_t va; + struct iattr iattr; xfs_acl_entry_t *ap; xfs_acl_entry_t *gap = NULL; int i, nomask = 1; @@ -734,25 +727,25 @@ xfs_acl_setmode( * Copy the u::, g::, o::, and m:: bits from the ACL into the * mode. The m:: bits take precedence over the g:: bits. */ - va.va_mask = XFS_AT_MODE; - va.va_mode = xfs_vtoi(vp)->i_d.di_mode; - va.va_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO); + iattr.ia_valid = ATTR_MODE; + iattr.ia_mode = XFS_I(vp)->i_d.di_mode; + iattr.ia_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO); ap = acl->acl_entry; for (i = 0; i < acl->acl_cnt; ++i) { switch (ap->ae_tag) { case ACL_USER_OBJ: - va.va_mode |= ap->ae_perm << 6; + iattr.ia_mode |= ap->ae_perm << 6; break; case ACL_GROUP_OBJ: gap = ap; break; case ACL_MASK: /* more than just standard modes */ nomask = 0; - va.va_mode |= ap->ae_perm << 3; + iattr.ia_mode |= ap->ae_perm << 3; *basicperms = 0; break; case ACL_OTHER: - va.va_mode |= ap->ae_perm; + iattr.ia_mode |= ap->ae_perm; break; default: /* more than just standard modes */ *basicperms = 0; @@ -763,9 +756,9 @@ xfs_acl_setmode( /* Set the group bits from ACL_GROUP_OBJ if there's no ACL_MASK */ if (gap && nomask) - va.va_mode |= gap->ae_perm << 3; + iattr.ia_mode |= gap->ae_perm << 3; - return xfs_setattr(xfs_vtoi(vp), &va, 0, sys_cred); + return xfs_setattr(XFS_I(vp), &iattr, 0, sys_cred); } /* diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 332a772461c..a4e293b93ef 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -46,6 +46,8 @@ typedef struct xfs_acl { #define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1) #define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1) +#define _ACL_TYPE_ACCESS 1 +#define _ACL_TYPE_DEFAULT 2 #ifdef CONFIG_XFS_POSIX_ACL @@ -57,17 +59,15 @@ extern struct kmem_zone *xfs_acl_zone; (zone) = kmem_zone_init(sizeof(xfs_acl_t), (name)) #define xfs_acl_zone_destroy(zone) kmem_zone_destroy(zone) -extern int xfs_acl_inherit(bhv_vnode_t *, mode_t mode, xfs_acl_t *); +extern int xfs_acl_inherit(struct inode *, mode_t mode, xfs_acl_t *); extern int xfs_acl_iaccess(struct xfs_inode *, mode_t, cred_t *); -extern int xfs_acl_vtoacl(bhv_vnode_t *, xfs_acl_t *, xfs_acl_t *); -extern int xfs_acl_vhasacl_access(bhv_vnode_t *); -extern int xfs_acl_vhasacl_default(bhv_vnode_t *); -extern int xfs_acl_vset(bhv_vnode_t *, void *, size_t, int); -extern int xfs_acl_vget(bhv_vnode_t *, void *, size_t, int); -extern int xfs_acl_vremove(bhv_vnode_t *, int); +extern int xfs_acl_vtoacl(struct inode *, xfs_acl_t *, xfs_acl_t *); +extern int xfs_acl_vhasacl_access(struct inode *); +extern int xfs_acl_vhasacl_default(struct inode *); +extern int xfs_acl_vset(struct inode *, void *, size_t, int); +extern int xfs_acl_vget(struct inode *, void *, size_t, int); +extern int xfs_acl_vremove(struct inode *, int); -#define _ACL_TYPE_ACCESS 1 -#define _ACL_TYPE_DEFAULT 2 #define _ACL_PERM_INVALID(perm) ((perm) & ~(ACL_READ|ACL_WRITE|ACL_EXECUTE)) #define _ACL_INHERIT(c,m,d) (xfs_acl_inherit(c,m,d)) diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h index f9472a2076d..0b3b5efe848 100644 --- a/fs/xfs/xfs_arch.h +++ b/fs/xfs/xfs_arch.h @@ -92,16 +92,6 @@ ((__u8*)(pointer))[1] = (((value) ) & 0xff); \ } -/* define generic INT_ macros */ - -#define INT_GET(reference,arch) \ - (((arch) == ARCH_NOCONVERT) \ - ? \ - (reference) \ - : \ - INT_SWAP((reference),(reference)) \ - ) - /* does not return a value */ #define INT_SET(reference,arch,valueref) \ (__builtin_constant_p(valueref) ? \ @@ -112,64 +102,6 @@ ) \ ) -/* does not return a value */ -#define INT_MOD_EXPR(reference,arch,code) \ - (((arch) == ARCH_NOCONVERT) \ - ? \ - (void)((reference) code) \ - : \ - (void)( \ - (reference) = INT_GET((reference),arch) , \ - ((reference) code), \ - INT_SET(reference, arch, reference) \ - ) \ - ) - -/* does not return a value */ -#define INT_MOD(reference,arch,delta) \ - (void)( \ - INT_MOD_EXPR(reference,arch,+=(delta)) \ - ) - -/* - * INT_COPY - copy a value between two locations with the - * _same architecture_ but _potentially different sizes_ - * - * if the types of the two parameters are equal or they are - * in native architecture, a simple copy is done - * - * otherwise, architecture conversions are done - * - */ - -/* does not return a value */ -#define INT_COPY(dst,src,arch) \ - ( \ - ((sizeof(dst) == sizeof(src)) || ((arch) == ARCH_NOCONVERT)) \ - ? \ - (void)((dst) = (src)) \ - : \ - INT_SET(dst, arch, INT_GET(src, arch)) \ - ) - -/* - * INT_XLATE - copy a value in either direction between two locations - * with different architectures - * - * dir < 0 - copy from memory to buffer (native to arch) - * dir > 0 - copy from buffer to memory (arch to native) - */ - -/* does not return a value */ -#define INT_XLATE(buf,mem,dir,arch) {\ - ASSERT(dir); \ - if (dir>0) { \ - (mem)=INT_GET(buf, arch); \ - } else { \ - INT_SET(buf, arch, mem); \ - } \ -} - /* * In directories inode numbers are stored as unaligned arrays of unsigned * 8bit integers on disk. diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index df151a85918..f7cdc28aff4 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -16,8 +16,6 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ -#include <linux/capability.h> - #include "xfs.h" #include "xfs_fs.h" #include "xfs_types.h" @@ -57,11 +55,6 @@ * Provide the external interfaces to manage attribute lists. */ -#define ATTR_SYSCOUNT 2 -static struct attrnames posix_acl_access; -static struct attrnames posix_acl_default; -static struct attrnames *attr_system_names[ATTR_SYSCOUNT]; - /*======================================================================== * Function prototypes for the kernel. *========================================================================*/ @@ -116,6 +109,17 @@ xfs_attr_name_to_xname( return 0; } +STATIC int +xfs_inode_hasattr( + struct xfs_inode *ip) +{ + if (!XFS_IFORK_Q(ip) || + (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && + ip->i_d.di_anextents == 0)) + return 0; + return 1; +} + /*======================================================================== * Overall external interface routines. *========================================================================*/ @@ -127,10 +131,8 @@ xfs_attr_fetch(xfs_inode_t *ip, struct xfs_name *name, xfs_da_args_t args; int error; - if ((XFS_IFORK_Q(ip) == 0) || - (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && - ip->i_d.di_anextents == 0)) - return(ENOATTR); + if (!xfs_inode_hasattr(ip)) + return ENOATTR; /* * Fill in the arg structure for this request. @@ -148,11 +150,7 @@ xfs_attr_fetch(xfs_inode_t *ip, struct xfs_name *name, /* * Decide on what work routines to call based on the inode size. */ - if (XFS_IFORK_Q(ip) == 0 || - (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && - ip->i_d.di_anextents == 0)) { - error = XFS_ERROR(ENOATTR); - } else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { + if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { error = xfs_attr_shortform_getvalue(&args); } else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK)) { error = xfs_attr_leaf_get(&args); @@ -196,6 +194,46 @@ xfs_attr_get( return(error); } +/* + * Calculate how many blocks we need for the new attribute, + */ +int +xfs_attr_calc_size( + struct xfs_inode *ip, + int namelen, + int valuelen, + int *local) +{ + struct xfs_mount *mp = ip->i_mount; + int size; + int nblks; + + /* + * Determine space new attribute will use, and if it would be + * "local" or "remote" (note: local != inline). + */ + size = xfs_attr_leaf_newentsize(namelen, valuelen, + mp->m_sb.sb_blocksize, local); + + nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK); + if (*local) { + if (size > (mp->m_sb.sb_blocksize >> 1)) { + /* Double split possible */ + nblks *= 2; + } + } else { + /* + * Out of line attribute, cannot double split, but + * make room for the attribute value itself. + */ + uint dblocks = XFS_B_TO_FSB(mp, valuelen); + nblks += dblocks; + nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK); + } + + return nblks; +} + STATIC int xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name, char *value, int valuelen, int flags) @@ -204,10 +242,9 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name, xfs_fsblock_t firstblock; xfs_bmap_free_t flist; int error, err2, committed; - int local, size; - uint nblks; xfs_mount_t *mp = dp->i_mount; int rsvd = (flags & ATTR_ROOT) != 0; + int local; /* * Attach the dquots to the inode. @@ -241,33 +278,10 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name, args.firstblock = &firstblock; args.flist = &flist; args.whichfork = XFS_ATTR_FORK; - args.addname = 1; - args.oknoent = 1; - - /* - * Determine space new attribute will use, and if it would be - * "local" or "remote" (note: local != inline). - */ - size = xfs_attr_leaf_newentsize(name->len, valuelen, - mp->m_sb.sb_blocksize, &local); - - nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK); - if (local) { - if (size > (mp->m_sb.sb_blocksize >> 1)) { - /* Double split possible */ - nblks <<= 1; - } - } else { - uint dblocks = XFS_B_TO_FSB(mp, valuelen); - /* Out of line attribute, cannot double split, but make - * room for the attribute value itself. - */ - nblks += dblocks; - nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK); - } + args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; /* Size is now blocks for attribute data */ - args.total = nblks; + args.total = xfs_attr_calc_size(dp, name->len, valuelen, &local); /* * Start our first transaction of the day. @@ -289,18 +303,17 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name, if (rsvd) args.trans->t_flags |= XFS_TRANS_RESERVE; - if ((error = xfs_trans_reserve(args.trans, (uint) nblks, - XFS_ATTRSET_LOG_RES(mp, nblks), - 0, XFS_TRANS_PERM_LOG_RES, - XFS_ATTRSET_LOG_COUNT))) { + if ((error = xfs_trans_reserve(args.trans, args.total, + XFS_ATTRSET_LOG_RES(mp, args.total), 0, + XFS_TRANS_PERM_LOG_RES, XFS_ATTRSET_LOG_COUNT))) { xfs_trans_cancel(args.trans, 0); return(error); } xfs_ilock(dp, XFS_ILOCK_EXCL); - error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, args.trans, dp, nblks, 0, - rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : - XFS_QMOPT_RES_REGBLKS); + error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, args.trans, dp, args.total, 0, + rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES : + XFS_QMOPT_RES_REGBLKS); if (error) { xfs_iunlock(dp, XFS_ILOCK_EXCL); xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES); @@ -387,7 +400,9 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name, * Commit the leaf transformation. We'll need another (linked) * transaction to add the new attribute to the leaf. */ - if ((error = xfs_attr_rolltrans(&args.trans, dp))) + + error = xfs_trans_roll(&args.trans, dp); + if (error) goto out; } @@ -529,9 +544,7 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags) /* * Decide on what work routines to call based on the inode size. */ - if (XFS_IFORK_Q(dp) == 0 || - (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && - dp->i_d.di_anextents == 0)) { + if (!xfs_inode_hasattr(dp)) { error = XFS_ERROR(ENOATTR); goto out; } @@ -601,29 +614,33 @@ xfs_attr_remove( return error; xfs_ilock(dp, XFS_ILOCK_SHARED); - if (XFS_IFORK_Q(dp) == 0 || - (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && - dp->i_d.di_anextents == 0)) { + if (!xfs_inode_hasattr(dp)) { xfs_iunlock(dp, XFS_ILOCK_SHARED); - return(XFS_ERROR(ENOATTR)); + return XFS_ERROR(ENOATTR); } xfs_iunlock(dp, XFS_ILOCK_SHARED); return xfs_attr_remove_int(dp, &xname, flags); } -STATIC int +int xfs_attr_list_int(xfs_attr_list_context_t *context) { int error; xfs_inode_t *dp = context->dp; + XFS_STATS_INC(xs_attr_list); + + if (XFS_FORCED_SHUTDOWN(dp->i_mount)) + return EIO; + + xfs_ilock(dp, XFS_ILOCK_SHARED); + xfs_attr_trace_l_c("syscall start", context); + /* * Decide on what work routines to call based on the inode size. */ - if (XFS_IFORK_Q(dp) == 0 || - (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && - dp->i_d.di_anextents == 0)) { + if (!xfs_inode_hasattr(dp)) { error = 0; } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { error = xfs_attr_shortform_list(context); @@ -632,6 +649,10 @@ xfs_attr_list_int(xfs_attr_list_context_t *context) } else { error = xfs_attr_node_list(context); } + + xfs_iunlock(dp, XFS_ILOCK_SHARED); + xfs_attr_trace_l_c("syscall end", context); + return error; } @@ -648,74 +669,50 @@ xfs_attr_list_int(xfs_attr_list_context_t *context) */ /*ARGSUSED*/ STATIC int -xfs_attr_put_listent(xfs_attr_list_context_t *context, attrnames_t *namesp, +xfs_attr_put_listent(xfs_attr_list_context_t *context, int flags, char *name, int namelen, int valuelen, char *value) { + struct attrlist *alist = (struct attrlist *)context->alist; attrlist_ent_t *aep; int arraytop; ASSERT(!(context->flags & ATTR_KERNOVAL)); ASSERT(context->count >= 0); ASSERT(context->count < (ATTR_MAX_VALUELEN/8)); - ASSERT(context->firstu >= sizeof(*context->alist)); + ASSERT(context->firstu >= sizeof(*alist)); ASSERT(context->firstu <= context->bufsize); - arraytop = sizeof(*context->alist) + - context->count * sizeof(context->alist->al_offset[0]); + /* + * Only list entries in the right namespace. + */ + if (((context->flags & ATTR_SECURE) == 0) != + ((flags & XFS_ATTR_SECURE) == 0)) + return 0; + if (((context->flags & ATTR_ROOT) == 0) != + ((flags & XFS_ATTR_ROOT) == 0)) + return 0; + + arraytop = sizeof(*alist) + + context->count * sizeof(alist->al_offset[0]); context->firstu -= ATTR_ENTSIZE(namelen); if (context->firstu < arraytop) { xfs_attr_trace_l_c("buffer full", context); - context->alist->al_more = 1; + alist->al_more = 1; context->seen_enough = 1; return 1; } - aep = (attrlist_ent_t *)&(((char *)context->alist)[ context->firstu ]); + aep = (attrlist_ent_t *)&context->alist[context->firstu]; aep->a_valuelen = valuelen; memcpy(aep->a_name, name, namelen); - aep->a_name[ namelen ] = 0; - context->alist->al_offset[ context->count++ ] = context->firstu; - context->alist->al_count = context->count; + aep->a_name[namelen] = 0; + alist->al_offset[context->count++] = context->firstu; + alist->al_count = context->count; xfs_attr_trace_l_c("add", context); return 0; } -STATIC int -xfs_attr_kern_list(xfs_attr_list_context_t *context, attrnames_t *namesp, - char *name, int namelen, - int valuelen, char *value) -{ - char *offset; - int arraytop; - - ASSERT(context->count >= 0); - - arraytop = context->count + namesp->attr_namelen + namelen + 1; - if (arraytop > context->firstu) { - context->count = -1; /* insufficient space */ - return 1; - } - offset = (char *)context->alist + context->count; - strncpy(offset, namesp->attr_name, namesp->attr_namelen); - offset += namesp->attr_namelen; - strncpy(offset, name, namelen); /* real name */ - offset += namelen; - *offset = '\0'; - context->count += namesp->attr_namelen + namelen + 1; - return 0; -} - -/*ARGSUSED*/ -STATIC int -xfs_attr_kern_list_sizes(xfs_attr_list_context_t *context, attrnames_t *namesp, - char *name, int namelen, - int valuelen, char *value) -{ - context->count += namesp->attr_namelen + namelen + 1; - return 0; -} - /* * Generate a list of extended attribute names and optionally * also value lengths. Positive return value follows the XFS @@ -732,10 +729,9 @@ xfs_attr_list( attrlist_cursor_kern_t *cursor) { xfs_attr_list_context_t context; + struct attrlist *alist; int error; - XFS_STATS_INC(xs_attr_list); - /* * Validate the cursor. */ @@ -756,52 +752,23 @@ xfs_attr_list( /* * Initialize the output buffer. */ + memset(&context, 0, sizeof(context)); context.dp = dp; context.cursor = cursor; - context.count = 0; - context.dupcnt = 0; context.resynch = 1; context.flags = flags; - context.seen_enough = 0; - context.alist = (attrlist_t *)buffer; - context.put_value = 0; - - if (flags & ATTR_KERNAMELS) { - context.bufsize = bufsize; - context.firstu = context.bufsize; - if (flags & ATTR_KERNOVAL) - context.put_listent = xfs_attr_kern_list_sizes; - else - context.put_listent = xfs_attr_kern_list; - } else { - context.bufsize = (bufsize & ~(sizeof(int)-1)); /* align */ - context.firstu = context.bufsize; - context.alist->al_count = 0; - context.alist->al_more = 0; - context.alist->al_offset[0] = context.bufsize; - context.put_listent = xfs_attr_put_listent; - } - - if (XFS_FORCED_SHUTDOWN(dp->i_mount)) - return EIO; + context.alist = buffer; + context.bufsize = (bufsize & ~(sizeof(int)-1)); /* align */ + context.firstu = context.bufsize; + context.put_listent = xfs_attr_put_listent; - xfs_ilock(dp, XFS_ILOCK_SHARED); - xfs_attr_trace_l_c("syscall start", &context); + alist = (struct attrlist *)context.alist; + alist->al_count = 0; + alist->al_more = 0; + alist->al_offset[0] = context.bufsize; error = xfs_attr_list_int(&context); - - xfs_iunlock(dp, XFS_ILOCK_SHARED); - xfs_attr_trace_l_c("syscall end", &context); - - if (context.flags & (ATTR_KERNOVAL|ATTR_KERNAMELS)) { - /* must return negated buffer size or the error */ - if (context.count < 0) - error = XFS_ERROR(ERANGE); - else - error = -context.count; - } else - ASSERT(error >= 0); - + ASSERT(error >= 0); return error; } @@ -816,12 +783,10 @@ xfs_attr_inactive(xfs_inode_t *dp) ASSERT(! XFS_NOT_DQATTACHED(mp, dp)); xfs_ilock(dp, XFS_ILOCK_SHARED); - if ((XFS_IFORK_Q(dp) == 0) || - (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) || - (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && - dp->i_d.di_anextents == 0)) { + if (!xfs_inode_hasattr(dp) || + dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { xfs_iunlock(dp, XFS_ILOCK_SHARED); - return(0); + return 0; } xfs_iunlock(dp, XFS_ILOCK_SHARED); @@ -854,10 +819,8 @@ xfs_attr_inactive(xfs_inode_t *dp) /* * Decide on what work routines to call based on the inode size. */ - if ((XFS_IFORK_Q(dp) == 0) || - (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) || - (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && - dp->i_d.di_anextents == 0)) { + if (!xfs_inode_hasattr(dp) || + dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { error = 0; goto out; } @@ -974,7 +937,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) xfs_da_brelse(args->trans, bp); return(retval); } - args->rename = 1; /* an atomic rename */ + args->op_flags |= XFS_DA_OP_RENAME; /* an atomic rename */ args->blkno2 = args->blkno; /* set 2nd entry info*/ args->index2 = args->index; args->rmtblkno2 = args->rmtblkno; @@ -1019,7 +982,8 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) * Commit the current trans (including the inode) and start * a new one. */ - if ((error = xfs_attr_rolltrans(&args->trans, dp))) + error = xfs_trans_roll(&args->trans, dp); + if (error) return (error); /* @@ -1033,7 +997,8 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) * Commit the transaction that added the attr name so that * later routines can manage their own transactions. */ - if ((error = xfs_attr_rolltrans(&args->trans, dp))) + error = xfs_trans_roll(&args->trans, dp); + if (error) return (error); /* @@ -1054,7 +1019,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) * so that one disappears and one appears atomically. Then we * must remove the "old" attribute/value pair. */ - if (args->rename) { + if (args->op_flags & XFS_DA_OP_RENAME) { /* * In a separate transaction, set the incomplete flag on the * "old" attr and clear the incomplete flag on the "new" attr. @@ -1122,7 +1087,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args) /* * Commit the remove and start the next trans in series. */ - error = xfs_attr_rolltrans(&args->trans, dp); + error = xfs_trans_roll(&args->trans, dp); } else if (args->rmtblkno > 0) { /* @@ -1307,7 +1272,7 @@ restart: } else if (retval == EEXIST) { if (args->flags & ATTR_CREATE) goto out; - args->rename = 1; /* atomic rename op */ + args->op_flags |= XFS_DA_OP_RENAME; /* atomic rename op */ args->blkno2 = args->blkno; /* set 2nd entry info*/ args->index2 = args->index; args->rmtblkno2 = args->rmtblkno; @@ -1353,7 +1318,8 @@ restart: * Commit the node conversion and start the next * trans in the chain. */ - if ((error = xfs_attr_rolltrans(&args->trans, dp))) + error = xfs_trans_roll(&args->trans, dp); + if (error) goto out; goto restart; @@ -1404,7 +1370,8 @@ restart: * Commit the leaf addition or btree split and start the next * trans in the chain. */ - if ((error = xfs_attr_rolltrans(&args->trans, dp))) + error = xfs_trans_roll(&args->trans, dp); + if (error) goto out; /* @@ -1425,7 +1392,7 @@ restart: * so that one disappears and one appears atomically. Then we * must remove the "old" attribute/value pair. */ - if (args->rename) { + if (args->op_flags & XFS_DA_OP_RENAME) { /* * In a separate transaction, set the incomplete flag on the * "old" attr and clear the incomplete flag on the "new" attr. @@ -1504,7 +1471,8 @@ restart: /* * Commit and start the next trans in the chain. */ - if ((error = xfs_attr_rolltrans(&args->trans, dp))) + error = xfs_trans_roll(&args->trans, dp); + if (error) goto out; } else if (args->rmtblkno > 0) { @@ -1636,7 +1604,8 @@ xfs_attr_node_removename(xfs_da_args_t *args) /* * Commit the Btree join operation and start a new trans. */ - if ((error = xfs_attr_rolltrans(&args->trans, dp))) + error = xfs_trans_roll(&args->trans, dp); + if (error) goto out; } @@ -2137,7 +2106,8 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) /* * Start the next trans in the chain. */ - if ((error = xfs_attr_rolltrans(&args->trans, dp))) + error = xfs_trans_roll(&args->trans, dp); + if (error) return (error); } @@ -2287,7 +2257,8 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args) /* * Close out trans and start the next one in the chain. */ - if ((error = xfs_attr_rolltrans(&args->trans, args->dp))) + error = xfs_trans_roll(&args->trans, args->dp); + if (error) return (error); } return(0); @@ -2300,23 +2271,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args) void xfs_attr_trace_l_c(char *where, struct xfs_attr_list_context *context) { - xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_C, where, - (__psunsigned_t)context->dp, - (__psunsigned_t)context->cursor->hashval, - (__psunsigned_t)context->cursor->blkno, - (__psunsigned_t)context->cursor->offset, - (__psunsigned_t)context->alist, - (__psunsigned_t)context->bufsize, - (__psunsigned_t)context->count, - (__psunsigned_t)context->firstu, - (__psunsigned_t) - ((context->count > 0) && - !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL))) - ? (ATTR_ENTRY(context->alist, - context->count-1)->a_valuelen) - : 0, - (__psunsigned_t)context->dupcnt, - (__psunsigned_t)context->flags, + xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_C, where, context, (__psunsigned_t)NULL, (__psunsigned_t)NULL, (__psunsigned_t)NULL); @@ -2329,23 +2284,7 @@ void xfs_attr_trace_l_cn(char *where, struct xfs_attr_list_context *context, struct xfs_da_intnode *node) { - xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CN, where, - (__psunsigned_t)context->dp, - (__psunsigned_t)context->cursor->hashval, - (__psunsigned_t)context->cursor->blkno, - (__psunsigned_t)context->cursor->offset, - (__psunsigned_t)context->alist, - (__psunsigned_t)context->bufsize, - (__psunsigned_t)context->count, - (__psunsigned_t)context->firstu, - (__psunsigned_t) - ((context->count > 0) && - !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL))) - ? (ATTR_ENTRY(context->alist, - context->count-1)->a_valuelen) - : 0, - (__psunsigned_t)context->dupcnt, - (__psunsigned_t)context->flags, + xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CN, where, context, (__psunsigned_t)be16_to_cpu(node->hdr.count), (__psunsigned_t)be32_to_cpu(node->btree[0].hashval), (__psunsigned_t)be32_to_cpu(node->btree[ @@ -2359,23 +2298,7 @@ void xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context, struct xfs_da_node_entry *btree) { - xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CB, where, - (__psunsigned_t)context->dp, - (__psunsigned_t)context->cursor->hashval, - (__psunsigned_t)context->cursor->blkno, - (__psunsigned_t)context->cursor->offset, - (__psunsigned_t)context->alist, - (__psunsigned_t)context->bufsize, - (__psunsigned_t)context->count, - (__psunsigned_t)context->firstu, - (__psunsigned_t) - ((context->count > 0) && - !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL))) - ? (ATTR_ENTRY(context->alist, - context->count-1)->a_valuelen) - : 0, - (__psunsigned_t)context->dupcnt, - (__psunsigned_t)context->flags, + xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CB, where, context, (__psunsigned_t)be32_to_cpu(btree->hashval), (__psunsigned_t)be32_to_cpu(btree->before), (__psunsigned_t)NULL); @@ -2388,23 +2311,7 @@ void xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context, struct xfs_attr_leafblock *leaf) { - xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CL, where, - (__psunsigned_t)context->dp, - (__psunsigned_t)context->cursor->hashval, - (__psunsigned_t)context->cursor->blkno, - (__psunsigned_t)context->cursor->offset, - (__psunsigned_t)context->alist, - (__psunsigned_t)context->bufsize, - (__psunsigned_t)context->count, - (__psunsigned_t)context->firstu, - (__psunsigned_t) - ((context->count > 0) && - !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL))) - ? (ATTR_ENTRY(context->alist, - context->count-1)->a_valuelen) - : 0, - (__psunsigned_t)context->dupcnt, - (__psunsigned_t)context->flags, + xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CL, where, context, (__psunsigned_t)be16_to_cpu(leaf->hdr.count), (__psunsigned_t)be32_to_cpu(leaf->entries[0].hashval), (__psunsigned_t)be32_to_cpu(leaf->entries[ @@ -2417,329 +2324,24 @@ xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context, */ void xfs_attr_trace_enter(int type, char *where, - __psunsigned_t a2, __psunsigned_t a3, - __psunsigned_t a4, __psunsigned_t a5, - __psunsigned_t a6, __psunsigned_t a7, - __psunsigned_t a8, __psunsigned_t a9, - __psunsigned_t a10, __psunsigned_t a11, - __psunsigned_t a12, __psunsigned_t a13, - __psunsigned_t a14, __psunsigned_t a15) + struct xfs_attr_list_context *context, + __psunsigned_t a13, __psunsigned_t a14, + __psunsigned_t a15) { ASSERT(xfs_attr_trace_buf); ktrace_enter(xfs_attr_trace_buf, (void *)((__psunsigned_t)type), - (void *)where, - (void *)a2, (void *)a3, (void *)a4, - (void *)a5, (void *)a6, (void *)a7, - (void *)a8, (void *)a9, (void *)a10, - (void *)a11, (void *)a12, (void *)a13, - (void *)a14, (void *)a15); + (void *)((__psunsigned_t)where), + (void *)((__psunsigned_t)context->dp), + (void *)((__psunsigned_t)context->cursor->hashval), + (void *)((__psunsigned_t)context->cursor->blkno), + (void *)((__psunsigned_t)context->cursor->offset), + (void *)((__psunsigned_t)context->alist), + (void *)((__psunsigned_t)context->bufsize), + (void *)((__psunsigned_t)context->count), + (void *)((__psunsigned_t)context->firstu), + NULL, + (void *)((__psunsigned_t)context->dupcnt), + (void *)((__psunsigned_t)context->flags), + (void *)a13, (void *)a14, (void *)a15); } #endif /* XFS_ATTR_TRACE */ - - -/*======================================================================== - * System (pseudo) namespace attribute interface routines. - *========================================================================*/ - -STATIC int -posix_acl_access_set( - bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags) -{ - return xfs_acl_vset(vp, data, size, _ACL_TYPE_ACCESS); -} - -STATIC int -posix_acl_access_remove( - bhv_vnode_t *vp, char *name, int xflags) -{ - return xfs_acl_vremove(vp, _ACL_TYPE_ACCESS); -} - -STATIC int -posix_acl_access_get( - bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags) -{ - return xfs_acl_vget(vp, data, size, _ACL_TYPE_ACCESS); -} - -STATIC int -posix_acl_access_exists( - bhv_vnode_t *vp) -{ - return xfs_acl_vhasacl_access(vp); -} - -STATIC int -posix_acl_default_set( - bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags) -{ - return xfs_acl_vset(vp, data, size, _ACL_TYPE_DEFAULT); -} - -STATIC int -posix_acl_default_get( - bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags) -{ - return xfs_acl_vget(vp, data, size, _ACL_TYPE_DEFAULT); -} - -STATIC int -posix_acl_default_remove( - bhv_vnode_t *vp, char *name, int xflags) -{ - return xfs_acl_vremove(vp, _ACL_TYPE_DEFAULT); -} - -STATIC int -posix_acl_default_exists( - bhv_vnode_t *vp) -{ - return xfs_acl_vhasacl_default(vp); -} - -static struct attrnames posix_acl_access = { - .attr_name = "posix_acl_access", - .attr_namelen = sizeof("posix_acl_access") - 1, - .attr_get = posix_acl_access_get, - .attr_set = posix_acl_access_set, - .attr_remove = posix_acl_access_remove, - .attr_exists = posix_acl_access_exists, -}; - -static struct attrnames posix_acl_default = { - .attr_name = "posix_acl_default", - .attr_namelen = sizeof("posix_acl_default") - 1, - .attr_get = posix_acl_default_get, - .attr_set = posix_acl_default_set, - .attr_remove = posix_acl_default_remove, - .attr_exists = posix_acl_default_exists, -}; - -static struct attrnames *attr_system_names[] = - { &posix_acl_access, &posix_acl_default }; - - -/*======================================================================== - * Namespace-prefix-style attribute name interface routines. - *========================================================================*/ - -STATIC int -attr_generic_set( - bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags) -{ - return -xfs_attr_set(xfs_vtoi(vp), name, data, size, xflags); -} - -STATIC int -attr_generic_get( - bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags) -{ - int error, asize = size; - - error = xfs_attr_get(xfs_vtoi(vp), name, data, &asize, xflags); - if (!error) - return asize; - return -error; -} - -STATIC int -attr_generic_remove( - bhv_vnode_t *vp, char *name, int xflags) -{ - return -xfs_attr_remove(xfs_vtoi(vp), name, xflags); -} - -STATIC int -attr_generic_listadd( - attrnames_t *prefix, - attrnames_t *namesp, - void *data, - size_t size, - ssize_t *result) -{ - char *p = data + *result; - - *result += prefix->attr_namelen; - *result += namesp->attr_namelen + 1; - if (!size) - return 0; - if (*result > size) - return -ERANGE; - strcpy(p, prefix->attr_name); - p += prefix->attr_namelen; - strcpy(p, namesp->attr_name); - p += namesp->attr_namelen + 1; - return 0; -} - -STATIC int -attr_system_list( - bhv_vnode_t *vp, - void *data, - size_t size, - ssize_t *result) -{ - attrnames_t *namesp; - int i, error = 0; - - for (i = 0; i < ATTR_SYSCOUNT; i++) { - namesp = attr_system_names[i]; - if (!namesp->attr_exists || !namesp->attr_exists(vp)) - continue; - error = attr_generic_listadd(&attr_system, namesp, - data, size, result); - if (error) - break; - } - return error; -} - -int -attr_generic_list( - bhv_vnode_t *vp, void *data, size_t size, int xflags, ssize_t *result) -{ - attrlist_cursor_kern_t cursor = { 0 }; - int error; - - error = xfs_attr_list(xfs_vtoi(vp), data, size, xflags, &cursor); - if (error > 0) - return -error; - *result = -error; - return attr_system_list(vp, data, size, result); -} - -attrnames_t * -attr_lookup_namespace( - char *name, - struct attrnames **names, - int nnames) -{ - int i; - - for (i = 0; i < nnames; i++) - if (!strncmp(name, names[i]->attr_name, names[i]->attr_namelen)) - return names[i]; - return NULL; -} - -/* - * Some checks to prevent people abusing EAs to get over quota: - * - Don't allow modifying user EAs on devices/symlinks; - * - Don't allow modifying user EAs if sticky bit set; - */ -STATIC int -attr_user_capable( - bhv_vnode_t *vp, - cred_t *cred) -{ - struct inode *inode = vn_to_inode(vp); - - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - return -EPERM; - if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) && - !capable(CAP_SYS_ADMIN)) - return -EPERM; - if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) && - (current_fsuid(cred) != inode->i_uid) && !capable(CAP_FOWNER)) - return -EPERM; - return 0; -} - -STATIC int -attr_trusted_capable( - bhv_vnode_t *vp, - cred_t *cred) -{ - struct inode *inode = vn_to_inode(vp); - - if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) - return -EPERM; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - return 0; -} - -STATIC int -attr_system_set( - bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags) -{ - attrnames_t *namesp; - int error; - - if (xflags & ATTR_CREATE) - return -EINVAL; - - namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT); - if (!namesp) - return -EOPNOTSUPP; - error = namesp->attr_set(vp, name, data, size, xflags); - if (!error) - error = vn_revalidate(vp); - return error; -} - -STATIC int -attr_system_get( - bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags) -{ - attrnames_t *namesp; - - namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT); - if (!namesp) - return -EOPNOTSUPP; - return namesp->attr_get(vp, name, data, size, xflags); -} - -STATIC int -attr_system_remove( - bhv_vnode_t *vp, char *name, int xflags) -{ - attrnames_t *namesp; - - namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT); - if (!namesp) - return -EOPNOTSUPP; - return namesp->attr_remove(vp, name, xflags); -} - -struct attrnames attr_system = { - .attr_name = "system.", - .attr_namelen = sizeof("system.") - 1, - .attr_flag = ATTR_SYSTEM, - .attr_get = attr_system_get, - .attr_set = attr_system_set, - .attr_remove = attr_system_remove, - .attr_capable = (attrcapable_t)fs_noerr, -}; - -struct attrnames attr_trusted = { - .attr_name = "trusted.", - .attr_namelen = sizeof("trusted.") - 1, - .attr_flag = ATTR_ROOT, - .attr_get = attr_generic_get, - .attr_set = attr_generic_set, - .attr_remove = attr_generic_remove, - .attr_capable = attr_trusted_capable, -}; - -struct attrnames attr_secure = { - .attr_name = "security.", - .attr_namelen = sizeof("security.") - 1, - .attr_flag = ATTR_SECURE, - .attr_get = attr_generic_get, - .attr_set = attr_generic_set, - .attr_remove = attr_generic_remove, - .attr_capable = (attrcapable_t)fs_noerr, -}; - -struct attrnames attr_user = { - .attr_name = "user.", - .attr_namelen = sizeof("user.") - 1, - .attr_get = attr_generic_get, - .attr_set = attr_generic_set, - .attr_remove = attr_generic_remove, - .attr_capable = attr_user_capable, -}; - -struct attrnames *attr_namespaces[] = - { &attr_system, &attr_trusted, &attr_secure, &attr_user }; diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h index 6cfc9384fe3..fb3b2a68b9b 100644 --- a/fs/xfs/xfs_attr.h +++ b/fs/xfs/xfs_attr.h @@ -18,9 +18,11 @@ #ifndef __XFS_ATTR_H__ #define __XFS_ATTR_H__ +struct xfs_inode; +struct xfs_da_args; +struct xfs_attr_list_context; + /* - * xfs_attr.h - * * Large attribute lists are structured around Btrees where all the data * elements are in the leaf nodes. Attribute names are hashed into an int, * then that int is used as the index into the Btree. Since the hashval @@ -35,35 +37,6 @@ * External interfaces *========================================================================*/ -struct cred; -struct xfs_attr_list_context; - -typedef int (*attrset_t)(bhv_vnode_t *, char *, void *, size_t, int); -typedef int (*attrget_t)(bhv_vnode_t *, char *, void *, size_t, int); -typedef int (*attrremove_t)(bhv_vnode_t *, char *, int); -typedef int (*attrexists_t)(bhv_vnode_t *); -typedef int (*attrcapable_t)(bhv_vnode_t *, struct cred *); - -typedef struct attrnames { - char * attr_name; - unsigned int attr_namelen; - unsigned int attr_flag; - attrget_t attr_get; - attrset_t attr_set; - attrremove_t attr_remove; - attrexists_t attr_exists; - attrcapable_t attr_capable; -} attrnames_t; - -#define ATTR_NAMECOUNT 4 -extern struct attrnames attr_user; -extern struct attrnames attr_secure; -extern struct attrnames attr_system; -extern struct attrnames attr_trusted; -extern struct attrnames *attr_namespaces[ATTR_NAMECOUNT]; - -extern attrnames_t *attr_lookup_namespace(char *, attrnames_t **, int); -extern int attr_generic_list(bhv_vnode_t *, void *, size_t, int, ssize_t *); #define ATTR_DONTFOLLOW 0x0001 /* -- unused, from IRIX -- */ #define ATTR_ROOT 0x0002 /* use attrs in root (trusted) namespace */ @@ -71,16 +44,9 @@ extern int attr_generic_list(bhv_vnode_t *, void *, size_t, int, ssize_t *); #define ATTR_SECURE 0x0008 /* use attrs in security namespace */ #define ATTR_CREATE 0x0010 /* pure create: fail if attr already exists */ #define ATTR_REPLACE 0x0020 /* pure set: fail if attr does not exist */ -#define ATTR_SYSTEM 0x0100 /* use attrs in system (pseudo) namespace */ -#define ATTR_KERNACCESS 0x0400 /* [kernel] iaccess, inode held io-locked */ #define ATTR_KERNOTIME 0x1000 /* [kernel] don't update inode timestamps */ #define ATTR_KERNOVAL 0x2000 /* [kernel] get attr size only, not value */ -#define ATTR_KERNAMELS 0x4000 /* [kernel] list attr names (simple list) */ - -#define ATTR_KERNORMALS 0x0800 /* [kernel] normal attr list: user+secure */ -#define ATTR_KERNROOTLS 0x8000 /* [kernel] include root in the attr list */ -#define ATTR_KERNFULLS (ATTR_KERNORMALS|ATTR_KERNROOTLS) /* * The maximum size (into the kernel or returned from the kernel) of an @@ -119,22 +85,6 @@ typedef struct attrlist_ent { /* data from attr_list() */ &((char *)buffer)[ ((attrlist_t *)(buffer))->al_offset[index] ]) /* - * Multi-attribute operation vector. - */ -typedef struct attr_multiop { - int am_opcode; /* operation to perform (ATTR_OP_GET, etc.) */ - int am_error; /* [out arg] result of this sub-op (an errno) */ - char *am_attrname; /* attribute name to work with */ - char *am_attrvalue; /* [in/out arg] attribute value (raw bytes) */ - int am_length; /* [in/out arg] length of value */ - int am_flags; /* bitwise OR of attr API flags defined above */ -} attr_multiop_t; - -#define ATTR_OP_GET 1 /* return the indicated attr's value */ -#define ATTR_OP_SET 2 /* set/create the indicated attr/value pair */ -#define ATTR_OP_REMOVE 3 /* remove the indicated attr */ - -/* * Kernel-internal version of the attrlist cursor. */ typedef struct attrlist_cursor_kern { @@ -148,20 +98,41 @@ typedef struct attrlist_cursor_kern { /*======================================================================== - * Function prototypes for the kernel. + * Structure used to pass context around among the routines. *========================================================================*/ -struct xfs_inode; -struct attrlist_cursor_kern; -struct xfs_da_args; + +typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, int, + char *, int, int, char *); + +typedef struct xfs_attr_list_context { + struct xfs_inode *dp; /* inode */ + struct attrlist_cursor_kern *cursor; /* position in list */ + char *alist; /* output buffer */ + int seen_enough; /* T/F: seen enough of list? */ + ssize_t count; /* num used entries */ + int dupcnt; /* count dup hashvals seen */ + int bufsize; /* total buffer size */ + int firstu; /* first used byte in buffer */ + int flags; /* from VOP call */ + int resynch; /* T/F: resynch with cursor */ + int put_value; /* T/F: need value for listent */ + put_listent_func_t put_listent; /* list output fmt function */ + int index; /* index into output buffer */ +} xfs_attr_list_context_t; + + +/*======================================================================== + * Function prototypes for the kernel. + *========================================================================*/ /* * Overall external interface routines. */ +int xfs_attr_calc_size(struct xfs_inode *, int, int, int *); int xfs_attr_inactive(struct xfs_inode *dp); - -int xfs_attr_shortform_getvalue(struct xfs_da_args *); int xfs_attr_fetch(struct xfs_inode *, struct xfs_name *, char *, int *, int); int xfs_attr_rmtval_get(struct xfs_da_args *args); +int xfs_attr_list_int(struct xfs_attr_list_context *); #endif /* __XFS_ATTR_H__ */ diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index 303d41e4217..79da6b2ea99 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c @@ -94,13 +94,6 @@ STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index); * Namespace helper routines *========================================================================*/ -STATIC_INLINE attrnames_t * -xfs_attr_flags_namesp(int flags) -{ - return ((flags & XFS_ATTR_SECURE) ? &attr_secure: - ((flags & XFS_ATTR_ROOT) ? &attr_trusted : &attr_user)); -} - /* * If namespace bits don't match return 0. * If all match then return 1. @@ -111,25 +104,6 @@ xfs_attr_namesp_match(int arg_flags, int ondisk_flags) return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags); } -/* - * If namespace bits don't match and we don't have an override for it - * then return 0. - * If all match or are overridable then return 1. - */ -STATIC_INLINE int -xfs_attr_namesp_match_overrides(int arg_flags, int ondisk_flags) -{ - if (((arg_flags & ATTR_SECURE) == 0) != - ((ondisk_flags & XFS_ATTR_SECURE) == 0) && - !(arg_flags & ATTR_KERNORMALS)) - return 0; - if (((arg_flags & ATTR_ROOT) == 0) != - ((ondisk_flags & XFS_ATTR_ROOT) == 0) && - !(arg_flags & ATTR_KERNROOTLS)) - return 0; - return 1; -} - /*======================================================================== * External routines when attribute fork size < XFS_LITINO(mp). @@ -369,9 +343,10 @@ xfs_attr_shortform_remove(xfs_da_args_t *args) * Fix up the start offset of the attribute fork */ totsize -= size; - if (totsize == sizeof(xfs_attr_sf_hdr_t) && !args->addname && - (mp->m_flags & XFS_MOUNT_ATTR2) && - (dp->i_d.di_format != XFS_DINODE_FMT_BTREE)) { + if (totsize == sizeof(xfs_attr_sf_hdr_t) && + !(args->op_flags & XFS_DA_OP_ADDNAME) && + (mp->m_flags & XFS_MOUNT_ATTR2) && + (dp->i_d.di_format != XFS_DINODE_FMT_BTREE)) { /* * Last attribute now removed, revert to original * inode format making all literal area available @@ -389,9 +364,10 @@ xfs_attr_shortform_remove(xfs_da_args_t *args) xfs_idata_realloc(dp, -size, XFS_ATTR_FORK); dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize); ASSERT(dp->i_d.di_forkoff); - ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) || args->addname || - !(mp->m_flags & XFS_MOUNT_ATTR2) || - dp->i_d.di_format == XFS_DINODE_FMT_BTREE); + ASSERT(totsize > sizeof(xfs_attr_sf_hdr_t) || + (args->op_flags & XFS_DA_OP_ADDNAME) || + !(mp->m_flags & XFS_MOUNT_ATTR2) || + dp->i_d.di_format == XFS_DINODE_FMT_BTREE); dp->i_afp->if_ext_max = XFS_IFORK_ASIZE(dp) / (uint)sizeof(xfs_bmbt_rec_t); dp->i_df.if_ext_max = @@ -531,7 +507,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args) nargs.total = args->total; nargs.whichfork = XFS_ATTR_FORK; nargs.trans = args->trans; - nargs.oknoent = 1; + nargs.op_flags = XFS_DA_OP_OKNOENT; sfe = &sf->list[0]; for (i = 0; i < sf->hdr.count; i++) { @@ -555,7 +531,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args) out: if(bp) xfs_da_buf_done(bp); - kmem_free(tmpbuffer, size); + kmem_free(tmpbuffer); return(error); } @@ -624,15 +600,8 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) (XFS_ISRESET_CURSOR(cursor) && (dp->i_afp->if_bytes + sf->hdr.count * 16) < context->bufsize)) { for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) { - attrnames_t *namesp; - - if (!xfs_attr_namesp_match_overrides(context->flags, sfe->flags)) { - sfe = XFS_ATTR_SF_NEXTENTRY(sfe); - continue; - } - namesp = xfs_attr_flags_namesp(sfe->flags); error = context->put_listent(context, - namesp, + sfe->flags, (char *)sfe->nameval, (int)sfe->namelen, (int)sfe->valuelen, @@ -676,13 +645,10 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) XFS_ERRLEVEL_LOW, context->dp->i_mount, sfe); xfs_attr_trace_l_c("sf corrupted", context); - kmem_free(sbuf, sbsize); + kmem_free(sbuf); return XFS_ERROR(EFSCORRUPTED); } - if (!xfs_attr_namesp_match_overrides(context->flags, sfe->flags)) { - sfe = XFS_ATTR_SF_NEXTENTRY(sfe); - continue; - } + sbp->entno = i; sbp->hash = xfs_da_hashname((char *)sfe->nameval, sfe->namelen); sbp->name = (char *)sfe->nameval; @@ -717,7 +683,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) } } if (i == nsbuf) { - kmem_free(sbuf, sbsize); + kmem_free(sbuf); xfs_attr_trace_l_c("blk end", context); return(0); } @@ -726,16 +692,12 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) * Loop putting entries into the user buffer. */ for ( ; i < nsbuf; i++, sbp++) { - attrnames_t *namesp; - - namesp = xfs_attr_flags_namesp(sbp->flags); - if (cursor->hashval != sbp->hash) { cursor->hashval = sbp->hash; cursor->offset = 0; } error = context->put_listent(context, - namesp, + sbp->flags, sbp->name, sbp->namelen, sbp->valuelen, @@ -747,7 +709,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context) cursor->offset++; } - kmem_free(sbuf, sbsize); + kmem_free(sbuf); xfs_attr_trace_l_c("sf E-O-F", context); return(0); } @@ -853,7 +815,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff) nargs.total = args->total; nargs.whichfork = XFS_ATTR_FORK; nargs.trans = args->trans; - nargs.oknoent = 1; + nargs.op_flags = XFS_DA_OP_OKNOENT; entry = &leaf->entries[0]; for (i = 0; i < be16_to_cpu(leaf->hdr.count); entry++, i++) { if (entry->flags & XFS_ATTR_INCOMPLETE) @@ -873,7 +835,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff) error = 0; out: - kmem_free(tmpbuffer, XFS_LBSIZE(dp->i_mount)); + kmem_free(tmpbuffer); return(error); } @@ -1155,7 +1117,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex) entry->hashval = cpu_to_be32(args->hashval); entry->flags = tmp ? XFS_ATTR_LOCAL : 0; entry->flags |= XFS_ATTR_NSP_ARGS_TO_ONDISK(args->flags); - if (args->rename) { + if (args->op_flags & XFS_DA_OP_RENAME) { entry->flags |= XFS_ATTR_INCOMPLETE; if ((args->blkno2 == args->blkno) && (args->index2 <= args->index)) { @@ -1271,7 +1233,7 @@ xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp) be16_to_cpu(hdr_s->count), mp); xfs_da_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1); - kmem_free(tmpbuffer, XFS_LBSIZE(mp)); + kmem_free(tmpbuffer); } /* @@ -1921,7 +1883,7 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk, be16_to_cpu(drop_hdr->count), mp); } memcpy((char *)save_leaf, (char *)tmp_leaf, state->blocksize); - kmem_free(tmpbuffer, state->blocksize); + kmem_free(tmpbuffer); } xfs_da_log_buf(state->args->trans, save_blk->bp, 0, @@ -2400,8 +2362,6 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context) */ retval = 0; for ( ; (i < be16_to_cpu(leaf->hdr.count)); entry++, i++) { - attrnames_t *namesp; - if (be32_to_cpu(entry->hashval) != cursor->hashval) { cursor->hashval = be32_to_cpu(entry->hashval); cursor->offset = 0; @@ -2409,17 +2369,13 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context) if (entry->flags & XFS_ATTR_INCOMPLETE) continue; /* skip incomplete entries */ - if (!xfs_attr_namesp_match_overrides(context->flags, entry->flags)) - continue; - - namesp = xfs_attr_flags_namesp(entry->flags); if (entry->flags & XFS_ATTR_LOCAL) { xfs_attr_leaf_name_local_t *name_loc = XFS_ATTR_LEAF_NAME_LOCAL(leaf, i); retval = context->put_listent(context, - namesp, + entry->flags, (char *)name_loc->nameval, (int)name_loc->namelen, be16_to_cpu(name_loc->valuelen), @@ -2446,16 +2402,15 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context) if (retval) return retval; retval = context->put_listent(context, - namesp, + entry->flags, (char *)name_rmt->name, (int)name_rmt->namelen, valuelen, (char*)args.value); - kmem_free(args.value, valuelen); - } - else { + kmem_free(args.value); + } else { retval = context->put_listent(context, - namesp, + entry->flags, (char *)name_rmt->name, (int)name_rmt->namelen, valuelen, @@ -2543,9 +2498,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args) /* * Commit the flag value change and start the next trans in series. */ - error = xfs_attr_rolltrans(&args->trans, args->dp); - - return(error); + return xfs_trans_roll(&args->trans, args->dp); } /* @@ -2592,9 +2545,7 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args) /* * Commit the flag value change and start the next trans in series. */ - error = xfs_attr_rolltrans(&args->trans, args->dp); - - return(error); + return xfs_trans_roll(&args->trans, args->dp); } /* @@ -2710,7 +2661,7 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args) /* * Commit the flag value change and start the next trans in series. */ - error = xfs_attr_rolltrans(&args->trans, args->dp); + error = xfs_trans_roll(&args->trans, args->dp); return(error); } @@ -2768,7 +2719,7 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp) /* * Commit the invalidate and start the next transaction. */ - error = xfs_attr_rolltrans(trans, dp); + error = xfs_trans_roll(trans, dp); return (error); } @@ -2870,7 +2821,8 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp, /* * Atomically commit the whole invalidate stuff. */ - if ((error = xfs_attr_rolltrans(trans, dp))) + error = xfs_trans_roll(trans, dp); + if (error) return (error); } @@ -2954,7 +2906,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp) error = tmp; /* save only the 1st errno */ } - kmem_free((xfs_caddr_t)list, size); + kmem_free((xfs_caddr_t)list); return(error); } @@ -3009,7 +2961,8 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp, /* * Roll to next transaction. */ - if ((error = xfs_attr_rolltrans(trans, dp))) + error = xfs_trans_roll(trans, dp); + if (error) return (error); } @@ -3019,60 +2972,3 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp, return(0); } - - -/* - * Roll from one trans in the sequence of PERMANENT transactions to the next. - */ -int -xfs_attr_rolltrans(xfs_trans_t **transp, xfs_inode_t *dp) -{ - xfs_trans_t *trans; - unsigned int logres, count; - int error; - - /* - * Ensure that the inode is always logged. - */ - trans = *transp; - xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE); - - /* - * Copy the critical parameters from one trans to the next. - */ - logres = trans->t_log_res; - count = trans->t_log_count; - *transp = xfs_trans_dup(trans); - - /* - * Commit the current transaction. - * If this commit failed, then it'd just unlock those items that - * are not marked ihold. That also means that a filesystem shutdown - * is in progress. The caller takes the responsibility to cancel - * the duplicate transaction that gets returned. - */ - if ((error = xfs_trans_commit(trans, 0))) - return (error); - - trans = *transp; - - /* - * Reserve space in the log for th next transaction. - * This also pushes items in the "AIL", the list of logged items, - * out to disk if they are taking up space at the tail of the log - * that we want to use. This requires that either nothing be locked - * across this call, or that anything that is locked be logged in - * the prior and the next transactions. - */ - error = xfs_trans_reserve(trans, 0, logres, 0, - XFS_TRANS_PERM_LOG_RES, count); - /* - * Ensure that the inode is in the new transaction and locked. - */ - if (!error) { - xfs_trans_ijoin(trans, dp, XFS_ILOCK_EXCL); - xfs_trans_ihold(trans, dp); - } - return (error); - -} diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h index 040f732ce1e..83e9af417ca 100644 --- a/fs/xfs/xfs_attr_leaf.h +++ b/fs/xfs/xfs_attr_leaf.h @@ -30,7 +30,7 @@ struct attrlist; struct attrlist_cursor_kern; -struct attrnames; +struct xfs_attr_list_context; struct xfs_dabuf; struct xfs_da_args; struct xfs_da_state; @@ -204,33 +204,6 @@ static inline int xfs_attr_leaf_entsize_local_max(int bsize) return (((bsize) >> 1) + ((bsize) >> 2)); } - -/*======================================================================== - * Structure used to pass context around among the routines. - *========================================================================*/ - - -struct xfs_attr_list_context; - -typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, struct attrnames *, - char *, int, int, char *); - -typedef struct xfs_attr_list_context { - struct xfs_inode *dp; /* inode */ - struct attrlist_cursor_kern *cursor; /* position in list */ - struct attrlist *alist; /* output buffer */ - int seen_enough; /* T/F: seen enough of list? */ - int count; /* num used entries */ - int dupcnt; /* count dup hashvals seen */ - int bufsize; /* total buffer size */ - int firstu; /* first used byte in buffer */ - int flags; /* from VOP call */ - int resynch; /* T/F: resynch with cursor */ - int put_value; /* T/F: need value for listent */ - put_listent_func_t put_listent; /* list output fmt function */ - int index; /* index into output buffer */ -} xfs_attr_list_context_t; - /* * Used to keep a list of "remote value" extents when unlinking an inode. */ @@ -301,6 +274,4 @@ int xfs_attr_leaf_order(struct xfs_dabuf *leaf1_bp, struct xfs_dabuf *leaf2_bp); int xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize, int *local); -int xfs_attr_rolltrans(struct xfs_trans **transp, struct xfs_inode *dp); - #endif /* __XFS_ATTR_LEAF_H__ */ diff --git a/fs/xfs/xfs_attr_sf.h b/fs/xfs/xfs_attr_sf.h index f67f917803b..ea22839caed 100644 --- a/fs/xfs/xfs_attr_sf.h +++ b/fs/xfs/xfs_attr_sf.h @@ -97,13 +97,9 @@ void xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context, void xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context, struct xfs_attr_leafblock *leaf); void xfs_attr_trace_enter(int type, char *where, - __psunsigned_t a2, __psunsigned_t a3, - __psunsigned_t a4, __psunsigned_t a5, - __psunsigned_t a6, __psunsigned_t a7, - __psunsigned_t a8, __psunsigned_t a9, - __psunsigned_t a10, __psunsigned_t a11, - __psunsigned_t a12, __psunsigned_t a13, - __psunsigned_t a14, __psunsigned_t a15); + struct xfs_attr_list_context *context, + __psunsigned_t a13, __psunsigned_t a14, + __psunsigned_t a15); #else #define xfs_attr_trace_l_c(w,c) #define xfs_attr_trace_l_cn(w,c,n) diff --git a/fs/xfs/xfs_bit.c b/fs/xfs/xfs_bit.c index fab0b6d5a41..48228848f5a 100644 --- a/fs/xfs/xfs_bit.c +++ b/fs/xfs/xfs_bit.c @@ -25,109 +25,6 @@ * XFS bit manipulation routines, used in non-realtime code. */ -#ifndef HAVE_ARCH_HIGHBIT -/* - * Index of high bit number in byte, -1 for none set, 0..7 otherwise. - */ -static const char xfs_highbit[256] = { - -1, 0, 1, 1, 2, 2, 2, 2, /* 00 .. 07 */ - 3, 3, 3, 3, 3, 3, 3, 3, /* 08 .. 0f */ - 4, 4, 4, 4, 4, 4, 4, 4, /* 10 .. 17 */ - 4, 4, 4, 4, 4, 4, 4, 4, /* 18 .. 1f */ - 5, 5, 5, 5, 5, 5, 5, 5, /* 20 .. 27 */ - 5, 5, 5, 5, 5, 5, 5, 5, /* 28 .. 2f */ - 5, 5, 5, 5, 5, 5, 5, 5, /* 30 .. 37 */ - 5, 5, 5, 5, 5, 5, 5, 5, /* 38 .. 3f */ - 6, 6, 6, 6, 6, 6, 6, 6, /* 40 .. 47 */ - 6, 6, 6, 6, 6, 6, 6, 6, /* 48 .. 4f */ - 6, 6, 6, 6, 6, 6, 6, 6, /* 50 .. 57 */ - 6, 6, 6, 6, 6, 6, 6, 6, /* 58 .. 5f */ - 6, 6, 6, 6, 6, 6, 6, 6, /* 60 .. 67 */ - 6, 6, 6, 6, 6, 6, 6, 6, /* 68 .. 6f */ - 6, 6, 6, 6, 6, 6, 6, 6, /* 70 .. 77 */ - 6, 6, 6, 6, 6, 6, 6, 6, /* 78 .. 7f */ - 7, 7, 7, 7, 7, 7, 7, 7, /* 80 .. 87 */ - 7, 7, 7, 7, 7, 7, 7, 7, /* 88 .. 8f */ - 7, 7, 7, 7, 7, 7, 7, 7, /* 90 .. 97 */ - 7, 7, 7, 7, 7, 7, 7, 7, /* 98 .. 9f */ - 7, 7, 7, 7, 7, 7, 7, 7, /* a0 .. a7 */ - 7, 7, 7, 7, 7, 7, 7, 7, /* a8 .. af */ - 7, 7, 7, 7, 7, 7, 7, 7, /* b0 .. b7 */ - 7, 7, 7, 7, 7, 7, 7, 7, /* b8 .. bf */ - 7, 7, 7, 7, 7, 7, 7, 7, /* c0 .. c7 */ - 7, 7, 7, 7, 7, 7, 7, 7, /* c8 .. cf */ - 7, 7, 7, 7, 7, 7, 7, 7, /* d0 .. d7 */ - 7, 7, 7, 7, 7, 7, 7, 7, /* d8 .. df */ - 7, 7, 7, 7, 7, 7, 7, 7, /* e0 .. e7 */ - 7, 7, 7, 7, 7, 7, 7, 7, /* e8 .. ef */ - 7, 7, 7, 7, 7, 7, 7, 7, /* f0 .. f7 */ - 7, 7, 7, 7, 7, 7, 7, 7, /* f8 .. ff */ -}; -#endif - -/* - * xfs_highbit32: get high bit set out of 32-bit argument, -1 if none set. - */ -inline int -xfs_highbit32( - __uint32_t v) -{ -#ifdef HAVE_ARCH_HIGHBIT - return highbit32(v); -#else - int i; - - if (v & 0xffff0000) - if (v & 0xff000000) - i = 24; - else - i = 16; - else if (v & 0x0000ffff) - if (v & 0x0000ff00) - i = 8; - else - i = 0; - else - return -1; - return i + xfs_highbit[(v >> i) & 0xff]; -#endif -} - -/* - * xfs_lowbit64: get low bit set out of 64-bit argument, -1 if none set. - */ -int -xfs_lowbit64( - __uint64_t v) -{ - __uint32_t w = (__uint32_t)v; - int n = 0; - - if (w) { /* lower bits */ - n = ffs(w); - } else { /* upper bits */ - w = (__uint32_t)(v >> 32); - if (w && (n = ffs(w))) - n += 32; - } - return n - 1; -} - -/* - * xfs_highbit64: get high bit set out of 64-bit argument, -1 if none set. - */ -int -xfs_highbit64( - __uint64_t v) -{ - __uint32_t h = (__uint32_t)(v >> 32); - - if (h) - return xfs_highbit32(h) + 32; - return xfs_highbit32((__uint32_t)v); -} - - /* * Return whether bitmap is empty. * Size is number of words in the bitmap, which is padded to word boundary diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/xfs_bit.h index 082641a9782..8e0e463dae2 100644 --- a/fs/xfs/xfs_bit.h +++ b/fs/xfs/xfs_bit.h @@ -47,13 +47,39 @@ static inline __uint64_t xfs_mask64lo(int n) } /* Get high bit set out of 32-bit argument, -1 if none set */ -extern int xfs_highbit32(__uint32_t v); +static inline int xfs_highbit32(__uint32_t v) +{ + return fls(v) - 1; +} + +/* Get high bit set out of 64-bit argument, -1 if none set */ +static inline int xfs_highbit64(__uint64_t v) +{ + return fls64(v) - 1; +} + +/* Get low bit set out of 32-bit argument, -1 if none set */ +static inline int xfs_lowbit32(__uint32_t v) +{ + unsigned long t = v; + return (v) ? find_first_bit(&t, 32) : -1; +} /* Get low bit set out of 64-bit argument, -1 if none set */ -extern int xfs_lowbit64(__uint64_t v); +static inline int xfs_lowbit64(__uint64_t v) +{ + __uint32_t w = (__uint32_t)v; + int n = 0; -/* Get high bit set out of 64-bit argument, -1 if none set */ -extern int xfs_highbit64(__uint64_t); + if (w) { /* lower bits */ + n = ffs(w); + } else { /* upper bits */ + w = (__uint32_t)(v >> 32); + if (w && (n = ffs(w))) + n += 32; + } + return n - 1; +} /* Return whether bitmap is empty (1 == empty) */ extern int xfs_bitmap_empty(uint *map, uint size); diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 53c259f5a5a..a1aab9275d5 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -384,14 +384,14 @@ xfs_bmap_count_tree( int levelin, int *count); -STATIC int +STATIC void xfs_bmap_count_leaves( xfs_ifork_t *ifp, xfs_extnum_t idx, int numrecs, int *count); -STATIC int +STATIC void xfs_bmap_disk_count_leaves( xfs_extnum_t idx, xfs_bmbt_block_t *block, @@ -428,7 +428,8 @@ xfs_bmap_add_attrfork_btree( cur->bc_private.b.firstblock = *firstblock; if ((error = xfs_bmbt_lookup_ge(cur, 0, 0, 0, &stat))) goto error0; - ASSERT(stat == 1); /* must be at least one entry */ + /* must be at least one entry */ + XFS_WANT_CORRUPTED_GOTO(stat == 1, error0); if ((error = xfs_bmbt_newroot(cur, flags, &stat))) goto error0; if (stat == 0) { @@ -816,13 +817,13 @@ xfs_bmap_add_extent_delay_real( RIGHT.br_startblock, RIGHT.br_blockcount, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_delete(cur, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_decrement(cur, 0, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, LEFT.br_startblock, LEFT.br_blockcount + @@ -860,7 +861,7 @@ xfs_bmap_add_extent_delay_real( LEFT.br_startblock, LEFT.br_blockcount, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, LEFT.br_startblock, LEFT.br_blockcount + @@ -895,7 +896,7 @@ xfs_bmap_add_extent_delay_real( RIGHT.br_startblock, RIGHT.br_blockcount, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_update(cur, PREV.br_startoff, new->br_startblock, PREV.br_blockcount + @@ -928,11 +929,11 @@ xfs_bmap_add_extent_delay_real( new->br_startblock, new->br_blockcount, &i))) goto done; - ASSERT(i == 0); + XFS_WANT_CORRUPTED_GOTO(i == 0, done); cur->bc_rec.b.br_state = XFS_EXT_NORM; if ((error = xfs_bmbt_insert(cur, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); } *dnew = 0; /* DELTA: The in-core extent described by new changed type. */ @@ -963,7 +964,7 @@ xfs_bmap_add_extent_delay_real( LEFT.br_startblock, LEFT.br_blockcount, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, LEFT.br_startblock, LEFT.br_blockcount + @@ -1004,11 +1005,11 @@ xfs_bmap_add_extent_delay_real( new->br_startblock, new->br_blockcount, &i))) goto done; - ASSERT(i == 0); + XFS_WANT_CORRUPTED_GOTO(i == 0, done); cur->bc_rec.b.br_state = XFS_EXT_NORM; if ((error = xfs_bmbt_insert(cur, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); } if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && ip->i_d.di_nextents > ip->i_df.if_ext_max) { @@ -1054,7 +1055,7 @@ xfs_bmap_add_extent_delay_real( RIGHT.br_startblock, RIGHT.br_blockcount, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_update(cur, new->br_startoff, new->br_startblock, new->br_blockcount + @@ -1094,11 +1095,11 @@ xfs_bmap_add_extent_delay_real( new->br_startblock, new->br_blockcount, &i))) goto done; - ASSERT(i == 0); + XFS_WANT_CORRUPTED_GOTO(i == 0, done); cur->bc_rec.b.br_state = XFS_EXT_NORM; if ((error = xfs_bmbt_insert(cur, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); } if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && ip->i_d.di_nextents > ip->i_df.if_ext_max) { @@ -1149,11 +1150,11 @@ xfs_bmap_add_extent_delay_real( new->br_startblock, new->br_blockcount, &i))) goto done; - ASSERT(i == 0); + XFS_WANT_CORRUPTED_GOTO(i == 0, done); cur->bc_rec.b.br_state = XFS_EXT_NORM; if ((error = xfs_bmbt_insert(cur, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); } if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS && ip->i_d.di_nextents > ip->i_df.if_ext_max) { @@ -1377,19 +1378,19 @@ xfs_bmap_add_extent_unwritten_real( RIGHT.br_startblock, RIGHT.br_blockcount, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_delete(cur, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_decrement(cur, 0, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_delete(cur, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_decrement(cur, 0, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, LEFT.br_startblock, LEFT.br_blockcount + PREV.br_blockcount + @@ -1426,13 +1427,13 @@ xfs_bmap_add_extent_unwritten_real( PREV.br_startblock, PREV.br_blockcount, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_delete(cur, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_decrement(cur, 0, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_update(cur, LEFT.br_startoff, LEFT.br_startblock, LEFT.br_blockcount + PREV.br_blockcount, @@ -1469,13 +1470,13 @@ xfs_bmap_add_extent_unwritten_real( RIGHT.br_startblock, RIGHT.br_blockcount, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_delete(cur, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_decrement(cur, 0, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_update(cur, new->br_startoff, new->br_startblock, new->br_blockcount + RIGHT.br_blockcount, @@ -1508,7 +1509,7 @@ xfs_bmap_add_extent_unwritten_real( new->br_startblock, new->br_blockcount, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_update(cur, new->br_startoff, new->br_startblock, new->br_blockcount, newext))) @@ -1549,7 +1550,7 @@ xfs_bmap_add_extent_unwritten_real( PREV.br_startblock, PREV.br_blockcount, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_update(cur, PREV.br_startoff + new->br_blockcount, PREV.br_startblock + new->br_blockcount, @@ -1596,7 +1597,7 @@ xfs_bmap_add_extent_unwritten_real( PREV.br_startblock, PREV.br_blockcount, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_update(cur, PREV.br_startoff + new->br_blockcount, PREV.br_startblock + new->br_blockcount, @@ -1606,7 +1607,7 @@ xfs_bmap_add_extent_unwritten_real( cur->bc_rec.b = *new; if ((error = xfs_bmbt_insert(cur, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); } /* DELTA: One in-core extent is split in two. */ temp = PREV.br_startoff; @@ -1640,7 +1641,7 @@ xfs_bmap_add_extent_unwritten_real( PREV.br_startblock, PREV.br_blockcount, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_update(cur, PREV.br_startoff, PREV.br_startblock, PREV.br_blockcount - new->br_blockcount, @@ -1682,7 +1683,7 @@ xfs_bmap_add_extent_unwritten_real( PREV.br_startblock, PREV.br_blockcount, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_update(cur, PREV.br_startoff, PREV.br_startblock, PREV.br_blockcount - new->br_blockcount, @@ -1692,11 +1693,11 @@ xfs_bmap_add_extent_unwritten_real( new->br_startblock, new->br_blockcount, &i))) goto done; - ASSERT(i == 0); + XFS_WANT_CORRUPTED_GOTO(i == 0, done); cur->bc_rec.b.br_state = XFS_EXT_NORM; if ((error = xfs_bmbt_insert(cur, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); } /* DELTA: One in-core extent is split in two. */ temp = PREV.br_startoff; @@ -1732,27 +1733,34 @@ xfs_bmap_add_extent_unwritten_real( PREV.br_startblock, PREV.br_blockcount, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); /* new right extent - oldext */ if ((error = xfs_bmbt_update(cur, r[1].br_startoff, r[1].br_startblock, r[1].br_blockcount, r[1].br_state))) goto done; /* new left extent - oldext */ - PREV.br_blockcount = - new->br_startoff - PREV.br_startoff; cur->bc_rec.b = PREV; + cur->bc_rec.b.br_blockcount = + new->br_startoff - PREV.br_startoff; if ((error = xfs_bmbt_insert(cur, &i))) goto done; - ASSERT(i == 1); - if ((error = xfs_bmbt_increment(cur, 0, &i))) + XFS_WANT_CORRUPTED_GOTO(i == 1, done); + /* + * Reset the cursor to the position of the new extent + * we are about to insert as we can't trust it after + * the previous insert. + */ + if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff, + new->br_startblock, new->br_blockcount, + &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 0, done); /* new middle extent - newext */ - cur->bc_rec.b = *new; + cur->bc_rec.b.br_state = new->br_state; if ((error = xfs_bmbt_insert(cur, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); } /* DELTA: One in-core extent is split in three. */ temp = PREV.br_startoff; @@ -2097,13 +2105,13 @@ xfs_bmap_add_extent_hole_real( right.br_startblock, right.br_blockcount, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_delete(cur, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_decrement(cur, 0, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_update(cur, left.br_startoff, left.br_startblock, left.br_blockcount + @@ -2139,7 +2147,7 @@ xfs_bmap_add_extent_hole_real( left.br_startblock, left.br_blockcount, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_update(cur, left.br_startoff, left.br_startblock, left.br_blockcount + @@ -2174,7 +2182,7 @@ xfs_bmap_add_extent_hole_real( right.br_startblock, right.br_blockcount, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); if ((error = xfs_bmbt_update(cur, new->br_startoff, new->br_startblock, new->br_blockcount + @@ -2208,11 +2216,11 @@ xfs_bmap_add_extent_hole_real( new->br_startblock, new->br_blockcount, &i))) goto done; - ASSERT(i == 0); + XFS_WANT_CORRUPTED_GOTO(i == 0, done); cur->bc_rec.b.br_state = new->br_state; if ((error = xfs_bmbt_insert(cur, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); } /* DELTA: A new extent was added in a hole. */ temp = new->br_startoff; @@ -3131,7 +3139,7 @@ xfs_bmap_del_extent( got.br_startblock, got.br_blockcount, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); } da_old = da_new = 0; } else { @@ -3164,7 +3172,7 @@ xfs_bmap_del_extent( } if ((error = xfs_bmbt_delete(cur, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); break; case 2: @@ -3268,7 +3276,7 @@ xfs_bmap_del_extent( got.br_startblock, temp, &i))) goto done; - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); /* * Update the btree record back * to the original value. @@ -3289,7 +3297,7 @@ xfs_bmap_del_extent( error = XFS_ERROR(ENOSPC); goto done; } - ASSERT(i == 1); + XFS_WANT_CORRUPTED_GOTO(i == 1, done); } else flags |= XFS_ILOG_FEXT(whichfork); XFS_IFORK_NEXT_SET(ip, whichfork, @@ -3992,7 +4000,7 @@ xfs_bmap_add_attrfork( ip->i_d.di_aformat = XFS_DINODE_FMT_EXTENTS; } ASSERT(ip->i_d.di_anextents == 0); - VN_HOLD(XFS_ITOV(ip)); + IHOLD(ip); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); switch (ip->i_d.di_format) { @@ -5970,7 +5978,7 @@ unlock_and_return: xfs_iunlock_map_shared(ip, lock); xfs_iunlock(ip, XFS_IOLOCK_SHARED); - kmem_free(map, subnex * sizeof(*map)); + kmem_free(map); return error; } @@ -6088,7 +6096,7 @@ xfs_bmap_get_bp( tp = cur->bc_tp; licp = &tp->t_items; while (!bp && licp != NULL) { - if (XFS_LIC_ARE_ALL_FREE(licp)) { + if (xfs_lic_are_all_free(licp)) { licp = licp->lic_next; continue; } @@ -6098,11 +6106,11 @@ xfs_bmap_get_bp( xfs_buf_log_item_t *bip; xfs_buf_t *lbp; - if (XFS_LIC_ISFREE(licp, i)) { + if (xfs_lic_isfree(licp, i)) { continue; } - lidp = XFS_LIC_SLOT(licp, i); + lidp = xfs_lic_slot(licp, i); lip = lidp->lid_item; if (lip->li_type != XFS_LI_BUF) continue; @@ -6359,13 +6367,9 @@ xfs_bmap_count_blocks( mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) { - if (unlikely(xfs_bmap_count_leaves(ifp, 0, + xfs_bmap_count_leaves(ifp, 0, ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t), - count) < 0)) { - XFS_ERROR_REPORT("xfs_bmap_count_blocks(1)", - XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); - } + count); return 0; } @@ -6446,13 +6450,7 @@ xfs_bmap_count_tree( for (;;) { nextbno = be64_to_cpu(block->bb_rightsib); numrecs = be16_to_cpu(block->bb_numrecs); - if (unlikely(xfs_bmap_disk_count_leaves(0, - block, numrecs, count) < 0)) { - xfs_trans_brelse(tp, bp); - XFS_ERROR_REPORT("xfs_bmap_count_tree(2)", - XFS_ERRLEVEL_LOW, mp); - return XFS_ERROR(EFSCORRUPTED); - } + xfs_bmap_disk_count_leaves(0, block, numrecs, count); xfs_trans_brelse(tp, bp); if (nextbno == NULLFSBLOCK) break; @@ -6470,7 +6468,7 @@ xfs_bmap_count_tree( /* * Count leaf blocks given a range of extent records. */ -STATIC int +STATIC void xfs_bmap_count_leaves( xfs_ifork_t *ifp, xfs_extnum_t idx, @@ -6483,14 +6481,13 @@ xfs_bmap_count_leaves( xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, idx + b); *count += xfs_bmbt_get_blockcount(frp); } - return 0; } /* * Count leaf blocks given a range of extent records originally * in btree format. */ -STATIC int +STATIC void xfs_bmap_disk_count_leaves( xfs_extnum_t idx, xfs_bmbt_block_t *block, @@ -6504,5 +6501,4 @@ xfs_bmap_disk_count_leaves( frp = XFS_BTREE_REC_ADDR(xfs_bmbt, block, idx + b); *count += xfs_bmbt_disk_get_blockcount(frp); } - return 0; } diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 6ff70cda451..9f3e3a836d1 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h @@ -54,12 +54,23 @@ typedef struct xfs_bmap_free_item /* * Header for free extent list. + * + * xbf_low is used by the allocator to activate the lowspace algorithm - + * when free space is running low the extent allocator may choose to + * allocate an extent from an AG without leaving sufficient space for + * a btree split when inserting the new extent. In this case the allocator + * will enable the lowspace algorithm which is supposed to allow further + * allocations (such as btree splits and newroots) to allocate from + * sequential AGs. In order to avoid locking AGs out of order the lowspace + * algorithm will start searching for free space from AG 0. If the correct + * transaction reservations have been made then this algorithm will eventually + * find all the space it needs. */ typedef struct xfs_bmap_free { xfs_bmap_free_item_t *xbf_first; /* list of to-be-free extents */ int xbf_count; /* count of items on list */ - int xbf_low; /* kludge: alloc in low mode */ + int xbf_low; /* alloc in low mode */ } xfs_bmap_free_t; #define XFS_BMAP_MAX_NMAP 4 diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 4f0e849d973..23efad29a5c 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -1493,12 +1493,27 @@ xfs_bmbt_split( left = XFS_BUF_TO_BMBT_BLOCK(lbp); args.fsbno = cur->bc_private.b.firstblock; args.firstblock = args.fsbno; + args.minleft = 0; if (args.fsbno == NULLFSBLOCK) { args.fsbno = lbno; args.type = XFS_ALLOCTYPE_START_BNO; - } else + /* + * Make sure there is sufficient room left in the AG to + * complete a full tree split for an extent insert. If + * we are converting the middle part of an extent then + * we may need space for two tree splits. + * + * We are relying on the caller to make the correct block + * reservation for this operation to succeed. If the + * reservation amount is insufficient then we may fail a + * block allocation here and corrupt the filesystem. + */ + args.minleft = xfs_trans_get_block_res(args.tp); + } else if (cur->bc_private.b.flist->xbf_low) + args.type = XFS_ALLOCTYPE_START_BNO; + else args.type = XFS_ALLOCTYPE_NEAR_BNO; - args.mod = args.minleft = args.alignment = args.total = args.isfl = + args.mod = args.alignment = args.total = args.isfl = args.userdata = args.minalignslop = 0; args.minlen = args.maxlen = args.prod = 1; args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL; @@ -1510,6 +1525,21 @@ xfs_bmbt_split( XFS_BMBT_TRACE_CURSOR(cur, ERROR); return error; } + if (args.fsbno == NULLFSBLOCK && args.minleft) { + /* + * Could not find an AG with enough free space to satisfy + * a full btree split. Try again without minleft and if + * successful activate the lowspace algorithm. + */ + args.fsbno = 0; + args.type = XFS_ALLOCTYPE_FIRST_AG; + args.minleft = 0; + if ((error = xfs_alloc_vextent(&args))) { + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; + } + cur->bc_private.b.flist->xbf_low = 1; + } if (args.fsbno == NULLFSBLOCK) { XFS_BMBT_TRACE_CURSOR(cur, EXIT); *stat = 0; @@ -2029,22 +2059,8 @@ xfs_bmbt_increment( * Insert the current record at the point referenced by cur. * * A multi-level split of the tree on insert will invalidate the original - * cursor. It appears, however, that some callers assume that the cursor is - * always valid. Hence if we do a multi-level split we need to revalidate the - * cursor. - * - * When a split occurs, we will see a new cursor returned. Use that as a - * trigger to determine if we need to revalidate the original cursor. If we get - * a split, then use the original irec to lookup up the path of the record we - * just inserted. - * - * Note that the fact that the btree root is in the inode means that we can - * have the level of the tree change without a "split" occurring at the root - * level. What happens is that the root is migrated to an allocated block and - * the inode root is pointed to it. This means a single split can change the - * level of the tree (level 2 -> level 3) and invalidate the old cursor. Hence - * the level change should be accounted as a split so as to correctly trigger a - * revalidation of the old cursor. + * cursor. All callers of this function should assume that the cursor is + * no longer valid and revalidate it. */ int /* error */ xfs_bmbt_insert( @@ -2057,14 +2073,11 @@ xfs_bmbt_insert( xfs_fsblock_t nbno; xfs_btree_cur_t *ncur; xfs_bmbt_rec_t nrec; - xfs_bmbt_irec_t oirec; /* original irec */ xfs_btree_cur_t *pcur; - int splits = 0; XFS_BMBT_TRACE_CURSOR(cur, ENTRY); level = 0; nbno = NULLFSBLOCK; - oirec = cur->bc_rec.b; xfs_bmbt_disk_set_all(&nrec, &cur->bc_rec.b); ncur = NULL; pcur = cur; @@ -2073,13 +2086,11 @@ xfs_bmbt_insert( &i))) { if (pcur != cur) xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR); - goto error0; + XFS_BMBT_TRACE_CURSOR(cur, ERROR); + return error; } XFS_WANT_CORRUPTED_GOTO(i == 1, error0); if (pcur != cur && (ncur || nbno == NULLFSBLOCK)) { - /* allocating a new root is effectively a split */ - if (cur->bc_nlevels != pcur->bc_nlevels) - splits++; cur->bc_nlevels = pcur->bc_nlevels; cur->bc_private.b.allocated += pcur->bc_private.b.allocated; @@ -2093,21 +2104,10 @@ xfs_bmbt_insert( xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR); } if (ncur) { - splits++; pcur = ncur; ncur = NULL; } } while (nbno != NULLFSBLOCK); - - if (splits > 1) { - /* revalidate the old cursor as we had a multi-level split */ - error = xfs_bmbt_lookup_eq(cur, oirec.br_startoff, - oirec.br_startblock, oirec.br_blockcount, &i); - if (error) - goto error0; - ASSERT(i == 1); - } - XFS_BMBT_TRACE_CURSOR(cur, EXIT); *stat = i; return 0; @@ -2254,7 +2254,9 @@ xfs_bmbt_newroot( #endif args.fsbno = be64_to_cpu(*pp); args.type = XFS_ALLOCTYPE_START_BNO; - } else + } else if (cur->bc_private.b.flist->xbf_low) + args.type = XFS_ALLOCTYPE_START_BNO; + else args.type = XFS_ALLOCTYPE_NEAR_BNO; if ((error = xfs_alloc_vextent(&args))) { XFS_BMBT_TRACE_CURSOR(cur, ERROR); diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index aeb87ca69fc..cc593a84c34 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -46,38 +46,11 @@ kmem_zone_t *xfs_btree_cur_zone; /* * Btree magic numbers. */ -const __uint32_t xfs_magics[XFS_BTNUM_MAX] = -{ +const __uint32_t xfs_magics[XFS_BTNUM_MAX] = { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC }; /* - * Prototypes for internal routines. - */ - -/* - * Checking routine: return maxrecs for the block. - */ -STATIC int /* number of records fitting in block */ -xfs_btree_maxrecs( - xfs_btree_cur_t *cur, /* btree cursor */ - xfs_btree_block_t *block);/* generic btree block pointer */ - -/* - * Internal routines. - */ - -/* - * Retrieve the block pointer from the cursor at the given level. - * This may be a bmap btree root or from a buffer. - */ -STATIC xfs_btree_block_t * /* generic btree block pointer */ -xfs_btree_get_block( - xfs_btree_cur_t *cur, /* btree cursor */ - int level, /* level in btree */ - struct xfs_buf **bpp); /* buffer containing the block */ - -/* * Checking routine: return maxrecs for the block. */ STATIC int /* number of records fitting in block */ @@ -457,35 +430,6 @@ xfs_btree_dup_cursor( } /* - * Change the cursor to point to the first record at the given level. - * Other levels are unaffected. - */ -int /* success=1, failure=0 */ -xfs_btree_firstrec( - xfs_btree_cur_t *cur, /* btree cursor */ - int level) /* level to change */ -{ - xfs_btree_block_t *block; /* generic btree block pointer */ - xfs_buf_t *bp; /* buffer containing block */ - - /* - * Get the block pointer for this level. - */ - block = xfs_btree_get_block(cur, level, &bp); - xfs_btree_check_block(cur, block, level, bp); - /* - * It's empty, there is no such record. - */ - if (!block->bb_h.bb_numrecs) - return 0; - /* - * Set the ptr value to 1, that's the first record/key. - */ - cur->bc_ptrs[level] = 1; - return 1; -} - -/* * Retrieve the block pointer from the cursor at the given level. * This may be a bmap btree root or from a buffer. */ @@ -626,6 +570,13 @@ xfs_btree_init_cursor( cur->bc_private.a.agbp = agbp; cur->bc_private.a.agno = agno; break; + case XFS_BTNUM_INO: + /* + * Inode allocation btree fields. + */ + cur->bc_private.a.agbp = agbp; + cur->bc_private.a.agno = agno; + break; case XFS_BTNUM_BMAP: /* * Bmap btree fields. @@ -638,13 +589,6 @@ xfs_btree_init_cursor( cur->bc_private.b.flags = 0; cur->bc_private.b.whichfork = whichfork; break; - case XFS_BTNUM_INO: - /* - * Inode allocation btree fields. - */ - cur->bc_private.i.agbp = agbp; - cur->bc_private.i.agno = agno; - break; default: ASSERT(0); } @@ -671,6 +615,35 @@ xfs_btree_islastblock( } /* + * Change the cursor to point to the first record at the given level. + * Other levels are unaffected. + */ +int /* success=1, failure=0 */ +xfs_btree_firstrec( + xfs_btree_cur_t *cur, /* btree cursor */ + int level) /* level to change */ +{ + xfs_btree_block_t *block; /* generic btree block pointer */ + xfs_buf_t *bp; /* buffer containing block */ + + /* + * Get the block pointer for this level. + */ + block = xfs_btree_get_block(cur, level, &bp); + xfs_btree_check_block(cur, block, level, bp); + /* + * It's empty, there is no such record. + */ + if (!block->bb_h.bb_numrecs) + return 0; + /* + * Set the ptr value to 1, that's the first record/key. + */ + cur->bc_ptrs[level] = 1; + return 1; +} + +/* * Change the cursor to point to the last record in the current block * at the given level. Other levels are unaffected. */ @@ -890,12 +863,12 @@ xfs_btree_readahead_core( case XFS_BTNUM_INO: i = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]); if ((lr & XFS_BTCUR_LEFTRA) && be32_to_cpu(i->bb_leftsib) != NULLAGBLOCK) { - xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.i.agno, + xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, be32_to_cpu(i->bb_leftsib), 1); rval++; } if ((lr & XFS_BTCUR_RIGHTRA) && be32_to_cpu(i->bb_rightsib) != NULLAGBLOCK) { - xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.i.agno, + xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, be32_to_cpu(i->bb_rightsib), 1); rval++; } diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 7440b78f9ce..1f528a2a375 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -158,8 +158,8 @@ typedef struct xfs_btree_cur __uint8_t bc_blocklog; /* log2(blocksize) of btree blocks */ xfs_btnum_t bc_btnum; /* identifies which btree type */ union { - struct { /* needed for BNO, CNT */ - struct xfs_buf *agbp; /* agf buffer pointer */ + struct { /* needed for BNO, CNT, INO */ + struct xfs_buf *agbp; /* agf/agi buffer pointer */ xfs_agnumber_t agno; /* ag number */ } a; struct { /* needed for BMAP */ @@ -172,10 +172,6 @@ typedef struct xfs_btree_cur char flags; /* flags */ #define XFS_BTCUR_BPRV_WASDEL 1 /* was delayed */ } b; - struct { /* needed for INO */ - struct xfs_buf *agbp; /* agi buffer pointer */ - xfs_agnumber_t agno; /* ag number */ - } i; } bc_private; /* per-btree type data */ } xfs_btree_cur_t; diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 53a71c62025..608c30c3f76 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -737,7 +737,7 @@ xfs_buf_item_init( bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp)); bip->bli_format.blf_map_size = map_size; #ifdef XFS_BLI_TRACE - bip->bli_trace = ktrace_alloc(XFS_BLI_TRACE_SIZE, KM_SLEEP); + bip->bli_trace = ktrace_alloc(XFS_BLI_TRACE_SIZE, KM_NOFS); #endif #ifdef XFS_TRANS_DEBUG @@ -889,9 +889,9 @@ xfs_buf_item_relse( } #ifdef XFS_TRANS_DEBUG - kmem_free(bip->bli_orig, XFS_BUF_COUNT(bp)); + kmem_free(bip->bli_orig); bip->bli_orig = NULL; - kmem_free(bip->bli_logged, XFS_BUF_COUNT(bp) / NBBY); + kmem_free(bip->bli_logged); bip->bli_logged = NULL; #endif /* XFS_TRANS_DEBUG */ @@ -1056,7 +1056,7 @@ xfs_buf_iodone_callbacks( anyway. */ XFS_BUF_SET_BRELSE_FUNC(bp,xfs_buf_error_relse); XFS_BUF_DONE(bp); - XFS_BUF_V_IODONESEMA(bp); + XFS_BUF_FINISH_IOWAIT(bp); } return; } @@ -1138,9 +1138,9 @@ xfs_buf_iodone( xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip); #ifdef XFS_TRANS_DEBUG - kmem_free(bip->bli_orig, XFS_BUF_COUNT(bp)); + kmem_free(bip->bli_orig); bip->bli_orig = NULL; - kmem_free(bip->bli_logged, XFS_BUF_COUNT(bp) / NBBY); + kmem_free(bip->bli_logged); bip->bli_logged = NULL; #endif /* XFS_TRANS_DEBUG */ diff --git a/fs/xfs/xfs_clnt.h b/fs/xfs/xfs_clnt.h index d5d1e60ee22..d2ce5dd70d8 100644 --- a/fs/xfs/xfs_clnt.h +++ b/fs/xfs/xfs_clnt.h @@ -78,6 +78,7 @@ struct xfs_mount_args { #define XFSMNT_IOSIZE 0x00002000 /* optimize for I/O size */ #define XFSMNT_OSYNCISOSYNC 0x00004000 /* o_sync is REALLY o_sync */ /* (osyncisdsync is default) */ +#define XFSMNT_NOATTR2 0x00008000 /* turn off ATTR2 EA format */ #define XFSMNT_32BITINODES 0x00200000 /* restrict inodes to 32 * bits of address space */ #define XFSMNT_GQUOTA 0x00400000 /* group quota accounting */ diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 021a8f7e563..9e561a9cefc 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -1431,7 +1431,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path, } if (level < 0) { *result = XFS_ERROR(ENOENT); /* we're out of our tree */ - ASSERT(args->oknoent); + ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); return(0); } @@ -1530,6 +1530,28 @@ xfs_da_hashname(const uchar_t *name, int namelen) } } +enum xfs_dacmp +xfs_da_compname( + struct xfs_da_args *args, + const char *name, + int len) +{ + return (args->namelen == len && memcmp(args->name, name, len) == 0) ? + XFS_CMP_EXACT : XFS_CMP_DIFFERENT; +} + +static xfs_dahash_t +xfs_default_hashname( + struct xfs_name *name) +{ + return xfs_da_hashname(name->name, name->len); +} + +const struct xfs_nameops xfs_default_nameops = { + .hashname = xfs_default_hashname, + .compname = xfs_da_compname +}; + /* * Add a block to the btree ahead of the file. * Return the new block number to the caller. @@ -1598,7 +1620,7 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno) args->firstblock, args->total, &mapp[mapi], &nmap, args->flist, NULL))) { - kmem_free(mapp, sizeof(*mapp) * count); + kmem_free(mapp); return error; } if (nmap < 1) @@ -1620,11 +1642,11 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno) mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount != bno + count) { if (mapp != &map) - kmem_free(mapp, sizeof(*mapp) * count); + kmem_free(mapp); return XFS_ERROR(ENOSPC); } if (mapp != &map) - kmem_free(mapp, sizeof(*mapp) * count); + kmem_free(mapp); *new_blkno = (xfs_dablk_t)bno; return 0; } @@ -2090,10 +2112,10 @@ xfs_da_do_buf( } } if (bplist) { - kmem_free(bplist, sizeof(*bplist) * nmap); + kmem_free(bplist); } if (mapp != &map) { - kmem_free(mapp, sizeof(*mapp) * nfsb); + kmem_free(mapp); } if (bpp) *bpp = rbp; @@ -2102,11 +2124,11 @@ exit1: if (bplist) { for (i = 0; i < nbplist; i++) xfs_trans_brelse(trans, bplist[i]); - kmem_free(bplist, sizeof(*bplist) * nmap); + kmem_free(bplist); } exit0: if (mapp != &map) - kmem_free(mapp, sizeof(*mapp) * nfsb); + kmem_free(mapp); if (bpp) *bpp = NULL; return error; @@ -2218,7 +2240,7 @@ xfs_da_state_free(xfs_da_state_t *state) #ifdef XFS_DABUF_DEBUG xfs_dabuf_t *xfs_dabuf_global_list; -spinlock_t xfs_dabuf_global_lock; +static DEFINE_SPINLOCK(xfs_dabuf_global_lock); #endif /* @@ -2315,7 +2337,7 @@ xfs_da_buf_done(xfs_dabuf_t *dabuf) if (dabuf->dirty) xfs_da_buf_clean(dabuf); if (dabuf->nbuf > 1) - kmem_free(dabuf->data, BBTOB(dabuf->bbcount)); + kmem_free(dabuf->data); #ifdef XFS_DABUF_DEBUG { spin_lock(&xfs_dabuf_global_lock); @@ -2332,7 +2354,7 @@ xfs_da_buf_done(xfs_dabuf_t *dabuf) if (dabuf->nbuf == 1) kmem_zone_free(xfs_dabuf_zone, dabuf); else - kmem_free(dabuf, XFS_DA_BUF_SIZE(dabuf->nbuf)); + kmem_free(dabuf); } /* @@ -2403,7 +2425,7 @@ xfs_da_brelse(xfs_trans_t *tp, xfs_dabuf_t *dabuf) for (i = 0; i < nbuf; i++) xfs_trans_brelse(tp, bplist[i]); if (bplist != &bp) - kmem_free(bplist, nbuf * sizeof(*bplist)); + kmem_free(bplist); } /* @@ -2429,7 +2451,7 @@ xfs_da_binval(xfs_trans_t *tp, xfs_dabuf_t *dabuf) for (i = 0; i < nbuf; i++) xfs_trans_binval(tp, bplist[i]); if (bplist != &bp) - kmem_free(bplist, nbuf * sizeof(*bplist)); + kmem_free(bplist); } /* diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h index 7facf86f74f..8be0b00ede9 100644 --- a/fs/xfs/xfs_da_btree.h +++ b/fs/xfs/xfs_da_btree.h @@ -99,6 +99,15 @@ typedef struct xfs_da_node_entry xfs_da_node_entry_t; *========================================================================*/ /* + * Search comparison results + */ +enum xfs_dacmp { + XFS_CMP_DIFFERENT, /* names are completely different */ + XFS_CMP_EXACT, /* names are exactly the same */ + XFS_CMP_CASE /* names are same but differ in case */ +}; + +/* * Structure to ease passing around component names. */ typedef struct xfs_da_args { @@ -123,13 +132,20 @@ typedef struct xfs_da_args { int index2; /* index of 2nd attr in blk */ xfs_dablk_t rmtblkno2; /* remote attr value starting blkno */ int rmtblkcnt2; /* remote attr value block count */ - unsigned char justcheck; /* T/F: check for ok with no space */ - unsigned char rename; /* T/F: this is an atomic rename op */ - unsigned char addname; /* T/F: this is an add operation */ - unsigned char oknoent; /* T/F: ok to return ENOENT, else die */ + int op_flags; /* operation flags */ + enum xfs_dacmp cmpresult; /* name compare result for lookups */ } xfs_da_args_t; /* + * Operation flags: + */ +#define XFS_DA_OP_JUSTCHECK 0x0001 /* check for ok with no space */ +#define XFS_DA_OP_RENAME 0x0002 /* this is an atomic rename op */ +#define XFS_DA_OP_ADDNAME 0x0004 /* this is an add operation */ +#define XFS_DA_OP_OKNOENT 0x0008 /* lookup/add op, ENOENT ok, else die */ +#define XFS_DA_OP_CILOOKUP 0x0010 /* lookup to return CI name if found */ + +/* * Structure to describe buffer(s) for a block. * This is needed in the directory version 2 format case, when * multiple non-contiguous fsblocks might be needed to cover one @@ -201,6 +217,14 @@ typedef struct xfs_da_state { (uint)(XFS_DA_LOGOFF(BASE, ADDR)), \ (uint)(XFS_DA_LOGOFF(BASE, ADDR)+(SIZE)-1) +/* + * Name ops for directory and/or attr name operations + */ +struct xfs_nameops { + xfs_dahash_t (*hashname)(struct xfs_name *); + enum xfs_dacmp (*compname)(struct xfs_da_args *, const char *, int); +}; + #ifdef __KERNEL__ /*======================================================================== @@ -249,6 +273,10 @@ int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno, xfs_dabuf_t *dead_buf); uint xfs_da_hashname(const uchar_t *name_string, int name_length); +enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args, + const char *name, int len); + + xfs_da_state_t *xfs_da_state_alloc(void); void xfs_da_state_free(xfs_da_state_t *state); diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index 5f3647cb988..760f4c5b516 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -116,7 +116,7 @@ xfs_swapext( out_put_file: fput(file); out_free_sxp: - kmem_free(sxp, sizeof(xfs_swapext_t)); + kmem_free(sxp); out: return error; } @@ -128,10 +128,8 @@ xfs_swap_extents( xfs_swapext_t *sxp) { xfs_mount_t *mp; - xfs_inode_t *ips[2]; xfs_trans_t *tp; xfs_bstat_t *sbp = &sxp->sx_stat; - bhv_vnode_t *vp, *tvp; xfs_ifork_t *tempifp, *ifp, *tifp; int ilf_fields, tilf_fields; static uint lock_flags = XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL; @@ -150,19 +148,8 @@ xfs_swap_extents( } sbp = &sxp->sx_stat; - vp = XFS_ITOV(ip); - tvp = XFS_ITOV(tip); - - /* Lock in i_ino order */ - if (ip->i_ino < tip->i_ino) { - ips[0] = ip; - ips[1] = tip; - } else { - ips[0] = tip; - ips[1] = ip; - } - xfs_lock_inodes(ips, 2, lock_flags); + xfs_lock_two_inodes(ip, tip, lock_flags); locked = 1; /* Verify that both files have the same format */ @@ -184,7 +171,7 @@ xfs_swap_extents( goto error0; } - if (VN_CACHED(tvp) != 0) { + if (VN_CACHED(VFS_I(tip)) != 0) { xfs_inval_cached_trace(tip, 0, -1, 0, -1); error = xfs_flushinval_pages(tip, 0, -1, FI_REMAPF_LOCKED); @@ -193,7 +180,7 @@ xfs_swap_extents( } /* Verify O_DIRECT for ftmp */ - if (VN_CACHED(tvp) != 0) { + if (VN_CACHED(VFS_I(tip)) != 0) { error = XFS_ERROR(EINVAL); goto error0; } @@ -237,7 +224,7 @@ xfs_swap_extents( * vop_read (or write in the case of autogrow) they block on the iolock * until we have switched the extents. */ - if (VN_MAPPED(vp)) { + if (VN_MAPPED(VFS_I(ip))) { error = XFS_ERROR(EBUSY); goto error0; } @@ -265,7 +252,7 @@ xfs_swap_extents( locked = 0; goto error0; } - xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL); + xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); /* * Count the number of extended attribute blocks @@ -350,15 +337,11 @@ xfs_swap_extents( break; } - /* - * Increment vnode ref counts since xfs_trans_commit & - * xfs_trans_cancel will both unlock the inodes and - * decrement the associated ref counts. - */ - VN_HOLD(vp); - VN_HOLD(tvp); + IHOLD(ip); xfs_trans_ijoin(tp, ip, lock_flags); + + IHOLD(tip); xfs_trans_ijoin(tp, tip, lock_flags); xfs_trans_log_inode(tp, ip, ilf_fields); @@ -381,6 +364,6 @@ xfs_swap_extents( xfs_iunlock(tip, lock_flags); } if (tempifp != NULL) - kmem_free(tempifp, sizeof(xfs_ifork_t)); + kmem_free(tempifp); return error; } diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index 7cb26529766..80e0dc51361 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c @@ -46,6 +46,54 @@ struct xfs_name xfs_name_dotdot = {"..", 2}; +extern const struct xfs_nameops xfs_default_nameops; + +/* + * ASCII case-insensitive (ie. A-Z) support for directories that was + * used in IRIX. + */ +STATIC xfs_dahash_t +xfs_ascii_ci_hashname( + struct xfs_name *name) +{ + xfs_dahash_t hash; + int i; + + for (i = 0, hash = 0; i < name->len; i++) + hash = tolower(name->name[i]) ^ rol32(hash, 7); + + return hash; +} + +STATIC enum xfs_dacmp +xfs_ascii_ci_compname( + struct xfs_da_args *args, + const char *name, + int len) +{ + enum xfs_dacmp result; + int i; + + if (args->namelen != len) + return XFS_CMP_DIFFERENT; + + result = XFS_CMP_EXACT; + for (i = 0; i < len; i++) { + if (args->name[i] == name[i]) + continue; + if (tolower(args->name[i]) != tolower(name[i])) + return XFS_CMP_DIFFERENT; + result = XFS_CMP_CASE; + } + + return result; +} + +static struct xfs_nameops xfs_ascii_ci_nameops = { + .hashname = xfs_ascii_ci_hashname, + .compname = xfs_ascii_ci_compname, +}; + void xfs_dir_mount( xfs_mount_t *mp) @@ -65,6 +113,10 @@ xfs_dir_mount( (mp->m_dirblksize - (uint)sizeof(xfs_da_node_hdr_t)) / (uint)sizeof(xfs_da_node_entry_t); mp->m_dir_magicpct = (mp->m_dirblksize * 37) / 100; + if (xfs_sb_version_hasasciici(&mp->m_sb)) + mp->m_dirnameops = &xfs_ascii_ci_nameops; + else + mp->m_dirnameops = &xfs_default_nameops; } /* @@ -162,9 +214,10 @@ xfs_dir_createname( return rval; XFS_STATS_INC(xs_dir_create); + memset(&args, 0, sizeof(xfs_da_args_t)); args.name = name->name; args.namelen = name->len; - args.hashval = xfs_da_hashname(name->name, name->len); + args.hashval = dp->i_mount->m_dirnameops->hashname(name); args.inumber = inum; args.dp = dp; args.firstblock = first; @@ -172,8 +225,7 @@ xfs_dir_createname( args.total = total; args.whichfork = XFS_DATA_FORK; args.trans = tp; - args.justcheck = 0; - args.addname = args.oknoent = 1; + args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) rval = xfs_dir2_sf_addname(&args); @@ -191,14 +243,43 @@ xfs_dir_createname( } /* + * If doing a CI lookup and case-insensitive match, dup actual name into + * args.value. Return EEXIST for success (ie. name found) or an error. + */ +int +xfs_dir_cilookup_result( + struct xfs_da_args *args, + const char *name, + int len) +{ + if (args->cmpresult == XFS_CMP_DIFFERENT) + return ENOENT; + if (args->cmpresult != XFS_CMP_CASE || + !(args->op_flags & XFS_DA_OP_CILOOKUP)) + return EEXIST; + + args->value = kmem_alloc(len, KM_MAYFAIL); + if (!args->value) + return ENOMEM; + + memcpy(args->value, name, len); + args->valuelen = len; + return EEXIST; +} + +/* * Lookup a name in a directory, give back the inode number. + * If ci_name is not NULL, returns the actual name in ci_name if it differs + * to name, or ci_name->name is set to NULL for an exact match. */ + int xfs_dir_lookup( xfs_trans_t *tp, xfs_inode_t *dp, struct xfs_name *name, - xfs_ino_t *inum) /* out: inode number */ + xfs_ino_t *inum, /* out: inode number */ + struct xfs_name *ci_name) /* out: actual name if CI match */ { xfs_da_args_t args; int rval; @@ -206,15 +287,17 @@ xfs_dir_lookup( ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); XFS_STATS_INC(xs_dir_lookup); - memset(&args, 0, sizeof(xfs_da_args_t)); + memset(&args, 0, sizeof(xfs_da_args_t)); args.name = name->name; args.namelen = name->len; - args.hashval = xfs_da_hashname(name->name, name->len); + args.hashval = dp->i_mount->m_dirnameops->hashname(name); args.dp = dp; args.whichfork = XFS_DATA_FORK; args.trans = tp; - args.oknoent = 1; + args.op_flags = XFS_DA_OP_OKNOENT; + if (ci_name) + args.op_flags |= XFS_DA_OP_CILOOKUP; if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) rval = xfs_dir2_sf_lookup(&args); @@ -230,8 +313,13 @@ xfs_dir_lookup( rval = xfs_dir2_node_lookup(&args); if (rval == EEXIST) rval = 0; - if (rval == 0) + if (!rval) { *inum = args.inumber; + if (ci_name) { + ci_name->name = args.value; + ci_name->len = args.valuelen; + } + } return rval; } @@ -255,9 +343,10 @@ xfs_dir_removename( ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); XFS_STATS_INC(xs_dir_remove); + memset(&args, 0, sizeof(xfs_da_args_t)); args.name = name->name; args.namelen = name->len; - args.hashval = xfs_da_hashname(name->name, name->len); + args.hashval = dp->i_mount->m_dirnameops->hashname(name); args.inumber = ino; args.dp = dp; args.firstblock = first; @@ -265,7 +354,6 @@ xfs_dir_removename( args.total = total; args.whichfork = XFS_DATA_FORK; args.trans = tp; - args.justcheck = args.addname = args.oknoent = 0; if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) rval = xfs_dir2_sf_removename(&args); @@ -338,9 +426,10 @@ xfs_dir_replace( if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) return rval; + memset(&args, 0, sizeof(xfs_da_args_t)); args.name = name->name; args.namelen = name->len; - args.hashval = xfs_da_hashname(name->name, name->len); + args.hashval = dp->i_mount->m_dirnameops->hashname(name); args.inumber = inum; args.dp = dp; args.firstblock = first; @@ -348,7 +437,6 @@ xfs_dir_replace( args.total = total; args.whichfork = XFS_DATA_FORK; args.trans = tp; - args.justcheck = args.addname = args.oknoent = 0; if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) rval = xfs_dir2_sf_replace(&args); @@ -384,15 +472,16 @@ xfs_dir_canenter( return 0; ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR); - memset(&args, 0, sizeof(xfs_da_args_t)); + memset(&args, 0, sizeof(xfs_da_args_t)); args.name = name->name; args.namelen = name->len; - args.hashval = xfs_da_hashname(name->name, name->len); + args.hashval = dp->i_mount->m_dirnameops->hashname(name); args.dp = dp; args.whichfork = XFS_DATA_FORK; args.trans = tp; - args.justcheck = args.addname = args.oknoent = 1; + args.op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME | + XFS_DA_OP_OKNOENT; if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) rval = xfs_dir2_sf_addname(&args); @@ -493,7 +582,7 @@ xfs_dir2_grow_inode( args->firstblock, args->total, &mapp[mapi], &nmap, args->flist, NULL))) { - kmem_free(mapp, sizeof(*mapp) * count); + kmem_free(mapp); return error; } if (nmap < 1) @@ -525,14 +614,14 @@ xfs_dir2_grow_inode( mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount != bno + count) { if (mapp != &map) - kmem_free(mapp, sizeof(*mapp) * count); + kmem_free(mapp); return XFS_ERROR(ENOSPC); } /* * Done with the temporary mapping table. */ if (mapp != &map) - kmem_free(mapp, sizeof(*mapp) * count); + kmem_free(mapp); *dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno); /* * Update file's size if this is the data space and it grew. diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h index 6392f939029..1d9ef96f33a 100644 --- a/fs/xfs/xfs_dir2.h +++ b/fs/xfs/xfs_dir2.h @@ -74,7 +74,8 @@ extern int xfs_dir_createname(struct xfs_trans *tp, struct xfs_inode *dp, xfs_fsblock_t *first, struct xfs_bmap_free *flist, xfs_extlen_t tot); extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp, - struct xfs_name *name, xfs_ino_t *inum); + struct xfs_name *name, xfs_ino_t *inum, + struct xfs_name *ci_name); extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp, struct xfs_name *name, xfs_ino_t ino, xfs_fsblock_t *first, @@ -99,4 +100,7 @@ extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db, struct xfs_dabuf *bp); +extern int xfs_dir_cilookup_result(struct xfs_da_args *args, const char *name, + int len); + #endif /* __XFS_DIR2_H__ */ diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c index fb5a556725b..e2fa0a1d8e9 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/xfs_dir2_block.c @@ -215,7 +215,7 @@ xfs_dir2_block_addname( /* * If this isn't a real add, we're done with the buffer. */ - if (args->justcheck) + if (args->op_flags & XFS_DA_OP_JUSTCHECK) xfs_da_brelse(tp, bp); /* * If we don't have space for the new entry & leaf ... @@ -225,7 +225,7 @@ xfs_dir2_block_addname( * Not trying to actually do anything, or don't have * a space reservation: return no-space. */ - if (args->justcheck || args->total == 0) + if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0) return XFS_ERROR(ENOSPC); /* * Convert to the next larger format. @@ -240,7 +240,7 @@ xfs_dir2_block_addname( /* * Just checking, and it would work, so say so. */ - if (args->justcheck) + if (args->op_flags & XFS_DA_OP_JUSTCHECK) return 0; needlog = needscan = 0; /* @@ -610,14 +610,15 @@ xfs_dir2_block_lookup( /* * Get the offset from the leaf entry, to point to the data. */ - dep = (xfs_dir2_data_entry_t *) - ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); + dep = (xfs_dir2_data_entry_t *)((char *)block + + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); /* - * Fill in inode number, release the block. + * Fill in inode number, CI name if appropriate, release the block. */ args->inumber = be64_to_cpu(dep->inumber); + error = xfs_dir_cilookup_result(args, dep->name, dep->namelen); xfs_da_brelse(args->trans, bp); - return XFS_ERROR(EEXIST); + return XFS_ERROR(error); } /* @@ -643,6 +644,7 @@ xfs_dir2_block_lookup_int( int mid; /* binary search current idx */ xfs_mount_t *mp; /* filesystem mount point */ xfs_trans_t *tp; /* transaction pointer */ + enum xfs_dacmp cmp; /* comparison result */ dp = args->dp; tp = args->trans; @@ -673,7 +675,7 @@ xfs_dir2_block_lookup_int( else high = mid - 1; if (low > high) { - ASSERT(args->oknoent); + ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); xfs_da_brelse(tp, bp); return XFS_ERROR(ENOENT); } @@ -697,20 +699,31 @@ xfs_dir2_block_lookup_int( dep = (xfs_dir2_data_entry_t *) ((char *)block + xfs_dir2_dataptr_to_off(mp, addr)); /* - * Compare, if it's right give back buffer & entry number. + * Compare name and if it's an exact match, return the index + * and buffer. If it's the first case-insensitive match, store + * the index and buffer and continue looking for an exact match. */ - if (dep->namelen == args->namelen && - dep->name[0] == args->name[0] && - memcmp(dep->name, args->name, args->namelen) == 0) { + cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen); + if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) { + args->cmpresult = cmp; *bpp = bp; *entno = mid; - return 0; + if (cmp == XFS_CMP_EXACT) + return 0; } - } while (++mid < be32_to_cpu(btp->count) && be32_to_cpu(blp[mid].hashval) == hash); + } while (++mid < be32_to_cpu(btp->count) && + be32_to_cpu(blp[mid].hashval) == hash); + + ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); + /* + * Here, we can only be doing a lookup (not a rename or replace). + * If a case-insensitive match was found earlier, return success. + */ + if (args->cmpresult == XFS_CMP_CASE) + return 0; /* * No match, release the buffer and return ENOENT. */ - ASSERT(args->oknoent); xfs_da_brelse(tp, bp); return XFS_ERROR(ENOENT); } @@ -1033,6 +1046,7 @@ xfs_dir2_sf_to_block( xfs_dir2_sf_t *sfp; /* shortform structure */ __be16 *tagp; /* end of data entry */ xfs_trans_t *tp; /* transaction pointer */ + struct xfs_name name; xfs_dir2_trace_args("sf_to_block", args); dp = args->dp; @@ -1071,7 +1085,7 @@ xfs_dir2_sf_to_block( */ error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno); if (error) { - kmem_free(buf, buf_len); + kmem_free(buf); return error; } /* @@ -1079,7 +1093,7 @@ xfs_dir2_sf_to_block( */ error = xfs_dir2_data_init(args, blkno, &bp); if (error) { - kmem_free(buf, buf_len); + kmem_free(buf); return error; } block = bp->data; @@ -1187,8 +1201,10 @@ xfs_dir2_sf_to_block( tagp = xfs_dir2_data_entry_tag_p(dep); *tagp = cpu_to_be16((char *)dep - (char *)block); xfs_dir2_data_log_entry(tp, bp, dep); - blp[2 + i].hashval = cpu_to_be32(xfs_da_hashname( - (char *)sfep->name, sfep->namelen)); + name.name = sfep->name; + name.len = sfep->namelen; + blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops-> + hashname(&name)); blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, (char *)dep - (char *)block)); offset = (int)((char *)(tagp + 1) - (char *)block); @@ -1198,7 +1214,7 @@ xfs_dir2_sf_to_block( sfep = xfs_dir2_sf_nextentry(sfp, sfep); } /* Done with the temporary buffer */ - kmem_free(buf, buf_len); + kmem_free(buf); /* * Sort the leaf entries by hash value. */ diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c index fb8c9e08b23..498f8d69433 100644 --- a/fs/xfs/xfs_dir2_data.c +++ b/fs/xfs/xfs_dir2_data.c @@ -65,6 +65,7 @@ xfs_dir2_data_check( xfs_mount_t *mp; /* filesystem mount point */ char *p; /* current data position */ int stale; /* count of stale leaves */ + struct xfs_name name; mp = dp->i_mount; d = bp->data; @@ -140,7 +141,9 @@ xfs_dir2_data_check( addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, (xfs_dir2_data_aoff_t) ((char *)dep - (char *)d)); - hash = xfs_da_hashname((char *)dep->name, dep->namelen); + name.name = dep->name; + name.len = dep->namelen; + hash = mp->m_dirnameops->hashname(&name); for (i = 0; i < be32_to_cpu(btp->count); i++) { if (be32_to_cpu(lep[i].address) == addr && be32_to_cpu(lep[i].hashval) == hash) diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index bc52b803d79..93535992cb6 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c @@ -263,20 +263,21 @@ xfs_dir2_leaf_addname( * If we don't have enough free bytes but we can make enough * by compacting out stale entries, we'll do that. */ - if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] < needbytes && - be16_to_cpu(leaf->hdr.stale) > 1) { + if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] < + needbytes && be16_to_cpu(leaf->hdr.stale) > 1) { compact = 1; } /* * Otherwise if we don't have enough free bytes we need to * convert to node form. */ - else if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] < - needbytes) { + else if ((char *)bestsp - (char *)&leaf->ents[be16_to_cpu( + leaf->hdr.count)] < needbytes) { /* * Just checking or no space reservation, give up. */ - if (args->justcheck || args->total == 0) { + if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || + args->total == 0) { xfs_da_brelse(tp, lbp); return XFS_ERROR(ENOSPC); } @@ -301,7 +302,7 @@ xfs_dir2_leaf_addname( * If just checking, then it will fit unless we needed to allocate * a new data block. */ - if (args->justcheck) { + if (args->op_flags & XFS_DA_OP_JUSTCHECK) { xfs_da_brelse(tp, lbp); return use_block == -1 ? XFS_ERROR(ENOSPC) : 0; } @@ -1110,7 +1111,7 @@ xfs_dir2_leaf_getdents( *offset = XFS_DIR2_MAX_DATAPTR; else *offset = xfs_dir2_byte_to_dataptr(mp, curoff); - kmem_free(map, map_size * sizeof(*map)); + kmem_free(map); if (bp) xfs_da_brelse(NULL, bp); return error; @@ -1298,12 +1299,13 @@ xfs_dir2_leaf_lookup( ((char *)dbp->data + xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address))); /* - * Return the found inode number. + * Return the found inode number & CI name if appropriate */ args->inumber = be64_to_cpu(dep->inumber); + error = xfs_dir_cilookup_result(args, dep->name, dep->namelen); xfs_da_brelse(tp, dbp); xfs_da_brelse(tp, lbp); - return XFS_ERROR(EEXIST); + return XFS_ERROR(error); } /* @@ -1319,8 +1321,8 @@ xfs_dir2_leaf_lookup_int( int *indexp, /* out: index in leaf block */ xfs_dabuf_t **dbpp) /* out: data buffer */ { - xfs_dir2_db_t curdb; /* current data block number */ - xfs_dabuf_t *dbp; /* data buffer */ + xfs_dir2_db_t curdb = -1; /* current data block number */ + xfs_dabuf_t *dbp = NULL; /* data buffer */ xfs_dir2_data_entry_t *dep; /* data entry */ xfs_inode_t *dp; /* incore directory inode */ int error; /* error return code */ @@ -1331,6 +1333,8 @@ xfs_dir2_leaf_lookup_int( xfs_mount_t *mp; /* filesystem mount point */ xfs_dir2_db_t newdb; /* new data block number */ xfs_trans_t *tp; /* transaction pointer */ + xfs_dir2_db_t cidb = -1; /* case match data block no. */ + enum xfs_dacmp cmp; /* name compare result */ dp = args->dp; tp = args->trans; @@ -1338,11 +1342,10 @@ xfs_dir2_leaf_lookup_int( /* * Read the leaf block into the buffer. */ - if ((error = - xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp, - XFS_DATA_FORK))) { + error = xfs_da_read_buf(tp, dp, mp->m_dirleafblk, -1, &lbp, + XFS_DATA_FORK); + if (error) return error; - } *lbpp = lbp; leaf = lbp->data; xfs_dir2_leaf_check(dp, lbp); @@ -1354,9 +1357,9 @@ xfs_dir2_leaf_lookup_int( * Loop over all the entries with the right hash value * looking to match the name. */ - for (lep = &leaf->ents[index], dbp = NULL, curdb = -1; - index < be16_to_cpu(leaf->hdr.count) && be32_to_cpu(lep->hashval) == args->hashval; - lep++, index++) { + for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) && + be32_to_cpu(lep->hashval) == args->hashval; + lep++, index++) { /* * Skip over stale leaf entries. */ @@ -1373,10 +1376,10 @@ xfs_dir2_leaf_lookup_int( if (newdb != curdb) { if (dbp) xfs_da_brelse(tp, dbp); - if ((error = - xfs_da_read_buf(tp, dp, - xfs_dir2_db_to_da(mp, newdb), -1, &dbp, - XFS_DATA_FORK))) { + error = xfs_da_read_buf(tp, dp, + xfs_dir2_db_to_da(mp, newdb), + -1, &dbp, XFS_DATA_FORK); + if (error) { xfs_da_brelse(tp, lbp); return error; } @@ -1386,24 +1389,50 @@ xfs_dir2_leaf_lookup_int( /* * Point to the data entry. */ - dep = (xfs_dir2_data_entry_t *) - ((char *)dbp->data + - xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address))); + dep = (xfs_dir2_data_entry_t *)((char *)dbp->data + + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address))); /* - * If it matches then return it. + * Compare name and if it's an exact match, return the index + * and buffer. If it's the first case-insensitive match, store + * the index and buffer and continue looking for an exact match. */ - if (dep->namelen == args->namelen && - dep->name[0] == args->name[0] && - memcmp(dep->name, args->name, args->namelen) == 0) { - *dbpp = dbp; + cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen); + if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) { + args->cmpresult = cmp; *indexp = index; - return 0; + /* case exact match: return the current buffer. */ + if (cmp == XFS_CMP_EXACT) { + *dbpp = dbp; + return 0; + } + cidb = curdb; } } + ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); + /* + * Here, we can only be doing a lookup (not a rename or remove). + * If a case-insensitive match was found earlier, re-read the + * appropriate data block if required and return it. + */ + if (args->cmpresult == XFS_CMP_CASE) { + ASSERT(cidb != -1); + if (cidb != curdb) { + xfs_da_brelse(tp, dbp); + error = xfs_da_read_buf(tp, dp, + xfs_dir2_db_to_da(mp, cidb), + -1, &dbp, XFS_DATA_FORK); + if (error) { + xfs_da_brelse(tp, lbp); + return error; + } + } + *dbpp = dbp; + return 0; + } /* * No match found, return ENOENT. */ - ASSERT(args->oknoent); + ASSERT(cidb == -1); if (dbp) xfs_da_brelse(tp, dbp); xfs_da_brelse(tp, lbp); diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c index 8dade711f09..fa6c3a5ddbc 100644 --- a/fs/xfs/xfs_dir2_node.c +++ b/fs/xfs/xfs_dir2_node.c @@ -226,7 +226,7 @@ xfs_dir2_leafn_add( ASSERT(index == be16_to_cpu(leaf->hdr.count) || be32_to_cpu(leaf->ents[index].hashval) >= args->hashval); - if (args->justcheck) + if (args->op_flags & XFS_DA_OP_JUSTCHECK) return 0; /* @@ -387,28 +387,26 @@ xfs_dir2_leafn_lasthash( } /* - * Look up a leaf entry in a node-format leaf block. - * If this is an addname then the extrablk in state is a freespace block, - * otherwise it's a data block. + * Look up a leaf entry for space to add a name in a node-format leaf block. + * The extrablk in state is a freespace block. */ -int -xfs_dir2_leafn_lookup_int( +STATIC int +xfs_dir2_leafn_lookup_for_addname( xfs_dabuf_t *bp, /* leaf buffer */ xfs_da_args_t *args, /* operation arguments */ int *indexp, /* out: leaf entry index */ xfs_da_state_t *state) /* state to fill in */ { - xfs_dabuf_t *curbp; /* current data/free buffer */ - xfs_dir2_db_t curdb; /* current data block number */ - xfs_dir2_db_t curfdb; /* current free block number */ - xfs_dir2_data_entry_t *dep; /* data block entry */ + xfs_dabuf_t *curbp = NULL; /* current data/free buffer */ + xfs_dir2_db_t curdb = -1; /* current data block number */ + xfs_dir2_db_t curfdb = -1; /* current free block number */ xfs_inode_t *dp; /* incore directory inode */ int error; /* error return value */ int fi; /* free entry index */ - xfs_dir2_free_t *free=NULL; /* free block structure */ + xfs_dir2_free_t *free = NULL; /* free block structure */ int index; /* leaf entry index */ xfs_dir2_leaf_t *leaf; /* leaf structure */ - int length=0; /* length of new data entry */ + int length; /* length of new data entry */ xfs_dir2_leaf_entry_t *lep; /* leaf entry */ xfs_mount_t *mp; /* filesystem mount point */ xfs_dir2_db_t newdb; /* new data block number */ @@ -431,33 +429,20 @@ xfs_dir2_leafn_lookup_int( /* * Do we have a buffer coming in? */ - if (state->extravalid) + if (state->extravalid) { + /* If so, it's a free block buffer, get the block number. */ curbp = state->extrablk.bp; - else - curbp = NULL; - /* - * For addname, it's a free block buffer, get the block number. - */ - if (args->addname) { - curfdb = curbp ? state->extrablk.blkno : -1; - curdb = -1; - length = xfs_dir2_data_entsize(args->namelen); - if ((free = (curbp ? curbp->data : NULL))) - ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); - } - /* - * For others, it's a data block buffer, get the block number. - */ - else { - curfdb = -1; - curdb = curbp ? state->extrablk.blkno : -1; + curfdb = state->extrablk.blkno; + free = curbp->data; + ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC); } + length = xfs_dir2_data_entsize(args->namelen); /* * Loop over leaf entries with the right hash value. */ - for (lep = &leaf->ents[index]; - index < be16_to_cpu(leaf->hdr.count) && be32_to_cpu(lep->hashval) == args->hashval; - lep++, index++) { + for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) && + be32_to_cpu(lep->hashval) == args->hashval; + lep++, index++) { /* * Skip stale leaf entries. */ @@ -471,161 +456,244 @@ xfs_dir2_leafn_lookup_int( * For addname, we're looking for a place to put the new entry. * We want to use a data block with an entry of equal * hash value to ours if there is one with room. + * + * If this block isn't the data block we already have + * in hand, take a look at it. */ - if (args->addname) { + if (newdb != curdb) { + curdb = newdb; /* - * If this block isn't the data block we already have - * in hand, take a look at it. + * Convert the data block to the free block + * holding its freespace information. */ - if (newdb != curdb) { - curdb = newdb; - /* - * Convert the data block to the free block - * holding its freespace information. - */ - newfdb = xfs_dir2_db_to_fdb(mp, newdb); - /* - * If it's not the one we have in hand, - * read it in. - */ - if (newfdb != curfdb) { - /* - * If we had one before, drop it. - */ - if (curbp) - xfs_da_brelse(tp, curbp); - /* - * Read the free block. - */ - if ((error = xfs_da_read_buf(tp, dp, - xfs_dir2_db_to_da(mp, - newfdb), - -1, &curbp, - XFS_DATA_FORK))) { - return error; - } - free = curbp->data; - ASSERT(be32_to_cpu(free->hdr.magic) == - XFS_DIR2_FREE_MAGIC); - ASSERT((be32_to_cpu(free->hdr.firstdb) % - XFS_DIR2_MAX_FREE_BESTS(mp)) == - 0); - ASSERT(be32_to_cpu(free->hdr.firstdb) <= curdb); - ASSERT(curdb < - be32_to_cpu(free->hdr.firstdb) + - be32_to_cpu(free->hdr.nvalid)); - } - /* - * Get the index for our entry. - */ - fi = xfs_dir2_db_to_fdindex(mp, curdb); - /* - * If it has room, return it. - */ - if (unlikely(be16_to_cpu(free->bests[fi]) == NULLDATAOFF)) { - XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int", - XFS_ERRLEVEL_LOW, mp); - if (curfdb != newfdb) - xfs_da_brelse(tp, curbp); - return XFS_ERROR(EFSCORRUPTED); - } - curfdb = newfdb; - if (be16_to_cpu(free->bests[fi]) >= length) { - *indexp = index; - state->extravalid = 1; - state->extrablk.bp = curbp; - state->extrablk.blkno = curfdb; - state->extrablk.index = fi; - state->extrablk.magic = - XFS_DIR2_FREE_MAGIC; - ASSERT(args->oknoent); - return XFS_ERROR(ENOENT); - } - } - } - /* - * Not adding a new entry, so we really want to find - * the name given to us. - */ - else { + newfdb = xfs_dir2_db_to_fdb(mp, newdb); /* - * If it's a different data block, go get it. + * If it's not the one we have in hand, read it in. */ - if (newdb != curdb) { + if (newfdb != curfdb) { /* - * If we had a block before, drop it. + * If we had one before, drop it. */ if (curbp) xfs_da_brelse(tp, curbp); /* - * Read the data block. + * Read the free block. */ - if ((error = - xfs_da_read_buf(tp, dp, - xfs_dir2_db_to_da(mp, newdb), -1, - &curbp, XFS_DATA_FORK))) { + error = xfs_da_read_buf(tp, dp, + xfs_dir2_db_to_da(mp, newfdb), + -1, &curbp, XFS_DATA_FORK); + if (error) return error; - } - xfs_dir2_data_check(dp, curbp); - curdb = newdb; + free = curbp->data; + ASSERT(be32_to_cpu(free->hdr.magic) == + XFS_DIR2_FREE_MAGIC); + ASSERT((be32_to_cpu(free->hdr.firstdb) % + XFS_DIR2_MAX_FREE_BESTS(mp)) == 0); + ASSERT(be32_to_cpu(free->hdr.firstdb) <= curdb); + ASSERT(curdb < be32_to_cpu(free->hdr.firstdb) + + be32_to_cpu(free->hdr.nvalid)); } /* - * Point to the data entry. + * Get the index for our entry. */ - dep = (xfs_dir2_data_entry_t *) - ((char *)curbp->data + - xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address))); + fi = xfs_dir2_db_to_fdindex(mp, curdb); /* - * Compare the entry, return it if it matches. + * If it has room, return it. */ - if (dep->namelen == args->namelen && - dep->name[0] == args->name[0] && - memcmp(dep->name, args->name, args->namelen) == 0) { - args->inumber = be64_to_cpu(dep->inumber); - *indexp = index; - state->extravalid = 1; - state->extrablk.bp = curbp; - state->extrablk.blkno = curdb; - state->extrablk.index = - (int)((char *)dep - - (char *)curbp->data); - state->extrablk.magic = XFS_DIR2_DATA_MAGIC; - return XFS_ERROR(EEXIST); + if (unlikely(be16_to_cpu(free->bests[fi]) == NULLDATAOFF)) { + XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int", + XFS_ERRLEVEL_LOW, mp); + if (curfdb != newfdb) + xfs_da_brelse(tp, curbp); + return XFS_ERROR(EFSCORRUPTED); } + curfdb = newfdb; + if (be16_to_cpu(free->bests[fi]) >= length) + goto out; } } + /* Didn't find any space */ + fi = -1; +out: + ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); + if (curbp) { + /* Giving back a free block. */ + state->extravalid = 1; + state->extrablk.bp = curbp; + state->extrablk.index = fi; + state->extrablk.blkno = curfdb; + state->extrablk.magic = XFS_DIR2_FREE_MAGIC; + } else { + state->extravalid = 0; + } /* - * Didn't find a match. - * If we are holding a buffer, give it back in case our caller - * finds it useful. + * Return the index, that will be the insertion point. */ - if ((state->extravalid = (curbp != NULL))) { - state->extrablk.bp = curbp; - state->extrablk.index = -1; + *indexp = index; + return XFS_ERROR(ENOENT); +} + +/* + * Look up a leaf entry in a node-format leaf block. + * The extrablk in state a data block. + */ +STATIC int +xfs_dir2_leafn_lookup_for_entry( + xfs_dabuf_t *bp, /* leaf buffer */ + xfs_da_args_t *args, /* operation arguments */ + int *indexp, /* out: leaf entry index */ + xfs_da_state_t *state) /* state to fill in */ +{ + xfs_dabuf_t *curbp = NULL; /* current data/free buffer */ + xfs_dir2_db_t curdb = -1; /* current data block number */ + xfs_dir2_data_entry_t *dep; /* data block entry */ + xfs_inode_t *dp; /* incore directory inode */ + int error; /* error return value */ + int index; /* leaf entry index */ + xfs_dir2_leaf_t *leaf; /* leaf structure */ + xfs_dir2_leaf_entry_t *lep; /* leaf entry */ + xfs_mount_t *mp; /* filesystem mount point */ + xfs_dir2_db_t newdb; /* new data block number */ + xfs_trans_t *tp; /* transaction pointer */ + enum xfs_dacmp cmp; /* comparison result */ + + dp = args->dp; + tp = args->trans; + mp = dp->i_mount; + leaf = bp->data; + ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC); +#ifdef __KERNEL__ + ASSERT(be16_to_cpu(leaf->hdr.count) > 0); +#endif + xfs_dir2_leafn_check(dp, bp); + /* + * Look up the hash value in the leaf entries. + */ + index = xfs_dir2_leaf_search_hash(args, bp); + /* + * Do we have a buffer coming in? + */ + if (state->extravalid) { + curbp = state->extrablk.bp; + curdb = state->extrablk.blkno; + } + /* + * Loop over leaf entries with the right hash value. + */ + for (lep = &leaf->ents[index]; index < be16_to_cpu(leaf->hdr.count) && + be32_to_cpu(lep->hashval) == args->hashval; + lep++, index++) { /* - * For addname, giving back a free block. + * Skip stale leaf entries. */ - if (args->addname) { - state->extrablk.blkno = curfdb; - state->extrablk.magic = XFS_DIR2_FREE_MAGIC; + if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR) + continue; + /* + * Pull the data block number from the entry. + */ + newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); + /* + * Not adding a new entry, so we really want to find + * the name given to us. + * + * If it's a different data block, go get it. + */ + if (newdb != curdb) { + /* + * If we had a block before that we aren't saving + * for a CI name, drop it + */ + if (curbp && (args->cmpresult == XFS_CMP_DIFFERENT || + curdb != state->extrablk.blkno)) + xfs_da_brelse(tp, curbp); + /* + * If needing the block that is saved with a CI match, + * use it otherwise read in the new data block. + */ + if (args->cmpresult != XFS_CMP_DIFFERENT && + newdb == state->extrablk.blkno) { + ASSERT(state->extravalid); + curbp = state->extrablk.bp; + } else { + error = xfs_da_read_buf(tp, dp, + xfs_dir2_db_to_da(mp, newdb), + -1, &curbp, XFS_DATA_FORK); + if (error) + return error; + } + xfs_dir2_data_check(dp, curbp); + curdb = newdb; } /* - * For other callers, giving back a data block. + * Point to the data entry. */ - else { + dep = (xfs_dir2_data_entry_t *)((char *)curbp->data + + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address))); + /* + * Compare the entry and if it's an exact match, return + * EEXIST immediately. If it's the first case-insensitive + * match, store the block & inode number and continue looking. + */ + cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen); + if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) { + /* If there is a CI match block, drop it */ + if (args->cmpresult != XFS_CMP_DIFFERENT && + curdb != state->extrablk.blkno) + xfs_da_brelse(tp, state->extrablk.bp); + args->cmpresult = cmp; + args->inumber = be64_to_cpu(dep->inumber); + *indexp = index; + state->extravalid = 1; + state->extrablk.bp = curbp; state->extrablk.blkno = curdb; + state->extrablk.index = (int)((char *)dep - + (char *)curbp->data); state->extrablk.magic = XFS_DIR2_DATA_MAGIC; + if (cmp == XFS_CMP_EXACT) + return XFS_ERROR(EEXIST); } } - /* - * Return the final index, that will be the insertion point. - */ + ASSERT(index == be16_to_cpu(leaf->hdr.count) || + (args->op_flags & XFS_DA_OP_OKNOENT)); + if (curbp) { + if (args->cmpresult == XFS_CMP_DIFFERENT) { + /* Giving back last used data block. */ + state->extravalid = 1; + state->extrablk.bp = curbp; + state->extrablk.index = -1; + state->extrablk.blkno = curdb; + state->extrablk.magic = XFS_DIR2_DATA_MAGIC; + } else { + /* If the curbp is not the CI match block, drop it */ + if (state->extrablk.bp != curbp) + xfs_da_brelse(tp, curbp); + } + } else { + state->extravalid = 0; + } *indexp = index; - ASSERT(index == be16_to_cpu(leaf->hdr.count) || args->oknoent); return XFS_ERROR(ENOENT); } /* + * Look up a leaf entry in a node-format leaf block. + * If this is an addname then the extrablk in state is a freespace block, + * otherwise it's a data block. + */ +int +xfs_dir2_leafn_lookup_int( + xfs_dabuf_t *bp, /* leaf buffer */ + xfs_da_args_t *args, /* operation arguments */ + int *indexp, /* out: leaf entry index */ + xfs_da_state_t *state) /* state to fill in */ +{ + if (args->op_flags & XFS_DA_OP_ADDNAME) + return xfs_dir2_leafn_lookup_for_addname(bp, args, indexp, + state); + return xfs_dir2_leafn_lookup_for_entry(bp, args, indexp, state); +} + +/* * Move count leaf entries from source to destination leaf. * Log entries and headers. Stale entries are preserved. */ @@ -823,9 +891,10 @@ xfs_dir2_leafn_rebalance( */ if (!state->inleaf) blk2->index = blk1->index - be16_to_cpu(leaf1->hdr.count); - - /* - * Finally sanity check just to make sure we are not returning a negative index + + /* + * Finally sanity check just to make sure we are not returning a + * negative index */ if(blk2->index < 0) { state->inleaf = 1; @@ -1332,7 +1401,7 @@ xfs_dir2_node_addname( /* * It worked, fix the hash values up the btree. */ - if (!args->justcheck) + if (!(args->op_flags & XFS_DA_OP_JUSTCHECK)) xfs_da_fixhashpath(state, &state->path); } else { /* @@ -1515,7 +1584,8 @@ xfs_dir2_node_addname_int( /* * Not allowed to allocate, return failure. */ - if (args->justcheck || args->total == 0) { + if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || + args->total == 0) { /* * Drop the freespace buffer unless it came from our * caller. @@ -1661,7 +1731,7 @@ xfs_dir2_node_addname_int( /* * If just checking, we succeeded. */ - if (args->justcheck) { + if (args->op_flags & XFS_DA_OP_JUSTCHECK) { if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL) xfs_da_buf_done(fbp); return 0; @@ -1767,6 +1837,14 @@ xfs_dir2_node_lookup( error = xfs_da_node_lookup_int(state, &rval); if (error) rval = error; + else if (rval == ENOENT && args->cmpresult == XFS_CMP_CASE) { + /* If a CI match, dup the actual name and return EEXIST */ + xfs_dir2_data_entry_t *dep; + + dep = (xfs_dir2_data_entry_t *)((char *)state->extrablk.bp-> + data + state->extrablk.index); + rval = xfs_dir_cilookup_result(args, dep->name, dep->namelen); + } /* * Release the btree blocks and leaf block. */ @@ -1810,9 +1888,8 @@ xfs_dir2_node_removename( * Look up the entry we're deleting, set up the cursor. */ error = xfs_da_node_lookup_int(state, &rval); - if (error) { + if (error) rval = error; - } /* * Didn't find it, upper layer screwed up. */ @@ -1829,9 +1906,8 @@ xfs_dir2_node_removename( */ error = xfs_dir2_leafn_remove(args, blk->bp, blk->index, &state->extrablk, &rval); - if (error) { + if (error) return error; - } /* * Fix the hash values up the btree. */ diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c index 919d275a1ce..b46af0013ec 100644 --- a/fs/xfs/xfs_dir2_sf.c +++ b/fs/xfs/xfs_dir2_sf.c @@ -255,7 +255,7 @@ xfs_dir2_block_to_sf( xfs_dir2_sf_check(args); out: xfs_trans_log_inode(args->trans, dp, logflags); - kmem_free(block, mp->m_dirblksize); + kmem_free(block); return error; } @@ -332,7 +332,7 @@ xfs_dir2_sf_addname( /* * Just checking or no space reservation, it doesn't fit. */ - if (args->justcheck || args->total == 0) + if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0) return XFS_ERROR(ENOSPC); /* * Convert to block form then add the name. @@ -345,7 +345,7 @@ xfs_dir2_sf_addname( /* * Just checking, it fits. */ - if (args->justcheck) + if (args->op_flags & XFS_DA_OP_JUSTCHECK) return 0; /* * Do it the easy way - just add it at the end. @@ -512,7 +512,7 @@ xfs_dir2_sf_addname_hard( sfep = xfs_dir2_sf_nextentry(sfp, sfep); memcpy(sfep, oldsfep, old_isize - nbytes); } - kmem_free(buf, old_isize); + kmem_free(buf); dp->i_d.di_size = new_isize; xfs_dir2_sf_check(args); } @@ -812,8 +812,11 @@ xfs_dir2_sf_lookup( { xfs_inode_t *dp; /* incore directory inode */ int i; /* entry index */ + int error; xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */ xfs_dir2_sf_t *sfp; /* shortform structure */ + enum xfs_dacmp cmp; /* comparison result */ + xfs_dir2_sf_entry_t *ci_sfep; /* case-insens. entry */ xfs_dir2_trace_args("sf_lookup", args); xfs_dir2_sf_check(args); @@ -836,6 +839,7 @@ xfs_dir2_sf_lookup( */ if (args->namelen == 1 && args->name[0] == '.') { args->inumber = dp->i_ino; + args->cmpresult = XFS_CMP_EXACT; return XFS_ERROR(EEXIST); } /* @@ -844,28 +848,41 @@ xfs_dir2_sf_lookup( if (args->namelen == 2 && args->name[0] == '.' && args->name[1] == '.') { args->inumber = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent); + args->cmpresult = XFS_CMP_EXACT; return XFS_ERROR(EEXIST); } /* * Loop over all the entries trying to match ours. */ - for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); - i < sfp->hdr.count; - i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { - if (sfep->namelen == args->namelen && - sfep->name[0] == args->name[0] && - memcmp(args->name, sfep->name, args->namelen) == 0) { - args->inumber = - xfs_dir2_sf_get_inumber(sfp, - xfs_dir2_sf_inumberp(sfep)); - return XFS_ERROR(EEXIST); + ci_sfep = NULL; + for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count; + i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { + /* + * Compare name and if it's an exact match, return the inode + * number. If it's the first case-insensitive match, store the + * inode number and continue looking for an exact match. + */ + cmp = dp->i_mount->m_dirnameops->compname(args, sfep->name, + sfep->namelen); + if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) { + args->cmpresult = cmp; + args->inumber = xfs_dir2_sf_get_inumber(sfp, + xfs_dir2_sf_inumberp(sfep)); + if (cmp == XFS_CMP_EXACT) + return XFS_ERROR(EEXIST); + ci_sfep = sfep; } } + ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); /* - * Didn't find it. + * Here, we can only be doing a lookup (not a rename or replace). + * If a case-insensitive match was not found, return ENOENT. */ - ASSERT(args->oknoent); - return XFS_ERROR(ENOENT); + if (!ci_sfep) + return XFS_ERROR(ENOENT); + /* otherwise process the CI match as required by the caller */ + error = xfs_dir_cilookup_result(args, ci_sfep->name, ci_sfep->namelen); + return XFS_ERROR(error); } /* @@ -904,24 +921,21 @@ xfs_dir2_sf_removename( * Loop over the old directory entries. * Find the one we're deleting. */ - for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); - i < sfp->hdr.count; - i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { - if (sfep->namelen == args->namelen && - sfep->name[0] == args->name[0] && - memcmp(sfep->name, args->name, args->namelen) == 0) { + for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count; + i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { + if (xfs_da_compname(args, sfep->name, sfep->namelen) == + XFS_CMP_EXACT) { ASSERT(xfs_dir2_sf_get_inumber(sfp, - xfs_dir2_sf_inumberp(sfep)) == - args->inumber); + xfs_dir2_sf_inumberp(sfep)) == + args->inumber); break; } } /* * Didn't find it. */ - if (i == sfp->hdr.count) { + if (i == sfp->hdr.count) return XFS_ERROR(ENOENT); - } /* * Calculate sizes. */ @@ -1042,11 +1056,10 @@ xfs_dir2_sf_replace( */ else { for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); - i < sfp->hdr.count; - i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { - if (sfep->namelen == args->namelen && - sfep->name[0] == args->name[0] && - memcmp(args->name, sfep->name, args->namelen) == 0) { + i < sfp->hdr.count; + i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) { + if (xfs_da_compname(args, sfep->name, sfep->namelen) == + XFS_CMP_EXACT) { #if XFS_BIG_INUMS || defined(DEBUG) ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep)); @@ -1061,7 +1074,7 @@ xfs_dir2_sf_replace( * Didn't find it. */ if (i == sfp->hdr.count) { - ASSERT(args->oknoent); + ASSERT(args->op_flags & XFS_DA_OP_OKNOENT); #if XFS_BIG_INUMS if (i8elevated) xfs_dir2_sf_toino4(args); @@ -1174,7 +1187,7 @@ xfs_dir2_sf_toino4( /* * Clean up the inode. */ - kmem_free(buf, oldsize); + kmem_free(buf); dp->i_d.di_size = newsize; xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); } @@ -1251,7 +1264,7 @@ xfs_dir2_sf_toino8( /* * Clean up the inode. */ - kmem_free(buf, oldsize); + kmem_free(buf); dp->i_d.di_size = newsize; xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA); } diff --git a/fs/xfs/xfs_dir2_sf.h b/fs/xfs/xfs_dir2_sf.h index 005629d702d..deecc9d238f 100644 --- a/fs/xfs/xfs_dir2_sf.h +++ b/fs/xfs/xfs_dir2_sf.h @@ -62,7 +62,7 @@ typedef union { * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t. * Only need 16 bits, this is the byte offset into the single block form. */ -typedef struct { __uint8_t i[2]; } xfs_dir2_sf_off_t; +typedef struct { __uint8_t i[2]; } __arch_pack xfs_dir2_sf_off_t; /* * The parent directory has a dedicated field, and the self-pointer must @@ -76,14 +76,14 @@ typedef struct xfs_dir2_sf_hdr { __uint8_t count; /* count of entries */ __uint8_t i8count; /* count of 8-byte inode #s */ xfs_dir2_inou_t parent; /* parent dir inode number */ -} xfs_dir2_sf_hdr_t; +} __arch_pack xfs_dir2_sf_hdr_t; typedef struct xfs_dir2_sf_entry { __uint8_t namelen; /* actual name length */ xfs_dir2_sf_off_t offset; /* saved offset */ __uint8_t name[1]; /* name, variable size */ xfs_dir2_inou_t inumber; /* inode number, var. offset */ -} xfs_dir2_sf_entry_t; +} __arch_pack xfs_dir2_sf_entry_t; typedef struct xfs_dir2_sf { xfs_dir2_sf_hdr_t hdr; /* shortform header */ diff --git a/fs/xfs/xfs_dir2_trace.c b/fs/xfs/xfs_dir2_trace.c index f3fb2ffd6f5..6cc7c0c681a 100644 --- a/fs/xfs/xfs_dir2_trace.c +++ b/fs/xfs/xfs_dir2_trace.c @@ -85,7 +85,8 @@ xfs_dir2_trace_args( (void *)((unsigned long)(args->inumber >> 32)), (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), (void *)args->dp, (void *)args->trans, - (void *)(unsigned long)args->justcheck, NULL, NULL); + (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK), + NULL, NULL); } void @@ -100,7 +101,7 @@ xfs_dir2_trace_args_b( (void *)((unsigned long)(args->inumber >> 32)), (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), (void *)args->dp, (void *)args->trans, - (void *)(unsigned long)args->justcheck, + (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK), (void *)(bp ? bp->bps[0] : NULL), NULL); } @@ -117,7 +118,7 @@ xfs_dir2_trace_args_bb( (void *)((unsigned long)(args->inumber >> 32)), (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), (void *)args->dp, (void *)args->trans, - (void *)(unsigned long)args->justcheck, + (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK), (void *)(lbp ? lbp->bps[0] : NULL), (void *)(dbp ? dbp->bps[0] : NULL)); } @@ -157,8 +158,8 @@ xfs_dir2_trace_args_db( (void *)((unsigned long)(args->inumber >> 32)), (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), (void *)args->dp, (void *)args->trans, - (void *)(unsigned long)args->justcheck, (void *)(long)db, - (void *)dbp); + (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK), + (void *)(long)db, (void *)dbp); } void @@ -173,7 +174,7 @@ xfs_dir2_trace_args_i( (void *)((unsigned long)(args->inumber >> 32)), (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), (void *)args->dp, (void *)args->trans, - (void *)(unsigned long)args->justcheck, + (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK), (void *)((unsigned long)(i >> 32)), (void *)((unsigned long)(i & 0xFFFFFFFF))); } @@ -190,7 +191,8 @@ xfs_dir2_trace_args_s( (void *)((unsigned long)(args->inumber >> 32)), (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), (void *)args->dp, (void *)args->trans, - (void *)(unsigned long)args->justcheck, (void *)(long)s, NULL); + (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK), + (void *)(long)s, NULL); } void @@ -208,7 +210,7 @@ xfs_dir2_trace_args_sb( (void *)((unsigned long)(args->inumber >> 32)), (void *)((unsigned long)(args->inumber & 0xFFFFFFFF)), (void *)args->dp, (void *)args->trans, - (void *)(unsigned long)args->justcheck, (void *)(long)s, - (void *)dbp); + (void *)(unsigned long)(args->op_flags & XFS_DA_OP_JUSTCHECK), + (void *)(long)s, (void *)dbp); } #endif /* XFS_DIR2_TRACE */ diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h index f71784ab6a6..cdc2d3464a1 100644 --- a/fs/xfs/xfs_dmapi.h +++ b/fs/xfs/xfs_dmapi.h @@ -166,6 +166,6 @@ typedef enum { #define FILP_DELAY_FLAG(filp) ((filp->f_flags&(O_NDELAY|O_NONBLOCK)) ? \ DM_FLAGS_NDELAY : 0) -#define AT_DELAY_FLAG(f) ((f&ATTR_NONBLOCK) ? DM_FLAGS_NDELAY : 0) +#define AT_DELAY_FLAG(f) ((f & XFS_ATTR_NONBLOCK) ? DM_FLAGS_NDELAY : 0) #endif /* __XFS_DMAPI_H__ */ diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 05e5365d3c3..f227ecd1a29 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -58,22 +58,11 @@ xfs_error_trap(int e) } return e; } -#endif - -#if (defined(DEBUG) || defined(INDUCE_IO_ERROR)) int xfs_etest[XFS_NUM_INJECT_ERROR]; int64_t xfs_etest_fsid[XFS_NUM_INJECT_ERROR]; char * xfs_etest_fsname[XFS_NUM_INJECT_ERROR]; -void -xfs_error_test_init(void) -{ - memset(xfs_etest, 0, sizeof(xfs_etest)); - memset(xfs_etest_fsid, 0, sizeof(xfs_etest_fsid)); - memset(xfs_etest_fsname, 0, sizeof(xfs_etest_fsname)); -} - int xfs_error_test(int error_tag, int *fsidp, char *expression, int line, char *file, unsigned long randfactor) @@ -150,8 +139,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud) xfs_etest[i]); xfs_etest[i] = 0; xfs_etest_fsid[i] = 0LL; - kmem_free(xfs_etest_fsname[i], - strlen(xfs_etest_fsname[i]) + 1); + kmem_free(xfs_etest_fsname[i]); xfs_etest_fsname[i] = NULL; } } @@ -163,7 +151,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud) return 0; } -#endif /* DEBUG || INDUCE_IO_ERROR */ +#endif /* DEBUG */ static void xfs_fs_vcmn_err(int level, xfs_mount_t *mp, char *fmt, va_list ap) @@ -175,7 +163,7 @@ xfs_fs_vcmn_err(int level, xfs_mount_t *mp, char *fmt, va_list ap) newfmt = kmem_alloc(len, KM_SLEEP); sprintf(newfmt, "Filesystem \"%s\": %s", mp->m_fsname, fmt); icmn_err(level, newfmt, ap); - kmem_free(newfmt, len); + kmem_free(newfmt); } else { icmn_err(level, fmt, ap); } diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index 6490d2a9f8e..11543f10b0c 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -125,23 +125,14 @@ extern void xfs_corruption_error(char *tag, int level, struct xfs_mount *mp, #define XFS_RANDOM_DIOWRITE_IOERR (XFS_RANDOM_DEFAULT/10) #define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT -#if (defined(DEBUG) || defined(INDUCE_IO_ERROR)) +#ifdef DEBUG extern int xfs_error_test(int, int *, char *, int, char *, unsigned long); -extern void xfs_error_test_init(void); #define XFS_NUM_INJECT_ERROR 10 - -#ifdef __ANSI_CPP__ -#define XFS_TEST_ERROR(expr, mp, tag, rf) \ - ((expr) || \ - xfs_error_test((tag), (mp)->m_fixedfsid, #expr, __LINE__, __FILE__, \ - (rf))) -#else #define XFS_TEST_ERROR(expr, mp, tag, rf) \ ((expr) || \ xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \ (rf))) -#endif /* __ANSI_CPP__ */ extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp); extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud); @@ -149,7 +140,7 @@ extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud); #define XFS_TEST_ERROR(expr, mp, tag, rf) (expr) #define xfs_errortag_add(tag, mp) (ENOSYS) #define xfs_errortag_clearall(mp, loud) (ENOSYS) -#endif /* (DEBUG || INDUCE_IO_ERROR) */ +#endif /* DEBUG */ /* * XFS panic tags -- allow a call to xfs_cmn_err() be turned into diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 132bd07b9bb..8aa28f751b2 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -41,8 +41,7 @@ xfs_efi_item_free(xfs_efi_log_item_t *efip) int nexts = efip->efi_format.efi_nextents; if (nexts > XFS_EFI_MAX_FAST_EXTENTS) { - kmem_free(efip, sizeof(xfs_efi_log_item_t) + - (nexts - 1) * sizeof(xfs_extent_t)); + kmem_free(efip); } else { kmem_zone_free(xfs_efi_zone, efip); } @@ -374,8 +373,7 @@ xfs_efd_item_free(xfs_efd_log_item_t *efdp) int nexts = efdp->efd_format.efd_nextents; if (nexts > XFS_EFD_MAX_FAST_EXTENTS) { - kmem_free(efdp, sizeof(xfs_efd_log_item_t) + - (nexts - 1) * sizeof(xfs_extent_t)); + kmem_free(efdp); } else { kmem_zone_free(xfs_efd_zone, efdp); } diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index 3f3785b1080..f3bb75da384 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -397,10 +397,12 @@ int xfs_filestream_init(void) { item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item"); + if (!item_zone) + return -ENOMEM; #ifdef XFS_FILESTREAMS_TRACE - xfs_filestreams_trace_buf = ktrace_alloc(XFS_FSTRM_KTRACE_SIZE, KM_SLEEP); + xfs_filestreams_trace_buf = ktrace_alloc(XFS_FSTRM_KTRACE_SIZE, KM_NOFS); #endif - return item_zone ? 0 : -ENOMEM; + return 0; } /* diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index 3bed6433d05..01c0cc88d3f 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -239,6 +239,7 @@ typedef struct xfs_fsop_resblks { #define XFS_FSOP_GEOM_FLAGS_LOGV2 0x0100 /* log format version 2 */ #define XFS_FSOP_GEOM_FLAGS_SECTOR 0x0200 /* sector sizes >1BB */ #define XFS_FSOP_GEOM_FLAGS_ATTR2 0x0400 /* inline attributes rework */ +#define XFS_FSOP_GEOM_FLAGS_DIRV2CI 0x1000 /* ASCII only CI names */ #define XFS_FSOP_GEOM_FLAGS_LAZYSB 0x4000 /* lazy superblock counters */ @@ -371,6 +372,9 @@ typedef struct xfs_fsop_attrlist_handlereq { typedef struct xfs_attr_multiop { __u32 am_opcode; +#define ATTR_OP_GET 1 /* return the indicated attr's value */ +#define ATTR_OP_SET 2 /* set/create the indicated attr/value pair */ +#define ATTR_OP_REMOVE 3 /* remove the indicated attr */ __s32 am_error; void __user *am_attrname; void __user *am_attrvalue; diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 381ebda4f7b..84583cf73db 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -95,6 +95,8 @@ xfs_fs_geometry( XFS_FSOP_GEOM_FLAGS_DIRV2 : 0) | (xfs_sb_version_hassector(&mp->m_sb) ? XFS_FSOP_GEOM_FLAGS_SECTOR : 0) | + (xfs_sb_version_hasasciici(&mp->m_sb) ? + XFS_FSOP_GEOM_FLAGS_DIRV2CI : 0) | (xfs_sb_version_haslazysbcount(&mp->m_sb) ? XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) | (xfs_sb_version_hasattr2(&mp->m_sb) ? @@ -625,7 +627,7 @@ xfs_fs_goingdown( xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT); thaw_bdev(sb->s_bdev, sb); } - + break; } case XFS_FSOP_GOING_FLAGS_LOGFLUSH: diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index e5310c90e50..83502f3edef 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -181,7 +181,7 @@ xfs_inobt_delrec( * then we can get rid of this level. */ if (numrecs == 1 && level > 0) { - agbp = cur->bc_private.i.agbp; + agbp = cur->bc_private.a.agbp; agi = XFS_BUF_TO_AGI(agbp); /* * pp is still set to the first pointer in the block. @@ -194,7 +194,7 @@ xfs_inobt_delrec( * Free the block. */ if ((error = xfs_free_extent(cur->bc_tp, - XFS_AGB_TO_FSB(mp, cur->bc_private.i.agno, bno), 1))) + XFS_AGB_TO_FSB(mp, cur->bc_private.a.agno, bno), 1))) return error; xfs_trans_binval(cur->bc_tp, bp); xfs_ialloc_log_agi(cur->bc_tp, agbp, @@ -379,7 +379,7 @@ xfs_inobt_delrec( rrecs = be16_to_cpu(right->bb_numrecs); rbp = bp; if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, - cur->bc_private.i.agno, lbno, 0, &lbp, + cur->bc_private.a.agno, lbno, 0, &lbp, XFS_INO_BTREE_REF))) return error; left = XFS_BUF_TO_INOBT_BLOCK(lbp); @@ -401,7 +401,7 @@ xfs_inobt_delrec( lrecs = be16_to_cpu(left->bb_numrecs); lbp = bp; if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, - cur->bc_private.i.agno, rbno, 0, &rbp, + cur->bc_private.a.agno, rbno, 0, &rbp, XFS_INO_BTREE_REF))) return error; right = XFS_BUF_TO_INOBT_BLOCK(rbp); @@ -484,7 +484,7 @@ xfs_inobt_delrec( xfs_buf_t *rrbp; if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, - cur->bc_private.i.agno, be32_to_cpu(left->bb_rightsib), 0, + cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), 0, &rrbp, XFS_INO_BTREE_REF))) return error; rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp); @@ -497,7 +497,7 @@ xfs_inobt_delrec( * Free the deleting block. */ if ((error = xfs_free_extent(cur->bc_tp, XFS_AGB_TO_FSB(mp, - cur->bc_private.i.agno, rbno), 1))) + cur->bc_private.a.agno, rbno), 1))) return error; xfs_trans_binval(cur->bc_tp, rbp); /* @@ -854,7 +854,7 @@ xfs_inobt_lookup( { xfs_agi_t *agi; /* a.g. inode header */ - agi = XFS_BUF_TO_AGI(cur->bc_private.i.agbp); + agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); agno = be32_to_cpu(agi->agi_seqno); agbno = be32_to_cpu(agi->agi_root); } @@ -1089,7 +1089,7 @@ xfs_inobt_lshift( * Set up the left neighbor as "left". */ if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, - cur->bc_private.i.agno, be32_to_cpu(right->bb_leftsib), + cur->bc_private.a.agno, be32_to_cpu(right->bb_leftsib), 0, &lbp, XFS_INO_BTREE_REF))) return error; left = XFS_BUF_TO_INOBT_BLOCK(lbp); @@ -1207,10 +1207,10 @@ xfs_inobt_newroot( /* * Get a block & a buffer. */ - agi = XFS_BUF_TO_AGI(cur->bc_private.i.agbp); + agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); args.tp = cur->bc_tp; args.mp = cur->bc_mp; - args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.i.agno, + args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, be32_to_cpu(agi->agi_root)); args.mod = args.minleft = args.alignment = args.total = args.wasdel = args.isfl = args.userdata = args.minalignslop = 0; @@ -1233,7 +1233,7 @@ xfs_inobt_newroot( */ agi->agi_root = cpu_to_be32(args.agbno); be32_add_cpu(&agi->agi_level, 1); - xfs_ialloc_log_agi(args.tp, cur->bc_private.i.agbp, + xfs_ialloc_log_agi(args.tp, cur->bc_private.a.agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL); /* * At the previous root level there are now two blocks: the old @@ -1376,7 +1376,7 @@ xfs_inobt_rshift( * Set up the right neighbor as "right". */ if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, - cur->bc_private.i.agno, be32_to_cpu(left->bb_rightsib), + cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), 0, &rbp, XFS_INO_BTREE_REF))) return error; right = XFS_BUF_TO_INOBT_BLOCK(rbp); @@ -1492,7 +1492,7 @@ xfs_inobt_split( * Allocate the new block. * If we can't do it, we're toast. Give up. */ - args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.i.agno, lbno); + args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, lbno); args.mod = args.minleft = args.alignment = args.total = args.wasdel = args.isfl = args.userdata = args.minalignslop = 0; args.minlen = args.maxlen = args.prod = 1; @@ -1725,7 +1725,7 @@ xfs_inobt_decrement( agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur)); if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, - cur->bc_private.i.agno, agbno, 0, &bp, + cur->bc_private.a.agno, agbno, 0, &bp, XFS_INO_BTREE_REF))) return error; lev--; @@ -1897,7 +1897,7 @@ xfs_inobt_increment( agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur)); if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp, - cur->bc_private.i.agno, agbno, 0, &bp, + cur->bc_private.a.agno, agbno, 0, &bp, XFS_INO_BTREE_REF))) return error; lev--; diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index b07604b94d9..e229e9e001c 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c @@ -216,7 +216,14 @@ finish_inode: mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); init_waitqueue_head(&ip->i_ipin_wait); atomic_set(&ip->i_pincount, 0); - initnsema(&ip->i_flock, 1, "xfsfino"); + + /* + * Because we want to use a counting completion, complete + * the flush completion once to allow a single access to + * the flush completion without blocking. + */ + init_completion(&ip->i_flush); + complete(&ip->i_flush); if (lock_flags) xfs_ilock(ip, lock_flags); @@ -288,10 +295,17 @@ finish_inode: *ipp = ip; /* + * Set up the Linux with the Linux inode. + */ + ip->i_vnode = inode; + inode->i_private = ip; + + /* * If we have a real type for an on-disk inode, we can set ops(&unlock) * now. If it's a new inode being created, xfs_ialloc will handle it. */ - xfs_initialize_vnode(mp, inode, ip); + if (ip->i_d.di_mode != 0) + xfs_setup_inode(ip); return 0; } @@ -411,10 +425,11 @@ xfs_iput(xfs_inode_t *ip, * Special iput for brand-new inodes that are still locked */ void -xfs_iput_new(xfs_inode_t *ip, - uint lock_flags) +xfs_iput_new( + xfs_inode_t *ip, + uint lock_flags) { - struct inode *inode = ip->i_vnode; + struct inode *inode = VFS_I(ip); xfs_itrace_entry(ip); @@ -775,26 +790,3 @@ xfs_isilocked( } #endif -/* - * The following three routines simply manage the i_flock - * semaphore embedded in the inode. This semaphore synchronizes - * processes attempting to flush the in-core inode back to disk. - */ -void -xfs_iflock(xfs_inode_t *ip) -{ - psema(&(ip->i_flock), PINOD|PLTWAIT); -} - -int -xfs_iflock_nowait(xfs_inode_t *ip) -{ - return (cpsema(&(ip->i_flock))); -} - -void -xfs_ifunlock(xfs_inode_t *ip) -{ - ASSERT(issemalocked(&(ip->i_flock))); - vsema(&(ip->i_flock)); -} diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index e569bf5d6cf..00e80df9dd9 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -580,8 +580,8 @@ xfs_iformat_extents( xfs_validate_extents(ifp, nex, XFS_EXTFMT_INODE(ip)); for (i = 0; i < nex; i++, dp++) { xfs_bmbt_rec_host_t *ep = xfs_iext_get_ext(ifp, i); - ep->l0 = be64_to_cpu(get_unaligned(&dp->l0)); - ep->l1 = be64_to_cpu(get_unaligned(&dp->l1)); + ep->l0 = get_unaligned_be64(&dp->l0); + ep->l1 = get_unaligned_be64(&dp->l1); } XFS_BMAP_TRACE_EXLIST(ip, nex, whichfork); if (whichfork != XFS_DATA_FORK || @@ -835,22 +835,22 @@ xfs_iread( * Do this before xfs_iformat in case it adds entries. */ #ifdef XFS_INODE_TRACE - ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_SLEEP); + ip->i_trace = ktrace_alloc(INODE_TRACE_SIZE, KM_NOFS); #endif #ifdef XFS_BMAP_TRACE - ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_SLEEP); + ip->i_xtrace = ktrace_alloc(XFS_BMAP_KTRACE_SIZE, KM_NOFS); #endif #ifdef XFS_BMBT_TRACE - ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_SLEEP); + ip->i_btrace = ktrace_alloc(XFS_BMBT_KTRACE_SIZE, KM_NOFS); #endif #ifdef XFS_RW_TRACE - ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_SLEEP); + ip->i_rwtrace = ktrace_alloc(XFS_RW_KTRACE_SIZE, KM_NOFS); #endif #ifdef XFS_ILOCK_TRACE - ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_SLEEP); + ip->i_lock_trace = ktrace_alloc(XFS_ILOCK_KTRACE_SIZE, KM_NOFS); #endif #ifdef XFS_DIR2_TRACE - ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_SLEEP); + ip->i_dir_trace = ktrace_alloc(XFS_DIR2_KTRACE_SIZE, KM_NOFS); #endif /* @@ -1046,9 +1046,9 @@ xfs_ialloc( { xfs_ino_t ino; xfs_inode_t *ip; - bhv_vnode_t *vp; uint flags; int error; + timespec_t tv; /* * Call the space management code to pick @@ -1077,13 +1077,12 @@ xfs_ialloc( } ASSERT(ip != NULL); - vp = XFS_ITOV(ip); ip->i_d.di_mode = (__uint16_t)mode; ip->i_d.di_onlink = 0; ip->i_d.di_nlink = nlink; ASSERT(ip->i_d.di_nlink == nlink); - ip->i_d.di_uid = current_fsuid(cr); - ip->i_d.di_gid = current_fsgid(cr); + ip->i_d.di_uid = current_fsuid(); + ip->i_d.di_gid = current_fsgid(); ip->i_d.di_projid = prid; memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); @@ -1130,7 +1129,13 @@ xfs_ialloc( ip->i_size = 0; ip->i_d.di_nextents = 0; ASSERT(ip->i_d.di_nblocks == 0); - xfs_ichgtime(ip, XFS_ICHGTIME_CHG|XFS_ICHGTIME_ACC|XFS_ICHGTIME_MOD); + + nanotime(&tv); + ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec; + ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec; + ip->i_d.di_atime = ip->i_d.di_mtime; + ip->i_d.di_ctime = ip->i_d.di_mtime; + /* * di_gen will have been taken care of in xfs_iread. */ @@ -1220,7 +1225,7 @@ xfs_ialloc( xfs_trans_log_inode(tp, ip, flags); /* now that we have an i_mode we can setup inode ops and unlock */ - xfs_initialize_vnode(tp->t_mountp, vp, ip); + xfs_setup_inode(ip); *ipp = ip; return 0; @@ -1399,7 +1404,6 @@ xfs_itruncate_start( xfs_fsize_t last_byte; xfs_off_t toss_start; xfs_mount_t *mp; - bhv_vnode_t *vp; int error = 0; ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); @@ -1408,7 +1412,6 @@ xfs_itruncate_start( (flags == XFS_ITRUNC_MAYBE)); mp = ip->i_mount; - vp = XFS_ITOV(ip); /* wait for the completion of any pending DIOs */ if (new_size < ip->i_size) @@ -1457,7 +1460,7 @@ xfs_itruncate_start( #ifdef DEBUG if (new_size == 0) { - ASSERT(VN_CACHED(vp) == 0); + ASSERT(VN_CACHED(VFS_I(ip)) == 0); } #endif return error; @@ -1763,67 +1766,6 @@ xfs_itruncate_finish( return 0; } - -/* - * xfs_igrow_start - * - * Do the first part of growing a file: zero any data in the last - * block that is beyond the old EOF. We need to do this before - * the inode is joined to the transaction to modify the i_size. - * That way we can drop the inode lock and call into the buffer - * cache to get the buffer mapping the EOF. - */ -int -xfs_igrow_start( - xfs_inode_t *ip, - xfs_fsize_t new_size, - cred_t *credp) -{ - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); - ASSERT(new_size > ip->i_size); - - /* - * Zero any pages that may have been created by - * xfs_write_file() beyond the end of the file - * and any blocks between the old and new file sizes. - */ - return xfs_zero_eof(ip, new_size, ip->i_size); -} - -/* - * xfs_igrow_finish - * - * This routine is called to extend the size of a file. - * The inode must have both the iolock and the ilock locked - * for update and it must be a part of the current transaction. - * The xfs_igrow_start() function must have been called previously. - * If the change_flag is not zero, the inode change timestamp will - * be updated. - */ -void -xfs_igrow_finish( - xfs_trans_t *tp, - xfs_inode_t *ip, - xfs_fsize_t new_size, - int change_flag) -{ - ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL)); - ASSERT(ip->i_transp == tp); - ASSERT(new_size > ip->i_size); - - /* - * Update the file size. Update the inode change timestamp - * if change_flag set. - */ - ip->i_d.di_size = new_size; - ip->i_size = new_size; - if (change_flag) - xfs_ichgtime(ip, XFS_ICHGTIME_CHG); - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - -} - - /* * This is called when the inode's link count goes to 0. * We place the on-disk inode on a list in the AGI. It @@ -2258,7 +2200,7 @@ xfs_ifree_cluster( xfs_trans_binval(tp, bp); } - kmem_free(ip_found, ninodes * sizeof(xfs_inode_t *)); + kmem_free(ip_found); xfs_put_perag(mp, pag); } @@ -2470,7 +2412,7 @@ xfs_iroot_realloc( (int)new_size); memcpy(np, op, new_max * (uint)sizeof(xfs_dfsbno_t)); } - kmem_free(ifp->if_broot, ifp->if_broot_bytes); + kmem_free(ifp->if_broot); ifp->if_broot = new_broot; ifp->if_broot_bytes = (int)new_size; ASSERT(ifp->if_broot_bytes <= @@ -2514,7 +2456,7 @@ xfs_idata_realloc( if (new_size == 0) { if (ifp->if_u1.if_data != ifp->if_u2.if_inline_data) { - kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes); + kmem_free(ifp->if_u1.if_data); } ifp->if_u1.if_data = NULL; real_size = 0; @@ -2529,7 +2471,7 @@ xfs_idata_realloc( ASSERT(ifp->if_real_bytes != 0); memcpy(ifp->if_u2.if_inline_data, ifp->if_u1.if_data, new_size); - kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes); + kmem_free(ifp->if_u1.if_data); ifp->if_u1.if_data = ifp->if_u2.if_inline_data; } real_size = 0; @@ -2636,7 +2578,7 @@ xfs_idestroy_fork( ifp = XFS_IFORK_PTR(ip, whichfork); if (ifp->if_broot != NULL) { - kmem_free(ifp->if_broot, ifp->if_broot_bytes); + kmem_free(ifp->if_broot); ifp->if_broot = NULL; } @@ -2650,7 +2592,7 @@ xfs_idestroy_fork( if ((ifp->if_u1.if_data != ifp->if_u2.if_inline_data) && (ifp->if_u1.if_data != NULL)) { ASSERT(ifp->if_real_bytes != 0); - kmem_free(ifp->if_u1.if_data, ifp->if_real_bytes); + kmem_free(ifp->if_u1.if_data); ifp->if_u1.if_data = NULL; ifp->if_real_bytes = 0; } @@ -2691,7 +2633,6 @@ xfs_idestroy( xfs_idestroy_fork(ip, XFS_ATTR_FORK); mrfree(&ip->i_lock); mrfree(&ip->i_iolock); - freesema(&ip->i_flock); #ifdef XFS_INODE_TRACE ktrace_free(ip->i_trace); @@ -3058,7 +2999,7 @@ xfs_iflush_cluster( out_free: read_unlock(&pag->pag_ici_lock); - kmem_free(ilist, ilist_size); + kmem_free(ilist); return 0; @@ -3102,17 +3043,17 @@ cluster_corrupt_out: * Unlocks the flush lock */ xfs_iflush_abort(iq); - kmem_free(ilist, ilist_size); + kmem_free(ilist); return XFS_ERROR(EFSCORRUPTED); } /* * xfs_iflush() will write a modified inode's changes out to the * inode's on disk home. The caller must have the inode lock held - * in at least shared mode and the inode flush semaphore must be - * held as well. The inode lock will still be held upon return from + * in at least shared mode and the inode flush completion must be + * active as well. The inode lock will still be held upon return from * the call and the caller is free to unlock it. - * The inode flush lock will be unlocked when the inode reaches the disk. + * The inode flush will be completed when the inode reaches the disk. * The flags indicate how the inode's buffer should be written out. */ int @@ -3131,7 +3072,7 @@ xfs_iflush( XFS_STATS_INC(xs_iflush_count); ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); - ASSERT(issemalocked(&(ip->i_flock))); + ASSERT(!completion_done(&ip->i_flush)); ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || ip->i_d.di_nextents > ip->i_df.if_ext_max); @@ -3143,8 +3084,6 @@ xfs_iflush( * flush lock and do nothing. */ if (xfs_inode_clean(ip)) { - ASSERT((iip != NULL) ? - !(iip->ili_item.li_flags & XFS_LI_IN_AIL) : 1); xfs_ifunlock(ip); return 0; } @@ -3296,7 +3235,7 @@ xfs_iflush_int( #endif ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)); - ASSERT(issemalocked(&(ip->i_flock))); + ASSERT(!completion_done(&ip->i_flush)); ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || ip->i_d.di_nextents > ip->i_df.if_ext_max); @@ -3528,7 +3467,6 @@ xfs_iflush_all( xfs_mount_t *mp) { xfs_inode_t *ip; - bhv_vnode_t *vp; again: XFS_MOUNT_ILOCK(mp); @@ -3543,14 +3481,13 @@ xfs_iflush_all( continue; } - vp = XFS_ITOV_NULL(ip); - if (!vp) { + if (!VFS_I(ip)) { XFS_MOUNT_IUNLOCK(mp); xfs_finish_reclaim(ip, 0, XFS_IFLUSH_ASYNC); goto again; } - ASSERT(vn_count(vp) == 0); + ASSERT(vn_count(VFS_I(ip)) == 0); ip = ip->i_mnext; } while (ip != mp->m_inodes); @@ -3770,7 +3707,7 @@ xfs_iext_add_indirect_multi( * (all extents past */ if (nex2) { byte_diff = nex2 * sizeof(xfs_bmbt_rec_t); - nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_SLEEP); + nex2_ep = (xfs_bmbt_rec_t *) kmem_alloc(byte_diff, KM_NOFS); memmove(nex2_ep, &erp->er_extbuf[idx], byte_diff); erp->er_extcount -= nex2; xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -nex2); @@ -3836,7 +3773,7 @@ xfs_iext_add_indirect_multi( erp = xfs_iext_irec_new(ifp, erp_idx); } memmove(&erp->er_extbuf[i], nex2_ep, byte_diff); - kmem_free(nex2_ep, byte_diff); + kmem_free(nex2_ep); erp->er_extcount += nex2; xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, nex2); } @@ -4070,8 +4007,7 @@ xfs_iext_realloc_direct( ifp->if_u1.if_extents = kmem_realloc(ifp->if_u1.if_extents, rnew_size, - ifp->if_real_bytes, - KM_SLEEP); + ifp->if_real_bytes, KM_NOFS); } if (rnew_size > ifp->if_real_bytes) { memset(&ifp->if_u1.if_extents[ifp->if_bytes / @@ -4112,7 +4048,7 @@ xfs_iext_direct_to_inline( */ memcpy(ifp->if_u2.if_inline_ext, ifp->if_u1.if_extents, nextents * sizeof(xfs_bmbt_rec_t)); - kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes); + kmem_free(ifp->if_u1.if_extents); ifp->if_u1.if_extents = ifp->if_u2.if_inline_ext; ifp->if_real_bytes = 0; } @@ -4130,7 +4066,7 @@ xfs_iext_inline_to_direct( xfs_ifork_t *ifp, /* inode fork pointer */ int new_size) /* number of extents in file */ { - ifp->if_u1.if_extents = kmem_alloc(new_size, KM_SLEEP); + ifp->if_u1.if_extents = kmem_alloc(new_size, KM_NOFS); memset(ifp->if_u1.if_extents, 0, new_size); if (ifp->if_bytes) { memcpy(ifp->if_u1.if_extents, ifp->if_u2.if_inline_ext, @@ -4162,7 +4098,7 @@ xfs_iext_realloc_indirect( } else { ifp->if_u1.if_ext_irec = (xfs_ext_irec_t *) kmem_realloc(ifp->if_u1.if_ext_irec, - new_size, size, KM_SLEEP); + new_size, size, KM_NOFS); } } @@ -4186,7 +4122,7 @@ xfs_iext_indirect_to_direct( ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); ep = ifp->if_u1.if_ext_irec->er_extbuf; - kmem_free(ifp->if_u1.if_ext_irec, sizeof(xfs_ext_irec_t)); + kmem_free(ifp->if_u1.if_ext_irec); ifp->if_flags &= ~XFS_IFEXTIREC; ifp->if_u1.if_extents = ep; ifp->if_bytes = size; @@ -4212,7 +4148,7 @@ xfs_iext_destroy( } ifp->if_flags &= ~XFS_IFEXTIREC; } else if (ifp->if_real_bytes) { - kmem_free(ifp->if_u1.if_extents, ifp->if_real_bytes); + kmem_free(ifp->if_u1.if_extents); } else if (ifp->if_bytes) { memset(ifp->if_u2.if_inline_ext, 0, XFS_INLINE_EXTS * sizeof(xfs_bmbt_rec_t)); @@ -4404,11 +4340,10 @@ xfs_iext_irec_init( nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t); ASSERT(nextents <= XFS_LINEAR_EXTS); - erp = (xfs_ext_irec_t *) - kmem_alloc(sizeof(xfs_ext_irec_t), KM_SLEEP); + erp = kmem_alloc(sizeof(xfs_ext_irec_t), KM_NOFS); if (nextents == 0) { - ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP); + ifp->if_u1.if_extents = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); } else if (!ifp->if_real_bytes) { xfs_iext_inline_to_direct(ifp, XFS_IEXT_BUFSZ); } else if (ifp->if_real_bytes < XFS_IEXT_BUFSZ) { @@ -4456,7 +4391,7 @@ xfs_iext_irec_new( /* Initialize new extent record */ erp = ifp->if_u1.if_ext_irec; - erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_SLEEP); + erp[erp_idx].er_extbuf = kmem_alloc(XFS_IEXT_BUFSZ, KM_NOFS); ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; memset(erp[erp_idx].er_extbuf, 0, XFS_IEXT_BUFSZ); erp[erp_idx].er_extcount = 0; @@ -4483,7 +4418,7 @@ xfs_iext_irec_remove( if (erp->er_extbuf) { xfs_iext_irec_update_extoffs(ifp, erp_idx + 1, -erp->er_extcount); - kmem_free(erp->er_extbuf, XFS_IEXT_BUFSZ); + kmem_free(erp->er_extbuf); } /* Compact extent records */ erp = ifp->if_u1.if_ext_irec; @@ -4501,8 +4436,7 @@ xfs_iext_irec_remove( xfs_iext_realloc_indirect(ifp, nlists * sizeof(xfs_ext_irec_t)); } else { - kmem_free(ifp->if_u1.if_ext_irec, - sizeof(xfs_ext_irec_t)); + kmem_free(ifp->if_u1.if_ext_irec); } ifp->if_real_bytes = nlists * XFS_IEXT_BUFSZ; } @@ -4571,7 +4505,7 @@ xfs_iext_irec_compact_pages( * so er_extoffs don't get modified in * xfs_iext_irec_remove. */ - kmem_free(erp_next->er_extbuf, XFS_IEXT_BUFSZ); + kmem_free(erp_next->er_extbuf); erp_next->er_extbuf = NULL; xfs_iext_irec_remove(ifp, erp_idx + 1); nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; @@ -4596,40 +4530,63 @@ xfs_iext_irec_compact_full( int nlists; /* number of irec's (ex lists) */ ASSERT(ifp->if_flags & XFS_IFEXTIREC); + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; erp = ifp->if_u1.if_ext_irec; ep = &erp->er_extbuf[erp->er_extcount]; erp_next = erp + 1; ep_next = erp_next->er_extbuf; + while (erp_idx < nlists - 1) { + /* + * Check how many extent records are available in this irec. + * If there is none skip the whole exercise. + */ ext_avail = XFS_LINEAR_EXTS - erp->er_extcount; - ext_diff = MIN(ext_avail, erp_next->er_extcount); - memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t)); - erp->er_extcount += ext_diff; - erp_next->er_extcount -= ext_diff; - /* Remove next page */ - if (erp_next->er_extcount == 0) { + if (ext_avail) { + /* - * Free page before removing extent record - * so er_extoffs don't get modified in - * xfs_iext_irec_remove. + * Copy over as many as possible extent records into + * the previous page. */ - kmem_free(erp_next->er_extbuf, - erp_next->er_extcount * sizeof(xfs_bmbt_rec_t)); - erp_next->er_extbuf = NULL; - xfs_iext_irec_remove(ifp, erp_idx + 1); - erp = &ifp->if_u1.if_ext_irec[erp_idx]; - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; - /* Update next page */ - } else { - /* Move rest of page up to become next new page */ - memmove(erp_next->er_extbuf, ep_next, - erp_next->er_extcount * sizeof(xfs_bmbt_rec_t)); - ep_next = erp_next->er_extbuf; - memset(&ep_next[erp_next->er_extcount], 0, - (XFS_LINEAR_EXTS - erp_next->er_extcount) * - sizeof(xfs_bmbt_rec_t)); + ext_diff = MIN(ext_avail, erp_next->er_extcount); + memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t)); + erp->er_extcount += ext_diff; + erp_next->er_extcount -= ext_diff; + + /* + * If the next irec is empty now we can simply + * remove it. + */ + if (erp_next->er_extcount == 0) { + /* + * Free page before removing extent record + * so er_extoffs don't get modified in + * xfs_iext_irec_remove. + */ + kmem_free(erp_next->er_extbuf); + erp_next->er_extbuf = NULL; + xfs_iext_irec_remove(ifp, erp_idx + 1); + erp = &ifp->if_u1.if_ext_irec[erp_idx]; + nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; + + /* + * If the next irec is not empty move up the content + * that has not been copied to the previous page to + * the beggining of this one. + */ + } else { + memmove(erp_next->er_extbuf, &ep_next[ext_diff], + erp_next->er_extcount * + sizeof(xfs_bmbt_rec_t)); + ep_next = erp_next->er_extbuf; + memset(&ep_next[erp_next->er_extcount], 0, + (XFS_LINEAR_EXTS - + erp_next->er_extcount) * + sizeof(xfs_bmbt_rec_t)); + } } + if (erp->er_extcount == XFS_LINEAR_EXTS) { erp_idx++; if (erp_idx < nlists) diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 0a999fee4f0..1420c49674d 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -87,8 +87,7 @@ typedef struct xfs_ifork { * Flags for xfs_ichgtime(). */ #define XFS_ICHGTIME_MOD 0x1 /* data fork modification timestamp */ -#define XFS_ICHGTIME_ACC 0x2 /* data fork access timestamp */ -#define XFS_ICHGTIME_CHG 0x4 /* inode field change timestamp */ +#define XFS_ICHGTIME_CHG 0x2 /* inode field change timestamp */ /* * Per-fork incore inode flags. @@ -204,7 +203,7 @@ typedef struct xfs_inode { struct xfs_inode *i_mprev; /* ptr to prev inode */ struct xfs_mount *i_mount; /* fs mount struct ptr */ struct list_head i_reclaim; /* reclaim list */ - bhv_vnode_t *i_vnode; /* vnode backpointer */ + struct inode *i_vnode; /* vnode backpointer */ struct xfs_dquot *i_udquot; /* user dquot */ struct xfs_dquot *i_gdquot; /* group dquot */ @@ -223,7 +222,7 @@ typedef struct xfs_inode { struct xfs_inode_log_item *i_itemp; /* logging information */ mrlock_t i_lock; /* inode lock */ mrlock_t i_iolock; /* inode IO lock */ - sema_t i_flock; /* inode flush lock */ + struct completion i_flush; /* inode flush completion q */ atomic_t i_pincount; /* inode pin count */ wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */ spinlock_t i_flags_lock; /* inode i_flags lock */ @@ -263,6 +262,18 @@ typedef struct xfs_inode { #define XFS_ISIZE(ip) (((ip)->i_d.di_mode & S_IFMT) == S_IFREG) ? \ (ip)->i_size : (ip)->i_d.di_size; +/* Convert from vfs inode to xfs inode */ +static inline struct xfs_inode *XFS_I(struct inode *inode) +{ + return (struct xfs_inode *)inode->i_private; +} + +/* convert from xfs inode to vfs inode */ +static inline struct inode *VFS_I(struct xfs_inode *ip) +{ + return (struct inode *)ip->i_vnode; +} + /* * i_flags helper functions */ @@ -439,9 +450,6 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags) #define XFS_ITRUNC_DEFINITE 0x1 #define XFS_ITRUNC_MAYBE 0x2 -#define XFS_ITOV(ip) ((ip)->i_vnode) -#define XFS_ITOV_NULL(ip) ((ip)->i_vnode) - /* * For multiple groups support: if S_ISGID bit is set in the parent * directory, group of new file is set to that of the parent, and @@ -473,11 +481,8 @@ int xfs_ilock_nowait(xfs_inode_t *, uint); void xfs_iunlock(xfs_inode_t *, uint); void xfs_ilock_demote(xfs_inode_t *, uint); int xfs_isilocked(xfs_inode_t *, uint); -void xfs_iflock(xfs_inode_t *); -int xfs_iflock_nowait(xfs_inode_t *); uint xfs_ilock_map_shared(xfs_inode_t *); void xfs_iunlock_map_shared(xfs_inode_t *, uint); -void xfs_ifunlock(xfs_inode_t *); void xfs_ireclaim(xfs_inode_t *); int xfs_finish_reclaim(xfs_inode_t *, int, int); int xfs_finish_reclaim_all(struct xfs_mount *, int); @@ -507,9 +512,6 @@ int xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t); int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *, xfs_fsize_t, int, int); int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); -int xfs_igrow_start(xfs_inode_t *, xfs_fsize_t, struct cred *); -void xfs_igrow_finish(struct xfs_trans *, xfs_inode_t *, - xfs_fsize_t, int); void xfs_idestroy_fork(xfs_inode_t *, int); void xfs_idestroy(xfs_inode_t *); @@ -525,6 +527,7 @@ void xfs_iflush_all(struct xfs_mount *); void xfs_ichgtime(xfs_inode_t *, int); xfs_fsize_t xfs_file_last_byte(xfs_inode_t *); void xfs_lock_inodes(xfs_inode_t **, int, uint); +void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); void xfs_synchronize_atime(xfs_inode_t *); void xfs_mark_inode_dirty_sync(xfs_inode_t *); @@ -573,6 +576,26 @@ extern struct kmem_zone *xfs_ifork_zone; extern struct kmem_zone *xfs_inode_zone; extern struct kmem_zone *xfs_ili_zone; +/* + * Manage the i_flush queue embedded in the inode. This completion + * queue synchronizes processes attempting to flush the in-core + * inode back to disk. + */ +static inline void xfs_iflock(xfs_inode_t *ip) +{ + wait_for_completion(&ip->i_flush); +} + +static inline int xfs_iflock_nowait(xfs_inode_t *ip) +{ + return try_wait_for_completion(&ip->i_flush); +} + +static inline void xfs_ifunlock(xfs_inode_t *ip) +{ + complete(&ip->i_flush); +} + #endif /* __KERNEL__ */ #endif /* __XFS_INODE_H__ */ diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 167b33f1577..97c7452e262 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -686,7 +686,7 @@ xfs_inode_item_unlock( ASSERT(ip->i_d.di_nextents > 0); ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_DEXT); ASSERT(ip->i_df.if_bytes > 0); - kmem_free(iip->ili_extents_buf, ip->i_df.if_bytes); + kmem_free(iip->ili_extents_buf); iip->ili_extents_buf = NULL; } if (iip->ili_aextents_buf != NULL) { @@ -694,7 +694,7 @@ xfs_inode_item_unlock( ASSERT(ip->i_d.di_anextents > 0); ASSERT(iip->ili_format.ilf_fields & XFS_ILOG_AEXT); ASSERT(ip->i_afp->if_bytes > 0); - kmem_free(iip->ili_aextents_buf, ip->i_afp->if_bytes); + kmem_free(iip->ili_aextents_buf); iip->ili_aextents_buf = NULL; } @@ -779,11 +779,10 @@ xfs_inode_item_pushbuf( ASSERT(iip->ili_push_owner == current_pid()); /* - * If flushlock isn't locked anymore, chances are that the - * inode flush completed and the inode was taken off the AIL. - * So, just get out. + * If a flush is not in progress anymore, chances are that the + * inode was taken off the AIL. So, just get out. */ - if (!issemalocked(&(ip->i_flock)) || + if (completion_done(&ip->i_flush) || ((iip->ili_item.li_flags & XFS_LI_IN_AIL) == 0)) { iip->ili_pushbuf_flag = 0; xfs_iunlock(ip, XFS_ILOCK_SHARED); @@ -805,7 +804,7 @@ xfs_inode_item_pushbuf( * If not, we can flush it async. */ dopush = ((iip->ili_item.li_flags & XFS_LI_IN_AIL) && - issemalocked(&(ip->i_flock))); + !completion_done(&ip->i_flush)); iip->ili_pushbuf_flag = 0; xfs_iunlock(ip, XFS_ILOCK_SHARED); xfs_buftrace("INODE ITEM PUSH", bp); @@ -858,7 +857,7 @@ xfs_inode_item_push( ip = iip->ili_inode; ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED)); - ASSERT(issemalocked(&(ip->i_flock))); + ASSERT(!completion_done(&ip->i_flush)); /* * Since we were able to lock the inode's flush lock and * we found it on the AIL, the inode must be dirty. This @@ -957,8 +956,7 @@ xfs_inode_item_destroy( { #ifdef XFS_TRANS_DEBUG if (ip->i_itemp->ili_root_size != 0) { - kmem_free(ip->i_itemp->ili_orig_root, - ip->i_itemp->ili_root_size); + kmem_free(ip->i_itemp->ili_orig_root); } #endif kmem_zone_free(xfs_ili_zone, ip->i_itemp); diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 7edcde691d1..67f22b2b44b 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -889,6 +889,16 @@ xfs_iomap_write_unwritten( count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb); + /* + * Reserve enough blocks in this transaction for two complete extent + * btree splits. We may be converting the middle part of an unwritten + * extent and in this case we will insert two new extents in the btree + * each of which could cause a full split. + * + * This reservation amount will be used in the first call to + * xfs_bmbt_split() to select an AG with enough space to satisfy the + * rest of the operation. + */ resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1; do { diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 419de15aeb4..cf6754a3c5b 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -59,7 +59,6 @@ xfs_bulkstat_one_iget( { xfs_icdinode_t *dic; /* dinode core info pointer */ xfs_inode_t *ip; /* incore inode pointer */ - bhv_vnode_t *vp; int error; error = xfs_iget(mp, NULL, ino, @@ -72,7 +71,6 @@ xfs_bulkstat_one_iget( ASSERT(ip != NULL); ASSERT(ip->i_blkno != (xfs_daddr_t)0); - vp = XFS_ITOV(ip); dic = &ip->i_d; /* xfs_iget returns the following without needing @@ -85,7 +83,7 @@ xfs_bulkstat_one_iget( buf->bs_uid = dic->di_uid; buf->bs_gid = dic->di_gid; buf->bs_size = dic->di_size; - vn_atime_to_bstime(vp, &buf->bs_atime); + vn_atime_to_bstime(VFS_I(ip), &buf->bs_atime); buf->bs_mtime.tv_sec = dic->di_mtime.t_sec; buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec; buf->bs_ctime.tv_sec = dic->di_ctime.t_sec; @@ -257,7 +255,7 @@ xfs_bulkstat_one( *ubused = error; out_free: - kmem_free(buf, sizeof(*buf)); + kmem_free(buf); return error; } @@ -708,7 +706,7 @@ xfs_bulkstat( /* * Done, we're either out of filesystem or space to put the data. */ - kmem_free(irbuf, irbsize); + kmem_free(irbuf); *ubcountp = ubelem; /* * Found some inodes, return them now and return the error next time. @@ -914,7 +912,7 @@ xfs_inumbers( } *lastino = XFS_AGINO_TO_INO(mp, agno, agino); } - kmem_free(buffer, bcount * sizeof(*buffer)); + kmem_free(buffer); if (cur) xfs_btree_del_cursor(cur, (error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR)); diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index ad3d26ddfe3..ccba14eb9db 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -160,7 +160,7 @@ void xlog_trace_iclog(xlog_in_core_t *iclog, uint state) { if (!iclog->ic_trace) - iclog->ic_trace = ktrace_alloc(256, KM_SLEEP); + iclog->ic_trace = ktrace_alloc(256, KM_NOFS); ktrace_enter(iclog->ic_trace, (void *)((unsigned long)state), (void *)((unsigned long)current_pid()), @@ -226,20 +226,24 @@ xlog_grant_sub_space(struct log *log, int bytes) static void xlog_grant_add_space_write(struct log *log, int bytes) { - log->l_grant_write_bytes += bytes; - if (log->l_grant_write_bytes > log->l_logsize) { - log->l_grant_write_bytes -= log->l_logsize; + int tmp = log->l_logsize - log->l_grant_write_bytes; + if (tmp > bytes) + log->l_grant_write_bytes += bytes; + else { log->l_grant_write_cycle++; + log->l_grant_write_bytes = bytes - tmp; } } static void xlog_grant_add_space_reserve(struct log *log, int bytes) { - log->l_grant_reserve_bytes += bytes; - if (log->l_grant_reserve_bytes > log->l_logsize) { - log->l_grant_reserve_bytes -= log->l_logsize; + int tmp = log->l_logsize - log->l_grant_reserve_bytes; + if (tmp > bytes) + log->l_grant_reserve_bytes += bytes; + else { log->l_grant_reserve_cycle++; + log->l_grant_reserve_bytes = bytes - tmp; } } @@ -332,15 +336,12 @@ xfs_log_done(xfs_mount_t *mp, } else { xlog_trace_loggrant(log, ticket, "xfs_log_done: (permanent)"); xlog_regrant_reserve_log_space(log, ticket); - } - - /* If this ticket was a permanent reservation and we aren't - * trying to release it, reset the inited flags; so next time - * we write, a start record will be written out. - */ - if ((ticket->t_flags & XLOG_TIC_PERM_RESERV) && - (flags & XFS_LOG_REL_PERM_RESERV) == 0) + /* If this ticket was a permanent reservation and we aren't + * trying to release it, reset the inited flags; so next time + * we write, a start record will be written out. + */ ticket->t_flags |= XLOG_TIC_INITED; + } return lsn; } /* xfs_log_done */ @@ -353,11 +354,11 @@ xfs_log_done(xfs_mount_t *mp, * Asynchronous forces are implemented by setting the WANT_SYNC * bit in the appropriate in-core log and then returning. * - * Synchronous forces are implemented with a semaphore. All callers - * to force a given lsn to disk will wait on a semaphore attached to the + * Synchronous forces are implemented with a signal variable. All callers + * to force a given lsn to disk will wait on a the sv attached to the * specific in-core log. When given in-core log finally completes its * write to disk, that thread will wake up all threads waiting on the - * semaphore. + * sv. */ int _xfs_log_force( @@ -584,12 +585,12 @@ error: * mp - ubiquitous xfs mount point structure */ int -xfs_log_mount_finish(xfs_mount_t *mp, int mfsi_flags) +xfs_log_mount_finish(xfs_mount_t *mp) { int error; if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) - error = xlog_recover_finish(mp->m_log, mfsi_flags); + error = xlog_recover_finish(mp->m_log); else { error = 0; ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); @@ -703,7 +704,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) if (!(iclog->ic_state == XLOG_STATE_ACTIVE || iclog->ic_state == XLOG_STATE_DIRTY)) { if (!XLOG_FORCED_SHUTDOWN(log)) { - sv_wait(&iclog->ic_forcesema, PMEM, + sv_wait(&iclog->ic_force_wait, PMEM, &log->l_icloglock, s); } else { spin_unlock(&log->l_icloglock); @@ -744,7 +745,7 @@ xfs_log_unmount_write(xfs_mount_t *mp) || iclog->ic_state == XLOG_STATE_DIRTY || iclog->ic_state == XLOG_STATE_IOERROR) ) { - sv_wait(&iclog->ic_forcesema, PMEM, + sv_wait(&iclog->ic_force_wait, PMEM, &log->l_icloglock, s); } else { spin_unlock(&log->l_icloglock); @@ -834,7 +835,7 @@ xfs_log_move_tail(xfs_mount_t *mp, break; tail_lsn = 0; free_bytes -= tic->t_unit_res; - sv_signal(&tic->t_sema); + sv_signal(&tic->t_wait); tic = tic->t_next; } while (tic != log->l_write_headq); } @@ -855,7 +856,7 @@ xfs_log_move_tail(xfs_mount_t *mp, break; tail_lsn = 0; free_bytes -= need_bytes; - sv_signal(&tic->t_sema); + sv_signal(&tic->t_wait); tic = tic->t_next; } while (tic != log->l_reserve_headq); } @@ -1228,7 +1229,7 @@ xlog_alloc_log(xfs_mount_t *mp, spin_lock_init(&log->l_icloglock); spin_lock_init(&log->l_grant_lock); - initnsema(&log->l_flushsema, 0, "ic-flush"); + sv_init(&log->l_flush_wait, 0, "flush_wait"); /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); @@ -1281,8 +1282,8 @@ xlog_alloc_log(xfs_mount_t *mp, ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp)); ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0); - sv_init(&iclog->ic_forcesema, SV_DEFAULT, "iclog-force"); - sv_init(&iclog->ic_writesema, SV_DEFAULT, "iclog-write"); + sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force"); + sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write"); iclogp = &iclog->ic_next; } @@ -1561,8 +1562,8 @@ xlog_dealloc_log(xlog_t *log) iclog = log->l_iclog; for (i=0; i<log->l_iclog_bufs; i++) { - sv_destroy(&iclog->ic_forcesema); - sv_destroy(&iclog->ic_writesema); + sv_destroy(&iclog->ic_force_wait); + sv_destroy(&iclog->ic_write_wait); xfs_buf_free(iclog->ic_bp); #ifdef XFS_LOG_TRACE if (iclog->ic_trace != NULL) { @@ -1570,10 +1571,9 @@ xlog_dealloc_log(xlog_t *log) } #endif next_iclog = iclog->ic_next; - kmem_free(iclog, sizeof(xlog_in_core_t)); + kmem_free(iclog); iclog = next_iclog; } - freesema(&log->l_flushsema); spinlock_destroy(&log->l_icloglock); spinlock_destroy(&log->l_grant_lock); @@ -1587,7 +1587,7 @@ xlog_dealloc_log(xlog_t *log) } #endif log->l_mp->m_log = NULL; - kmem_free(log, sizeof(xlog_t)); + kmem_free(log); } /* xlog_dealloc_log */ /* @@ -1973,7 +1973,7 @@ xlog_write(xfs_mount_t * mp, /* Clean iclogs starting from the head. This ordering must be * maintained, so an iclog doesn't become ACTIVE beyond one that * is SYNCING. This is also required to maintain the notion that we use - * a counting semaphore to hold off would be writers to the log when every + * a ordered wait queue to hold off would be writers to the log when every * iclog is trying to sync to disk. * * State Change: DIRTY -> ACTIVE @@ -2097,6 +2097,7 @@ xlog_state_do_callback( int funcdidcallbacks; /* flag: function did callbacks */ int repeats; /* for issuing console warnings if * looping too many times */ + int wake = 0; spin_lock(&log->l_icloglock); first_iclog = iclog = log->l_iclog; @@ -2236,7 +2237,7 @@ xlog_state_do_callback( xlog_state_clean_log(log); /* wake up threads waiting in xfs_log_force() */ - sv_broadcast(&iclog->ic_forcesema); + sv_broadcast(&iclog->ic_force_wait); iclog = iclog->ic_next; } while (first_iclog != iclog); @@ -2278,15 +2279,13 @@ xlog_state_do_callback( } #endif - flushcnt = 0; - if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR)) { - flushcnt = log->l_flushcnt; - log->l_flushcnt = 0; - } + if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR)) + wake = 1; spin_unlock(&log->l_icloglock); - while (flushcnt--) - vsema(&log->l_flushsema); -} /* xlog_state_do_callback */ + + if (wake) + sv_broadcast(&log->l_flush_wait); +} /* @@ -2300,8 +2299,7 @@ xlog_state_do_callback( * the second completion goes through. * * Callbacks could take time, so they are done outside the scope of the - * global state machine log lock. Assume that the calls to cvsema won't - * take a long time. At least we know it won't sleep. + * global state machine log lock. */ STATIC void xlog_state_done_syncing( @@ -2337,7 +2335,7 @@ xlog_state_done_syncing( * iclog buffer, we wake them all, one will get to do the * I/O, the others get to wait for the result. */ - sv_broadcast(&iclog->ic_writesema); + sv_broadcast(&iclog->ic_write_wait); spin_unlock(&log->l_icloglock); xlog_state_do_callback(log, aborted, iclog); /* also cleans log */ } /* xlog_state_done_syncing */ @@ -2345,11 +2343,9 @@ xlog_state_done_syncing( /* * If the head of the in-core log ring is not (ACTIVE or DIRTY), then we must - * sleep. The flush semaphore is set to the number of in-core buffers and - * decremented around disk syncing. Therefore, if all buffers are syncing, - * this semaphore will cause new writes to sleep until a sync completes. - * Otherwise, this code just does p() followed by v(). This approximates - * a sleep/wakeup except we can't race. + * sleep. We wait on the flush queue on the head iclog as that should be + * the first iclog to complete flushing. Hence if all iclogs are syncing, + * we will wait here and all new writes will sleep until a sync completes. * * The in-core logs are used in a circular fashion. They are not used * out-of-order even when an iclog past the head is free. @@ -2384,16 +2380,15 @@ restart: } iclog = log->l_iclog; - if (! (iclog->ic_state == XLOG_STATE_ACTIVE)) { - log->l_flushcnt++; - spin_unlock(&log->l_icloglock); + if (iclog->ic_state != XLOG_STATE_ACTIVE) { xlog_trace_iclog(iclog, XLOG_TRACE_SLEEP_FLUSH); XFS_STATS_INC(xs_log_noiclogs); - /* Ensure that log writes happen */ - psema(&log->l_flushsema, PINOD); + + /* Wait for log writes to have flushed */ + sv_wait(&log->l_flush_wait, 0, &log->l_icloglock, 0); goto restart; } - ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); + head = &iclog->ic_header; atomic_inc(&iclog->ic_refcnt); /* prevents sync */ @@ -2507,7 +2502,7 @@ xlog_grant_log_space(xlog_t *log, goto error_return; XFS_STATS_INC(xs_sleep_logspace); - sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s); + sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); /* * If we got an error, and the filesystem is shutting down, * we'll catch it down below. So just continue... @@ -2533,7 +2528,7 @@ redo: xlog_trace_loggrant(log, tic, "xlog_grant_log_space: sleep 2"); XFS_STATS_INC(xs_sleep_logspace); - sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s); + sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); if (XLOG_FORCED_SHUTDOWN(log)) { spin_lock(&log->l_grant_lock); @@ -2632,7 +2627,7 @@ xlog_regrant_write_log_space(xlog_t *log, if (free_bytes < ntic->t_unit_res) break; free_bytes -= ntic->t_unit_res; - sv_signal(&ntic->t_sema); + sv_signal(&ntic->t_wait); ntic = ntic->t_next; } while (ntic != log->l_write_headq); @@ -2643,7 +2638,7 @@ xlog_regrant_write_log_space(xlog_t *log, xlog_trace_loggrant(log, tic, "xlog_regrant_write_log_space: sleep 1"); XFS_STATS_INC(xs_sleep_logspace); - sv_wait(&tic->t_sema, PINOD|PLTWAIT, + sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); /* If we're shutting down, this tic is already @@ -2672,7 +2667,7 @@ redo: if ((tic->t_flags & XLOG_TIC_IN_Q) == 0) xlog_ins_ticketq(&log->l_write_headq, tic); XFS_STATS_INC(xs_sleep_logspace); - sv_wait(&tic->t_sema, PINOD|PLTWAIT, &log->l_grant_lock, s); + sv_wait(&tic->t_wait, PINOD|PLTWAIT, &log->l_grant_lock, s); /* If we're shutting down, this tic is already off the queue */ if (XLOG_FORCED_SHUTDOWN(log)) { @@ -2915,7 +2910,7 @@ xlog_state_switch_iclogs(xlog_t *log, * 2. the current iclog is drity, and the previous iclog is in the * active or dirty state. * - * We may sleep (call psema) if: + * We may sleep if: * * 1. the current iclog is not in the active nor dirty state. * 2. the current iclog dirty, and the previous iclog is not in the @@ -3012,7 +3007,7 @@ maybe_sleep: return XFS_ERROR(EIO); } XFS_STATS_INC(xs_log_force_sleep); - sv_wait(&iclog->ic_forcesema, PINOD, &log->l_icloglock, s); + sv_wait(&iclog->ic_force_wait, PINOD, &log->l_icloglock, s); /* * No need to grab the log lock here since we're * only deciding whether or not to return EIO @@ -3095,7 +3090,7 @@ try_again: XLOG_STATE_SYNCING))) { ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR)); XFS_STATS_INC(xs_log_force_sleep); - sv_wait(&iclog->ic_prev->ic_writesema, PSWP, + sv_wait(&iclog->ic_prev->ic_write_wait, PSWP, &log->l_icloglock, s); *log_flushed = 1; already_slept = 1; @@ -3115,7 +3110,7 @@ try_again: !(iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))) { /* - * Don't wait on the forcesema if we know that we've + * Don't wait on completion if we know that we've * gotten a log write error. */ if (iclog->ic_state & XLOG_STATE_IOERROR) { @@ -3123,7 +3118,7 @@ try_again: return XFS_ERROR(EIO); } XFS_STATS_INC(xs_log_force_sleep); - sv_wait(&iclog->ic_forcesema, PSWP, &log->l_icloglock, s); + sv_wait(&iclog->ic_force_wait, PSWP, &log->l_icloglock, s); /* * No need to grab the log lock here since we're * only deciding whether or not to return EIO @@ -3179,7 +3174,7 @@ STATIC void xlog_ticket_put(xlog_t *log, xlog_ticket_t *ticket) { - sv_destroy(&ticket->t_sema); + sv_destroy(&ticket->t_wait); kmem_zone_free(xfs_log_ticket_zone, ticket); } /* xlog_ticket_put */ @@ -3269,7 +3264,7 @@ xlog_ticket_get(xlog_t *log, tic->t_trans_type = 0; if (xflags & XFS_LOG_PERM_RESERV) tic->t_flags |= XLOG_TIC_PERM_RESERV; - sv_init(&(tic->t_sema), SV_DEFAULT, "logtick"); + sv_init(&(tic->t_wait), SV_DEFAULT, "logtick"); xlog_tic_reset_res(tic); @@ -3556,14 +3551,14 @@ xfs_log_force_umount( */ if ((tic = log->l_reserve_headq)) { do { - sv_signal(&tic->t_sema); + sv_signal(&tic->t_wait); tic = tic->t_next; } while (tic != log->l_reserve_headq); } if ((tic = log->l_write_headq)) { do { - sv_signal(&tic->t_sema); + sv_signal(&tic->t_wait); tic = tic->t_next; } while (tic != log->l_write_headq); } diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index d1d678ecb63..d47b91f1082 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -149,7 +149,7 @@ int xfs_log_mount(struct xfs_mount *mp, struct xfs_buftarg *log_target, xfs_daddr_t start_block, int num_bblocks); -int xfs_log_mount_finish(struct xfs_mount *mp, int); +int xfs_log_mount_finish(struct xfs_mount *mp); void xfs_log_move_tail(struct xfs_mount *mp, xfs_lsn_t tail_lsn); int xfs_log_notify(struct xfs_mount *mp, diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 8952a392b5f..c8a5b22ee3e 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -241,7 +241,7 @@ typedef struct xlog_res { } xlog_res_t; typedef struct xlog_ticket { - sv_t t_sema; /* sleep on this semaphore : 20 */ + sv_t t_wait; /* ticket wait queue : 20 */ struct xlog_ticket *t_next; /* :4|8 */ struct xlog_ticket *t_prev; /* :4|8 */ xlog_tid_t t_tid; /* transaction identifier : 4 */ @@ -314,7 +314,7 @@ typedef struct xlog_rec_ext_header { * xlog_rec_header_t into the reserved space. * - ic_data follows, so a write to disk can start at the beginning of * the iclog. - * - ic_forcesema is used to implement synchronous forcing of the iclog to disk. + * - ic_forcewait is used to implement synchronous forcing of the iclog to disk. * - ic_next is the pointer to the next iclog in the ring. * - ic_bp is a pointer to the buffer used to write this incore log to disk. * - ic_log is a pointer back to the global log structure. @@ -339,8 +339,8 @@ typedef struct xlog_rec_ext_header { * and move everything else out to subsequent cachelines. */ typedef struct xlog_iclog_fields { - sv_t ic_forcesema; - sv_t ic_writesema; + sv_t ic_force_wait; + sv_t ic_write_wait; struct xlog_in_core *ic_next; struct xlog_in_core *ic_prev; struct xfs_buf *ic_bp; @@ -377,8 +377,8 @@ typedef struct xlog_in_core { /* * Defines to save our code from this glop. */ -#define ic_forcesema hic_fields.ic_forcesema -#define ic_writesema hic_fields.ic_writesema +#define ic_force_wait hic_fields.ic_force_wait +#define ic_write_wait hic_fields.ic_write_wait #define ic_next hic_fields.ic_next #define ic_prev hic_fields.ic_prev #define ic_bp hic_fields.ic_bp @@ -423,10 +423,8 @@ typedef struct log { int l_logBBsize; /* size of log in BB chunks */ /* The following block of fields are changed while holding icloglock */ - sema_t l_flushsema ____cacheline_aligned_in_smp; - /* iclog flushing semaphore */ - int l_flushcnt; /* # of procs waiting on this - * sema */ + sv_t l_flush_wait ____cacheline_aligned_in_smp; + /* waiting for iclog flush */ int l_covered_state;/* state of "covering disk * log entries" */ xlog_in_core_t *l_iclog; /* head log queue */ @@ -470,7 +468,7 @@ extern int xlog_find_tail(xlog_t *log, xfs_daddr_t *head_blk, xfs_daddr_t *tail_blk); extern int xlog_recover(xlog_t *log); -extern int xlog_recover_finish(xlog_t *log, int mfsi_flags); +extern int xlog_recover_finish(xlog_t *log); extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); extern void xlog_recover_process_iunlinks(xlog_t *log); diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index e65ab4af095..82d46ce69d5 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1715,8 +1715,7 @@ xlog_check_buffer_cancelled( } else { prevp->bc_next = bcp->bc_next; } - kmem_free(bcp, - sizeof(xfs_buf_cancel_t)); + kmem_free(bcp); } } return 1; @@ -2519,7 +2518,7 @@ write_inode_buffer: error: if (need_free) - kmem_free(in_f, sizeof(*in_f)); + kmem_free(in_f); return XFS_ERROR(error); } @@ -2830,16 +2829,14 @@ xlog_recover_free_trans( item = item->ri_next; /* Free the regions in the item. */ for (i = 0; i < free_item->ri_cnt; i++) { - kmem_free(free_item->ri_buf[i].i_addr, - free_item->ri_buf[i].i_len); + kmem_free(free_item->ri_buf[i].i_addr); } /* Free the item itself */ - kmem_free(free_item->ri_buf, - (free_item->ri_total * sizeof(xfs_log_iovec_t))); - kmem_free(free_item, sizeof(xlog_recover_item_t)); + kmem_free(free_item->ri_buf); + kmem_free(free_item); } while (first_item != item); /* Free the transaction recover structure */ - kmem_free(trans, sizeof(xlog_recover_t)); + kmem_free(trans); } STATIC int @@ -3786,8 +3783,7 @@ xlog_do_log_recovery( error = xlog_do_recovery_pass(log, head_blk, tail_blk, XLOG_RECOVER_PASS1); if (error != 0) { - kmem_free(log->l_buf_cancel_table, - XLOG_BC_TABLE_SIZE * sizeof(xfs_buf_cancel_t*)); + kmem_free(log->l_buf_cancel_table); log->l_buf_cancel_table = NULL; return error; } @@ -3806,8 +3802,7 @@ xlog_do_log_recovery( } #endif /* DEBUG */ - kmem_free(log->l_buf_cancel_table, - XLOG_BC_TABLE_SIZE * sizeof(xfs_buf_cancel_t*)); + kmem_free(log->l_buf_cancel_table); log->l_buf_cancel_table = NULL; return error; @@ -3945,8 +3940,7 @@ xlog_recover( */ int xlog_recover_finish( - xlog_t *log, - int mfsi_flags) + xlog_t *log) { /* * Now we're ready to do the transactions needed for the @@ -3974,9 +3968,7 @@ xlog_recover_finish( xfs_log_force(log->l_mp, (xfs_lsn_t)0, (XFS_LOG_FORCE | XFS_LOG_SYNC)); - if ( (mfsi_flags & XFS_MFSI_NOUNLINK) == 0 ) { - xlog_recover_process_iunlinks(log); - } + xlog_recover_process_iunlinks(log); xlog_recover_check_summary(log); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index da3988453b7..a4503f5e949 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -47,12 +47,10 @@ STATIC int xfs_mount_log_sb(xfs_mount_t *, __int64_t); STATIC int xfs_uuid_mount(xfs_mount_t *); -STATIC void xfs_uuid_unmount(xfs_mount_t *mp); STATIC void xfs_unmountfs_wait(xfs_mount_t *); #ifdef HAVE_PERCPU_SB -STATIC void xfs_icsb_destroy_counters(xfs_mount_t *); STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, int); STATIC void xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t, @@ -63,7 +61,6 @@ STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t); #else -#define xfs_icsb_destroy_counters(mp) do { } while (0) #define xfs_icsb_balance_counter(mp, a, b) do { } while (0) #define xfs_icsb_balance_counter_locked(mp, a, b) do { } while (0) #define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0) @@ -126,34 +123,12 @@ static const struct { }; /* - * Return a pointer to an initialized xfs_mount structure. - */ -xfs_mount_t * -xfs_mount_init(void) -{ - xfs_mount_t *mp; - - mp = kmem_zalloc(sizeof(xfs_mount_t), KM_SLEEP); - - if (xfs_icsb_init_counters(mp)) { - mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB; - } - - spin_lock_init(&mp->m_sb_lock); - mutex_init(&mp->m_ilock); - mutex_init(&mp->m_growlock); - atomic_set(&mp->m_active_trans, 0); - - return mp; -} - -/* * Free up the resources associated with a mount structure. Assume that * the structure was initially zeroed, so we can tell which fields got * initialized. */ -void -xfs_mount_free( +STATIC void +xfs_free_perag( xfs_mount_t *mp) { if (mp->m_perag) { @@ -161,28 +136,9 @@ xfs_mount_free( for (agno = 0; agno < mp->m_maxagi; agno++) if (mp->m_perag[agno].pagb_list) - kmem_free(mp->m_perag[agno].pagb_list, - sizeof(xfs_perag_busy_t) * - XFS_PAGB_NUM_SLOTS); - kmem_free(mp->m_perag, - sizeof(xfs_perag_t) * mp->m_sb.sb_agcount); + kmem_free(mp->m_perag[agno].pagb_list); + kmem_free(mp->m_perag); } - - spinlock_destroy(&mp->m_ail_lock); - spinlock_destroy(&mp->m_sb_lock); - mutex_destroy(&mp->m_ilock); - mutex_destroy(&mp->m_growlock); - if (mp->m_quotainfo) - XFS_QM_DONE(mp); - - if (mp->m_fsname != NULL) - kmem_free(mp->m_fsname, mp->m_fsname_len); - if (mp->m_rtname != NULL) - kmem_free(mp->m_rtname, strlen(mp->m_rtname) + 1); - if (mp->m_logname != NULL) - kmem_free(mp->m_logname, strlen(mp->m_logname) + 1); - - xfs_icsb_destroy_counters(mp); } /* @@ -288,6 +244,19 @@ xfs_mount_validate_sb( return XFS_ERROR(EFSCORRUPTED); } + /* + * Until this is fixed only page-sized or smaller data blocks work. + */ + if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) { + xfs_fs_mount_cmn_err(flags, + "file system with blocksize %d bytes", + sbp->sb_blocksize); + xfs_fs_mount_cmn_err(flags, + "only pagesize (%ld) or less will currently work.", + PAGE_SIZE); + return XFS_ERROR(ENOSYS); + } + if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) || xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { xfs_fs_mount_cmn_err(flags, @@ -309,19 +278,6 @@ xfs_mount_validate_sb( return XFS_ERROR(ENOSYS); } - /* - * Until this is fixed only page-sized or smaller data blocks work. - */ - if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) { - xfs_fs_mount_cmn_err(flags, - "file system with blocksize %d bytes", - sbp->sb_blocksize); - xfs_fs_mount_cmn_err(flags, - "only pagesize (%ld) or less will currently work.", - PAGE_SIZE); - return XFS_ERROR(ENOSYS); - } - return 0; } @@ -734,11 +690,11 @@ xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount) * Update alignment values based on mount options and sb values */ STATIC int -xfs_update_alignment(xfs_mount_t *mp, int mfsi_flags, __uint64_t *update_flags) +xfs_update_alignment(xfs_mount_t *mp, __uint64_t *update_flags) { xfs_sb_t *sbp = &(mp->m_sb); - if (mp->m_dalign && !(mfsi_flags & XFS_MFSI_SECOND)) { + if (mp->m_dalign) { /* * If stripe unit and stripe width are not multiples * of the fs blocksize turn off alignment. @@ -894,7 +850,7 @@ xfs_set_inoalignment(xfs_mount_t *mp) * Check that the data (and log if separate) are an ok size. */ STATIC int -xfs_check_sizes(xfs_mount_t *mp, int mfsi_flags) +xfs_check_sizes(xfs_mount_t *mp) { xfs_buf_t *bp; xfs_daddr_t d; @@ -917,8 +873,7 @@ xfs_check_sizes(xfs_mount_t *mp, int mfsi_flags) return error; } - if (((mfsi_flags & XFS_MFSI_CLIENT) == 0) && - mp->m_logdev_targp != mp->m_ddev_targp) { + if (mp->m_logdev_targp != mp->m_ddev_targp) { d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { cmn_err(CE_WARN, "XFS: size check 3 failed"); @@ -953,15 +908,13 @@ xfs_check_sizes(xfs_mount_t *mp, int mfsi_flags) */ int xfs_mountfs( - xfs_mount_t *mp, - int mfsi_flags) + xfs_mount_t *mp) { xfs_sb_t *sbp = &(mp->m_sb); xfs_inode_t *rip; __uint64_t resblks; __int64_t update_flags = 0LL; uint quotamount, quotaflags; - int agno; int uuid_mounted = 0; int error = 0; @@ -994,9 +947,19 @@ xfs_mountfs( * Re-check for ATTR2 in case it was found in bad_features2 * slot. */ - if (xfs_sb_version_hasattr2(&mp->m_sb)) + if (xfs_sb_version_hasattr2(&mp->m_sb) && + !(mp->m_flags & XFS_MOUNT_NOATTR2)) mp->m_flags |= XFS_MOUNT_ATTR2; + } + + if (xfs_sb_version_hasattr2(&mp->m_sb) && + (mp->m_flags & XFS_MOUNT_NOATTR2)) { + xfs_sb_version_removeattr2(&mp->m_sb); + update_flags |= XFS_SB_FEATURES2; + /* update sb_versionnum for the clearing of the morebits */ + if (!sbp->sb_features2) + update_flags |= XFS_SB_VERSIONNUM; } /* @@ -1005,7 +968,7 @@ xfs_mountfs( * allocator alignment is within an ag, therefore ag has * to be aligned at stripe boundary. */ - error = xfs_update_alignment(mp, mfsi_flags, &update_flags); + error = xfs_update_alignment(mp, &update_flags); if (error) goto error1; @@ -1024,8 +987,7 @@ xfs_mountfs( * since a single partition filesystem is identical to a single * partition volume/filesystem. */ - if ((mfsi_flags & XFS_MFSI_SECOND) == 0 && - (mp->m_flags & XFS_MOUNT_NOUUID) == 0) { + if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) { if (xfs_uuid_mount(mp)) { error = XFS_ERROR(EINVAL); goto error1; @@ -1053,7 +1015,7 @@ xfs_mountfs( /* * Check that the data (and log if separate) are an ok size. */ - error = xfs_check_sizes(mp, mfsi_flags); + error = xfs_check_sizes(mp); if (error) goto error1; @@ -1067,13 +1029,6 @@ xfs_mountfs( } /* - * For client case we are done now - */ - if (mfsi_flags & XFS_MFSI_CLIENT) { - return 0; - } - - /* * Copies the low order bits of the timestamp and the randomly * set "sequence" number out of a UUID. */ @@ -1097,8 +1052,10 @@ xfs_mountfs( * Allocate and initialize the per-ag data. */ init_rwsem(&mp->m_peraglock); - mp->m_perag = - kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), KM_SLEEP); + mp->m_perag = kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), + KM_MAYFAIL); + if (!mp->m_perag) + goto error1; mp->m_maxagi = xfs_initialize_perag(mp, sbp->sb_agcount); @@ -1210,7 +1167,7 @@ xfs_mountfs( * delayed until after the root and real-time bitmap inodes * were consistently read in. */ - error = xfs_log_mount_finish(mp, mfsi_flags); + error = xfs_log_mount_finish(mp); if (error) { cmn_err(CE_WARN, "XFS: log mount finish failed"); goto error4; @@ -1219,7 +1176,7 @@ xfs_mountfs( /* * Complete the quota initialisation, post-log-replay component. */ - error = XFS_QM_MOUNT(mp, quotamount, quotaflags, mfsi_flags); + error = XFS_QM_MOUNT(mp, quotamount, quotaflags); if (error) goto error4; @@ -1253,31 +1210,25 @@ xfs_mountfs( error3: xfs_log_unmount_dealloc(mp); error2: - for (agno = 0; agno < sbp->sb_agcount; agno++) - if (mp->m_perag[agno].pagb_list) - kmem_free(mp->m_perag[agno].pagb_list, - sizeof(xfs_perag_busy_t) * XFS_PAGB_NUM_SLOTS); - kmem_free(mp->m_perag, sbp->sb_agcount * sizeof(xfs_perag_t)); - mp->m_perag = NULL; - /* FALLTHROUGH */ + xfs_free_perag(mp); error1: if (uuid_mounted) - xfs_uuid_unmount(mp); - xfs_freesb(mp); + uuid_table_remove(&mp->m_sb.sb_uuid); return error; } /* - * xfs_unmountfs - * * This flushes out the inodes,dquots and the superblock, unmounts the * log and makes sure that incore structures are freed. */ -int -xfs_unmountfs(xfs_mount_t *mp, struct cred *cr) +void +xfs_unmountfs( + struct xfs_mount *mp) { - __uint64_t resblks; - int error = 0; + __uint64_t resblks; + int error; + + IRELE(mp->m_rootip); /* * We can potentially deadlock here if we have an inode cluster @@ -1334,32 +1285,20 @@ xfs_unmountfs(xfs_mount_t *mp, struct cred *cr) xfs_unmountfs_wait(mp); /* wait for async bufs */ xfs_log_unmount(mp); /* Done! No more fs ops. */ - xfs_freesb(mp); - /* * All inodes from this mount point should be freed. */ ASSERT(mp->m_inodes == NULL); - xfs_unmountfs_close(mp, cr); if ((mp->m_flags & XFS_MOUNT_NOUUID) == 0) - xfs_uuid_unmount(mp); + uuid_table_remove(&mp->m_sb.sb_uuid); -#if defined(DEBUG) || defined(INDUCE_IO_ERROR) +#if defined(DEBUG) xfs_errortag_clearall(mp, 0); #endif - xfs_mount_free(mp); - return 0; -} - -void -xfs_unmountfs_close(xfs_mount_t *mp, struct cred *cr) -{ - if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) - xfs_free_buftarg(mp->m_logdev_targp, 1); - if (mp->m_rtdev_targp) - xfs_free_buftarg(mp->m_rtdev_targp, 1); - xfs_free_buftarg(mp->m_ddev_targp, 0); + xfs_free_perag(mp); + if (mp->m_quotainfo) + XFS_QM_DONE(mp); } STATIC void @@ -1905,16 +1844,6 @@ xfs_uuid_mount( } /* - * Remove filesystem from the UUID table. - */ -STATIC void -xfs_uuid_unmount( - xfs_mount_t *mp) -{ - uuid_table_remove(&mp->m_sb.sb_uuid); -} - -/* * Used to log changes to the superblock unit and width fields which could * be altered by the mount options, as well as any potential sb_features2 * fixup. Only the first superblock is updated. @@ -1928,7 +1857,8 @@ xfs_mount_log_sb( int error; ASSERT(fields & (XFS_SB_UNIT | XFS_SB_WIDTH | XFS_SB_UUID | - XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2)); + XFS_SB_FEATURES2 | XFS_SB_BAD_FEATURES2 | + XFS_SB_VERSIONNUM)); tp = xfs_trans_alloc(mp, XFS_TRANS_SB_UNIT); error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0, @@ -2109,7 +2039,7 @@ xfs_icsb_reinit_counters( xfs_icsb_unlock(mp); } -STATIC void +void xfs_icsb_destroy_counters( xfs_mount_t *mp) { diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 63e0693a358..f3c1024b124 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -61,6 +61,7 @@ struct xfs_bmap_free; struct xfs_extdelta; struct xfs_swapext; struct xfs_mru_cache; +struct xfs_nameops; /* * Prototypes and functions for the Data Migration subsystem. @@ -113,7 +114,7 @@ struct xfs_dqtrxops; struct xfs_quotainfo; typedef int (*xfs_qminit_t)(struct xfs_mount *, uint *, uint *); -typedef int (*xfs_qmmount_t)(struct xfs_mount *, uint, uint, int); +typedef int (*xfs_qmmount_t)(struct xfs_mount *, uint, uint); typedef int (*xfs_qmunmount_t)(struct xfs_mount *); typedef void (*xfs_qmdone_t)(struct xfs_mount *); typedef void (*xfs_dqrele_t)(struct xfs_dquot *); @@ -157,8 +158,8 @@ typedef struct xfs_qmops { #define XFS_QM_INIT(mp, mnt, fl) \ (*(mp)->m_qm_ops->xfs_qminit)(mp, mnt, fl) -#define XFS_QM_MOUNT(mp, mnt, fl, mfsi_flags) \ - (*(mp)->m_qm_ops->xfs_qmmount)(mp, mnt, fl, mfsi_flags) +#define XFS_QM_MOUNT(mp, mnt, fl) \ + (*(mp)->m_qm_ops->xfs_qmmount)(mp, mnt, fl) #define XFS_QM_UNMOUNT(mp) \ (*(mp)->m_qm_ops->xfs_qmunmount)(mp) #define XFS_QM_DONE(mp) \ @@ -210,12 +211,14 @@ typedef struct xfs_icsb_cnts { extern int xfs_icsb_init_counters(struct xfs_mount *); extern void xfs_icsb_reinit_counters(struct xfs_mount *); +extern void xfs_icsb_destroy_counters(struct xfs_mount *); extern void xfs_icsb_sync_counters(struct xfs_mount *, int); extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int); #else -#define xfs_icsb_init_counters(mp) (0) -#define xfs_icsb_reinit_counters(mp) do { } while (0) +#define xfs_icsb_init_counters(mp) (0) +#define xfs_icsb_destroy_counters(mp) do { } while (0) +#define xfs_icsb_reinit_counters(mp) do { } while (0) #define xfs_icsb_sync_counters(mp, flags) do { } while (0) #define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0) #endif @@ -313,6 +316,7 @@ typedef struct xfs_mount { __uint8_t m_inode_quiesce;/* call quiesce on new inodes. field governed by m_ilock */ __uint8_t m_sectbb_log; /* sectlog - BBSHIFT */ + const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */ int m_dirblksize; /* directory block sz--bytes */ int m_dirblkfsbs; /* directory block sz--fsbs */ xfs_dablk_t m_dirdatablk; /* blockno of dir data v2 */ @@ -378,6 +382,7 @@ typedef struct xfs_mount { counters */ #define XFS_MOUNT_FILESTREAMS (1ULL << 24) /* enable the filestreams allocator */ +#define XFS_MOUNT_NOATTR2 (1ULL << 25) /* disable use of attr2 format */ /* @@ -437,13 +442,6 @@ void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname, /* * Flags for xfs_mountfs */ -#define XFS_MFSI_SECOND 0x01 /* Secondary mount -- skip stuff */ -#define XFS_MFSI_CLIENT 0x02 /* Is a client -- skip lots of stuff */ -/* XFS_MFSI_RRINODES */ -#define XFS_MFSI_NOUNLINK 0x08 /* Skip unlinked inode processing in */ - /* log recovery */ -#define XFS_MFSI_NO_QUOTACHECK 0x10 /* Skip quotacheck processing */ -/* XFS_MFSI_CONVERT_SUNIT */ #define XFS_MFSI_QUIET 0x40 /* Be silent if mount errors found */ #define XFS_DADDR_TO_AGNO(mp,d) xfs_daddr_to_agno(mp,d) @@ -510,15 +508,12 @@ typedef struct xfs_mod_sb { #define XFS_MOUNT_ILOCK(mp) mutex_lock(&((mp)->m_ilock)) #define XFS_MOUNT_IUNLOCK(mp) mutex_unlock(&((mp)->m_ilock)) -extern xfs_mount_t *xfs_mount_init(void); extern void xfs_mod_sb(xfs_trans_t *, __int64_t); extern int xfs_log_sbcount(xfs_mount_t *, uint); -extern void xfs_mount_free(xfs_mount_t *mp); -extern int xfs_mountfs(xfs_mount_t *mp, int); +extern int xfs_mountfs(xfs_mount_t *mp); extern void xfs_mountfs_check_barriers(xfs_mount_t *mp); -extern int xfs_unmountfs(xfs_mount_t *, struct cred *); -extern void xfs_unmountfs_close(xfs_mount_t *, struct cred *); +extern void xfs_unmountfs(xfs_mount_t *); extern int xfs_unmountfs_writesb(xfs_mount_t *); extern int xfs_unmount_flush(xfs_mount_t *, int); extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int64_t, int); @@ -544,9 +539,6 @@ extern void xfs_qmops_put(struct xfs_mount *); extern struct xfs_dmops xfs_dmcore_xfs; -extern int xfs_init(void); -extern void xfs_cleanup(void); - #endif /* __KERNEL__ */ #endif /* __XFS_MOUNT_H__ */ diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index a0b2c0a2589..afee7eb2432 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c @@ -307,15 +307,18 @@ xfs_mru_cache_init(void) xfs_mru_elem_zone = kmem_zone_init(sizeof(xfs_mru_cache_elem_t), "xfs_mru_cache_elem"); if (!xfs_mru_elem_zone) - return ENOMEM; + goto out; xfs_mru_reap_wq = create_singlethread_workqueue("xfs_mru_cache"); - if (!xfs_mru_reap_wq) { - kmem_zone_destroy(xfs_mru_elem_zone); - return ENOMEM; - } + if (!xfs_mru_reap_wq) + goto out_destroy_mru_elem_zone; return 0; + + out_destroy_mru_elem_zone: + kmem_zone_destroy(xfs_mru_elem_zone); + out: + return -ENOMEM; } void @@ -382,9 +385,9 @@ xfs_mru_cache_create( exit: if (err && mru && mru->lists) - kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists)); + kmem_free(mru->lists); if (err && mru) - kmem_free(mru, sizeof(*mru)); + kmem_free(mru); return err; } @@ -424,8 +427,8 @@ xfs_mru_cache_destroy( xfs_mru_cache_flush(mru); - kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists)); - kmem_free(mru, sizeof(*mru)); + kmem_free(mru->lists); + kmem_free(mru); } /* diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index d8063e1ad29..d700dacdb10 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c @@ -336,22 +336,18 @@ xfs_rename( ASSERT(error != EEXIST); if (error) goto abort_return; - xfs_ichgtime(src_ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - - } else { - /* - * We always want to hit the ctime on the source inode. - * We do it in the if clause above for the 'new_parent && - * src_is_directory' case, and here we get all the other - * cases. This isn't strictly required by the standards - * since the source inode isn't really being changed, - * but old unix file systems did it and some incremental - * backup programs won't work without it. - */ - xfs_ichgtime(src_ip, XFS_ICHGTIME_CHG); } /* + * We always want to hit the ctime on the source inode. + * + * This isn't strictly required by the standards since the source + * inode isn't really being changed, but old unix file systems did + * it and some incremental backup programs won't work without it. + */ + xfs_ichgtime(src_ip, XFS_ICHGTIME_CHG); + + /* * Adjust the link count on src_dp. This is necessary when * renaming a directory, either within one parent when * the target existed, or across two parent directories. diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index a0dc6e5bc5b..e2f68de1615 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -74,18 +74,6 @@ STATIC int xfs_rtmodify_summary(xfs_mount_t *, xfs_trans_t *, int, */ /* - * xfs_lowbit32: get low bit set out of 32-bit argument, -1 if none set. - */ -STATIC int -xfs_lowbit32( - __uint32_t v) -{ - if (v) - return ffs(v) - 1; - return -1; -} - -/* * Allocate space to the bitmap or summary file, and zero it, for growfs. */ STATIC int /* error */ @@ -450,6 +438,7 @@ xfs_rtallocate_extent_near( } bbno = XFS_BITTOBLOCK(mp, bno); i = 0; + ASSERT(minlen != 0); log2len = xfs_highbit32(minlen); /* * Loop over all bitmap blocks (bbno + i is current block). @@ -618,6 +607,8 @@ xfs_rtallocate_extent_size( xfs_suminfo_t sum; /* summary information for extents */ ASSERT(minlen % prod == 0 && maxlen % prod == 0); + ASSERT(maxlen != 0); + /* * Loop over all the levels starting with maxlen. * At each level, look at all the bitmap blocks, to see if there @@ -675,6 +666,9 @@ xfs_rtallocate_extent_size( *rtblock = NULLRTBLOCK; return 0; } + ASSERT(minlen != 0); + ASSERT(maxlen != 0); + /* * Loop over sizes, from maxlen down to minlen. * This time, when we do the allocations, allow smaller ones @@ -1961,6 +1955,7 @@ xfs_growfs_rt( nsbp->sb_blocksize * nsbp->sb_rextsize); nsbp->sb_rextents = nsbp->sb_rblocks; do_div(nsbp->sb_rextents, nsbp->sb_rextsize); + ASSERT(nsbp->sb_rextents != 0); nsbp->sb_rextslog = xfs_highbit32(nsbp->sb_rextents); nrsumlevels = nmp->m_rsumlevels = nsbp->sb_rextslog + 1; nrsumsize = @@ -2062,7 +2057,7 @@ xfs_growfs_rt( /* * Free the fake mp structure. */ - kmem_free(nmp, sizeof(*nmp)); + kmem_free(nmp); return error; } diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c index b0f31c09a76..3a82576dde9 100644 --- a/fs/xfs/xfs_rw.c +++ b/fs/xfs/xfs_rw.c @@ -314,7 +314,7 @@ xfs_bioerror_relse( * ASYNC buffers. */ XFS_BUF_ERROR(bp, EIO); - XFS_BUF_V_IODONESEMA(bp); + XFS_BUF_FINISH_IOWAIT(bp); } else { xfs_buf_relse(bp); } diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h index d904efe7f87..3f8cf1587f4 100644 --- a/fs/xfs/xfs_sb.h +++ b/fs/xfs/xfs_sb.h @@ -46,10 +46,12 @@ struct xfs_mount; #define XFS_SB_VERSION_SECTORBIT 0x0800 #define XFS_SB_VERSION_EXTFLGBIT 0x1000 #define XFS_SB_VERSION_DIRV2BIT 0x2000 +#define XFS_SB_VERSION_BORGBIT 0x4000 /* ASCII only case-insens. */ #define XFS_SB_VERSION_MOREBITSBIT 0x8000 #define XFS_SB_VERSION_OKSASHFBITS \ (XFS_SB_VERSION_EXTFLGBIT | \ - XFS_SB_VERSION_DIRV2BIT) + XFS_SB_VERSION_DIRV2BIT | \ + XFS_SB_VERSION_BORGBIT) #define XFS_SB_VERSION_OKREALFBITS \ (XFS_SB_VERSION_ATTRBIT | \ XFS_SB_VERSION_NLINKBIT | \ @@ -437,6 +439,12 @@ static inline int xfs_sb_version_hassector(xfs_sb_t *sbp) ((sbp)->sb_versionnum & XFS_SB_VERSION_SECTORBIT); } +static inline int xfs_sb_version_hasasciici(xfs_sb_t *sbp) +{ + return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ + (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT); +} + static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp) { return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) && \ @@ -473,6 +481,13 @@ static inline void xfs_sb_version_addattr2(xfs_sb_t *sbp) ((sbp)->sb_features2 | XFS_SB_VERSION2_ATTR2BIT))); } +static inline void xfs_sb_version_removeattr2(xfs_sb_t *sbp) +{ + sbp->sb_features2 &= ~XFS_SB_VERSION2_ATTR2BIT; + if (!sbp->sb_features2) + sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT; +} + /* * end of superblock version macros */ diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 140386434aa..4e1c22a23be 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -43,6 +43,7 @@ #include "xfs_quota.h" #include "xfs_trans_priv.h" #include "xfs_trans_space.h" +#include "xfs_inode_item.h" STATIC void xfs_trans_apply_sb_deltas(xfs_trans_t *); @@ -253,7 +254,7 @@ _xfs_trans_alloc( tp->t_mountp = mp; tp->t_items_free = XFS_LIC_NUM_SLOTS; tp->t_busy_free = XFS_LBC_NUM_SLOTS; - XFS_LIC_INIT(&(tp->t_items)); + xfs_lic_init(&(tp->t_items)); XFS_LBC_INIT(&(tp->t_busy)); return tp; } @@ -282,7 +283,7 @@ xfs_trans_dup( ntp->t_mountp = tp->t_mountp; ntp->t_items_free = XFS_LIC_NUM_SLOTS; ntp->t_busy_free = XFS_LBC_NUM_SLOTS; - XFS_LIC_INIT(&(ntp->t_items)); + xfs_lic_init(&(ntp->t_items)); XFS_LBC_INIT(&(ntp->t_busy)); ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); @@ -889,7 +890,7 @@ shut_us_down: tp->t_commit_lsn = commit_lsn; if (nvec > XFS_TRANS_LOGVEC_COUNT) { - kmem_free(log_vector, nvec * sizeof(xfs_log_iovec_t)); + kmem_free(log_vector); } /* @@ -1169,7 +1170,7 @@ xfs_trans_cancel( while (licp != NULL) { lidp = licp->lic_descs; for (i = 0; i < licp->lic_unused; i++, lidp++) { - if (XFS_LIC_ISFREE(licp, i)) { + if (xfs_lic_isfree(licp, i)) { continue; } @@ -1216,6 +1217,68 @@ xfs_trans_free( kmem_zone_free(xfs_trans_zone, tp); } +/* + * Roll from one trans in the sequence of PERMANENT transactions to + * the next: permanent transactions are only flushed out when + * committed with XFS_TRANS_RELEASE_LOG_RES, but we still want as soon + * as possible to let chunks of it go to the log. So we commit the + * chunk we've been working on and get a new transaction to continue. + */ +int +xfs_trans_roll( + struct xfs_trans **tpp, + struct xfs_inode *dp) +{ + struct xfs_trans *trans; + unsigned int logres, count; + int error; + + /* + * Ensure that the inode is always logged. + */ + trans = *tpp; + xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE); + + /* + * Copy the critical parameters from one trans to the next. + */ + logres = trans->t_log_res; + count = trans->t_log_count; + *tpp = xfs_trans_dup(trans); + + /* + * Commit the current transaction. + * If this commit failed, then it'd just unlock those items that + * are not marked ihold. That also means that a filesystem shutdown + * is in progress. The caller takes the responsibility to cancel + * the duplicate transaction that gets returned. + */ + error = xfs_trans_commit(trans, 0); + if (error) + return (error); + + trans = *tpp; + + /* + * Reserve space in the log for th next transaction. + * This also pushes items in the "AIL", the list of logged items, + * out to disk if they are taking up space at the tail of the log + * that we want to use. This requires that either nothing be locked + * across this call, or that anything that is locked be logged in + * the prior and the next transactions. + */ + error = xfs_trans_reserve(trans, 0, logres, 0, + XFS_TRANS_PERM_LOG_RES, count); + /* + * Ensure that the inode is in the new transaction and locked. + */ + if (error) + return error; + + xfs_trans_ijoin(trans, dp, XFS_ILOCK_EXCL); + xfs_trans_ihold(trans, dp); + return 0; +} /* * THIS SHOULD BE REWRITTEN TO USE xfs_trans_next_item(). @@ -1253,7 +1316,7 @@ xfs_trans_committed( * Special case the chunk embedded in the transaction. */ licp = &(tp->t_items); - if (!(XFS_LIC_ARE_ALL_FREE(licp))) { + if (!(xfs_lic_are_all_free(licp))) { xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag); } @@ -1262,10 +1325,10 @@ xfs_trans_committed( */ licp = licp->lic_next; while (licp != NULL) { - ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); + ASSERT(!xfs_lic_are_all_free(licp)); xfs_trans_chunk_committed(licp, tp->t_lsn, abortflag); next_licp = licp->lic_next; - kmem_free(licp, sizeof(xfs_log_item_chunk_t)); + kmem_free(licp); licp = next_licp; } @@ -1325,7 +1388,7 @@ xfs_trans_chunk_committed( lidp = licp->lic_descs; for (i = 0; i < licp->lic_unused; i++, lidp++) { - if (XFS_LIC_ISFREE(licp, i)) { + if (xfs_lic_isfree(licp, i)) { continue; } diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 0804207c739..74c80bd2b0e 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -210,62 +210,52 @@ typedef struct xfs_log_item_chunk { * lic_unused to the right value (0 matches all free). The * lic_descs.lid_index values are set up as each desc is allocated. */ -#define XFS_LIC_INIT(cp) xfs_lic_init(cp) static inline void xfs_lic_init(xfs_log_item_chunk_t *cp) { cp->lic_free = XFS_LIC_FREEMASK; } -#define XFS_LIC_INIT_SLOT(cp,slot) xfs_lic_init_slot(cp, slot) static inline void xfs_lic_init_slot(xfs_log_item_chunk_t *cp, int slot) { cp->lic_descs[slot].lid_index = (unsigned char)(slot); } -#define XFS_LIC_VACANCY(cp) xfs_lic_vacancy(cp) static inline int xfs_lic_vacancy(xfs_log_item_chunk_t *cp) { return cp->lic_free & XFS_LIC_FREEMASK; } -#define XFS_LIC_ALL_FREE(cp) xfs_lic_all_free(cp) static inline void xfs_lic_all_free(xfs_log_item_chunk_t *cp) { cp->lic_free = XFS_LIC_FREEMASK; } -#define XFS_LIC_ARE_ALL_FREE(cp) xfs_lic_are_all_free(cp) static inline int xfs_lic_are_all_free(xfs_log_item_chunk_t *cp) { return ((cp->lic_free & XFS_LIC_FREEMASK) == XFS_LIC_FREEMASK); } -#define XFS_LIC_ISFREE(cp,slot) xfs_lic_isfree(cp,slot) static inline int xfs_lic_isfree(xfs_log_item_chunk_t *cp, int slot) { return (cp->lic_free & (1 << slot)); } -#define XFS_LIC_CLAIM(cp,slot) xfs_lic_claim(cp,slot) static inline void xfs_lic_claim(xfs_log_item_chunk_t *cp, int slot) { cp->lic_free &= ~(1 << slot); } -#define XFS_LIC_RELSE(cp,slot) xfs_lic_relse(cp,slot) static inline void xfs_lic_relse(xfs_log_item_chunk_t *cp, int slot) { cp->lic_free |= 1 << slot; } -#define XFS_LIC_SLOT(cp,slot) xfs_lic_slot(cp,slot) static inline xfs_log_item_desc_t * xfs_lic_slot(xfs_log_item_chunk_t *cp, int slot) { return &(cp->lic_descs[slot]); } -#define XFS_LIC_DESC_TO_SLOT(dp) xfs_lic_desc_to_slot(dp) static inline int xfs_lic_desc_to_slot(xfs_log_item_desc_t *dp) { return (uint)dp->lid_index; @@ -278,7 +268,6 @@ static inline int xfs_lic_desc_to_slot(xfs_log_item_desc_t *dp) * All of this yields the address of the chunk, which is * cast to a chunk pointer. */ -#define XFS_LIC_DESC_TO_CHUNK(dp) xfs_lic_desc_to_chunk(dp) static inline xfs_log_item_chunk_t * xfs_lic_desc_to_chunk(xfs_log_item_desc_t *dp) { @@ -986,6 +975,7 @@ int _xfs_trans_commit(xfs_trans_t *, int *); #define xfs_trans_commit(tp, flags) _xfs_trans_commit(tp, flags, NULL) void xfs_trans_cancel(xfs_trans_t *, int); +int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *); int xfs_trans_ail_init(struct xfs_mount *); void xfs_trans_ail_destroy(struct xfs_mount *); void xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t); diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index cb0c5839154..4e855b5ced6 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -1021,16 +1021,16 @@ xfs_trans_buf_item_match( bp = NULL; len = BBTOB(len); licp = &tp->t_items; - if (!XFS_LIC_ARE_ALL_FREE(licp)) { + if (!xfs_lic_are_all_free(licp)) { for (i = 0; i < licp->lic_unused; i++) { /* * Skip unoccupied slots. */ - if (XFS_LIC_ISFREE(licp, i)) { + if (xfs_lic_isfree(licp, i)) { continue; } - lidp = XFS_LIC_SLOT(licp, i); + lidp = xfs_lic_slot(licp, i); blip = (xfs_buf_log_item_t *)lidp->lid_item; if (blip->bli_item.li_type != XFS_LI_BUF) { continue; @@ -1074,7 +1074,7 @@ xfs_trans_buf_item_match_all( bp = NULL; len = BBTOB(len); for (licp = &tp->t_items; licp != NULL; licp = licp->lic_next) { - if (XFS_LIC_ARE_ALL_FREE(licp)) { + if (xfs_lic_are_all_free(licp)) { ASSERT(licp == &tp->t_items); ASSERT(licp->lic_next == NULL); return NULL; @@ -1083,11 +1083,11 @@ xfs_trans_buf_item_match_all( /* * Skip unoccupied slots. */ - if (XFS_LIC_ISFREE(licp, i)) { + if (xfs_lic_isfree(licp, i)) { continue; } - lidp = XFS_LIC_SLOT(licp, i); + lidp = xfs_lic_slot(licp, i); blip = (xfs_buf_log_item_t *)lidp->lid_item; if (blip->bli_item.li_type != XFS_LI_BUF) { continue; diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index 4c70bf5e998..2a1c0f071f9 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -291,7 +291,7 @@ xfs_trans_inode_broot_debug( iip = ip->i_itemp; if (iip->ili_root_size != 0) { ASSERT(iip->ili_orig_root != NULL); - kmem_free(iip->ili_orig_root, iip->ili_root_size); + kmem_free(iip->ili_orig_root); iip->ili_root_size = 0; iip->ili_orig_root = NULL; } diff --git a/fs/xfs/xfs_trans_item.c b/fs/xfs/xfs_trans_item.c index 66a09f0d894..3c666e8317f 100644 --- a/fs/xfs/xfs_trans_item.c +++ b/fs/xfs/xfs_trans_item.c @@ -53,11 +53,11 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip) * Initialize the chunk, and then * claim the first slot in the newly allocated chunk. */ - XFS_LIC_INIT(licp); - XFS_LIC_CLAIM(licp, 0); + xfs_lic_init(licp); + xfs_lic_claim(licp, 0); licp->lic_unused = 1; - XFS_LIC_INIT_SLOT(licp, 0); - lidp = XFS_LIC_SLOT(licp, 0); + xfs_lic_init_slot(licp, 0); + lidp = xfs_lic_slot(licp, 0); /* * Link in the new chunk and update the free count. @@ -88,14 +88,14 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip) */ licp = &tp->t_items; while (licp != NULL) { - if (XFS_LIC_VACANCY(licp)) { + if (xfs_lic_vacancy(licp)) { if (licp->lic_unused <= XFS_LIC_MAX_SLOT) { i = licp->lic_unused; - ASSERT(XFS_LIC_ISFREE(licp, i)); + ASSERT(xfs_lic_isfree(licp, i)); break; } for (i = 0; i <= XFS_LIC_MAX_SLOT; i++) { - if (XFS_LIC_ISFREE(licp, i)) + if (xfs_lic_isfree(licp, i)) break; } ASSERT(i <= XFS_LIC_MAX_SLOT); @@ -108,12 +108,12 @@ xfs_trans_add_item(xfs_trans_t *tp, xfs_log_item_t *lip) * If we find a free descriptor, claim it, * initialize it, and return it. */ - XFS_LIC_CLAIM(licp, i); + xfs_lic_claim(licp, i); if (licp->lic_unused <= i) { licp->lic_unused = i + 1; - XFS_LIC_INIT_SLOT(licp, i); + xfs_lic_init_slot(licp, i); } - lidp = XFS_LIC_SLOT(licp, i); + lidp = xfs_lic_slot(licp, i); tp->t_items_free--; lidp->lid_item = lip; lidp->lid_flags = 0; @@ -136,9 +136,9 @@ xfs_trans_free_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp) xfs_log_item_chunk_t *licp; xfs_log_item_chunk_t **licpp; - slot = XFS_LIC_DESC_TO_SLOT(lidp); - licp = XFS_LIC_DESC_TO_CHUNK(lidp); - XFS_LIC_RELSE(licp, slot); + slot = xfs_lic_desc_to_slot(lidp); + licp = xfs_lic_desc_to_chunk(lidp); + xfs_lic_relse(licp, slot); lidp->lid_item->li_desc = NULL; tp->t_items_free++; @@ -154,14 +154,14 @@ xfs_trans_free_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp) * Also decrement the transaction structure's count of free items * by the number in a chunk since we are freeing an empty chunk. */ - if (XFS_LIC_ARE_ALL_FREE(licp) && (licp != &(tp->t_items))) { + if (xfs_lic_are_all_free(licp) && (licp != &(tp->t_items))) { licpp = &(tp->t_items.lic_next); while (*licpp != licp) { ASSERT(*licpp != NULL); licpp = &((*licpp)->lic_next); } *licpp = licp->lic_next; - kmem_free(licp, sizeof(xfs_log_item_chunk_t)); + kmem_free(licp); tp->t_items_free -= XFS_LIC_NUM_SLOTS; } } @@ -207,20 +207,20 @@ xfs_trans_first_item(xfs_trans_t *tp) /* * If it's not in the first chunk, skip to the second. */ - if (XFS_LIC_ARE_ALL_FREE(licp)) { + if (xfs_lic_are_all_free(licp)) { licp = licp->lic_next; } /* * Return the first non-free descriptor in the chunk. */ - ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); + ASSERT(!xfs_lic_are_all_free(licp)); for (i = 0; i < licp->lic_unused; i++) { - if (XFS_LIC_ISFREE(licp, i)) { + if (xfs_lic_isfree(licp, i)) { continue; } - return XFS_LIC_SLOT(licp, i); + return xfs_lic_slot(licp, i); } cmn_err(CE_WARN, "xfs_trans_first_item() -- no first item"); return NULL; @@ -242,18 +242,18 @@ xfs_trans_next_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp) xfs_log_item_chunk_t *licp; int i; - licp = XFS_LIC_DESC_TO_CHUNK(lidp); + licp = xfs_lic_desc_to_chunk(lidp); /* * First search the rest of the chunk. The for loop keeps us * from referencing things beyond the end of the chunk. */ - for (i = (int)XFS_LIC_DESC_TO_SLOT(lidp) + 1; i < licp->lic_unused; i++) { - if (XFS_LIC_ISFREE(licp, i)) { + for (i = (int)xfs_lic_desc_to_slot(lidp) + 1; i < licp->lic_unused; i++) { + if (xfs_lic_isfree(licp, i)) { continue; } - return XFS_LIC_SLOT(licp, i); + return xfs_lic_slot(licp, i); } /* @@ -266,13 +266,13 @@ xfs_trans_next_item(xfs_trans_t *tp, xfs_log_item_desc_t *lidp) } licp = licp->lic_next; - ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); + ASSERT(!xfs_lic_are_all_free(licp)); for (i = 0; i < licp->lic_unused; i++) { - if (XFS_LIC_ISFREE(licp, i)) { + if (xfs_lic_isfree(licp, i)) { continue; } - return XFS_LIC_SLOT(licp, i); + return xfs_lic_slot(licp, i); } ASSERT(0); /* NOTREACHED */ @@ -300,9 +300,9 @@ xfs_trans_free_items( /* * Special case the embedded chunk so we don't free it below. */ - if (!XFS_LIC_ARE_ALL_FREE(licp)) { + if (!xfs_lic_are_all_free(licp)) { (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN); - XFS_LIC_ALL_FREE(licp); + xfs_lic_all_free(licp); licp->lic_unused = 0; } licp = licp->lic_next; @@ -311,10 +311,10 @@ xfs_trans_free_items( * Unlock each item in each chunk and free the chunks. */ while (licp != NULL) { - ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); + ASSERT(!xfs_lic_are_all_free(licp)); (void) xfs_trans_unlock_chunk(licp, 1, abort, NULLCOMMITLSN); next_licp = licp->lic_next; - kmem_free(licp, sizeof(xfs_log_item_chunk_t)); + kmem_free(licp); licp = next_licp; } @@ -347,7 +347,7 @@ xfs_trans_unlock_items(xfs_trans_t *tp, xfs_lsn_t commit_lsn) /* * Special case the embedded chunk so we don't free. */ - if (!XFS_LIC_ARE_ALL_FREE(licp)) { + if (!xfs_lic_are_all_free(licp)) { freed = xfs_trans_unlock_chunk(licp, 0, 0, commit_lsn); } licpp = &(tp->t_items.lic_next); @@ -358,12 +358,12 @@ xfs_trans_unlock_items(xfs_trans_t *tp, xfs_lsn_t commit_lsn) * and free empty chunks. */ while (licp != NULL) { - ASSERT(!XFS_LIC_ARE_ALL_FREE(licp)); + ASSERT(!xfs_lic_are_all_free(licp)); freed += xfs_trans_unlock_chunk(licp, 0, 0, commit_lsn); next_licp = licp->lic_next; - if (XFS_LIC_ARE_ALL_FREE(licp)) { + if (xfs_lic_are_all_free(licp)) { *licpp = next_licp; - kmem_free(licp, sizeof(xfs_log_item_chunk_t)); + kmem_free(licp); freed -= XFS_LIC_NUM_SLOTS; } else { licpp = &(licp->lic_next); @@ -402,7 +402,7 @@ xfs_trans_unlock_chunk( freed = 0; lidp = licp->lic_descs; for (i = 0; i < licp->lic_unused; i++, lidp++) { - if (XFS_LIC_ISFREE(licp, i)) { + if (xfs_lic_isfree(licp, i)) { continue; } lip = lidp->lid_item; @@ -421,7 +421,7 @@ xfs_trans_unlock_chunk( */ if (!(freeing_chunk) && (!(lidp->lid_flags & XFS_LID_DIRTY) || abort)) { - XFS_LIC_RELSE(licp, i); + xfs_lic_relse(licp, i); freed++; } } @@ -530,7 +530,7 @@ xfs_trans_free_busy(xfs_trans_t *tp) lbcp = tp->t_busy.lbc_next; while (lbcp != NULL) { lbcq = lbcp->lbc_next; - kmem_free(lbcp, sizeof(xfs_log_busy_chunk_t)); + kmem_free(lbcp); lbcp = lbcq; } diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c index 98e5f110ba5..35d4d414bcc 100644 --- a/fs/xfs/xfs_utils.c +++ b/fs/xfs/xfs_utils.c @@ -237,7 +237,7 @@ xfs_droplink( ASSERT (ip->i_d.di_nlink > 0); ip->i_d.di_nlink--; - drop_nlink(ip->i_vnode); + drop_nlink(VFS_I(ip)); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); error = 0; @@ -301,7 +301,7 @@ xfs_bumplink( ASSERT(ip->i_d.di_nlink > 0); ip->i_d.di_nlink++; - inc_nlink(ip->i_vnode); + inc_nlink(VFS_I(ip)); if ((ip->i_d.di_version == XFS_DINODE_VERSION_1) && (ip->i_d.di_nlink > XFS_MAXLINK_1)) { /* diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h index f316cb85d8e..ef321225d26 100644 --- a/fs/xfs/xfs_utils.h +++ b/fs/xfs/xfs_utils.h @@ -18,9 +18,6 @@ #ifndef __XFS_UTILS_H__ #define __XFS_UTILS_H__ -#define IRELE(ip) VN_RELE(XFS_ITOV(ip)) -#define IHOLD(ip) VN_HOLD(XFS_ITOV(ip)) - extern int xfs_truncate_file(xfs_mount_t *, xfs_inode_t *); extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t, xfs_dev_t, cred_t *, prid_t, int, diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index 30bacd8bb0e..439dd3939dd 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -58,586 +58,6 @@ #include "xfs_utils.h" -int __init -xfs_init(void) -{ -#ifdef XFS_DABUF_DEBUG - extern spinlock_t xfs_dabuf_global_lock; - spin_lock_init(&xfs_dabuf_global_lock); -#endif - - /* - * Initialize all of the zone allocators we use. - */ - xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t), - "xfs_log_ticket"); - xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t), - "xfs_bmap_free_item"); - xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t), - "xfs_btree_cur"); - xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t), - "xfs_da_state"); - xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf"); - xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork"); - xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans"); - xfs_acl_zone_init(xfs_acl_zone, "xfs_acl"); - xfs_mru_cache_init(); - xfs_filestream_init(); - - /* - * The size of the zone allocated buf log item is the maximum - * size possible under XFS. This wastes a little bit of memory, - * but it is much faster. - */ - xfs_buf_item_zone = - kmem_zone_init((sizeof(xfs_buf_log_item_t) + - (((XFS_MAX_BLOCKSIZE / XFS_BLI_CHUNK) / - NBWORD) * sizeof(int))), - "xfs_buf_item"); - xfs_efd_zone = - kmem_zone_init((sizeof(xfs_efd_log_item_t) + - ((XFS_EFD_MAX_FAST_EXTENTS - 1) * - sizeof(xfs_extent_t))), - "xfs_efd_item"); - xfs_efi_zone = - kmem_zone_init((sizeof(xfs_efi_log_item_t) + - ((XFS_EFI_MAX_FAST_EXTENTS - 1) * - sizeof(xfs_extent_t))), - "xfs_efi_item"); - - /* - * These zones warrant special memory allocator hints - */ - xfs_inode_zone = - kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode", - KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | - KM_ZONE_SPREAD, NULL); - xfs_ili_zone = - kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili", - KM_ZONE_SPREAD, NULL); - - /* - * Allocate global trace buffers. - */ -#ifdef XFS_ALLOC_TRACE - xfs_alloc_trace_buf = ktrace_alloc(XFS_ALLOC_TRACE_SIZE, KM_SLEEP); -#endif -#ifdef XFS_BMAP_TRACE - xfs_bmap_trace_buf = ktrace_alloc(XFS_BMAP_TRACE_SIZE, KM_SLEEP); -#endif -#ifdef XFS_BMBT_TRACE - xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_SLEEP); -#endif -#ifdef XFS_ATTR_TRACE - xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_SLEEP); -#endif -#ifdef XFS_DIR2_TRACE - xfs_dir2_trace_buf = ktrace_alloc(XFS_DIR2_GTRACE_SIZE, KM_SLEEP); -#endif - - xfs_dir_startup(); - -#if (defined(DEBUG) || defined(INDUCE_IO_ERROR)) - xfs_error_test_init(); -#endif /* DEBUG || INDUCE_IO_ERROR */ - - xfs_init_procfs(); - xfs_sysctl_register(); - return 0; -} - -void __exit -xfs_cleanup(void) -{ - extern kmem_zone_t *xfs_inode_zone; - extern kmem_zone_t *xfs_efd_zone; - extern kmem_zone_t *xfs_efi_zone; - - xfs_cleanup_procfs(); - xfs_sysctl_unregister(); - xfs_filestream_uninit(); - xfs_mru_cache_uninit(); - xfs_acl_zone_destroy(xfs_acl_zone); - -#ifdef XFS_DIR2_TRACE - ktrace_free(xfs_dir2_trace_buf); -#endif -#ifdef XFS_ATTR_TRACE - ktrace_free(xfs_attr_trace_buf); -#endif -#ifdef XFS_BMBT_TRACE - ktrace_free(xfs_bmbt_trace_buf); -#endif -#ifdef XFS_BMAP_TRACE - ktrace_free(xfs_bmap_trace_buf); -#endif -#ifdef XFS_ALLOC_TRACE - ktrace_free(xfs_alloc_trace_buf); -#endif - - kmem_zone_destroy(xfs_bmap_free_item_zone); - kmem_zone_destroy(xfs_btree_cur_zone); - kmem_zone_destroy(xfs_inode_zone); - kmem_zone_destroy(xfs_trans_zone); - kmem_zone_destroy(xfs_da_state_zone); - kmem_zone_destroy(xfs_dabuf_zone); - kmem_zone_destroy(xfs_buf_item_zone); - kmem_zone_destroy(xfs_efd_zone); - kmem_zone_destroy(xfs_efi_zone); - kmem_zone_destroy(xfs_ifork_zone); - kmem_zone_destroy(xfs_ili_zone); - kmem_zone_destroy(xfs_log_ticket_zone); -} - -/* - * xfs_start_flags - * - * This function fills in xfs_mount_t fields based on mount args. - * Note: the superblock has _not_ yet been read in. - */ -STATIC int -xfs_start_flags( - struct xfs_mount_args *ap, - struct xfs_mount *mp) -{ - /* Values are in BBs */ - if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) { - /* - * At this point the superblock has not been read - * in, therefore we do not know the block size. - * Before the mount call ends we will convert - * these to FSBs. - */ - mp->m_dalign = ap->sunit; - mp->m_swidth = ap->swidth; - } - - if (ap->logbufs != -1 && - ap->logbufs != 0 && - (ap->logbufs < XLOG_MIN_ICLOGS || - ap->logbufs > XLOG_MAX_ICLOGS)) { - cmn_err(CE_WARN, - "XFS: invalid logbufs value: %d [not %d-%d]", - ap->logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); - return XFS_ERROR(EINVAL); - } - mp->m_logbufs = ap->logbufs; - if (ap->logbufsize != -1 && - ap->logbufsize != 0 && - (ap->logbufsize < XLOG_MIN_RECORD_BSIZE || - ap->logbufsize > XLOG_MAX_RECORD_BSIZE || - !is_power_of_2(ap->logbufsize))) { - cmn_err(CE_WARN, - "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", - ap->logbufsize); - return XFS_ERROR(EINVAL); - } - mp->m_logbsize = ap->logbufsize; - mp->m_fsname_len = strlen(ap->fsname) + 1; - mp->m_fsname = kmem_alloc(mp->m_fsname_len, KM_SLEEP); - strcpy(mp->m_fsname, ap->fsname); - if (ap->rtname[0]) { - mp->m_rtname = kmem_alloc(strlen(ap->rtname) + 1, KM_SLEEP); - strcpy(mp->m_rtname, ap->rtname); - } - if (ap->logname[0]) { - mp->m_logname = kmem_alloc(strlen(ap->logname) + 1, KM_SLEEP); - strcpy(mp->m_logname, ap->logname); - } - - if (ap->flags & XFSMNT_WSYNC) - mp->m_flags |= XFS_MOUNT_WSYNC; -#if XFS_BIG_INUMS - if (ap->flags & XFSMNT_INO64) { - mp->m_flags |= XFS_MOUNT_INO64; - mp->m_inoadd = XFS_INO64_OFFSET; - } -#endif - if (ap->flags & XFSMNT_RETERR) - mp->m_flags |= XFS_MOUNT_RETERR; - if (ap->flags & XFSMNT_NOALIGN) - mp->m_flags |= XFS_MOUNT_NOALIGN; - if (ap->flags & XFSMNT_SWALLOC) - mp->m_flags |= XFS_MOUNT_SWALLOC; - if (ap->flags & XFSMNT_OSYNCISOSYNC) - mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC; - if (ap->flags & XFSMNT_32BITINODES) - mp->m_flags |= XFS_MOUNT_32BITINODES; - - if (ap->flags & XFSMNT_IOSIZE) { - if (ap->iosizelog > XFS_MAX_IO_LOG || - ap->iosizelog < XFS_MIN_IO_LOG) { - cmn_err(CE_WARN, - "XFS: invalid log iosize: %d [not %d-%d]", - ap->iosizelog, XFS_MIN_IO_LOG, - XFS_MAX_IO_LOG); - return XFS_ERROR(EINVAL); - } - - mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE; - mp->m_readio_log = mp->m_writeio_log = ap->iosizelog; - } - - if (ap->flags & XFSMNT_IKEEP) - mp->m_flags |= XFS_MOUNT_IKEEP; - if (ap->flags & XFSMNT_DIRSYNC) - mp->m_flags |= XFS_MOUNT_DIRSYNC; - if (ap->flags & XFSMNT_ATTR2) - mp->m_flags |= XFS_MOUNT_ATTR2; - - if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE) - mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; - - /* - * no recovery flag requires a read-only mount - */ - if (ap->flags & XFSMNT_NORECOVERY) { - if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { - cmn_err(CE_WARN, - "XFS: tried to mount a FS read-write without recovery!"); - return XFS_ERROR(EINVAL); - } - mp->m_flags |= XFS_MOUNT_NORECOVERY; - } - - if (ap->flags & XFSMNT_NOUUID) - mp->m_flags |= XFS_MOUNT_NOUUID; - if (ap->flags & XFSMNT_BARRIER) - mp->m_flags |= XFS_MOUNT_BARRIER; - else - mp->m_flags &= ~XFS_MOUNT_BARRIER; - - if (ap->flags2 & XFSMNT2_FILESTREAMS) - mp->m_flags |= XFS_MOUNT_FILESTREAMS; - - if (ap->flags & XFSMNT_DMAPI) - mp->m_flags |= XFS_MOUNT_DMAPI; - return 0; -} - -/* - * This function fills in xfs_mount_t fields based on mount args. - * Note: the superblock _has_ now been read in. - */ -STATIC int -xfs_finish_flags( - struct xfs_mount_args *ap, - struct xfs_mount *mp) -{ - int ronly = (mp->m_flags & XFS_MOUNT_RDONLY); - - /* Fail a mount where the logbuf is smaller then the log stripe */ - if (xfs_sb_version_haslogv2(&mp->m_sb)) { - if ((ap->logbufsize <= 0) && - (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) { - mp->m_logbsize = mp->m_sb.sb_logsunit; - } else if (ap->logbufsize > 0 && - ap->logbufsize < mp->m_sb.sb_logsunit) { - cmn_err(CE_WARN, - "XFS: logbuf size must be greater than or equal to log stripe size"); - return XFS_ERROR(EINVAL); - } - } else { - /* Fail a mount if the logbuf is larger than 32K */ - if (ap->logbufsize > XLOG_BIG_RECORD_BSIZE) { - cmn_err(CE_WARN, - "XFS: logbuf size for version 1 logs must be 16K or 32K"); - return XFS_ERROR(EINVAL); - } - } - - if (xfs_sb_version_hasattr2(&mp->m_sb)) - mp->m_flags |= XFS_MOUNT_ATTR2; - - /* - * prohibit r/w mounts of read-only filesystems - */ - if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) { - cmn_err(CE_WARN, - "XFS: cannot mount a read-only filesystem as read-write"); - return XFS_ERROR(EROFS); - } - - /* - * check for shared mount. - */ - if (ap->flags & XFSMNT_SHARED) { - if (!xfs_sb_version_hasshared(&mp->m_sb)) - return XFS_ERROR(EINVAL); - - /* - * For IRIX 6.5, shared mounts must have the shared - * version bit set, have the persistent readonly - * field set, must be version 0 and can only be mounted - * read-only. - */ - if (!ronly || !(mp->m_sb.sb_flags & XFS_SBF_READONLY) || - (mp->m_sb.sb_shared_vn != 0)) - return XFS_ERROR(EINVAL); - - mp->m_flags |= XFS_MOUNT_SHARED; - - /* - * Shared XFS V0 can't deal with DMI. Return EINVAL. - */ - if (mp->m_sb.sb_shared_vn == 0 && (ap->flags & XFSMNT_DMAPI)) - return XFS_ERROR(EINVAL); - } - - if (ap->flags & XFSMNT_UQUOTA) { - mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE); - if (ap->flags & XFSMNT_UQUOTAENF) - mp->m_qflags |= XFS_UQUOTA_ENFD; - } - - if (ap->flags & XFSMNT_GQUOTA) { - mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); - if (ap->flags & XFSMNT_GQUOTAENF) - mp->m_qflags |= XFS_OQUOTA_ENFD; - } else if (ap->flags & XFSMNT_PQUOTA) { - mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE); - if (ap->flags & XFSMNT_PQUOTAENF) - mp->m_qflags |= XFS_OQUOTA_ENFD; - } - - return 0; -} - -/* - * xfs_mount - * - * The file system configurations are: - * (1) device (partition) with data and internal log - * (2) logical volume with data and log subvolumes. - * (3) logical volume with data, log, and realtime subvolumes. - * - * We only have to handle opening the log and realtime volumes here if - * they are present. The data subvolume has already been opened by - * get_sb_bdev() and is stored in vfsp->vfs_super->s_bdev. - */ -int -xfs_mount( - struct xfs_mount *mp, - struct xfs_mount_args *args, - cred_t *credp) -{ - struct block_device *ddev, *logdev, *rtdev; - int flags = 0, error; - - ddev = mp->m_super->s_bdev; - logdev = rtdev = NULL; - - error = xfs_dmops_get(mp, args); - if (error) - return error; - error = xfs_qmops_get(mp, args); - if (error) - return error; - - if (args->flags & XFSMNT_QUIET) - flags |= XFS_MFSI_QUIET; - - /* - * Open real time and log devices - order is important. - */ - if (args->logname[0]) { - error = xfs_blkdev_get(mp, args->logname, &logdev); - if (error) - return error; - } - if (args->rtname[0]) { - error = xfs_blkdev_get(mp, args->rtname, &rtdev); - if (error) { - xfs_blkdev_put(logdev); - return error; - } - - if (rtdev == ddev || rtdev == logdev) { - cmn_err(CE_WARN, - "XFS: Cannot mount filesystem with identical rtdev and ddev/logdev."); - xfs_blkdev_put(logdev); - xfs_blkdev_put(rtdev); - return EINVAL; - } - } - - /* - * Setup xfs_mount buffer target pointers - */ - error = ENOMEM; - mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0); - if (!mp->m_ddev_targp) { - xfs_blkdev_put(logdev); - xfs_blkdev_put(rtdev); - return error; - } - if (rtdev) { - mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1); - if (!mp->m_rtdev_targp) { - xfs_blkdev_put(logdev); - xfs_blkdev_put(rtdev); - goto error0; - } - } - mp->m_logdev_targp = (logdev && logdev != ddev) ? - xfs_alloc_buftarg(logdev, 1) : mp->m_ddev_targp; - if (!mp->m_logdev_targp) { - xfs_blkdev_put(logdev); - xfs_blkdev_put(rtdev); - goto error0; - } - - /* - * Setup flags based on mount(2) options and then the superblock - */ - error = xfs_start_flags(args, mp); - if (error) - goto error1; - error = xfs_readsb(mp, flags); - if (error) - goto error1; - error = xfs_finish_flags(args, mp); - if (error) - goto error2; - - /* - * Setup xfs_mount buffer target pointers based on superblock - */ - error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize, - mp->m_sb.sb_sectsize); - if (!error && logdev && logdev != ddev) { - unsigned int log_sector_size = BBSIZE; - - if (xfs_sb_version_hassector(&mp->m_sb)) - log_sector_size = mp->m_sb.sb_logsectsize; - error = xfs_setsize_buftarg(mp->m_logdev_targp, - mp->m_sb.sb_blocksize, - log_sector_size); - } - if (!error && rtdev) - error = xfs_setsize_buftarg(mp->m_rtdev_targp, - mp->m_sb.sb_blocksize, - mp->m_sb.sb_sectsize); - if (error) - goto error2; - - if (mp->m_flags & XFS_MOUNT_BARRIER) - xfs_mountfs_check_barriers(mp); - - if ((error = xfs_filestream_mount(mp))) - goto error2; - - error = xfs_mountfs(mp, flags); - if (error) - goto error2; - - XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, args->mtpt, args->fsname); - - return 0; - -error2: - if (mp->m_sb_bp) - xfs_freesb(mp); -error1: - xfs_binval(mp->m_ddev_targp); - if (logdev && logdev != ddev) - xfs_binval(mp->m_logdev_targp); - if (rtdev) - xfs_binval(mp->m_rtdev_targp); -error0: - xfs_unmountfs_close(mp, credp); - xfs_qmops_put(mp); - xfs_dmops_put(mp); - return error; -} - -int -xfs_unmount( - xfs_mount_t *mp, - int flags, - cred_t *credp) -{ - xfs_inode_t *rip; - bhv_vnode_t *rvp; - int unmount_event_wanted = 0; - int unmount_event_flags = 0; - int xfs_unmountfs_needed = 0; - int error; - - rip = mp->m_rootip; - rvp = XFS_ITOV(rip); - -#ifdef HAVE_DMAPI - if (mp->m_flags & XFS_MOUNT_DMAPI) { - error = XFS_SEND_PREUNMOUNT(mp, - rip, DM_RIGHT_NULL, rip, DM_RIGHT_NULL, - NULL, NULL, 0, 0, - (mp->m_dmevmask & (1<<DM_EVENT_PREUNMOUNT))? - 0:DM_FLAGS_UNWANTED); - if (error) - return XFS_ERROR(error); - unmount_event_wanted = 1; - unmount_event_flags = (mp->m_dmevmask & (1<<DM_EVENT_UNMOUNT))? - 0 : DM_FLAGS_UNWANTED; - } -#endif - - /* - * Blow away any referenced inode in the filestreams cache. - * This can and will cause log traffic as inodes go inactive - * here. - */ - xfs_filestream_unmount(mp); - - XFS_bflush(mp->m_ddev_targp); - error = xfs_unmount_flush(mp, 0); - if (error) - goto out; - - ASSERT(vn_count(rvp) == 1); - - /* - * Drop the reference count - */ - IRELE(rip); - - /* - * If we're forcing a shutdown, typically because of a media error, - * we want to make sure we invalidate dirty pages that belong to - * referenced vnodes as well. - */ - if (XFS_FORCED_SHUTDOWN(mp)) { - error = xfs_sync(mp, SYNC_WAIT | SYNC_CLOSE); - ASSERT(error != EFSCORRUPTED); - } - xfs_unmountfs_needed = 1; - -out: - /* Send DMAPI event, if required. - * Then do xfs_unmountfs() if needed. - * Then return error (or zero). - */ - if (unmount_event_wanted) { - /* Note: mp structure must still exist for - * XFS_SEND_UNMOUNT() call. - */ - XFS_SEND_UNMOUNT(mp, error == 0 ? rip : NULL, - DM_RIGHT_NULL, 0, error, unmount_event_flags); - } - if (xfs_unmountfs_needed) { - /* - * Call common unmount function to flush to disk - * and free the super block buffer & mount structures. - */ - xfs_unmountfs(mp, credp); - xfs_qmops_put(mp); - xfs_dmops_put(mp); - kmem_free(mp, sizeof(xfs_mount_t)); - } - - return XFS_ERROR(error); -} - STATIC void xfs_quiesce_fs( xfs_mount_t *mp) @@ -694,30 +114,6 @@ xfs_attr_quiesce( xfs_unmountfs_writesb(mp); } -int -xfs_mntupdate( - struct xfs_mount *mp, - int *flags, - struct xfs_mount_args *args) -{ - if (!(*flags & MS_RDONLY)) { /* rw/ro -> rw */ - if (mp->m_flags & XFS_MOUNT_RDONLY) - mp->m_flags &= ~XFS_MOUNT_RDONLY; - if (args->flags & XFSMNT_BARRIER) { - mp->m_flags |= XFS_MOUNT_BARRIER; - xfs_mountfs_check_barriers(mp); - } else { - mp->m_flags &= ~XFS_MOUNT_BARRIER; - } - } else if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { /* rw -> ro */ - xfs_filestream_flush(mp); - xfs_sync(mp, SYNC_DATA_QUIESCE); - xfs_attr_quiesce(mp); - mp->m_flags |= XFS_MOUNT_RDONLY; - } - return 0; -} - /* * xfs_unmount_flush implements a set of flush operation on special * inodes, which are needed as a separate set of operations so that @@ -732,7 +128,6 @@ xfs_unmount_flush( xfs_inode_t *rip = mp->m_rootip; xfs_inode_t *rbmip; xfs_inode_t *rsumip = NULL; - bhv_vnode_t *rvp = XFS_ITOV(rip); int error; xfs_ilock(rip, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); @@ -750,7 +145,7 @@ xfs_unmount_flush( if (error == EFSCORRUPTED) goto fscorrupt_out; - ASSERT(vn_count(XFS_ITOV(rbmip)) == 1); + ASSERT(vn_count(VFS_I(rbmip)) == 1); rsumip = mp->m_rsumip; xfs_ilock(rsumip, XFS_ILOCK_EXCL); @@ -761,7 +156,7 @@ xfs_unmount_flush( if (error == EFSCORRUPTED) goto fscorrupt_out; - ASSERT(vn_count(XFS_ITOV(rsumip)) == 1); + ASSERT(vn_count(VFS_I(rsumip)) == 1); } /* @@ -771,7 +166,7 @@ xfs_unmount_flush( if (error == EFSCORRUPTED) goto fscorrupt_out2; - if (vn_count(rvp) != 1 && !relocation) { + if (vn_count(VFS_I(rip)) != 1 && !relocation) { xfs_iunlock(rip, XFS_ILOCK_EXCL); return XFS_ERROR(EBUSY); } @@ -888,7 +283,7 @@ xfs_sync_inodes( int *bypassed) { xfs_inode_t *ip = NULL; - bhv_vnode_t *vp = NULL; + struct inode *vp = NULL; int error; int last_error; uint64_t fflag; @@ -1008,7 +403,7 @@ xfs_sync_inodes( continue; } - vp = XFS_ITOV_NULL(ip); + vp = VFS_I(ip); /* * If the vnode is gone then this is being torn down, @@ -1048,7 +443,7 @@ xfs_sync_inodes( if (XFS_FORCED_SHUTDOWN(mp) && !(flags & SYNC_CLOSE)) { XFS_MOUNT_IUNLOCK(mp); - kmem_free(ipointer, sizeof(xfs_iptr_t)); + kmem_free(ipointer); return 0; } @@ -1083,7 +478,7 @@ xfs_sync_inodes( IPOINTER_INSERT(ip, mp); xfs_ilock(ip, lock_flags); - ASSERT(vp == XFS_ITOV(ip)); + ASSERT(vp == VFS_I(ip)); ASSERT(ip->i_mount == mp); vnode_refed = B_TRUE; @@ -1194,7 +589,7 @@ xfs_sync_inodes( } XFS_MOUNT_IUNLOCK(mp); ASSERT(ipointer_in == B_FALSE); - kmem_free(ipointer, sizeof(xfs_iptr_t)); + kmem_free(ipointer); return XFS_ERROR(error); } @@ -1224,7 +619,7 @@ xfs_sync_inodes( ASSERT(ipointer_in == B_FALSE); - kmem_free(ipointer, sizeof(xfs_iptr_t)); + kmem_free(ipointer); return XFS_ERROR(last_error); } diff --git a/fs/xfs/xfs_vfsops.h b/fs/xfs/xfs_vfsops.h index 1688817c55e..a74b05087da 100644 --- a/fs/xfs/xfs_vfsops.h +++ b/fs/xfs/xfs_vfsops.h @@ -8,11 +8,6 @@ struct kstatfs; struct xfs_mount; struct xfs_mount_args; -int xfs_mount(struct xfs_mount *mp, struct xfs_mount_args *args, - struct cred *credp); -int xfs_unmount(struct xfs_mount *mp, int flags, struct cred *credp); -int xfs_mntupdate(struct xfs_mount *mp, int *flags, - struct xfs_mount_args *args); int xfs_sync(struct xfs_mount *mp, int flags); void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname, int lnnum); diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index e475e3717eb..aa238c8fbd7 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -75,26 +75,23 @@ xfs_open( return 0; } -/* - * xfs_setattr - */ int xfs_setattr( - xfs_inode_t *ip, - bhv_vattr_t *vap, + struct xfs_inode *ip, + struct iattr *iattr, int flags, cred_t *credp) { xfs_mount_t *mp = ip->i_mount; + struct inode *inode = VFS_I(ip); + int mask = iattr->ia_valid; xfs_trans_t *tp; - int mask; int code; uint lock_flags; uint commit_flags=0; uid_t uid=0, iuid=0; gid_t gid=0, igid=0; int timeflags = 0; - xfs_prid_t projid=0, iprojid=0; struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2; int file_owner; int need_iolock = 1; @@ -104,30 +101,9 @@ xfs_setattr( if (mp->m_flags & XFS_MOUNT_RDONLY) return XFS_ERROR(EROFS); - /* - * Cannot set certain attributes. - */ - mask = vap->va_mask; - if (mask & XFS_AT_NOSET) { - return XFS_ERROR(EINVAL); - } - if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); - /* - * Timestamps do not need to be logged and hence do not - * need to be done within a transaction. - */ - if (mask & XFS_AT_UPDTIMES) { - ASSERT((mask & ~XFS_AT_UPDTIMES) == 0); - timeflags = ((mask & XFS_AT_UPDATIME) ? XFS_ICHGTIME_ACC : 0) | - ((mask & XFS_AT_UPDCTIME) ? XFS_ICHGTIME_CHG : 0) | - ((mask & XFS_AT_UPDMTIME) ? XFS_ICHGTIME_MOD : 0); - xfs_ichgtime(ip, timeflags); - return 0; - } - olddquot1 = olddquot2 = NULL; udqp = gdqp = NULL; @@ -139,28 +115,22 @@ xfs_setattr( * If the IDs do change before we take the ilock, we're covered * because the i_*dquot fields will get updated anyway. */ - if (XFS_IS_QUOTA_ON(mp) && - (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID))) { + if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) { uint qflags = 0; - if ((mask & XFS_AT_UID) && XFS_IS_UQUOTA_ON(mp)) { - uid = vap->va_uid; + if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) { + uid = iattr->ia_uid; qflags |= XFS_QMOPT_UQUOTA; } else { uid = ip->i_d.di_uid; } - if ((mask & XFS_AT_GID) && XFS_IS_GQUOTA_ON(mp)) { - gid = vap->va_gid; + if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) { + gid = iattr->ia_gid; qflags |= XFS_QMOPT_GQUOTA; } else { gid = ip->i_d.di_gid; } - if ((mask & XFS_AT_PROJID) && XFS_IS_PQUOTA_ON(mp)) { - projid = vap->va_projid; - qflags |= XFS_QMOPT_PQUOTA; - } else { - projid = ip->i_d.di_projid; - } + /* * We take a reference when we initialize udqp and gdqp, * so it is important that we never blindly double trip on @@ -168,8 +138,8 @@ xfs_setattr( */ ASSERT(udqp == NULL); ASSERT(gdqp == NULL); - code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, projid, qflags, - &udqp, &gdqp); + code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, ip->i_d.di_projid, + qflags, &udqp, &gdqp); if (code) return code; } @@ -180,10 +150,10 @@ xfs_setattr( */ tp = NULL; lock_flags = XFS_ILOCK_EXCL; - if (flags & ATTR_NOLOCK) + if (flags & XFS_ATTR_NOLOCK) need_iolock = 0; - if (!(mask & XFS_AT_SIZE)) { - if ((mask != (XFS_AT_CTIME|XFS_AT_ATIME|XFS_AT_MTIME)) || + if (!(mask & ATTR_SIZE)) { + if ((mask != (ATTR_CTIME|ATTR_ATIME|ATTR_MTIME)) || (mp->m_flags & XFS_MOUNT_WSYNC)) { tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); commit_flags = 0; @@ -196,10 +166,10 @@ xfs_setattr( } } else { if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) && - !(flags & ATTR_DMI)) { + !(flags & XFS_ATTR_DMI)) { int dmflags = AT_DELAY_FLAG(flags) | DM_SEM_FLAG_WR; code = XFS_SEND_DATA(mp, DM_EVENT_TRUNCATE, ip, - vap->va_size, 0, dmflags, NULL); + iattr->ia_size, 0, dmflags, NULL); if (code) { lock_flags = 0; goto error_return; @@ -212,16 +182,14 @@ xfs_setattr( xfs_ilock(ip, lock_flags); /* boolean: are we the file owner? */ - file_owner = (current_fsuid(credp) == ip->i_d.di_uid); + file_owner = (current_fsuid() == ip->i_d.di_uid); /* * Change various properties of a file. * Only the owner or users with CAP_FOWNER * capability may do these things. */ - if (mask & - (XFS_AT_MODE|XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_UID| - XFS_AT_GID|XFS_AT_PROJID)) { + if (mask & (ATTR_MODE|ATTR_UID|ATTR_GID)) { /* * CAP_FOWNER overrides the following restrictions: * @@ -245,21 +213,21 @@ xfs_setattr( * IDs of the calling process shall match the group owner of * the file when setting the set-group-ID bit on that file */ - if (mask & XFS_AT_MODE) { + if (mask & ATTR_MODE) { mode_t m = 0; - if ((vap->va_mode & S_ISUID) && !file_owner) + if ((iattr->ia_mode & S_ISUID) && !file_owner) m |= S_ISUID; - if ((vap->va_mode & S_ISGID) && + if ((iattr->ia_mode & S_ISGID) && !in_group_p((gid_t)ip->i_d.di_gid)) m |= S_ISGID; #if 0 /* Linux allows this, Irix doesn't. */ - if ((vap->va_mode & S_ISVTX) && !S_ISDIR(ip->i_d.di_mode)) + if ((iattr->ia_mode & S_ISVTX) && !S_ISDIR(ip->i_d.di_mode)) m |= S_ISVTX; #endif if (m && !capable(CAP_FSETID)) - vap->va_mode &= ~m; + iattr->ia_mode &= ~m; } } @@ -270,7 +238,7 @@ xfs_setattr( * and can change the group id only to a group of which he * or she is a member. */ - if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) { + if (mask & (ATTR_UID|ATTR_GID)) { /* * These IDs could have changed since we last looked at them. * But, we're assured that if the ownership did change @@ -278,12 +246,9 @@ xfs_setattr( * would have changed also. */ iuid = ip->i_d.di_uid; - iprojid = ip->i_d.di_projid; igid = ip->i_d.di_gid; - gid = (mask & XFS_AT_GID) ? vap->va_gid : igid; - uid = (mask & XFS_AT_UID) ? vap->va_uid : iuid; - projid = (mask & XFS_AT_PROJID) ? (xfs_prid_t)vap->va_projid : - iprojid; + gid = (mask & ATTR_GID) ? iattr->ia_gid : igid; + uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid; /* * CAP_CHOWN overrides the following restrictions: @@ -303,11 +268,10 @@ xfs_setattr( goto error_return; } /* - * Do a quota reservation only if uid/projid/gid is actually + * Do a quota reservation only if uid/gid is actually * going to change. */ if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) || - (XFS_IS_PQUOTA_ON(mp) && iprojid != projid) || (XFS_IS_GQUOTA_ON(mp) && igid != gid)) { ASSERT(tp); code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp, @@ -321,13 +285,13 @@ xfs_setattr( /* * Truncate file. Must have write permission and not be a directory. */ - if (mask & XFS_AT_SIZE) { + if (mask & ATTR_SIZE) { /* Short circuit the truncate case for zero length files */ - if ((vap->va_size == 0) && - (ip->i_size == 0) && (ip->i_d.di_nextents == 0)) { + if (iattr->ia_size == 0 && + ip->i_size == 0 && ip->i_d.di_nextents == 0) { xfs_iunlock(ip, XFS_ILOCK_EXCL); lock_flags &= ~XFS_ILOCK_EXCL; - if (mask & XFS_AT_CTIME) + if (mask & ATTR_CTIME) xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); code = 0; goto error_return; @@ -350,9 +314,9 @@ xfs_setattr( /* * Change file access or modified times. */ - if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) { + if (mask & (ATTR_ATIME|ATTR_MTIME)) { if (!file_owner) { - if ((flags & ATTR_UTIME) && + if ((mask & (ATTR_MTIME_SET|ATTR_ATIME_SET)) && !capable(CAP_FOWNER)) { code = XFS_ERROR(EPERM); goto error_return; @@ -361,90 +325,23 @@ xfs_setattr( } /* - * Change extent size or realtime flag. - */ - if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) { - /* - * Can't change extent size if any extents are allocated. - */ - if (ip->i_d.di_nextents && (mask & XFS_AT_EXTSIZE) && - ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != - vap->va_extsize) ) { - code = XFS_ERROR(EINVAL); /* EFBIG? */ - goto error_return; - } - - /* - * Can't change realtime flag if any extents are allocated. - */ - if ((ip->i_d.di_nextents || ip->i_delayed_blks) && - (mask & XFS_AT_XFLAGS) && - (XFS_IS_REALTIME_INODE(ip)) != - (vap->va_xflags & XFS_XFLAG_REALTIME)) { - code = XFS_ERROR(EINVAL); /* EFBIG? */ - goto error_return; - } - /* - * Extent size must be a multiple of the appropriate block - * size, if set at all. - */ - if ((mask & XFS_AT_EXTSIZE) && vap->va_extsize != 0) { - xfs_extlen_t size; - - if (XFS_IS_REALTIME_INODE(ip) || - ((mask & XFS_AT_XFLAGS) && - (vap->va_xflags & XFS_XFLAG_REALTIME))) { - size = mp->m_sb.sb_rextsize << - mp->m_sb.sb_blocklog; - } else { - size = mp->m_sb.sb_blocksize; - } - if (vap->va_extsize % size) { - code = XFS_ERROR(EINVAL); - goto error_return; - } - } - /* - * If realtime flag is set then must have realtime data. - */ - if ((mask & XFS_AT_XFLAGS) && - (vap->va_xflags & XFS_XFLAG_REALTIME)) { - if ((mp->m_sb.sb_rblocks == 0) || - (mp->m_sb.sb_rextsize == 0) || - (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) { - code = XFS_ERROR(EINVAL); - goto error_return; - } - } - - /* - * Can't modify an immutable/append-only file unless - * we have appropriate permission. - */ - if ((mask & XFS_AT_XFLAGS) && - (ip->i_d.di_flags & - (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) || - (vap->va_xflags & - (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) && - !capable(CAP_LINUX_IMMUTABLE)) { - code = XFS_ERROR(EPERM); - goto error_return; - } - } - - /* * Now we can make the changes. Before we join the inode - * to the transaction, if XFS_AT_SIZE is set then take care of + * to the transaction, if ATTR_SIZE is set then take care of * the part of the truncation that must be done without the * inode lock. This needs to be done before joining the inode * to the transaction, because the inode cannot be unlocked * once it is a part of the transaction. */ - if (mask & XFS_AT_SIZE) { + if (mask & ATTR_SIZE) { code = 0; - if ((vap->va_size > ip->i_size) && - (flags & ATTR_NOSIZETOK) == 0) { - code = xfs_igrow_start(ip, vap->va_size, credp); + if (iattr->ia_size > ip->i_size) { + /* + * Do the first part of growing a file: zero any data + * in the last block that is beyond the old EOF. We + * need to do this before the inode is joined to the + * transaction to modify the i_size. + */ + code = xfs_zero_eof(ip, iattr->ia_size, ip->i_size); } xfs_iunlock(ip, XFS_ILOCK_EXCL); @@ -461,10 +358,10 @@ xfs_setattr( * not within the range we care about here. */ if (!code && - (ip->i_size != ip->i_d.di_size) && - (vap->va_size > ip->i_d.di_size)) { + ip->i_size != ip->i_d.di_size && + iattr->ia_size > ip->i_d.di_size) { code = xfs_flush_pages(ip, - ip->i_d.di_size, vap->va_size, + ip->i_d.di_size, iattr->ia_size, XFS_B_ASYNC, FI_NONE); } @@ -472,7 +369,7 @@ xfs_setattr( vn_iowait(ip); if (!code) - code = xfs_itruncate_data(ip, vap->va_size); + code = xfs_itruncate_data(ip, iattr->ia_size); if (code) { ASSERT(tp == NULL); lock_flags &= ~XFS_ILOCK_EXCL; @@ -501,28 +398,30 @@ xfs_setattr( /* * Truncate file. Must have write permission and not be a directory. */ - if (mask & XFS_AT_SIZE) { + if (mask & ATTR_SIZE) { /* * Only change the c/mtime if we are changing the size * or we are explicitly asked to change it. This handles * the semantic difference between truncate() and ftruncate() * as implemented in the VFS. */ - if (vap->va_size != ip->i_size || (mask & XFS_AT_CTIME)) + if (iattr->ia_size != ip->i_size || (mask & ATTR_CTIME)) timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG; - if (vap->va_size > ip->i_size) { - xfs_igrow_finish(tp, ip, vap->va_size, - !(flags & ATTR_DMI)); - } else if ((vap->va_size <= ip->i_size) || - ((vap->va_size == 0) && ip->i_d.di_nextents)) { + if (iattr->ia_size > ip->i_size) { + ip->i_d.di_size = iattr->ia_size; + ip->i_size = iattr->ia_size; + if (!(flags & XFS_ATTR_DMI)) + xfs_ichgtime(ip, XFS_ICHGTIME_CHG); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + } else if (iattr->ia_size <= ip->i_size || + (iattr->ia_size == 0 && ip->i_d.di_nextents)) { /* * signal a sync transaction unless * we're truncating an already unlinked * file on a wsync filesystem */ - code = xfs_itruncate_finish(&tp, ip, - (xfs_fsize_t)vap->va_size, + code = xfs_itruncate_finish(&tp, ip, iattr->ia_size, XFS_DATA_FORK, ((ip->i_d.di_nlink != 0 || !(mp->m_flags & XFS_MOUNT_WSYNC)) @@ -544,9 +443,12 @@ xfs_setattr( /* * Change file access modes. */ - if (mask & XFS_AT_MODE) { + if (mask & ATTR_MODE) { ip->i_d.di_mode &= S_IFMT; - ip->i_d.di_mode |= vap->va_mode & ~S_IFMT; + ip->i_d.di_mode |= iattr->ia_mode & ~S_IFMT; + + inode->i_mode &= S_IFMT; + inode->i_mode |= iattr->ia_mode & ~S_IFMT; xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); timeflags |= XFS_ICHGTIME_CHG; @@ -559,7 +461,7 @@ xfs_setattr( * and can change the group id only to a group of which he * or she is a member. */ - if (mask & (XFS_AT_UID|XFS_AT_GID|XFS_AT_PROJID)) { + if (mask & (ATTR_UID|ATTR_GID)) { /* * CAP_FSETID overrides the following restrictions: * @@ -577,39 +479,24 @@ xfs_setattr( */ if (iuid != uid) { if (XFS_IS_UQUOTA_ON(mp)) { - ASSERT(mask & XFS_AT_UID); + ASSERT(mask & ATTR_UID); ASSERT(udqp); olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip, &ip->i_udquot, udqp); } ip->i_d.di_uid = uid; + inode->i_uid = uid; } if (igid != gid) { if (XFS_IS_GQUOTA_ON(mp)) { ASSERT(!XFS_IS_PQUOTA_ON(mp)); - ASSERT(mask & XFS_AT_GID); + ASSERT(mask & ATTR_GID); ASSERT(gdqp); olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip, &ip->i_gdquot, gdqp); } ip->i_d.di_gid = gid; - } - if (iprojid != projid) { - if (XFS_IS_PQUOTA_ON(mp)) { - ASSERT(!XFS_IS_GQUOTA_ON(mp)); - ASSERT(mask & XFS_AT_PROJID); - ASSERT(gdqp); - olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip, - &ip->i_gdquot, gdqp); - } - ip->i_d.di_projid = projid; - /* - * We may have to rev the inode as well as - * the superblock version number since projids didn't - * exist before DINODE_VERSION_2 and SB_VERSION_NLINK. - */ - if (ip->i_d.di_version == XFS_DINODE_VERSION_1) - xfs_bump_ino_vers2(tp, ip); + inode->i_gid = gid; } xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); @@ -620,82 +507,33 @@ xfs_setattr( /* * Change file access or modified times. */ - if (mask & (XFS_AT_ATIME|XFS_AT_MTIME)) { - if (mask & XFS_AT_ATIME) { - ip->i_d.di_atime.t_sec = vap->va_atime.tv_sec; - ip->i_d.di_atime.t_nsec = vap->va_atime.tv_nsec; + if (mask & (ATTR_ATIME|ATTR_MTIME)) { + if (mask & ATTR_ATIME) { + inode->i_atime = iattr->ia_atime; + ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; + ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; ip->i_update_core = 1; - timeflags &= ~XFS_ICHGTIME_ACC; } - if (mask & XFS_AT_MTIME) { - ip->i_d.di_mtime.t_sec = vap->va_mtime.tv_sec; - ip->i_d.di_mtime.t_nsec = vap->va_mtime.tv_nsec; + if (mask & ATTR_MTIME) { + inode->i_mtime = iattr->ia_mtime; + ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; + ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; timeflags &= ~XFS_ICHGTIME_MOD; timeflags |= XFS_ICHGTIME_CHG; } - if (tp && (flags & ATTR_UTIME)) + if (tp && (mask & (ATTR_MTIME_SET|ATTR_ATIME_SET))) xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE); } /* - * Change XFS-added attributes. - */ - if (mask & (XFS_AT_EXTSIZE|XFS_AT_XFLAGS)) { - if (mask & XFS_AT_EXTSIZE) { - /* - * Converting bytes to fs blocks. - */ - ip->i_d.di_extsize = vap->va_extsize >> - mp->m_sb.sb_blocklog; - } - if (mask & XFS_AT_XFLAGS) { - uint di_flags; - - /* can't set PREALLOC this way, just preserve it */ - di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC); - if (vap->va_xflags & XFS_XFLAG_IMMUTABLE) - di_flags |= XFS_DIFLAG_IMMUTABLE; - if (vap->va_xflags & XFS_XFLAG_APPEND) - di_flags |= XFS_DIFLAG_APPEND; - if (vap->va_xflags & XFS_XFLAG_SYNC) - di_flags |= XFS_DIFLAG_SYNC; - if (vap->va_xflags & XFS_XFLAG_NOATIME) - di_flags |= XFS_DIFLAG_NOATIME; - if (vap->va_xflags & XFS_XFLAG_NODUMP) - di_flags |= XFS_DIFLAG_NODUMP; - if (vap->va_xflags & XFS_XFLAG_PROJINHERIT) - di_flags |= XFS_DIFLAG_PROJINHERIT; - if (vap->va_xflags & XFS_XFLAG_NODEFRAG) - di_flags |= XFS_DIFLAG_NODEFRAG; - if (vap->va_xflags & XFS_XFLAG_FILESTREAM) - di_flags |= XFS_DIFLAG_FILESTREAM; - if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) { - if (vap->va_xflags & XFS_XFLAG_RTINHERIT) - di_flags |= XFS_DIFLAG_RTINHERIT; - if (vap->va_xflags & XFS_XFLAG_NOSYMLINKS) - di_flags |= XFS_DIFLAG_NOSYMLINKS; - if (vap->va_xflags & XFS_XFLAG_EXTSZINHERIT) - di_flags |= XFS_DIFLAG_EXTSZINHERIT; - } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) { - if (vap->va_xflags & XFS_XFLAG_REALTIME) - di_flags |= XFS_DIFLAG_REALTIME; - if (vap->va_xflags & XFS_XFLAG_EXTSIZE) - di_flags |= XFS_DIFLAG_EXTSIZE; - } - ip->i_d.di_flags = di_flags; - } - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - timeflags |= XFS_ICHGTIME_CHG; - } - - /* - * Change file inode change time only if XFS_AT_CTIME set + * Change file inode change time only if ATTR_CTIME set * AND we have been called by a DMI function. */ - if ( (flags & ATTR_DMI) && (mask & XFS_AT_CTIME) ) { - ip->i_d.di_ctime.t_sec = vap->va_ctime.tv_sec; - ip->i_d.di_ctime.t_nsec = vap->va_ctime.tv_nsec; + if ((flags & XFS_ATTR_DMI) && (mask & ATTR_CTIME)) { + inode->i_ctime = iattr->ia_ctime; + ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; + ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; ip->i_update_core = 1; timeflags &= ~XFS_ICHGTIME_CHG; } @@ -704,7 +542,7 @@ xfs_setattr( * Send out timestamp changes that need to be set to the * current time. Not done when called by a DMI function. */ - if (timeflags && !(flags & ATTR_DMI)) + if (timeflags && !(flags & XFS_ATTR_DMI)) xfs_ichgtime(ip, timeflags); XFS_STATS_INC(xs_ig_attrchg); @@ -742,7 +580,7 @@ xfs_setattr( } if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) && - !(flags & ATTR_DMI)) { + !(flags & XFS_ATTR_DMI)) { (void) XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL, NULL, DM_RIGHT_NULL, NULL, NULL, 0, 0, AT_DELAY_FLAG(flags)); @@ -875,7 +713,7 @@ xfs_fsync( return XFS_ERROR(EIO); /* capture size updates in I/O completion before writing the inode. */ - error = filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping); + error = filemap_fdatawait(VFS_I(ip)->i_mapping); if (error) return XFS_ERROR(error); @@ -1321,7 +1159,6 @@ int xfs_release( xfs_inode_t *ip) { - bhv_vnode_t *vp = XFS_ITOV(ip); xfs_mount_t *mp = ip->i_mount; int error; @@ -1356,13 +1193,13 @@ xfs_release( * be exposed to that problem. */ truncated = xfs_iflags_test_and_clear(ip, XFS_ITRUNCATED); - if (truncated && VN_DIRTY(vp) && ip->i_delayed_blks > 0) + if (truncated && VN_DIRTY(VFS_I(ip)) && ip->i_delayed_blks > 0) xfs_flush_pages(ip, 0, -1, XFS_B_ASYNC, FI_NONE); } if (ip->i_d.di_nlink != 0) { if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && - ((ip->i_size > 0) || (VN_CACHED(vp) > 0 || + ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 || ip->i_delayed_blks > 0)) && (ip->i_df.if_flags & XFS_IFEXTENTS)) && (!(ip->i_d.di_flags & @@ -1388,7 +1225,6 @@ int xfs_inactive( xfs_inode_t *ip) { - bhv_vnode_t *vp = XFS_ITOV(ip); xfs_bmap_free_t free_list; xfs_fsblock_t first_block; int committed; @@ -1403,7 +1239,7 @@ xfs_inactive( * If the inode is already free, then there can be nothing * to clean up here. */ - if (ip->i_d.di_mode == 0 || VN_BAD(vp)) { + if (ip->i_d.di_mode == 0 || VN_BAD(VFS_I(ip))) { ASSERT(ip->i_df.if_real_bytes == 0); ASSERT(ip->i_df.if_broot_bytes == 0); return VN_INACTIVE_CACHE; @@ -1433,7 +1269,7 @@ xfs_inactive( if (ip->i_d.di_nlink != 0) { if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) && - ((ip->i_size > 0) || (VN_CACHED(vp) > 0 || + ((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 || ip->i_delayed_blks > 0)) && (ip->i_df.if_flags & XFS_IFEXTENTS) && (!(ip->i_d.di_flags & @@ -1601,12 +1437,18 @@ xfs_inactive( return VN_INACTIVE_CACHE; } - +/* + * Lookups up an inode from "name". If ci_name is not NULL, then a CI match + * is allowed, otherwise it has to be an exact match. If a CI match is found, + * ci_name->name will point to a the actual name (caller must free) or + * will be set to NULL if an exact match is found. + */ int xfs_lookup( xfs_inode_t *dp, struct xfs_name *name, - xfs_inode_t **ipp) + xfs_inode_t **ipp, + struct xfs_name *ci_name) { xfs_ino_t inum; int error; @@ -1618,7 +1460,7 @@ xfs_lookup( return XFS_ERROR(EIO); lock_mode = xfs_ilock_map_shared(dp); - error = xfs_dir_lookup(NULL, dp, name, &inum); + error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name); xfs_iunlock_map_shared(dp, lock_mode); if (error) @@ -1626,12 +1468,15 @@ xfs_lookup( error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp, 0); if (error) - goto out; + goto out_free_name; xfs_itrace_ref(*ipp); return 0; - out: +out_free_name: + if (ci_name) + kmem_free(ci_name->name); +out: *ipp = NULL; return error; } @@ -1688,7 +1533,7 @@ xfs_create( * Make sure that we have allocated dquot(s) on disk. */ error = XFS_QM_DQVOPALLOC(mp, dp, - current_fsuid(credp), current_fsgid(credp), prid, + current_fsuid(), current_fsgid(), prid, XFS_QMOPT_QUOTALL|XFS_QMOPT_INHERIT, &udqp, &gdqp); if (error) goto std_return; @@ -1860,111 +1705,6 @@ std_return: } #ifdef DEBUG -/* - * Some counters to see if (and how often) we are hitting some deadlock - * prevention code paths. - */ - -int xfs_rm_locks; -int xfs_rm_lock_delays; -int xfs_rm_attempts; -#endif - -/* - * The following routine will lock the inodes associated with the - * directory and the named entry in the directory. The locks are - * acquired in increasing inode number. - * - * If the entry is "..", then only the directory is locked. The - * vnode ref count will still include that from the .. entry in - * this case. - * - * There is a deadlock we need to worry about. If the locked directory is - * in the AIL, it might be blocking up the log. The next inode we lock - * could be already locked by another thread waiting for log space (e.g - * a permanent log reservation with a long running transaction (see - * xfs_itruncate_finish)). To solve this, we must check if the directory - * is in the ail and use lock_nowait. If we can't lock, we need to - * drop the inode lock on the directory and try again. xfs_iunlock will - * potentially push the tail if we were holding up the log. - */ -STATIC int -xfs_lock_dir_and_entry( - xfs_inode_t *dp, - xfs_inode_t *ip) /* inode of entry 'name' */ -{ - int attempts; - xfs_ino_t e_inum; - xfs_inode_t *ips[2]; - xfs_log_item_t *lp; - -#ifdef DEBUG - xfs_rm_locks++; -#endif - attempts = 0; - -again: - xfs_ilock(dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); - - e_inum = ip->i_ino; - - xfs_itrace_ref(ip); - - /* - * We want to lock in increasing inum. Since we've already - * acquired the lock on the directory, we may need to release - * if if the inum of the entry turns out to be less. - */ - if (e_inum > dp->i_ino) { - /* - * We are already in the right order, so just - * lock on the inode of the entry. - * We need to use nowait if dp is in the AIL. - */ - - lp = (xfs_log_item_t *)dp->i_itemp; - if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { - if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { - attempts++; -#ifdef DEBUG - xfs_rm_attempts++; -#endif - - /* - * Unlock dp and try again. - * xfs_iunlock will try to push the tail - * if the inode is in the AIL. - */ - - xfs_iunlock(dp, XFS_ILOCK_EXCL); - - if ((attempts % 5) == 0) { - delay(1); /* Don't just spin the CPU */ -#ifdef DEBUG - xfs_rm_lock_delays++; -#endif - } - goto again; - } - } else { - xfs_ilock(ip, XFS_ILOCK_EXCL); - } - } else if (e_inum < dp->i_ino) { - xfs_iunlock(dp, XFS_ILOCK_EXCL); - - ips[0] = ip; - ips[1] = dp; - xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL); - } - /* else e_inum == dp->i_ino */ - /* This can happen if we're asked to lock /x/.. - * the entry is "..", which is also the parent directory. - */ - - return 0; -} - -#ifdef DEBUG int xfs_locked_n; int xfs_small_retries; int xfs_middle_retries; @@ -2098,12 +1838,44 @@ again: #endif } -#ifdef DEBUG -#define REMOVE_DEBUG_TRACE(x) {remove_which_error_return = (x);} -int remove_which_error_return = 0; -#else /* ! DEBUG */ -#define REMOVE_DEBUG_TRACE(x) -#endif /* ! DEBUG */ +void +xfs_lock_two_inodes( + xfs_inode_t *ip0, + xfs_inode_t *ip1, + uint lock_mode) +{ + xfs_inode_t *temp; + int attempts = 0; + xfs_log_item_t *lp; + + ASSERT(ip0->i_ino != ip1->i_ino); + + if (ip0->i_ino > ip1->i_ino) { + temp = ip0; + ip0 = ip1; + ip1 = temp; + } + + again: + xfs_ilock(ip0, xfs_lock_inumorder(lock_mode, 0)); + + /* + * If the first lock we have locked is in the AIL, we must TRY to get + * the second lock. If we can't get it, we must release the first one + * and try again. + */ + lp = (xfs_log_item_t *)ip0->i_itemp; + if (lp && (lp->li_flags & XFS_LI_IN_AIL)) { + if (!xfs_ilock_nowait(ip1, xfs_lock_inumorder(lock_mode, 1))) { + xfs_iunlock(ip0, lock_mode); + if ((++attempts % 5) == 0) + delay(1); /* Don't just spin the CPU */ + goto again; + } + } else { + xfs_ilock(ip1, xfs_lock_inumorder(lock_mode, 1)); + } +} int xfs_remove( @@ -2113,6 +1885,7 @@ xfs_remove( { xfs_mount_t *mp = dp->i_mount; xfs_trans_t *tp = NULL; + int is_dir = S_ISDIR(ip->i_d.di_mode); int error = 0; xfs_bmap_free_t free_list; xfs_fsblock_t first_block; @@ -2120,8 +1893,10 @@ xfs_remove( int committed; int link_zero; uint resblks; + uint log_count; xfs_itrace_entry(dp); + xfs_itrace_entry(ip); if (XFS_FORCED_SHUTDOWN(mp)) return XFS_ERROR(EIO); @@ -2134,19 +1909,23 @@ xfs_remove( return error; } - xfs_itrace_entry(ip); - xfs_itrace_ref(ip); - error = XFS_QM_DQATTACH(mp, dp, 0); - if (!error) - error = XFS_QM_DQATTACH(mp, ip, 0); - if (error) { - REMOVE_DEBUG_TRACE(__LINE__); + if (error) goto std_return; - } - tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE); + error = XFS_QM_DQATTACH(mp, ip, 0); + if (error) + goto std_return; + + if (is_dir) { + tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR); + log_count = XFS_DEFAULT_LOG_COUNT; + } else { + tp = xfs_trans_alloc(mp, XFS_TRANS_REMOVE); + log_count = XFS_REMOVE_LOG_COUNT; + } cancel_flags = XFS_TRANS_RELEASE_LOG_RES; + /* * We try to get the real space reservation first, * allowing for directory btree deletion(s) implying @@ -2158,25 +1937,19 @@ xfs_remove( */ resblks = XFS_REMOVE_SPACE_RES(mp); error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0, - XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT); + XFS_TRANS_PERM_LOG_RES, log_count); if (error == ENOSPC) { resblks = 0; error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0, - XFS_TRANS_PERM_LOG_RES, XFS_REMOVE_LOG_COUNT); + XFS_TRANS_PERM_LOG_RES, log_count); } if (error) { ASSERT(error != ENOSPC); - REMOVE_DEBUG_TRACE(__LINE__); - xfs_trans_cancel(tp, 0); - return error; + cancel_flags = 0; + goto out_trans_cancel; } - error = xfs_lock_dir_and_entry(dp, ip); - if (error) { - REMOVE_DEBUG_TRACE(__LINE__); - xfs_trans_cancel(tp, cancel_flags); - goto std_return; - } + xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL); /* * At this point, we've gotten both the directory and the entry @@ -2189,46 +1962,83 @@ xfs_remove( xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); /* - * Entry must exist since we did a lookup in xfs_lock_dir_and_entry. + * If we're removing a directory perform some additional validation. */ + if (is_dir) { + ASSERT(ip->i_d.di_nlink >= 2); + if (ip->i_d.di_nlink != 2) { + error = XFS_ERROR(ENOTEMPTY); + goto out_trans_cancel; + } + if (!xfs_dir_isempty(ip)) { + error = XFS_ERROR(ENOTEMPTY); + goto out_trans_cancel; + } + } + XFS_BMAP_INIT(&free_list, &first_block); error = xfs_dir_removename(tp, dp, name, ip->i_ino, &first_block, &free_list, resblks); if (error) { ASSERT(error != ENOENT); - REMOVE_DEBUG_TRACE(__LINE__); - goto error1; + goto out_bmap_cancel; } xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + /* + * Bump the in memory generation count on the parent + * directory so that other can know that it has changed. + */ dp->i_gen++; xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); - error = xfs_droplink(tp, ip); - if (error) { - REMOVE_DEBUG_TRACE(__LINE__); - goto error1; + if (is_dir) { + /* + * Drop the link from ip's "..". + */ + error = xfs_droplink(tp, dp); + if (error) + goto out_bmap_cancel; + + /* + * Drop the link from dp to ip. + */ + error = xfs_droplink(tp, ip); + if (error) + goto out_bmap_cancel; + } else { + /* + * When removing a non-directory we need to log the parent + * inode here for the i_gen update. For a directory this is + * done implicitly by the xfs_droplink call for the ".." entry. + */ + xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); } - /* Determine if this is the last link while + /* + * Drop the "." link from ip to self. + */ + error = xfs_droplink(tp, ip); + if (error) + goto out_bmap_cancel; + + /* + * Determine if this is the last link while * we are in the transaction. */ - link_zero = (ip)->i_d.di_nlink==0; + link_zero = (ip->i_d.di_nlink == 0); /* * If this is a synchronous mount, make sure that the * remove transaction goes to disk before returning to * the user. */ - if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { + if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) xfs_trans_set_sync(tp); - } error = xfs_bmap_finish(&tp, &free_list, &committed); - if (error) { - REMOVE_DEBUG_TRACE(__LINE__); - goto error_rele; - } + if (error) + goto out_bmap_cancel; error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); if (error) @@ -2240,38 +2050,26 @@ xfs_remove( * will get killed on last close in xfs_close() so we don't * have to worry about that. */ - if (link_zero && xfs_inode_is_filestream(ip)) + if (!is_dir && link_zero && xfs_inode_is_filestream(ip)) xfs_filestream_deassociate(ip); xfs_itrace_exit(ip); + xfs_itrace_exit(dp); -/* Fall through to std_return with error = 0 */ std_return: if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) { - (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, - dp, DM_RIGHT_NULL, - NULL, DM_RIGHT_NULL, - name->name, NULL, ip->i_d.di_mode, error, 0); + XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, dp, DM_RIGHT_NULL, + NULL, DM_RIGHT_NULL, name->name, NULL, + ip->i_d.di_mode, error, 0); } - return error; - error1: - xfs_bmap_cancel(&free_list); - cancel_flags |= XFS_TRANS_ABORT; - xfs_trans_cancel(tp, cancel_flags); - goto std_return; + return error; - error_rele: - /* - * In this case make sure to not release the inode until after - * the current transaction is aborted. Releasing it beforehand - * can cause us to go to xfs_inactive and start a recursive - * transaction which can easily deadlock with the current one. - */ + out_bmap_cancel: xfs_bmap_cancel(&free_list); cancel_flags |= XFS_TRANS_ABORT; + out_trans_cancel: xfs_trans_cancel(tp, cancel_flags); - goto std_return; } @@ -2283,7 +2081,6 @@ xfs_link( { xfs_mount_t *mp = tdp->i_mount; xfs_trans_t *tp; - xfs_inode_t *ips[2]; int error; xfs_bmap_free_t free_list; xfs_fsblock_t first_block; @@ -2331,15 +2128,7 @@ xfs_link( goto error_return; } - if (sip->i_ino < tdp->i_ino) { - ips[0] = sip; - ips[1] = tdp; - } else { - ips[0] = tdp; - ips[1] = sip; - } - - xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL); + xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL); /* * Increment vnode ref counts since xfs_trans_commit & @@ -2480,7 +2269,7 @@ xfs_mkdir( * Make sure that we have allocated dquot(s) on disk. */ error = XFS_QM_DQVOPALLOC(mp, dp, - current_fsuid(credp), current_fsgid(credp), prid, + current_fsuid(), current_fsgid(), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); if (error) goto std_return; @@ -2638,186 +2427,6 @@ std_return: } int -xfs_rmdir( - xfs_inode_t *dp, - struct xfs_name *name, - xfs_inode_t *cdp) -{ - xfs_mount_t *mp = dp->i_mount; - xfs_trans_t *tp; - int error; - xfs_bmap_free_t free_list; - xfs_fsblock_t first_block; - int cancel_flags; - int committed; - int last_cdp_link; - uint resblks; - - xfs_itrace_entry(dp); - - if (XFS_FORCED_SHUTDOWN(mp)) - return XFS_ERROR(EIO); - - if (DM_EVENT_ENABLED(dp, DM_EVENT_REMOVE)) { - error = XFS_SEND_NAMESP(mp, DM_EVENT_REMOVE, - dp, DM_RIGHT_NULL, - NULL, DM_RIGHT_NULL, name->name, - NULL, cdp->i_d.di_mode, 0, 0); - if (error) - return XFS_ERROR(error); - } - - /* - * Get the dquots for the inodes. - */ - error = XFS_QM_DQATTACH(mp, dp, 0); - if (!error) - error = XFS_QM_DQATTACH(mp, cdp, 0); - if (error) { - REMOVE_DEBUG_TRACE(__LINE__); - goto std_return; - } - - tp = xfs_trans_alloc(mp, XFS_TRANS_RMDIR); - cancel_flags = XFS_TRANS_RELEASE_LOG_RES; - /* - * We try to get the real space reservation first, - * allowing for directory btree deletion(s) implying - * possible bmap insert(s). If we can't get the space - * reservation then we use 0 instead, and avoid the bmap - * btree insert(s) in the directory code by, if the bmap - * insert tries to happen, instead trimming the LAST - * block from the directory. - */ - resblks = XFS_REMOVE_SPACE_RES(mp); - error = xfs_trans_reserve(tp, resblks, XFS_REMOVE_LOG_RES(mp), 0, - XFS_TRANS_PERM_LOG_RES, XFS_DEFAULT_LOG_COUNT); - if (error == ENOSPC) { - resblks = 0; - error = xfs_trans_reserve(tp, 0, XFS_REMOVE_LOG_RES(mp), 0, - XFS_TRANS_PERM_LOG_RES, XFS_DEFAULT_LOG_COUNT); - } - if (error) { - ASSERT(error != ENOSPC); - cancel_flags = 0; - goto error_return; - } - XFS_BMAP_INIT(&free_list, &first_block); - - /* - * Now lock the child directory inode and the parent directory - * inode in the proper order. This will take care of validating - * that the directory entry for the child directory inode has - * not changed while we were obtaining a log reservation. - */ - error = xfs_lock_dir_and_entry(dp, cdp); - if (error) { - xfs_trans_cancel(tp, cancel_flags); - goto std_return; - } - - IHOLD(dp); - xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL); - - IHOLD(cdp); - xfs_trans_ijoin(tp, cdp, XFS_ILOCK_EXCL); - - ASSERT(cdp->i_d.di_nlink >= 2); - if (cdp->i_d.di_nlink != 2) { - error = XFS_ERROR(ENOTEMPTY); - goto error_return; - } - if (!xfs_dir_isempty(cdp)) { - error = XFS_ERROR(ENOTEMPTY); - goto error_return; - } - - error = xfs_dir_removename(tp, dp, name, cdp->i_ino, - &first_block, &free_list, resblks); - if (error) - goto error1; - - xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - - /* - * Bump the in memory generation count on the parent - * directory so that other can know that it has changed. - */ - dp->i_gen++; - - /* - * Drop the link from cdp's "..". - */ - error = xfs_droplink(tp, dp); - if (error) { - goto error1; - } - - /* - * Drop the link from dp to cdp. - */ - error = xfs_droplink(tp, cdp); - if (error) { - goto error1; - } - - /* - * Drop the "." link from cdp to self. - */ - error = xfs_droplink(tp, cdp); - if (error) { - goto error1; - } - - /* Determine these before committing transaction */ - last_cdp_link = (cdp)->i_d.di_nlink==0; - - /* - * If this is a synchronous mount, make sure that the - * rmdir transaction goes to disk before returning to - * the user. - */ - if (mp->m_flags & (XFS_MOUNT_WSYNC|XFS_MOUNT_DIRSYNC)) { - xfs_trans_set_sync(tp); - } - - error = xfs_bmap_finish (&tp, &free_list, &committed); - if (error) { - xfs_bmap_cancel(&free_list); - xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | - XFS_TRANS_ABORT)); - goto std_return; - } - - error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); - if (error) { - goto std_return; - } - - - /* Fall through to std_return with error = 0 or the errno - * from xfs_trans_commit. */ - std_return: - if (DM_EVENT_ENABLED(dp, DM_EVENT_POSTREMOVE)) { - (void) XFS_SEND_NAMESP(mp, DM_EVENT_POSTREMOVE, - dp, DM_RIGHT_NULL, - NULL, DM_RIGHT_NULL, - name->name, NULL, cdp->i_d.di_mode, - error, 0); - } - return error; - - error1: - xfs_bmap_cancel(&free_list); - cancel_flags |= XFS_TRANS_ABORT; - /* FALLTHROUGH */ - - error_return: - xfs_trans_cancel(tp, cancel_flags); - goto std_return; -} - -int xfs_symlink( xfs_inode_t *dp, struct xfs_name *link_name, @@ -2886,7 +2495,7 @@ xfs_symlink( * Make sure that we have allocated dquot(s) on disk. */ error = XFS_QM_DQVOPALLOC(mp, dp, - current_fsuid(credp), current_fsgid(credp), prid, + current_fsuid(), current_fsgid(), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp); if (error) goto std_return; @@ -3181,14 +2790,13 @@ int xfs_reclaim( xfs_inode_t *ip) { - bhv_vnode_t *vp = XFS_ITOV(ip); xfs_itrace_entry(ip); - ASSERT(!VN_MAPPED(vp)); + ASSERT(!VN_MAPPED(VFS_I(ip))); /* bad inode, get out here ASAP */ - if (VN_BAD(vp)) { + if (VN_BAD(VFS_I(ip))) { xfs_ireclaim(ip); return 0; } @@ -3225,7 +2833,7 @@ xfs_reclaim( XFS_MOUNT_ILOCK(mp); spin_lock(&ip->i_flags_lock); __xfs_iflags_set(ip, XFS_IRECLAIMABLE); - vn_to_inode(vp)->i_private = NULL; + VFS_I(ip)->i_private = NULL; ip->i_vnode = NULL; spin_unlock(&ip->i_flags_lock); list_add_tail(&ip->i_reclaim, &mp->m_del_inodes); @@ -3241,8 +2849,7 @@ xfs_finish_reclaim( int sync_mode) { xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino); - bhv_vnode_t *vp = XFS_ITOV_NULL(ip); - int error; + struct inode *vp = VFS_I(ip); if (vp && VN_BAD(vp)) goto reclaim; @@ -3285,29 +2892,16 @@ xfs_finish_reclaim( xfs_iflock(ip); } - if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { - if (ip->i_update_core || - ((ip->i_itemp != NULL) && - (ip->i_itemp->ili_format.ilf_fields != 0))) { - error = xfs_iflush(ip, sync_mode); - /* - * If we hit an error, typically because of filesystem - * shutdown, we don't need to let vn_reclaim to know - * because we're gonna reclaim the inode anyway. - */ - if (error) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - goto reclaim; - } - xfs_iflock(ip); /* synchronize with xfs_iflush_done */ - } - - ASSERT(ip->i_update_core == 0); - ASSERT(ip->i_itemp == NULL || - ip->i_itemp->ili_format.ilf_fields == 0); + /* + * In the case of a forced shutdown we rely on xfs_iflush() to + * wait for the inode to be unpinned before returning an error. + */ + if (xfs_iflush(ip, sync_mode) == 0) { + /* synchronize with xfs_iflush_done */ + xfs_iflock(ip); + xfs_ifunlock(ip); } - xfs_ifunlock(ip); xfs_iunlock(ip, XFS_ILOCK_EXCL); reclaim: @@ -3418,7 +3012,7 @@ xfs_alloc_file_space( /* Generate a DMAPI event if needed. */ if (alloc_type != 0 && offset < ip->i_size && - (attr_flags&ATTR_DMI) == 0 && + (attr_flags & XFS_ATTR_DMI) == 0 && DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) { xfs_off_t end_dmi_offset; @@ -3532,7 +3126,7 @@ retry: allocatesize_fsb -= allocated_fsb; } dmapi_enospc_check: - if (error == ENOSPC && (attr_flags & ATTR_DMI) == 0 && + if (error == ENOSPC && (attr_flags & XFS_ATTR_DMI) == 0 && DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE)) { error = XFS_SEND_NAMESP(mp, DM_EVENT_NOSPACE, ip, DM_RIGHT_NULL, @@ -3643,7 +3237,6 @@ xfs_free_file_space( xfs_off_t len, int attr_flags) { - bhv_vnode_t *vp; int committed; int done; xfs_off_t end_dmi_offset; @@ -3663,7 +3256,6 @@ xfs_free_file_space( xfs_trans_t *tp; int need_iolock = 1; - vp = XFS_ITOV(ip); mp = ip->i_mount; xfs_itrace_entry(ip); @@ -3679,7 +3271,7 @@ xfs_free_file_space( end_dmi_offset = offset + len; endoffset_fsb = XFS_B_TO_FSBT(mp, end_dmi_offset); - if (offset < ip->i_size && (attr_flags & ATTR_DMI) == 0 && + if (offset < ip->i_size && (attr_flags & XFS_ATTR_DMI) == 0 && DM_EVENT_ENABLED(ip, DM_EVENT_WRITE)) { if (end_dmi_offset > ip->i_size) end_dmi_offset = ip->i_size; @@ -3690,7 +3282,7 @@ xfs_free_file_space( return error; } - if (attr_flags & ATTR_NOLOCK) + if (attr_flags & XFS_ATTR_NOLOCK) need_iolock = 0; if (need_iolock) { xfs_ilock(ip, XFS_IOLOCK_EXCL); @@ -3700,7 +3292,7 @@ xfs_free_file_space( rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE); ioffset = offset & ~(rounding - 1); - if (VN_CACHED(vp) != 0) { + if (VN_CACHED(VFS_I(ip)) != 0) { xfs_inval_cached_trace(ip, ioffset, -1, ioffset, -1); error = xfs_flushinval_pages(ip, ioffset, -1, FI_REMAPF_LOCKED); if (error) @@ -3867,7 +3459,7 @@ xfs_change_file_space( xfs_off_t startoffset; xfs_off_t llen; xfs_trans_t *tp; - bhv_vattr_t va; + struct iattr iattr; xfs_itrace_entry(ip); @@ -3941,10 +3533,10 @@ xfs_change_file_space( break; } - va.va_mask = XFS_AT_SIZE; - va.va_size = startoffset; + iattr.ia_valid = ATTR_SIZE; + iattr.ia_size = startoffset; - error = xfs_setattr(ip, &va, attr_flags, credp); + error = xfs_setattr(ip, &iattr, attr_flags, credp); if (error) return error; @@ -3974,7 +3566,7 @@ xfs_change_file_space( xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, ip); - if ((attr_flags & ATTR_DMI) == 0) { + if ((attr_flags & XFS_ATTR_DMI) == 0) { ip->i_d.di_mode &= ~S_ISUID; /* diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index 57335ba4ce5..e932a96bec5 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h @@ -2,9 +2,9 @@ #define _XFS_VNODEOPS_H 1 struct attrlist_cursor_kern; -struct bhv_vattr; struct cred; struct file; +struct iattr; struct inode; struct iovec; struct kiocb; @@ -15,14 +15,18 @@ struct xfs_iomap; int xfs_open(struct xfs_inode *ip); -int xfs_setattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags, +int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags, struct cred *credp); +#define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */ +#define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */ +#define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */ + int xfs_readlink(struct xfs_inode *ip, char *link); int xfs_fsync(struct xfs_inode *ip); int xfs_release(struct xfs_inode *ip); int xfs_inactive(struct xfs_inode *ip); int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, - struct xfs_inode **ipp); + struct xfs_inode **ipp, struct xfs_name *ci_name); int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode, xfs_dev_t rdev, struct xfs_inode **ipp, struct cred *credp); int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, @@ -31,8 +35,6 @@ int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, struct xfs_name *target_name); int xfs_mkdir(struct xfs_inode *dp, struct xfs_name *dir_name, mode_t mode, struct xfs_inode **ipp, struct cred *credp); -int xfs_rmdir(struct xfs_inode *dp, struct xfs_name *name, - struct xfs_inode *cdp); int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize, xfs_off_t *offset, filldir_t filldir); int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, |