aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig10
-rw-r--r--fs/adfs/adfs.h4
-rw-r--r--fs/adfs/dir.c10
-rw-r--r--fs/adfs/dir_f.c17
-rw-r--r--fs/adfs/dir_fplus.c17
-rw-r--r--fs/adfs/file.c2
-rw-r--r--fs/adfs/inode.c4
-rw-r--r--fs/adfs/map.c2
-rw-r--r--fs/adfs/super.c4
-rw-r--r--fs/affs/affs.h1
-rw-r--r--fs/affs/dir.c2
-rw-r--r--fs/affs/file.c14
-rw-r--r--fs/affs/super.c54
-rw-r--r--fs/afs/mntpt.c2
-rw-r--r--fs/afs/super.c4
-rw-r--r--fs/autofs/dirhash.c5
-rw-r--r--fs/autofs4/autofs_i.h6
-rw-r--r--fs/autofs4/dev-ioctl.c195
-rw-r--r--fs/autofs4/expire.c15
-rw-r--r--fs/autofs4/root.c7
-rw-r--r--fs/befs/linuxvfs.c5
-rw-r--r--fs/bfs/dir.c8
-rw-r--r--fs/bfs/inode.c52
-rw-r--r--fs/bio.c5
-rw-r--r--fs/block_dev.c19
-rw-r--r--fs/btrfs/disk-io.c26
-rw-r--r--fs/btrfs/inode.c7
-rw-r--r--fs/btrfs/super.c11
-rw-r--r--fs/btrfs/transaction.c12
-rw-r--r--fs/cachefiles/interface.c4
-rw-r--r--fs/char_dev.c14
-rw-r--r--fs/cifs/cifs_dfs_ref.c2
-rw-r--r--fs/cifs/cifsfs.c6
-rw-r--r--fs/compat.c4
-rw-r--r--fs/compat_ioctl.c2
-rw-r--r--fs/configfs/configfs_internal.h3
-rw-r--r--fs/configfs/dir.c196
-rw-r--r--fs/configfs/inode.c38
-rw-r--r--fs/dcache.c7
-rw-r--r--fs/dlm/dir.c7
-rw-r--r--fs/dlm/lockspace.c17
-rw-r--r--fs/dlm/lowcomms.c22
-rw-r--r--fs/dlm/lowcomms.h3
-rw-r--r--fs/dlm/member.c19
-rw-r--r--fs/dlm/requestqueue.c2
-rw-r--r--fs/ecryptfs/super.c5
-rw-r--r--fs/eventfd.c3
-rw-r--r--fs/exofs/common.h6
-rw-r--r--fs/exofs/inode.c8
-rw-r--r--fs/exofs/osd.c26
-rw-r--r--fs/exofs/super.c25
-rw-r--r--fs/ext2/Makefile2
-rw-r--r--fs/ext2/dir.c2
-rw-r--r--fs/ext2/ext2.h5
-rw-r--r--fs/ext2/file.c4
-rw-r--r--fs/ext2/fsync.c50
-rw-r--r--fs/ext2/inode.c11
-rw-r--r--fs/ext2/super.c60
-rw-r--r--fs/ext3/balloc.c3
-rw-r--r--fs/ext3/ialloc.c3
-rw-r--r--fs/ext3/inode.c1
-rw-r--r--fs/ext3/resize.c2
-rw-r--r--fs/ext3/super.c34
-rw-r--r--fs/ext3/xattr.c1
-rw-r--r--fs/ext4/super.c16
-rw-r--r--fs/fat/cache.c6
-rw-r--r--fs/fat/dir.c18
-rw-r--r--fs/fat/fat.h13
-rw-r--r--fs/fat/fatent.c17
-rw-r--r--fs/fat/file.c198
-rw-r--r--fs/fat/inode.c59
-rw-r--r--fs/fat/misc.c22
-rw-r--r--fs/fat/namei_msdos.c6
-rw-r--r--fs/fat/namei_vfat.c6
-rw-r--r--fs/file_table.c40
-rw-r--r--fs/freevxfs/vxfs_super.c4
-rw-r--r--fs/fs-writeback.c92
-rw-r--r--fs/fuse/Makefile1
-rw-r--r--fs/fuse/cuse.c610
-rw-r--r--fs/fuse/dev.c15
-rw-r--r--fs/fuse/dir.c33
-rw-r--r--fs/fuse/file.c346
-rw-r--r--fs/fuse/fuse_i.h47
-rw-r--r--fs/fuse/inode.c118
-rw-r--r--fs/gfs2/Makefile1
-rw-r--r--fs/gfs2/bmap.c3
-rw-r--r--fs/gfs2/glock.c12
-rw-r--r--fs/gfs2/log.c11
-rw-r--r--fs/gfs2/lops.c3
-rw-r--r--fs/gfs2/ops_fstype.c2
-rw-r--r--fs/gfs2/rgrp.c11
-rw-r--r--fs/gfs2/super.c13
-rw-r--r--fs/gfs2/trace_gfs2.h407
-rw-r--r--fs/hfs/super.c23
-rw-r--r--fs/hfsplus/super.c25
-rw-r--r--fs/hpfs/super.c12
-rw-r--r--fs/inode.c4
-rw-r--r--fs/internal.h17
-rw-r--r--fs/isofs/inode.c5
-rw-r--r--fs/jffs2/fs.c18
-rw-r--r--fs/jffs2/os-linux.h1
-rw-r--r--fs/jffs2/super.c26
-rw-r--r--fs/jfs/super.c27
-rw-r--r--fs/libfs.c25
-rw-r--r--fs/minix/dir.c2
-rw-r--r--fs/minix/file.c20
-rw-r--r--fs/minix/inode.c37
-rw-r--r--fs/minix/minix.h2
-rw-r--r--fs/namei.c129
-rw-r--r--fs/namespace.c327
-rw-r--r--fs/ncpfs/inode.c4
-rw-r--r--fs/nfs/namespace.c2
-rw-r--r--fs/nfs/super.c2
-rw-r--r--fs/nfsd/export.c78
-rw-r--r--fs/nfsd/vfs.c54
-rw-r--r--fs/nilfs2/bmap.c272
-rw-r--r--fs/nilfs2/bmap.h135
-rw-r--r--fs/nilfs2/btnode.c9
-rw-r--r--fs/nilfs2/btnode.h2
-rw-r--r--fs/nilfs2/btree.c366
-rw-r--r--fs/nilfs2/btree.h31
-rw-r--r--fs/nilfs2/cpfile.c53
-rw-r--r--fs/nilfs2/cpfile.h4
-rw-r--r--fs/nilfs2/dat.c36
-rw-r--r--fs/nilfs2/dat.h2
-rw-r--r--fs/nilfs2/direct.c139
-rw-r--r--fs/nilfs2/direct.h20
-rw-r--r--fs/nilfs2/gcinode.c5
-rw-r--r--fs/nilfs2/inode.c18
-rw-r--r--fs/nilfs2/ioctl.c35
-rw-r--r--fs/nilfs2/mdt.c3
-rw-r--r--fs/nilfs2/nilfs.h1
-rw-r--r--fs/nilfs2/recovery.c37
-rw-r--r--fs/nilfs2/sb.h1
-rw-r--r--fs/nilfs2/segbuf.c3
-rw-r--r--fs/nilfs2/seglist.h85
-rw-r--r--fs/nilfs2/segment.c130
-rw-r--r--fs/nilfs2/segment.h12
-rw-r--r--fs/nilfs2/sufile.c119
-rw-r--r--fs/nilfs2/sufile.h62
-rw-r--r--fs/nilfs2/super.c265
-rw-r--r--fs/nilfs2/the_nilfs.c114
-rw-r--r--fs/nilfs2/the_nilfs.h23
-rw-r--r--fs/ntfs/super.c54
-rw-r--r--fs/ocfs2/alloc.c80
-rw-r--r--fs/ocfs2/blockcheck.c184
-rw-r--r--fs/ocfs2/blockcheck.h29
-rw-r--r--fs/ocfs2/cluster/masklog.h35
-rw-r--r--fs/ocfs2/cluster/tcp.c7
-rw-r--r--fs/ocfs2/dir.c21
-rw-r--r--fs/ocfs2/dlmglue.c51
-rw-r--r--fs/ocfs2/dlmglue.h11
-rw-r--r--fs/ocfs2/file.c56
-rw-r--r--fs/ocfs2/journal.c111
-rw-r--r--fs/ocfs2/journal.h4
-rw-r--r--fs/ocfs2/ocfs2.h16
-rw-r--r--fs/ocfs2/ocfs2_lockid.h5
-rw-r--r--fs/ocfs2/quota_global.c4
-rw-r--r--fs/ocfs2/quota_local.c21
-rw-r--r--fs/ocfs2/super.c88
-rw-r--r--fs/ocfs2/xattr.c5
-rw-r--r--fs/omfs/file.c17
-rw-r--r--fs/open.c4
-rw-r--r--fs/partitions/check.c42
-rw-r--r--fs/proc/internal.h25
-rw-r--r--fs/proc/proc_devtree.c1
-rw-r--r--fs/qnx4/Makefile2
-rw-r--r--fs/qnx4/bitmap.c7
-rw-r--r--fs/qnx4/dir.c9
-rw-r--r--fs/qnx4/file.c5
-rw-r--r--fs/qnx4/fsync.c169
-rw-r--r--fs/qnx4/inode.c58
-rw-r--r--fs/qnx4/namei.c13
-rw-r--r--fs/qnx4/qnx4.h57
-rw-r--r--fs/qnx4/truncate.c6
-rw-r--r--fs/quota/quota.c25
-rw-r--r--fs/ramfs/inode.c9
-rw-r--r--fs/reiserfs/dir.c10
-rw-r--r--fs/reiserfs/super.c33
-rw-r--r--fs/reiserfs/xattr.c3
-rw-r--r--fs/smbfs/inode.c4
-rw-r--r--fs/squashfs/super.c4
-rw-r--r--fs/super.c192
-rw-r--r--fs/sync.c117
-rw-r--r--fs/sysv/dir.c2
-rw-r--r--fs/sysv/file.c17
-rw-r--r--fs/sysv/inode.c75
-rw-r--r--fs/sysv/sysv.h1
-rw-r--r--fs/ubifs/super.c20
-rw-r--r--fs/udf/Makefile2
-rw-r--r--fs/udf/dir.c2
-rw-r--r--fs/udf/file.c2
-rw-r--r--fs/udf/fsync.c52
-rw-r--r--fs/udf/super.c11
-rw-r--r--fs/udf/udfdecl.h3
-rw-r--r--fs/ufs/dir.c2
-rw-r--r--fs/ufs/file.c23
-rw-r--r--fs/ufs/super.c65
-rw-r--r--fs/ufs/ufs.h1
-rw-r--r--fs/xattr.c4
-rw-r--r--fs/xfs/Kconfig1
-rw-r--r--fs/xfs/Makefile5
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c523
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c25
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c53
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_quotaops.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c61
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c479
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h19
-rw-r--r--fs/xfs/linux-2.6/xfs_xattr.c67
-rw-r--r--fs/xfs/quota/xfs_dquot.c5
-rw-r--r--fs/xfs/quota/xfs_dquot.h1
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c1
-rw-r--r--fs/xfs/quota/xfs_qm.c168
-rw-r--r--fs/xfs/quota/xfs_qm.h21
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c77
-rw-r--r--fs/xfs/quota/xfs_qm_stats.c1
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c113
-rw-r--r--fs/xfs/quota/xfs_trans_dquot.c66
-rw-r--r--fs/xfs/xfs_acl.c874
-rw-r--r--fs/xfs/xfs_acl.h97
-rw-r--r--fs/xfs/xfs_ag.h2
-rw-r--r--fs/xfs/xfs_arch.h32
-rw-r--r--fs/xfs/xfs_attr.c13
-rw-r--r--fs/xfs/xfs_bmap.c34
-rw-r--r--fs/xfs/xfs_bmap_btree.c4
-rw-r--r--fs/xfs/xfs_filestream.c6
-rw-r--r--fs/xfs/xfs_fs.h11
-rw-r--r--fs/xfs/xfs_iget.c8
-rw-r--r--fs/xfs/xfs_inode.c1
-rw-r--r--fs/xfs/xfs_inode.h6
-rw-r--r--fs/xfs/xfs_iomap.c13
-rw-r--r--fs/xfs/xfs_log_recover.c38
-rw-r--r--fs/xfs/xfs_mount.c105
-rw-r--r--fs/xfs/xfs_mount.h84
-rw-r--r--fs/xfs/xfs_qmops.c152
-rw-r--r--fs/xfs/xfs_quota.h129
-rw-r--r--fs/xfs/xfs_rename.c3
-rw-r--r--fs/xfs/xfs_rw.c1
-rw-r--r--fs/xfs/xfs_trans.c17
-rw-r--r--fs/xfs/xfs_utils.c2
-rw-r--r--fs/xfs/xfs_vnodeops.c114
-rw-r--r--fs/xfs/xfs_vnodeops.h1
244 files changed, 6148 insertions, 5235 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 9f7270f36b2..525da2e8f73 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -62,6 +62,16 @@ source "fs/autofs/Kconfig"
source "fs/autofs4/Kconfig"
source "fs/fuse/Kconfig"
+config CUSE
+ tristate "Character device in Userpace support"
+ depends on FUSE_FS
+ help
+ This FUSE extension allows character devices to be
+ implemented in userspace.
+
+ If you want to develop or use userspace character device
+ based on CUSE, answer Y or M.
+
config GENERIC_ACL
bool
select FS_POSIX_ACL
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index e0a85dbeeb8..a6665f37f45 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -53,6 +53,7 @@ struct adfs_dir_ops {
int (*update)(struct adfs_dir *dir, struct object_info *obj);
int (*create)(struct adfs_dir *dir, struct object_info *obj);
int (*remove)(struct adfs_dir *dir, struct object_info *obj);
+ int (*sync)(struct adfs_dir *dir);
void (*free)(struct adfs_dir *dir);
};
@@ -90,7 +91,8 @@ extern const struct dentry_operations adfs_dentry_operations;
extern struct adfs_dir_ops adfs_f_dir_ops;
extern struct adfs_dir_ops adfs_fplus_dir_ops;
-extern int adfs_dir_update(struct super_block *sb, struct object_info *obj);
+extern int adfs_dir_update(struct super_block *sb, struct object_info *obj,
+ int wait);
/* file.c */
extern const struct inode_operations adfs_file_inode_operations;
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c
index e867ccf3724..4d4073447d1 100644
--- a/fs/adfs/dir.c
+++ b/fs/adfs/dir.c
@@ -83,7 +83,7 @@ out:
}
int
-adfs_dir_update(struct super_block *sb, struct object_info *obj)
+adfs_dir_update(struct super_block *sb, struct object_info *obj, int wait)
{
int ret = -EINVAL;
#ifdef CONFIG_ADFS_FS_RW
@@ -106,6 +106,12 @@ adfs_dir_update(struct super_block *sb, struct object_info *obj)
ret = ops->update(&dir, obj);
write_unlock(&adfs_dir_lock);
+ if (wait) {
+ int err = ops->sync(&dir);
+ if (!ret)
+ ret = err;
+ }
+
ops->free(&dir);
out:
#endif
@@ -199,7 +205,7 @@ const struct file_operations adfs_dir_operations = {
.read = generic_read_dir,
.llseek = generic_file_llseek,
.readdir = adfs_readdir,
- .fsync = file_fsync,
+ .fsync = simple_fsync,
};
static int
diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c
index ea7df214692..31df6adf0de 100644
--- a/fs/adfs/dir_f.c
+++ b/fs/adfs/dir_f.c
@@ -437,6 +437,22 @@ bad_dir:
#endif
}
+static int
+adfs_f_sync(struct adfs_dir *dir)
+{
+ int err = 0;
+ int i;
+
+ for (i = dir->nr_buffers - 1; i >= 0; i--) {
+ struct buffer_head *bh = dir->bh[i];
+ sync_dirty_buffer(bh);
+ if (buffer_req(bh) && !buffer_uptodate(bh))
+ err = -EIO;
+ }
+
+ return err;
+}
+
static void
adfs_f_free(struct adfs_dir *dir)
{
@@ -456,5 +472,6 @@ struct adfs_dir_ops adfs_f_dir_ops = {
.setpos = adfs_f_setpos,
.getnext = adfs_f_getnext,
.update = adfs_f_update,
+ .sync = adfs_f_sync,
.free = adfs_f_free
};
diff --git a/fs/adfs/dir_fplus.c b/fs/adfs/dir_fplus.c
index 1ec644e32df..139e0f345f1 100644
--- a/fs/adfs/dir_fplus.c
+++ b/fs/adfs/dir_fplus.c
@@ -161,6 +161,22 @@ out:
return ret;
}
+static int
+adfs_fplus_sync(struct adfs_dir *dir)
+{
+ int err = 0;
+ int i;
+
+ for (i = dir->nr_buffers - 1; i >= 0; i--) {
+ struct buffer_head *bh = dir->bh[i];
+ sync_dirty_buffer(bh);
+ if (buffer_req(bh) && !buffer_uptodate(bh))
+ err = -EIO;
+ }
+
+ return err;
+}
+
static void
adfs_fplus_free(struct adfs_dir *dir)
{
@@ -175,5 +191,6 @@ struct adfs_dir_ops adfs_fplus_dir_ops = {
.read = adfs_fplus_read,
.setpos = adfs_fplus_setpos,
.getnext = adfs_fplus_getnext,
+ .sync = adfs_fplus_sync,
.free = adfs_fplus_free
};
diff --git a/fs/adfs/file.c b/fs/adfs/file.c
index 36e381c6a99..8224d54a2af 100644
--- a/fs/adfs/file.c
+++ b/fs/adfs/file.c
@@ -30,7 +30,7 @@ const struct file_operations adfs_file_operations = {
.read = do_sync_read,
.aio_read = generic_file_aio_read,
.mmap = generic_file_mmap,
- .fsync = file_fsync,
+ .fsync = simple_fsync,
.write = do_sync_write,
.aio_write = generic_file_aio_write,
.splice_read = generic_file_splice_read,
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index e647200262a..05b3a677201 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -376,7 +376,7 @@ out:
* The adfs-specific inode data has already been updated by
* adfs_notify_change()
*/
-int adfs_write_inode(struct inode *inode, int unused)
+int adfs_write_inode(struct inode *inode, int wait)
{
struct super_block *sb = inode->i_sb;
struct object_info obj;
@@ -391,7 +391,7 @@ int adfs_write_inode(struct inode *inode, int unused)
obj.attr = ADFS_I(inode)->attr;
obj.size = inode->i_size;
- ret = adfs_dir_update(sb, &obj);
+ ret = adfs_dir_update(sb, &obj, wait);
unlock_kernel();
return ret;
}
diff --git a/fs/adfs/map.c b/fs/adfs/map.c
index 92ab4fbc203..568081b93f7 100644
--- a/fs/adfs/map.c
+++ b/fs/adfs/map.c
@@ -62,7 +62,7 @@ static DEFINE_RWLOCK(adfs_map_lock);
#define GET_FRAG_ID(_map,_start,_idmask) \
({ \
unsigned char *_m = _map + (_start >> 3); \
- u32 _frag = get_unaligned((u32 *)_m); \
+ u32 _frag = get_unaligned_le32(_m); \
_frag >>= (_start & 7); \
_frag & _idmask; \
})
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index dd9becca424..0ec5aaf47aa 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -132,11 +132,15 @@ static void adfs_put_super(struct super_block *sb)
int i;
struct adfs_sb_info *asb = ADFS_SB(sb);
+ lock_kernel();
+
for (i = 0; i < asb->s_map_size; i++)
brelse(asb->s_map[i].dm_bh);
kfree(asb->s_map);
kfree(asb);
sb->s_fs_info = NULL;
+
+ unlock_kernel();
}
static int adfs_show_options(struct seq_file *seq, struct vfsmount *mnt)
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 1a2d5e3c7f4..e511dc621a2 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -182,6 +182,7 @@ extern int affs_add_entry(struct inode *dir, struct inode *inode, struct dent
void affs_free_prealloc(struct inode *inode);
extern void affs_truncate(struct inode *);
+int affs_file_fsync(struct file *, struct dentry *, int);
/* dir.c */
diff --git a/fs/affs/dir.c b/fs/affs/dir.c
index 7b36904dbea..8ca8f3a5559 100644
--- a/fs/affs/dir.c
+++ b/fs/affs/dir.c
@@ -21,7 +21,7 @@ const struct file_operations affs_dir_operations = {
.read = generic_read_dir,
.llseek = generic_file_llseek,
.readdir = affs_readdir,
- .fsync = file_fsync,
+ .fsync = affs_file_fsync,
};
/*
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 9246cb4aa01..184e55c1c9b 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -34,7 +34,7 @@ const struct file_operations affs_file_operations = {
.mmap = generic_file_mmap,
.open = affs_file_open,
.release = affs_file_release,
- .fsync = file_fsync,
+ .fsync = affs_file_fsync,
.splice_read = generic_file_splice_read,
};
@@ -915,3 +915,15 @@ affs_truncate(struct inode *inode)
}
affs_free_prealloc(inode);
}
+
+int affs_file_fsync(struct file *filp, struct dentry *dentry, int datasync)
+{
+ struct inode * inode = dentry->d_inode;
+ int ret, err;
+
+ ret = write_inode_now(inode, 0);
+ err = sync_blockdev(inode->i_sb->s_bdev);
+ if (!ret)
+ ret = err;
+ return ret;
+}
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 63f5183f263..104fdcb3a7f 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -16,6 +16,7 @@
#include <linux/parser.h>
#include <linux/magic.h>
#include <linux/sched.h>
+#include <linux/smp_lock.h>
#include "affs.h"
extern struct timezone sys_tz;
@@ -24,49 +25,67 @@ static int affs_statfs(struct dentry *dentry, struct kstatfs *buf);
static int affs_remount (struct super_block *sb, int *flags, char *data);
static void
+affs_commit_super(struct super_block *sb, int clean)
+{
+ struct affs_sb_info *sbi = AFFS_SB(sb);
+ struct buffer_head *bh = sbi->s_root_bh;
+ struct affs_root_tail *tail = AFFS_ROOT_TAIL(sb, bh);
+
+ tail->bm_flag = cpu_to_be32(clean);
+ secs_to_datestamp(get_seconds(), &tail->disk_change);
+ affs_fix_checksum(sb, bh);
+ mark_buffer_dirty(bh);
+}
+
+static void
affs_put_super(struct super_block *sb)
{
struct affs_sb_info *sbi = AFFS_SB(sb);
pr_debug("AFFS: put_super()\n");
- if (!(sb->s_flags & MS_RDONLY)) {
- AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->bm_flag = cpu_to_be32(1);
- secs_to_datestamp(get_seconds(),
- &AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->disk_change);
- affs_fix_checksum(sb, sbi->s_root_bh);
- mark_buffer_dirty(sbi->s_root_bh);
- }
+ lock_kernel();
+
+ if (!(sb->s_flags & MS_RDONLY))
+ affs_commit_super(sb, 1);
kfree(sbi->s_prefix);
affs_free_bitmap(sb);
affs_brelse(sbi->s_root_bh);
kfree(sbi);
sb->s_fs_info = NULL;
- return;
+
+ unlock_kernel();
}
static void
affs_write_super(struct super_block *sb)
{
int clean = 2;
- struct affs_sb_info *sbi = AFFS_SB(sb);
+ lock_super(sb);
if (!(sb->s_flags & MS_RDONLY)) {
// if (sbi->s_bitmap[i].bm_bh) {
// if (buffer_dirty(sbi->s_bitmap[i].bm_bh)) {
// clean = 0;
- AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->bm_flag = cpu_to_be32(clean);
- secs_to_datestamp(get_seconds(),
- &AFFS_ROOT_TAIL(sb, sbi->s_root_bh)->disk_change);
- affs_fix_checksum(sb, sbi->s_root_bh);
- mark_buffer_dirty(sbi->s_root_bh);
+ affs_commit_super(sb, clean);
sb->s_dirt = !clean; /* redo until bitmap synced */
} else
sb->s_dirt = 0;
+ unlock_super(sb);
pr_debug("AFFS: write_super() at %lu, clean=%d\n", get_seconds(), clean);
}
+static int
+affs_sync_fs(struct super_block *sb, int wait)
+{
+ lock_super(sb);
+ affs_commit_super(sb, 2);
+ sb->s_dirt = 0;
+ unlock_super(sb);
+ return 0;
+}
+
static struct kmem_cache * affs_inode_cachep;
static struct inode *affs_alloc_inode(struct super_block *sb)
@@ -124,6 +143,7 @@ static const struct super_operations affs_sops = {
.clear_inode = affs_clear_inode,
.put_super = affs_put_super,
.write_super = affs_write_super,
+ .sync_fs = affs_sync_fs,
.statfs = affs_statfs,
.remount_fs = affs_remount,
.show_options = generic_show_options,
@@ -507,6 +527,7 @@ affs_remount(struct super_block *sb, int *flags, char *data)
kfree(new_opts);
return -EINVAL;
}
+ lock_kernel();
replace_mount_options(sb, new_opts);
sbi->s_flags = mount_flags;
@@ -514,8 +535,10 @@ affs_remount(struct super_block *sb, int *flags, char *data)
sbi->s_uid = uid;
sbi->s_gid = gid;
- if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
+ if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
+ unlock_kernel();
return 0;
+ }
if (*flags & MS_RDONLY) {
sb->s_dirt = 1;
while (sb->s_dirt)
@@ -524,6 +547,7 @@ affs_remount(struct super_block *sb, int *flags, char *data)
} else
res = affs_init_bitmap(sb, flags);
+ unlock_kernel();
return res;
}
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index 2b9e2d03a39..c52be53f694 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -244,7 +244,7 @@ static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd)
case -EBUSY:
/* someone else made a mount here whilst we were busy */
while (d_mountpoint(nd->path.dentry) &&
- follow_down(&nd->path.mnt, &nd->path.dentry))
+ follow_down(&nd->path))
;
err = 0;
default:
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 76828e5f8a3..ad0514d0115 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -440,8 +440,12 @@ static void afs_put_super(struct super_block *sb)
_enter("");
+ lock_kernel();
+
afs_put_volume(as->volume);
+ unlock_kernel();
+
_leave("");
}
diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c
index 4eb4d8dfb2f..2316e944a10 100644
--- a/fs/autofs/dirhash.c
+++ b/fs/autofs/dirhash.c
@@ -85,13 +85,12 @@ struct autofs_dir_ent *autofs_expire(struct super_block *sb,
}
path.mnt = mnt;
path_get(&path);
- if (!follow_down(&path.mnt, &path.dentry)) {
+ if (!follow_down(&path)) {
path_put(&path);
DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name));
continue;
}
- while (d_mountpoint(path.dentry) &&
- follow_down(&path.mnt, &path.dentry))
+ while (d_mountpoint(path.dentry) && follow_down(&path));
;
umount_ok = may_umount(path.mnt);
path_put(&path);
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index b7ff33c6310..8f7cdde4173 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -223,12 +223,12 @@ int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify);
int autofs4_wait_release(struct autofs_sb_info *,autofs_wqt_t,int);
void autofs4_catatonic_mode(struct autofs_sb_info *);
-static inline int autofs4_follow_mount(struct vfsmount **mnt, struct dentry **dentry)
+static inline int autofs4_follow_mount(struct path *path)
{
int res = 0;
- while (d_mountpoint(*dentry)) {
- int followed = follow_down(mnt, dentry);
+ while (d_mountpoint(path->dentry)) {
+ int followed = follow_down(path);
if (!followed)
break;
res = 1;
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 84168c0dcc2..f3da2eb51f5 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -192,77 +192,42 @@ static int autofs_dev_ioctl_protosubver(struct file *fp,
return 0;
}
-/*
- * Walk down the mount stack looking for an autofs mount that
- * has the requested device number (aka. new_encode_dev(sb->s_dev).
- */
-static int autofs_dev_ioctl_find_super(struct nameidata *nd, dev_t devno)
+static int find_autofs_mount(const char *pathname,
+ struct path *res,
+ int test(struct path *path, void *data),
+ void *data)
{
- struct dentry *dentry;
- struct inode *inode;
- struct super_block *sb;
- dev_t s_dev;
- unsigned int err;
-
+ struct path path;
+ int err = kern_path(pathname, 0, &path);
+ if (err)
+ return err;
err = -ENOENT;
-
- /* Lookup the dentry name at the base of our mount point */
- dentry = d_lookup(nd->path.dentry, &nd->last);
- if (!dentry)
- goto out;
-
- dput(nd->path.dentry);
- nd->path.dentry = dentry;
-
- /* And follow the mount stack looking for our autofs mount */
- while (follow_down(&nd->path.mnt, &nd->path.dentry)) {
- inode = nd->path.dentry->d_inode;
- if (!inode)
- break;
-
- sb = inode->i_sb;
- s_dev = new_encode_dev(sb->s_dev);
- if (devno == s_dev) {
- if (sb->s_magic == AUTOFS_SUPER_MAGIC) {
+ while (path.dentry == path.mnt->mnt_root) {
+ if (path.mnt->mnt_sb->s_magic == AUTOFS_SUPER_MAGIC) {
+ if (test(&path, data)) {
+ path_get(&path);
+ if (!err) /* already found some */
+ path_put(res);
+ *res = path;
err = 0;
- break;
}
}
+ if (!follow_up(&path))
+ break;
}
-out:
+ path_put(&path);
return err;
}
-/*
- * Walk down the mount stack looking for an autofs mount that
- * has the requested mount type (ie. indirect, direct or offset).
- */
-static int autofs_dev_ioctl_find_sbi_type(struct nameidata *nd, unsigned int type)
+static int test_by_dev(struct path *path, void *p)
{
- struct dentry *dentry;
- struct autofs_info *ino;
- unsigned int err;
-
- err = -ENOENT;
-
- /* Lookup the dentry name at the base of our mount point */
- dentry = d_lookup(nd->path.dentry, &nd->last);
- if (!dentry)
- goto out;
-
- dput(nd->path.dentry);
- nd->path.dentry = dentry;
+ return path->mnt->mnt_sb->s_dev == *(dev_t *)p;
+}
- /* And follow the mount stack looking for our autofs mount */
- while (follow_down(&nd->path.mnt, &nd->path.dentry)) {
- ino = autofs4_dentry_ino(nd->path.dentry);
- if (ino && ino->sbi->type & type) {
- err = 0;
- break;
- }
- }
-out:
- return err;
+static int test_by_type(struct path *path, void *p)
+{
+ struct autofs_info *ino = autofs4_dentry_ino(path->dentry);
+ return ino && ino->sbi->type & *(unsigned *)p;
}
static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file)
@@ -283,31 +248,25 @@ static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file)
* Open a file descriptor on the autofs mount point corresponding
* to the given path and device number (aka. new_encode_dev(sb->s_dev)).
*/
-static int autofs_dev_ioctl_open_mountpoint(const char *path, dev_t devid)
+static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid)
{
- struct file *filp;
- struct nameidata nd;
int err, fd;
fd = get_unused_fd();
if (likely(fd >= 0)) {
- /* Get nameidata of the parent directory */
- err = path_lookup(path, LOOKUP_PARENT, &nd);
+ struct file *filp;
+ struct path path;
+
+ err = find_autofs_mount(name, &path, test_by_dev, &devid);
if (err)
goto out;
/*
- * Search down, within the parent, looking for an
- * autofs super block that has the device number
+ * Find autofs super block that has the device number
* corresponding to the autofs fs we want to open.
*/
- err = autofs_dev_ioctl_find_super(&nd, devid);
- if (err) {
- path_put(&nd.path);
- goto out;
- }
- filp = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY,
+ filp = dentry_open(path.dentry, path.mnt, O_RDONLY,
current_cred());
if (IS_ERR(filp)) {
err = PTR_ERR(filp);
@@ -340,7 +299,7 @@ static int autofs_dev_ioctl_openmount(struct file *fp,
param->ioctlfd = -1;
path = param->path;
- devid = param->openmount.devid;
+ devid = new_decode_dev(param->openmount.devid);
err = 0;
fd = autofs_dev_ioctl_open_mountpoint(path, devid);
@@ -475,8 +434,7 @@ static int autofs_dev_ioctl_requester(struct file *fp,
struct autofs_dev_ioctl *param)
{
struct autofs_info *ino;
- struct nameidata nd;
- const char *path;
+ struct path path;
dev_t devid;
int err = -ENOENT;
@@ -485,32 +443,24 @@ static int autofs_dev_ioctl_requester(struct file *fp,
goto out;
}
- path = param->path;
- devid = new_encode_dev(sbi->sb->s_dev);
+ devid = sbi->sb->s_dev;
param->requester.uid = param->requester.gid = -1;
- /* Get nameidata of the parent directory */
- err = path_lookup(path, LOOKUP_PARENT, &nd);
+ err = find_autofs_mount(param->path, &path, test_by_dev, &devid);
if (err)
goto out;
- err = autofs_dev_ioctl_find_super(&nd, devid);
- if (err)
- goto out_release;
-
- ino = autofs4_dentry_ino(nd.path.dentry);
+ ino = autofs4_dentry_ino(path.dentry);
if (ino) {
err = 0;
- autofs4_expire_wait(nd.path.dentry);
+ autofs4_expire_wait(path.dentry);
spin_lock(&sbi->fs_lock);
param->requester.uid = ino->uid;
param->requester.gid = ino->gid;
spin_unlock(&sbi->fs_lock);
}
-
-out_release:
- path_put(&nd.path);
+ path_put(&path);
out:
return err;
}
@@ -569,8 +519,8 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
struct autofs_sb_info *sbi,
struct autofs_dev_ioctl *param)
{
- struct nameidata nd;
- const char *path;
+ struct path path;
+ const char *name;
unsigned int type;
unsigned int devid, magic;
int err = -ENOENT;
@@ -580,71 +530,46 @@ static int autofs_dev_ioctl_ismountpoint(struct file *fp,
goto out;
}
- path = param->path;
+ name = param->path;
type = param->ismountpoint.in.type;
param->ismountpoint.out.devid = devid = 0;
param->ismountpoint.out.magic = magic = 0;
if (!fp || param->ioctlfd == -1) {
- if (autofs_type_any(type)) {
- struct super_block *sb;
-
- err = path_lookup(path, LOOKUP_FOLLOW, &nd);
- if (err)
- goto out;
-
- sb = nd.path.dentry->d_sb;
- devid = new_encode_dev(sb->s_dev);
- } else {
- struct autofs_info *ino;
-
- err = path_lookup(path, LOOKUP_PARENT, &nd);
- if (err)
- goto out;
-
- err = autofs_dev_ioctl_find_sbi_type(&nd, type);
- if (err)
- goto out_release;
-
- ino = autofs4_dentry_ino(nd.path.dentry);
- devid = autofs4_get_dev(ino->sbi);
- }
-
+ if (autofs_type_any(type))
+ err = kern_path(name, LOOKUP_FOLLOW, &path);
+ else
+ err = find_autofs_mount(name, &path, test_by_type, &type);
+ if (err)
+ goto out;
+ devid = new_encode_dev(path.mnt->mnt_sb->s_dev);
err = 0;
- if (nd.path.dentry->d_inode &&
- nd.path.mnt->mnt_root == nd.path.dentry) {
+ if (path.dentry->d_inode &&
+ path.mnt->mnt_root == path.dentry) {
err = 1;
- magic = nd.path.dentry->d_inode->i_sb->s_magic;
+ magic = path.dentry->d_inode->i_sb->s_magic;
}
} else {
- dev_t dev = autofs4_get_dev(sbi);
+ dev_t dev = sbi->sb->s_dev;
- err = path_lookup(path, LOOKUP_PARENT, &nd);
+ err = find_autofs_mount(name, &path, test_by_dev, &dev);
if (err)
goto out;
- err = autofs_dev_ioctl_find_super(&nd, dev);
- if (err)
- goto out_release;
-
- devid = dev;
+ devid = new_encode_dev(dev);
- err = have_submounts(nd.path.dentry);
+ err = have_submounts(path.dentry);
- if (nd.path.mnt->mnt_mountpoint != nd.path.mnt->mnt_root) {
- if (follow_down(&nd.path.mnt, &nd.path.dentry)) {
- struct inode *inode = nd.path.dentry->d_inode;
- magic = inode->i_sb->s_magic;
- }
+ if (path.mnt->mnt_mountpoint != path.mnt->mnt_root) {
+ if (follow_down(&path))
+ magic = path.mnt->mnt_sb->s_magic;
}
}
param->ismountpoint.out.devid = devid;
param->ismountpoint.out.magic = magic;
-
-out_release:
- path_put(&nd.path);
+ path_put(&path);
out:
return err;
}
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 3077d8f1652..aa39ae83f01 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -48,19 +48,19 @@ static inline int autofs4_can_expire(struct dentry *dentry,
static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
{
struct dentry *top = dentry;
+ struct path path = {.mnt = mnt, .dentry = dentry};
int status = 1;
DPRINTK("dentry %p %.*s",
dentry, (int)dentry->d_name.len, dentry->d_name.name);
- mntget(mnt);
- dget(dentry);
+ path_get(&path);
- if (!follow_down(&mnt, &dentry))
+ if (!follow_down(&path))
goto done;
- if (is_autofs4_dentry(dentry)) {
- struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+ if (is_autofs4_dentry(path.dentry)) {
+ struct autofs_sb_info *sbi = autofs4_sbi(path.dentry->d_sb);
/* This is an autofs submount, we can't expire it */
if (autofs_type_indirect(sbi->type))
@@ -70,7 +70,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
* Otherwise it's an offset mount and we need to check
* if we can umount its mount, if there is one.
*/
- if (!d_mountpoint(dentry)) {
+ if (!d_mountpoint(path.dentry)) {
status = 0;
goto done;
}
@@ -86,8 +86,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
status = 0;
done:
DPRINTK("returning = %d", status);
- dput(dentry);
- mntput(mnt);
+ path_put(&path);
return status;
}
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index e383bf0334f..b96a3c57359 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -181,7 +181,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
nd->flags);
/*
* For an expire of a covered direct or offset mount we need
- * to beeak out of follow_down() at the autofs mount trigger
+ * to break out of follow_down() at the autofs mount trigger
* (d_mounted--), so we can see the expiring flag, and manage
* the blocking and following here until the expire is completed.
*/
@@ -190,7 +190,7 @@ static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
if (ino->flags & AUTOFS_INF_EXPIRING) {
spin_unlock(&sbi->fs_lock);
/* Follow down to our covering mount. */
- if (!follow_down(&nd->path.mnt, &nd->path.dentry))
+ if (!follow_down(&nd->path))
goto done;
goto follow;
}
@@ -230,8 +230,7 @@ follow:
* to follow it.
*/
if (d_mountpoint(dentry)) {
- if (!autofs4_follow_mount(&nd->path.mnt,
- &nd->path.dentry)) {
+ if (!autofs4_follow_mount(&nd->path)) {
status = -ENOENT;
goto out_error;
}
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 76afd0d6b86..9367b6297d8 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -737,6 +737,8 @@ parse_options(char *options, befs_mount_options * opts)
static void
befs_put_super(struct super_block *sb)
{
+ lock_kernel();
+
kfree(BEFS_SB(sb)->mount_opts.iocharset);
BEFS_SB(sb)->mount_opts.iocharset = NULL;
@@ -747,7 +749,8 @@ befs_put_super(struct super_block *sb)
kfree(sb->s_fs_info);
sb->s_fs_info = NULL;
- return;
+
+ unlock_kernel();
}
/* Allocate private field of the superblock, fill it.
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 4dd1b623f93..54bd07d44e6 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -79,7 +79,7 @@ static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir)
const struct file_operations bfs_dir_operations = {
.read = generic_read_dir,
.readdir = bfs_readdir,
- .fsync = file_fsync,
+ .fsync = simple_fsync,
.llseek = generic_file_llseek,
};
@@ -205,7 +205,7 @@ static int bfs_unlink(struct inode *dir, struct dentry *dentry)
inode->i_nlink = 1;
}
de->ino = 0;
- mark_buffer_dirty(bh);
+ mark_buffer_dirty_inode(bh, dir);
dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
mark_inode_dirty(dir);
inode->i_ctime = dir->i_ctime;
@@ -267,7 +267,7 @@ static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry,
new_inode->i_ctime = CURRENT_TIME_SEC;
inode_dec_link_count(new_inode);
}
- mark_buffer_dirty(old_bh);
+ mark_buffer_dirty_inode(old_bh, old_dir);
error = 0;
end_rename:
@@ -320,7 +320,7 @@ static int bfs_add_entry(struct inode *dir, const unsigned char *name,
for (i = 0; i < BFS_NAMELEN; i++)
de->name[i] =
(i < namelen) ? name[i] : 0;
- mark_buffer_dirty(bh);
+ mark_buffer_dirty_inode(bh, dir);
brelse(bh);
return 0;
}
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index cc4062d12ca..6f60336c662 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -30,6 +30,7 @@ MODULE_LICENSE("GPL");
#define dprintf(x...)
#endif
+static void bfs_write_super(struct super_block *s);
void dump_imap(const char *prefix, struct super_block *s);
struct inode *bfs_iget(struct super_block *sb, unsigned long ino)
@@ -97,14 +98,15 @@ error:
return ERR_PTR(-EIO);
}
-static int bfs_write_inode(struct inode *inode, int unused)
+static int bfs_write_inode(struct inode *inode, int wait)
{
+ struct bfs_sb_info *info = BFS_SB(inode->i_sb);
unsigned int ino = (u16)inode->i_ino;
unsigned long i_sblock;
struct bfs_inode *di;
struct buffer_head *bh;
int block, off;
- struct bfs_sb_info *info = BFS_SB(inode->i_sb);
+ int err = 0;
dprintf("ino=%08x\n", ino);
@@ -145,9 +147,14 @@ static int bfs_write_inode(struct inode *inode, int unused)
di->i_eoffset = cpu_to_le32(i_sblock * BFS_BSIZE + inode->i_size - 1);
mark_buffer_dirty(bh);
+ if (wait) {
+ sync_dirty_buffer(bh);
+ if (buffer_req(bh) && !buffer_uptodate(bh))
+ err = -EIO;
+ }
brelse(bh);
mutex_unlock(&info->bfs_lock);
- return 0;
+ return err;
}
static void bfs_delete_inode(struct inode *inode)
@@ -209,6 +216,26 @@ static void bfs_delete_inode(struct inode *inode)
clear_inode(inode);
}
+static int bfs_sync_fs(struct super_block *sb, int wait)
+{
+ struct bfs_sb_info *info = BFS_SB(sb);
+
+ mutex_lock(&info->bfs_lock);
+ mark_buffer_dirty(info->si_sbh);
+ sb->s_dirt = 0;
+ mutex_unlock(&info->bfs_lock);
+
+ return 0;
+}
+
+static void bfs_write_super(struct super_block *sb)
+{
+ if (!(sb->s_flags & MS_RDONLY))
+ bfs_sync_fs(sb, 1);
+ else
+ sb->s_dirt = 0;
+}
+
static void bfs_put_super(struct super_block *s)
{
struct bfs_sb_info *info = BFS_SB(s);
@@ -216,11 +243,18 @@ static void bfs_put_super(struct super_block *s)
if (!info)
return;
+ lock_kernel();
+
+ if (s->s_dirt)
+ bfs_write_super(s);
+
brelse(info->si_sbh);
mutex_destroy(&info->bfs_lock);
kfree(info->si_imap);
kfree(info);
s->s_fs_info = NULL;
+
+ unlock_kernel();
}
static int bfs_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -240,17 +274,6 @@ static int bfs_statfs(struct dentry *dentry, struct kstatfs *buf)
return 0;
}
-static void bfs_write_super(struct super_block *s)
-{
- struct bfs_sb_info *info = BFS_SB(s);
-
- mutex_lock(&info->bfs_lock);
- if (!(s->s_flags & MS_RDONLY))
- mark_buffer_dirty(info->si_sbh);
- s->s_dirt = 0;
- mutex_unlock(&info->bfs_lock);
-}
-
static struct kmem_cache *bfs_inode_cachep;
static struct inode *bfs_alloc_inode(struct super_block *sb)
@@ -298,6 +321,7 @@ static const struct super_operations bfs_sops = {
.delete_inode = bfs_delete_inode,
.put_super = bfs_put_super,
.write_super = bfs_write_super,
+ .sync_fs = bfs_sync_fs,
.statfs = bfs_statfs,
};
diff --git a/fs/bio.c b/fs/bio.c
index 59000215e59..24c91404353 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -25,7 +25,6 @@
#include <linux/module.h>
#include <linux/mempool.h>
#include <linux/workqueue.h>
-#include <linux/blktrace_api.h>
#include <scsi/sg.h> /* for struct sg_iovec */
#include <trace/events/block.h>
@@ -358,9 +357,9 @@ static void bio_kmalloc_destructor(struct bio *bio)
*
* If %__GFP_WAIT is set, then bio_alloc will always be able to allocate
* a bio. This is due to the mempool guarantees. To make this work, callers
- * must never allocate more than 1 bio at the time from this pool. Callers
+ * must never allocate more than 1 bio at a time from this pool. Callers
* that need to allocate more than 1 bio must always submit the previously
- * allocate bio for IO before attempting to allocate a new one. Failure to
+ * allocated bio for IO before attempting to allocate a new one. Failure to
* do so can cause livelocks under memory pressure.
*
**/
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 931f6b8c4b2..3a6d4fb2a32 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -176,17 +176,22 @@ blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
iov, offset, nr_segs, blkdev_get_blocks, NULL);
}
+int __sync_blockdev(struct block_device *bdev, int wait)
+{
+ if (!bdev)
+ return 0;
+ if (!wait)
+ return filemap_flush(bdev->bd_inode->i_mapping);
+ return filemap_write_and_wait(bdev->bd_inode->i_mapping);
+}
+
/*
* Write out and wait upon all the dirty data associated with a block
* device via its mapping. Does not take the superblock lock.
*/
int sync_blockdev(struct block_device *bdev)
{
- int ret = 0;
-
- if (bdev)
- ret = filemap_write_and_wait(bdev->bd_inode->i_mapping);
- return ret;
+ return __sync_blockdev(bdev, 1);
}
EXPORT_SYMBOL(sync_blockdev);
@@ -199,7 +204,7 @@ int fsync_bdev(struct block_device *bdev)
{
struct super_block *sb = get_super(bdev);
if (sb) {
- int res = fsync_super(sb);
+ int res = sync_filesystem(sb);
drop_super(sb);
return res;
}
@@ -241,7 +246,7 @@ struct super_block *freeze_bdev(struct block_device *bdev)
sb->s_frozen = SB_FREEZE_WRITE;
smp_wmb();
- __fsync_super(sb);
+ sync_filesystem(sb);
sb->s_frozen = SB_FREEZE_TRANS;
smp_wmb();
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 0d50d49d990..d28d29c95f7 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -42,6 +42,8 @@
static struct extent_io_ops btree_extent_io_ops;
static void end_workqueue_fn(struct btrfs_work *work);
+static atomic_t btrfs_bdi_num = ATOMIC_INIT(0);
+
/*
* end_io_wq structs are used to do processing in task context when an IO is
* complete. This is used during reads to verify checksums, and it is used
@@ -1342,12 +1344,25 @@ static void btrfs_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
free_extent_map(em);
}
+/*
+ * If this fails, caller must call bdi_destroy() to get rid of the
+ * bdi again.
+ */
static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
{
- bdi_init(bdi);
+ int err;
+
+ bdi->capabilities = BDI_CAP_MAP_COPY;
+ err = bdi_init(bdi);
+ if (err)
+ return err;
+
+ err = bdi_register(bdi, NULL, "btrfs-%d",
+ atomic_inc_return(&btrfs_bdi_num));
+ if (err)
+ return err;
+
bdi->ra_pages = default_backing_dev_info.ra_pages;
- bdi->state = 0;
- bdi->capabilities = default_backing_dev_info.capabilities;
bdi->unplug_io_fn = btrfs_unplug_io_fn;
bdi->unplug_io_data = info;
bdi->congested_fn = btrfs_congested_fn;
@@ -1569,7 +1584,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info->sb = sb;
fs_info->max_extent = (u64)-1;
fs_info->max_inline = 8192 * 1024;
- setup_bdi(fs_info, &fs_info->bdi);
+ if (setup_bdi(fs_info, &fs_info->bdi))
+ goto fail_bdi;
fs_info->btree_inode = new_inode(sb);
fs_info->btree_inode->i_ino = 1;
fs_info->btree_inode->i_nlink = 1;
@@ -1946,8 +1962,8 @@ fail_iput:
btrfs_close_devices(fs_info->fs_devices);
btrfs_mapping_tree_free(&fs_info->mapping_tree);
+fail_bdi:
bdi_destroy(&fs_info->bdi);
-
fail:
kfree(extent_root);
kfree(tree_root);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 5b68330f858..8612b3a0981 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2322,7 +2322,6 @@ err:
btrfs_update_inode(trans, root, dir);
btrfs_drop_nlink(inode);
ret = btrfs_update_inode(trans, root, inode);
- dir->i_sb->s_dirt = 1;
out:
return ret;
}
@@ -2806,7 +2805,6 @@ error:
pending_del_nr);
}
btrfs_free_path(path);
- inode->i_sb->s_dirt = 1;
return ret;
}
@@ -3768,7 +3766,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
init_special_inode(inode, inode->i_mode, rdev);
btrfs_update_inode(trans, root, inode);
}
- dir->i_sb->s_dirt = 1;
btrfs_update_inode_block_group(trans, inode);
btrfs_update_inode_block_group(trans, dir);
out_unlock:
@@ -3833,7 +3830,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
inode->i_op = &btrfs_file_inode_operations;
BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
}
- dir->i_sb->s_dirt = 1;
btrfs_update_inode_block_group(trans, inode);
btrfs_update_inode_block_group(trans, dir);
out_unlock:
@@ -3880,7 +3876,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
if (err)
drop_inode = 1;
- dir->i_sb->s_dirt = 1;
btrfs_update_inode_block_group(trans, dir);
err = btrfs_update_inode(trans, root, inode);
@@ -3962,7 +3957,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
d_instantiate(dentry, inode);
drop_on_err = 0;
- dir->i_sb->s_dirt = 1;
btrfs_update_inode_block_group(trans, inode);
btrfs_update_inode_block_group(trans, dir);
@@ -4991,7 +4985,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
inode->i_op = &btrfs_file_inode_operations;
BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
}
- dir->i_sb->s_dirt = 1;
btrfs_update_inode_block_group(trans, inode);
btrfs_update_inode_block_group(trans, dir);
if (drop_inode)
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 708ac06b953..9f179d4832d 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -394,10 +394,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
struct btrfs_root *root = btrfs_sb(sb);
int ret;
- if (sb->s_flags & MS_RDONLY)
- return 0;
-
- sb->s_dirt = 0;
if (!wait) {
filemap_flush(root->fs_info->btree_inode->i_mapping);
return 0;
@@ -408,7 +404,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
trans = btrfs_start_transaction(root, 1);
ret = btrfs_commit_transaction(trans, root);
- sb->s_dirt = 0;
return ret;
}
@@ -454,11 +449,6 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
return 0;
}
-static void btrfs_write_super(struct super_block *sb)
-{
- sb->s_dirt = 0;
-}
-
static int btrfs_test_super(struct super_block *s, void *data)
{
struct btrfs_fs_devices *test_fs_devices = data;
@@ -689,7 +679,6 @@ static int btrfs_unfreeze(struct super_block *sb)
static struct super_operations btrfs_super_ops = {
.delete_inode = btrfs_delete_inode,
.put_super = btrfs_put_super,
- .write_super = btrfs_write_super,
.sync_fs = btrfs_sync_fs,
.show_options = btrfs_show_options,
.write_inode = btrfs_write_inode,
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 2e177d7f4bb..4e83457ea25 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -543,13 +543,13 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
btrfs_free_log(trans, root);
btrfs_update_reloc_root(trans, root);
- if (root->commit_root == root->node)
- continue;
-
- free_extent_buffer(root->commit_root);
- root->commit_root = btrfs_root_node(root);
+ if (root->commit_root != root->node) {
+ free_extent_buffer(root->commit_root);
+ root->commit_root = btrfs_root_node(root);
+ btrfs_set_root_node(&root->root_item,
+ root->node);
+ }
- btrfs_set_root_node(&root->root_item, root->node);
err = btrfs_update_root(trans, fs_info->tree_root,
&root->root_key,
&root->root_item);
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 1e962348d11..431accd475a 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -354,7 +354,9 @@ static void cachefiles_sync_cache(struct fscache_cache *_cache)
/* make sure all pages pinned by operations on behalf of the netfs are
* written to disc */
cachefiles_begin_secure(cache, &saved_cred);
- ret = fsync_super(cache->mnt->mnt_sb);
+ down_read(&cache->mnt->mnt_sb->s_umount);
+ ret = sync_filesystem(cache->mnt->mnt_sb);
+ up_read(&cache->mnt->mnt_sb->s_umount);
cachefiles_end_secure(cache, saved_cred);
if (ret == -EIO)
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 38f71222a55..b7c9d5187a7 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -375,7 +375,6 @@ static int chrdev_open(struct inode *inode, struct file *filp)
p = inode->i_cdev;
if (!p) {
inode->i_cdev = p = new;
- inode->i_cindex = idx;
list_add(&inode->i_devices, &p->list);
new = NULL;
} else if (!cdev_get(p))
@@ -405,6 +404,18 @@ static int chrdev_open(struct inode *inode, struct file *filp)
return ret;
}
+int cdev_index(struct inode *inode)
+{
+ int idx;
+ struct kobject *kobj;
+
+ kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx);
+ if (!kobj)
+ return -1;
+ kobject_put(kobj);
+ return idx;
+}
+
void cd_forget(struct inode *inode)
{
spin_lock(&cdev_lock);
@@ -557,6 +568,7 @@ EXPORT_SYMBOL(cdev_init);
EXPORT_SYMBOL(cdev_alloc);
EXPORT_SYMBOL(cdev_del);
EXPORT_SYMBOL(cdev_add);
+EXPORT_SYMBOL(cdev_index);
EXPORT_SYMBOL(register_chrdev);
EXPORT_SYMBOL(unregister_chrdev);
EXPORT_SYMBOL(directly_mappable_cdev_bdi);
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 83d62759c7c..3bb11be8b6a 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -275,7 +275,7 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd,
case -EBUSY:
/* someone else made a mount here whilst we were busy */
while (d_mountpoint(nd->path.dentry) &&
- follow_down(&nd->path.mnt, &nd->path.dentry))
+ follow_down(&nd->path))
;
err = 0;
default:
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 0a10a59b639..0d92114195a 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -204,6 +204,9 @@ cifs_put_super(struct super_block *sb)
cFYI(1, ("Empty cifs superblock info passed to unmount"));
return;
}
+
+ lock_kernel();
+
rc = cifs_umount(sb, cifs_sb);
if (rc)
cERROR(1, ("cifs_umount failed with return code %d", rc));
@@ -216,7 +219,8 @@ cifs_put_super(struct super_block *sb)
unload_nls(cifs_sb->local_nls);
kfree(cifs_sb);
- return;
+
+ unlock_kernel();
}
static int
diff --git a/fs/compat.c b/fs/compat.c
index bb2a9b2e817..cdd51a3a7c5 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -471,7 +471,7 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd,
ret = sys_fcntl(fd, cmd, (unsigned long)&f);
set_fs(old_fs);
if (cmd == F_GETLK && ret == 0) {
- /* GETLK was successfule and we need to return the data...
+ /* GETLK was successful and we need to return the data...
* but it needs to fit in the compat structure.
* l_start shouldn't be too big, unless the original
* start + end is greater than COMPAT_OFF_T_MAX, in which
@@ -812,10 +812,8 @@ asmlinkage long compat_sys_mount(char __user * dev_name, char __user * dir_name,
}
}
- lock_kernel();
retval = do_mount((char*)dev_page, dir_page, (char*)type_page,
flags, (void*)data_page);
- unlock_kernel();
out4:
free_page(data_page);
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index b83f6bcfa51..0aac371bff0 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1765,7 +1765,7 @@ static int do_i2c_smbus_ioctl(unsigned int fd, unsigned int cmd, unsigned long a
/* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
* for some operations; this forces use of the newer bridge-utils that
- * use compatiable ioctls
+ * use compatible ioctls
*/
static int old_bridge_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
{
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
index 762d287123c..da6061a6df4 100644
--- a/fs/configfs/configfs_internal.h
+++ b/fs/configfs/configfs_internal.h
@@ -39,6 +39,9 @@ struct configfs_dirent {
umode_t s_mode;
struct dentry * s_dentry;
struct iattr * s_iattr;
+#ifdef CONFIG_LOCKDEP
+ int s_depth;
+#endif
};
#define CONFIGFS_ROOT 0x0001
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 05373db21a4..8e48b52205a 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -78,11 +78,97 @@ static const struct dentry_operations configfs_dentry_ops = {
.d_delete = configfs_d_delete,
};
+#ifdef CONFIG_LOCKDEP
+
+/*
+ * Helpers to make lockdep happy with our recursive locking of default groups'
+ * inodes (see configfs_attach_group() and configfs_detach_group()).
+ * We put default groups i_mutexes in separate classes according to their depth
+ * from the youngest non-default group ancestor.
+ *
+ * For a non-default group A having default groups A/B, A/C, and A/C/D, default
+ * groups A/B and A/C will have their inode's mutex in class
+ * default_group_class[0], and default group A/C/D will be in
+ * default_group_class[1].
+ *
+ * The lock classes are declared and assigned in inode.c, according to the
+ * s_depth value.
+ * The s_depth value is initialized to -1, adjusted to >= 0 when attaching
+ * default groups, and reset to -1 when all default groups are attached. During
+ * attachment, if configfs_create() sees s_depth > 0, the lock class of the new
+ * inode's mutex is set to default_group_class[s_depth - 1].
+ */
+
+static void configfs_init_dirent_depth(struct configfs_dirent *sd)
+{
+ sd->s_depth = -1;
+}
+
+static void configfs_set_dir_dirent_depth(struct configfs_dirent *parent_sd,
+ struct configfs_dirent *sd)
+{
+ int parent_depth = parent_sd->s_depth;
+
+ if (parent_depth >= 0)
+ sd->s_depth = parent_depth + 1;
+}
+
+static void
+configfs_adjust_dir_dirent_depth_before_populate(struct configfs_dirent *sd)
+{
+ /*
+ * item's i_mutex class is already setup, so s_depth is now only
+ * used to set new sub-directories s_depth, which is always done
+ * with item's i_mutex locked.
+ */
+ /*
+ * sd->s_depth == -1 iff we are a non default group.
+ * else (we are a default group) sd->s_depth > 0 (see
+ * create_dir()).
+ */
+ if (sd->s_depth == -1)
+ /*
+ * We are a non default group and we are going to create
+ * default groups.
+ */
+ sd->s_depth = 0;
+}
+
+static void
+configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd)
+{
+ /* We will not create default groups anymore. */
+ sd->s_depth = -1;
+}
+
+#else /* CONFIG_LOCKDEP */
+
+static void configfs_init_dirent_depth(struct configfs_dirent *sd)
+{
+}
+
+static void configfs_set_dir_dirent_depth(struct configfs_dirent *parent_sd,
+ struct configfs_dirent *sd)
+{
+}
+
+static void
+configfs_adjust_dir_dirent_depth_before_populate(struct configfs_dirent *sd)
+{
+}
+
+static void
+configfs_adjust_dir_dirent_depth_after_populate(struct configfs_dirent *sd)
+{
+}
+
+#endif /* CONFIG_LOCKDEP */
+
/*
* Allocates a new configfs_dirent and links it to the parent configfs_dirent
*/
-static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * parent_sd,
- void * element)
+static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent *parent_sd,
+ void *element, int type)
{
struct configfs_dirent * sd;
@@ -94,6 +180,8 @@ static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * pare
INIT_LIST_HEAD(&sd->s_links);
INIT_LIST_HEAD(&sd->s_children);
sd->s_element = element;
+ sd->s_type = type;
+ configfs_init_dirent_depth(sd);
spin_lock(&configfs_dirent_lock);
if (parent_sd->s_type & CONFIGFS_USET_DROPPING) {
spin_unlock(&configfs_dirent_lock);
@@ -138,12 +226,11 @@ int configfs_make_dirent(struct configfs_dirent * parent_sd,
{
struct configfs_dirent * sd;
- sd = configfs_new_dirent(parent_sd, element);
+ sd = configfs_new_dirent(parent_sd, element, type);
if (IS_ERR(sd))
return PTR_ERR(sd);
sd->s_mode = mode;
- sd->s_type = type;
sd->s_dentry = dentry;
if (dentry) {
dentry->d_fsdata = configfs_get(sd);
@@ -187,6 +274,7 @@ static int create_dir(struct config_item * k, struct dentry * p,
error = configfs_make_dirent(p->d_fsdata, d, k, mode,
CONFIGFS_DIR | CONFIGFS_USET_CREATING);
if (!error) {
+ configfs_set_dir_dirent_depth(p->d_fsdata, d->d_fsdata);
error = configfs_create(d, mode, init_dir);
if (!error) {
inc_nlink(p->d_inode);
@@ -789,11 +877,13 @@ static int configfs_attach_group(struct config_item *parent_item,
* error, as rmdir() would.
*/
mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
+ configfs_adjust_dir_dirent_depth_before_populate(sd);
ret = populate_groups(to_config_group(item));
if (ret) {
configfs_detach_item(item);
dentry->d_inode->i_flags |= S_DEAD;
}
+ configfs_adjust_dir_dirent_depth_after_populate(sd);
mutex_unlock(&dentry->d_inode->i_mutex);
if (ret)
d_delete(dentry);
@@ -916,11 +1006,11 @@ static int configfs_dump(struct configfs_dirent *sd, int level)
* Note, btw, that this can be called at *any* time, even when a configfs
* subsystem isn't registered, or when configfs is loading or unloading.
* Just like configfs_register_subsystem(). So we take the same
- * precautions. We pin the filesystem. We lock each i_mutex _in_order_
- * on our way down the tree. If we can find the target item in the
+ * precautions. We pin the filesystem. We lock configfs_dirent_lock.
+ * If we can find the target item in the
* configfs tree, it must be part of the subsystem tree as well, so we
- * do not need the subsystem semaphore. Holding the i_mutex chain locks
- * out mkdir() and rmdir(), who might be racing us.
+ * do not need the subsystem semaphore. Holding configfs_dirent_lock helps
+ * locking out mkdir() and rmdir(), who might be racing us.
*/
/*
@@ -933,17 +1023,21 @@ static int configfs_dump(struct configfs_dirent *sd, int level)
* do that so we can unlock it if we find nothing.
*
* Here we do a depth-first search of the dentry hierarchy looking for
- * our object. We take i_mutex on each step of the way down. IT IS
- * ESSENTIAL THAT i_mutex LOCKING IS ORDERED. If we come back up a branch,
- * we'll drop the i_mutex.
+ * our object.
+ * We deliberately ignore items tagged as dropping since they are virtually
+ * dead, as well as items in the middle of attachment since they virtually
+ * do not exist yet. This completes the locking out of racing mkdir() and
+ * rmdir().
+ * Note: subdirectories in the middle of attachment start with s_type =
+ * CONFIGFS_DIR|CONFIGFS_USET_CREATING set by create_dir(). When
+ * CONFIGFS_USET_CREATING is set, we ignore the item. The actual set of
+ * s_type is in configfs_new_dirent(), which has configfs_dirent_lock.
*
- * If the target is not found, -ENOENT is bubbled up and we have released
- * all locks. If the target was found, the locks will be cleared by
- * configfs_depend_rollback().
+ * If the target is not found, -ENOENT is bubbled up.
*
* This adds a requirement that all config_items be unique!
*
- * This is recursive because the locking traversal is tricky. There isn't
+ * This is recursive. There isn't
* much on the stack, though, so folks that need this function - be careful
* about your stack! Patches will be accepted to make it iterative.
*/
@@ -955,13 +1049,13 @@ static int configfs_depend_prep(struct dentry *origin,
BUG_ON(!origin || !sd);
- /* Lock this guy on the way down */
- mutex_lock(&sd->s_dentry->d_inode->i_mutex);
if (sd->s_element == target) /* Boo-yah */
goto out;
list_for_each_entry(child_sd, &sd->s_children, s_sibling) {
- if (child_sd->s_type & CONFIGFS_DIR) {
+ if ((child_sd->s_type & CONFIGFS_DIR) &&
+ !(child_sd->s_type & CONFIGFS_USET_DROPPING) &&
+ !(child_sd->s_type & CONFIGFS_USET_CREATING)) {
ret = configfs_depend_prep(child_sd->s_dentry,
target);
if (!ret)
@@ -970,33 +1064,12 @@ static int configfs_depend_prep(struct dentry *origin,
}
/* We looped all our children and didn't find target */
- mutex_unlock(&sd->s_dentry->d_inode->i_mutex);
ret = -ENOENT;
out:
return ret;
}
-/*
- * This is ONLY called if configfs_depend_prep() did its job. So we can
- * trust the entire path from item back up to origin.
- *
- * We walk backwards from item, unlocking each i_mutex. We finish by
- * unlocking origin.
- */
-static void configfs_depend_rollback(struct dentry *origin,
- struct config_item *item)
-{
- struct dentry *dentry = item->ci_dentry;
-
- while (dentry != origin) {
- mutex_unlock(&dentry->d_inode->i_mutex);
- dentry = dentry->d_parent;
- }
-
- mutex_unlock(&origin->d_inode->i_mutex);
-}
-
int configfs_depend_item(struct configfs_subsystem *subsys,
struct config_item *target)
{
@@ -1037,17 +1110,21 @@ int configfs_depend_item(struct configfs_subsystem *subsys,
/* Ok, now we can trust subsys/s_item */
- /* Scan the tree, locking i_mutex recursively, return 0 if found */
+ spin_lock(&configfs_dirent_lock);
+ /* Scan the tree, return 0 if found */
ret = configfs_depend_prep(subsys_sd->s_dentry, target);
if (ret)
- goto out_unlock_fs;
+ goto out_unlock_dirent_lock;
- /* We hold all i_mutexes from the subsystem down to the target */
+ /*
+ * We are sure that the item is not about to be removed by rmdir(), and
+ * not in the middle of attachment by mkdir().
+ */
p = target->ci_dentry->d_fsdata;
p->s_dependent_count += 1;
- configfs_depend_rollback(subsys_sd->s_dentry, target);
-
+out_unlock_dirent_lock:
+ spin_unlock(&configfs_dirent_lock);
out_unlock_fs:
mutex_unlock(&configfs_sb->s_root->d_inode->i_mutex);
@@ -1072,10 +1149,10 @@ void configfs_undepend_item(struct configfs_subsystem *subsys,
struct configfs_dirent *sd;
/*
- * Since we can trust everything is pinned, we just need i_mutex
- * on the item.
+ * Since we can trust everything is pinned, we just need
+ * configfs_dirent_lock.
*/
- mutex_lock(&target->ci_dentry->d_inode->i_mutex);
+ spin_lock(&configfs_dirent_lock);
sd = target->ci_dentry->d_fsdata;
BUG_ON(sd->s_dependent_count < 1);
@@ -1086,7 +1163,7 @@ void configfs_undepend_item(struct configfs_subsystem *subsys,
* After this unlock, we cannot trust the item to stay alive!
* DO NOT REFERENCE item after this unlock.
*/
- mutex_unlock(&target->ci_dentry->d_inode->i_mutex);
+ spin_unlock(&configfs_dirent_lock);
}
EXPORT_SYMBOL(configfs_undepend_item);
@@ -1286,13 +1363,6 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
if (sd->s_type & CONFIGFS_USET_DEFAULT)
return -EPERM;
- /*
- * Here's where we check for dependents. We're protected by
- * i_mutex.
- */
- if (sd->s_dependent_count)
- return -EBUSY;
-
/* Get a working ref until we have the child */
parent_item = configfs_get_config_item(dentry->d_parent);
subsys = to_config_group(parent_item)->cg_subsys;
@@ -1316,9 +1386,17 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
mutex_lock(&configfs_symlink_mutex);
spin_lock(&configfs_dirent_lock);
- ret = configfs_detach_prep(dentry, &wait_mutex);
- if (ret)
- configfs_detach_rollback(dentry);
+ /*
+ * Here's where we check for dependents. We're protected by
+ * configfs_dirent_lock.
+ * If no dependent, atomically tag the item as dropping.
+ */
+ ret = sd->s_dependent_count ? -EBUSY : 0;
+ if (!ret) {
+ ret = configfs_detach_prep(dentry, &wait_mutex);
+ if (ret)
+ configfs_detach_rollback(dentry);
+ }
spin_unlock(&configfs_dirent_lock);
mutex_unlock(&configfs_symlink_mutex);
@@ -1429,7 +1507,7 @@ static int configfs_dir_open(struct inode *inode, struct file *file)
*/
err = -ENOENT;
if (configfs_dirent_is_ready(parent_sd)) {
- file->private_data = configfs_new_dirent(parent_sd, NULL);
+ file->private_data = configfs_new_dirent(parent_sd, NULL, 0);
if (IS_ERR(file->private_data))
err = PTR_ERR(file->private_data);
else
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 5d349d38e05..4921e7426d9 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -33,10 +33,15 @@
#include <linux/backing-dev.h>
#include <linux/capability.h>
#include <linux/sched.h>
+#include <linux/lockdep.h>
#include <linux/configfs.h>
#include "configfs_internal.h"
+#ifdef CONFIG_LOCKDEP
+static struct lock_class_key default_group_class[MAX_LOCK_DEPTH];
+#endif
+
extern struct super_block * configfs_sb;
static const struct address_space_operations configfs_aops = {
@@ -150,6 +155,38 @@ struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd)
return inode;
}
+#ifdef CONFIG_LOCKDEP
+
+static void configfs_set_inode_lock_class(struct configfs_dirent *sd,
+ struct inode *inode)
+{
+ int depth = sd->s_depth;
+
+ if (depth > 0) {
+ if (depth <= ARRAY_SIZE(default_group_class)) {
+ lockdep_set_class(&inode->i_mutex,
+ &default_group_class[depth - 1]);
+ } else {
+ /*
+ * In practice the maximum level of locking depth is
+ * already reached. Just inform about possible reasons.
+ */
+ printk(KERN_INFO "configfs: Too many levels of inodes"
+ " for the locking correctness validator.\n");
+ printk(KERN_INFO "Spurious warnings may appear.\n");
+ }
+ }
+}
+
+#else /* CONFIG_LOCKDEP */
+
+static void configfs_set_inode_lock_class(struct configfs_dirent *sd,
+ struct inode *inode)
+{
+}
+
+#endif /* CONFIG_LOCKDEP */
+
int configfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *))
{
int error = 0;
@@ -162,6 +199,7 @@ int configfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *
struct inode *p_inode = dentry->d_parent->d_inode;
p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME;
}
+ configfs_set_inode_lock_class(sd, inode);
goto Proceed;
}
else
diff --git a/fs/dcache.c b/fs/dcache.c
index 75659a6fd1f..9e5cd3c3a6b 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1910,7 +1910,7 @@ char *__d_path(const struct path *path, struct path *root,
spin_lock(&vfsmount_lock);
prepend(&end, &buflen, "\0", 1);
- if (!IS_ROOT(dentry) && d_unhashed(dentry) &&
+ if (d_unlinked(dentry) &&
(prepend(&end, &buflen, " (deleted)", 10) != 0))
goto Elong;
@@ -2035,7 +2035,7 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen)
spin_lock(&dcache_lock);
prepend(&end, &buflen, "\0", 1);
- if (!IS_ROOT(dentry) && d_unhashed(dentry) &&
+ if (d_unlinked(dentry) &&
(prepend(&end, &buflen, "//deleted", 9) != 0))
goto Elong;
if (buflen < 1)
@@ -2097,9 +2097,8 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
read_unlock(&current->fs->lock);
error = -ENOENT;
- /* Has the current directory has been unlinked? */
spin_lock(&dcache_lock);
- if (IS_ROOT(pwd.dentry) || !d_unhashed(pwd.dentry)) {
+ if (!d_unlinked(pwd.dentry)) {
unsigned long len;
struct path tmp = root;
char * cwd;
diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c
index 858fba14aaa..c4dfa1dcc86 100644
--- a/fs/dlm/dir.c
+++ b/fs/dlm/dir.c
@@ -49,7 +49,8 @@ static struct dlm_direntry *get_free_de(struct dlm_ls *ls, int len)
spin_unlock(&ls->ls_recover_list_lock);
if (!found)
- de = kzalloc(sizeof(struct dlm_direntry) + len, GFP_KERNEL);
+ de = kzalloc(sizeof(struct dlm_direntry) + len,
+ ls->ls_allocation);
return de;
}
@@ -211,7 +212,7 @@ int dlm_recover_directory(struct dlm_ls *ls)
dlm_dir_clear(ls);
- last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_KERNEL);
+ last_name = kmalloc(DLM_RESNAME_MAXLEN, ls->ls_allocation);
if (!last_name)
goto out;
@@ -322,7 +323,7 @@ static int get_entry(struct dlm_ls *ls, int nodeid, char *name,
if (namelen > DLM_RESNAME_MAXLEN)
return -EINVAL;
- de = kzalloc(sizeof(struct dlm_direntry) + namelen, GFP_KERNEL);
+ de = kzalloc(sizeof(struct dlm_direntry) + namelen, ls->ls_allocation);
if (!de)
return -ENOMEM;
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index cd8e2df3c29..d489fcc8671 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -384,7 +384,7 @@ static void threads_stop(void)
dlm_astd_stop();
}
-static int new_lockspace(char *name, int namelen, void **lockspace,
+static int new_lockspace(const char *name, int namelen, void **lockspace,
uint32_t flags, int lvblen)
{
struct dlm_ls *ls;
@@ -419,16 +419,14 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
break;
}
ls->ls_create_count++;
- module_put(THIS_MODULE);
- error = 1; /* not an error, return 0 */
+ *lockspace = ls;
+ error = 1;
break;
}
spin_unlock(&lslist_lock);
- if (error < 0)
- goto out;
if (error)
- goto ret_zero;
+ goto out;
error = -ENOMEM;
@@ -583,7 +581,6 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
dlm_create_debug_file(ls);
log_debug(ls, "join complete");
- ret_zero:
*lockspace = ls;
return 0;
@@ -614,7 +611,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
return error;
}
-int dlm_new_lockspace(char *name, int namelen, void **lockspace,
+int dlm_new_lockspace(const char *name, int namelen, void **lockspace,
uint32_t flags, int lvblen)
{
int error = 0;
@@ -628,7 +625,9 @@ int dlm_new_lockspace(char *name, int namelen, void **lockspace,
error = new_lockspace(name, namelen, lockspace, flags, lvblen);
if (!error)
ls_count++;
- else if (!ls_count)
+ if (error > 0)
+ error = 0;
+ if (!ls_count)
threads_stop();
out:
mutex_unlock(&ls_lock);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 609108a8326..cdb580a9c7a 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -309,6 +309,20 @@ static void lowcomms_state_change(struct sock *sk)
lowcomms_write_space(sk);
}
+int dlm_lowcomms_connect_node(int nodeid)
+{
+ struct connection *con;
+
+ if (nodeid == dlm_our_nodeid())
+ return 0;
+
+ con = nodeid2con(nodeid, GFP_NOFS);
+ if (!con)
+ return -ENOMEM;
+ lowcomms_connect_sock(con);
+ return 0;
+}
+
/* Make a socket active */
static int add_sock(struct socket *sock, struct connection *con)
{
@@ -486,7 +500,7 @@ static void process_sctp_notification(struct connection *con,
return;
}
- new_con = nodeid2con(nodeid, GFP_KERNEL);
+ new_con = nodeid2con(nodeid, GFP_NOFS);
if (!new_con)
return;
@@ -722,7 +736,7 @@ static int tcp_accept_from_sock(struct connection *con)
* the same time and the connections cross on the wire.
* In this case we store the incoming one in "othercon"
*/
- newcon = nodeid2con(nodeid, GFP_KERNEL);
+ newcon = nodeid2con(nodeid, GFP_NOFS);
if (!newcon) {
result = -ENOMEM;
goto accept_err;
@@ -732,7 +746,7 @@ static int tcp_accept_from_sock(struct connection *con)
struct connection *othercon = newcon->othercon;
if (!othercon) {
- othercon = kmem_cache_zalloc(con_cache, GFP_KERNEL);
+ othercon = kmem_cache_zalloc(con_cache, GFP_NOFS);
if (!othercon) {
log_print("failed to allocate incoming socket");
mutex_unlock(&newcon->sock_mutex);
@@ -1421,7 +1435,7 @@ static int work_start(void)
static void stop_conn(struct connection *con)
{
con->flags |= 0x0F;
- if (con->sock)
+ if (con->sock && con->sock->sk)
con->sock->sk->sk_user_data = NULL;
}
diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h
index a9a9618c0d3..1311e642628 100644
--- a/fs/dlm/lowcomms.h
+++ b/fs/dlm/lowcomms.h
@@ -2,7 +2,7 @@
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
-** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2004-2009 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -19,6 +19,7 @@ void dlm_lowcomms_stop(void);
int dlm_lowcomms_close(int nodeid);
void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc);
void dlm_lowcomms_commit_buffer(void *mh);
+int dlm_lowcomms_connect_node(int nodeid);
#endif /* __LOWCOMMS_DOT_H__ */
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index 26133f05ae3..b128775913b 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -1,7 +1,7 @@
/******************************************************************************
*******************************************************************************
**
-** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved.
+** Copyright (C) 2005-2009 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
@@ -17,6 +17,7 @@
#include "recover.h"
#include "rcom.h"
#include "config.h"
+#include "lowcomms.h"
static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new)
{
@@ -45,9 +46,9 @@ static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new)
static int dlm_add_member(struct dlm_ls *ls, int nodeid)
{
struct dlm_member *memb;
- int w;
+ int w, error;
- memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL);
+ memb = kzalloc(sizeof(struct dlm_member), ls->ls_allocation);
if (!memb)
return -ENOMEM;
@@ -57,6 +58,12 @@ static int dlm_add_member(struct dlm_ls *ls, int nodeid)
return w;
}
+ error = dlm_lowcomms_connect_node(nodeid);
+ if (error < 0) {
+ kfree(memb);
+ return error;
+ }
+
memb->nodeid = nodeid;
memb->weight = w;
add_ordered_member(ls, memb);
@@ -136,7 +143,7 @@ static void make_member_array(struct dlm_ls *ls)
ls->ls_total_weight = total;
- array = kmalloc(sizeof(int) * total, GFP_KERNEL);
+ array = kmalloc(sizeof(int) * total, ls->ls_allocation);
if (!array)
return;
@@ -219,7 +226,7 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
continue;
log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]);
- memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL);
+ memb = kzalloc(sizeof(struct dlm_member), ls->ls_allocation);
if (!memb)
return -ENOMEM;
memb->nodeid = rv->new[i];
@@ -334,7 +341,7 @@ int dlm_ls_start(struct dlm_ls *ls)
int *ids = NULL, *new = NULL;
int error, ids_count = 0, new_count = 0;
- rv = kzalloc(sizeof(struct dlm_recover), GFP_KERNEL);
+ rv = kzalloc(sizeof(struct dlm_recover), ls->ls_allocation);
if (!rv)
return -ENOMEM;
diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c
index daa4183fbb8..7a2307c0891 100644
--- a/fs/dlm/requestqueue.c
+++ b/fs/dlm/requestqueue.c
@@ -35,7 +35,7 @@ void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_message *ms)
struct rq_entry *e;
int length = ms->m_header.h_length - sizeof(struct dlm_message);
- e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL);
+ e = kmalloc(sizeof(struct rq_entry) + length, ls->ls_allocation);
if (!e) {
log_print("dlm_add_requestqueue: out of memory len %d", length);
return;
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index fa4c7e7d15d..12d649602d3 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -27,6 +27,7 @@
#include <linux/mount.h>
#include <linux/key.h>
#include <linux/seq_file.h>
+#include <linux/smp_lock.h>
#include <linux/file.h>
#include <linux/crypto.h>
#include "ecryptfs_kernel.h"
@@ -120,9 +121,13 @@ static void ecryptfs_put_super(struct super_block *sb)
{
struct ecryptfs_sb_info *sb_info = ecryptfs_superblock_to_private(sb);
+ lock_kernel();
+
ecryptfs_destroy_mount_crypt_stat(&sb_info->mount_crypt_stat);
kmem_cache_free(ecryptfs_sb_info_cache, sb_info);
ecryptfs_set_superblock_private(sb, NULL);
+
+ unlock_kernel();
}
/**
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 2a701d593d3..3f0e1974abd 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -16,6 +16,7 @@
#include <linux/anon_inodes.h>
#include <linux/eventfd.h>
#include <linux/syscalls.h>
+#include <linux/module.h>
struct eventfd_ctx {
wait_queue_head_t wqh;
@@ -56,6 +57,7 @@ int eventfd_signal(struct file *file, int n)
return n;
}
+EXPORT_SYMBOL_GPL(eventfd_signal);
static int eventfd_release(struct inode *inode, struct file *file)
{
@@ -197,6 +199,7 @@ struct file *eventfd_fget(int fd)
return file;
}
+EXPORT_SYMBOL_GPL(eventfd_fget);
SYSCALL_DEFINE2(eventfd2, unsigned int, count, int, flags)
{
diff --git a/fs/exofs/common.h b/fs/exofs/common.h
index b1512c4bb8c..24667eedc02 100644
--- a/fs/exofs/common.h
+++ b/fs/exofs/common.h
@@ -175,10 +175,4 @@ int exofs_async_op(struct osd_request *or,
int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr);
-int osd_req_read_kern(struct osd_request *or,
- const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
-
-int osd_req_write_kern(struct osd_request *or,
- const struct osd_obj_id *obj, u64 offset, void *buff, u64 len);
-
#endif /*ifndef __EXOFS_COM_H__*/
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index ba8d9fab469..77d0a295eb1 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -59,10 +59,9 @@ static void _pcol_init(struct page_collect *pcol, unsigned expected_pages,
struct inode *inode)
{
struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
- struct request_queue *req_q = sbi->s_dev->scsi_device->request_queue;
pcol->sbi = sbi;
- pcol->req_q = req_q;
+ pcol->req_q = osd_request_queue(sbi->s_dev);
pcol->inode = inode;
pcol->expected_pages = expected_pages;
@@ -266,7 +265,7 @@ static int read_exec(struct page_collect *pcol, bool is_sync)
goto err;
}
- osd_req_read(or, &obj, pcol->bio, i_start);
+ osd_req_read(or, &obj, i_start, pcol->bio, pcol->length);
if (is_sync) {
exofs_sync_op(or, pcol->sbi->s_timeout, oi->i_cred);
@@ -522,7 +521,8 @@ static int write_exec(struct page_collect *pcol)
*pcol_copy = *pcol;
- osd_req_write(or, &obj, pcol_copy->bio, i_start);
+ pcol_copy->bio->bi_rw |= (1 << BIO_RW); /* FIXME: bio_set_dir() */
+ osd_req_write(or, &obj, i_start, pcol_copy->bio, pcol_copy->length);
ret = exofs_async_op(or, writepages_done, pcol_copy, oi->i_cred);
if (unlikely(ret)) {
EXOFS_ERR("write_exec: exofs_async_op() Faild\n");
diff --git a/fs/exofs/osd.c b/fs/exofs/osd.c
index 06ca92672eb..b3d2ccb87aa 100644
--- a/fs/exofs/osd.c
+++ b/fs/exofs/osd.c
@@ -125,29 +125,3 @@ int extract_attr_from_req(struct osd_request *or, struct osd_attr *attr)
return -EIO;
}
-
-int osd_req_read_kern(struct osd_request *or,
- const struct osd_obj_id *obj, u64 offset, void* buff, u64 len)
-{
- struct request_queue *req_q = or->osd_dev->scsi_device->request_queue;
- struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL);
-
- if (!bio)
- return -ENOMEM;
-
- osd_req_read(or, obj, bio, offset);
- return 0;
-}
-
-int osd_req_write_kern(struct osd_request *or,
- const struct osd_obj_id *obj, u64 offset, void* buff, u64 len)
-{
- struct request_queue *req_q = or->osd_dev->scsi_device->request_queue;
- struct bio *bio = bio_map_kern(req_q, buff, len, GFP_KERNEL);
-
- if (!bio)
- return -ENOMEM;
-
- osd_req_write(or, obj, bio, offset);
- return 0;
-}
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 9f1985e857e..8216c5b77b5 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -200,20 +200,21 @@ static const struct export_operations exofs_export_ops;
/*
* Write the superblock to the OSD
*/
-static void exofs_write_super(struct super_block *sb)
+static int exofs_sync_fs(struct super_block *sb, int wait)
{
struct exofs_sb_info *sbi;
struct exofs_fscb *fscb;
struct osd_request *or;
struct osd_obj_id obj;
- int ret;
+ int ret = -ENOMEM;
fscb = kzalloc(sizeof(struct exofs_fscb), GFP_KERNEL);
if (!fscb) {
EXOFS_ERR("exofs_write_super: memory allocation failed.\n");
- return;
+ return -ENOMEM;
}
+ lock_super(sb);
lock_kernel();
sbi = sb->s_fs_info;
fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
@@ -246,7 +247,17 @@ out:
if (or)
osd_end_request(or);
unlock_kernel();
+ unlock_super(sb);
kfree(fscb);
+ return ret;
+}
+
+static void exofs_write_super(struct super_block *sb)
+{
+ if (!(sb->s_flags & MS_RDONLY))
+ exofs_sync_fs(sb, 1);
+ else
+ sb->s_dirt = 0;
}
/*
@@ -258,6 +269,11 @@ static void exofs_put_super(struct super_block *sb)
int num_pend;
struct exofs_sb_info *sbi = sb->s_fs_info;
+ lock_kernel();
+
+ if (sb->s_dirt)
+ exofs_write_super(sb);
+
/* make sure there are no pending commands */
for (num_pend = atomic_read(&sbi->s_curr_pending); num_pend > 0;
num_pend = atomic_read(&sbi->s_curr_pending)) {
@@ -271,6 +287,8 @@ static void exofs_put_super(struct super_block *sb)
osduld_put_device(sbi->s_dev);
kfree(sb->s_fs_info);
sb->s_fs_info = NULL;
+
+ unlock_kernel();
}
/*
@@ -484,6 +502,7 @@ static const struct super_operations exofs_sops = {
.delete_inode = exofs_delete_inode,
.put_super = exofs_put_super,
.write_super = exofs_write_super,
+ .sync_fs = exofs_sync_fs,
.statfs = exofs_statfs,
};
diff --git a/fs/ext2/Makefile b/fs/ext2/Makefile
index e0b2b43c1fd..f42af45cfd8 100644
--- a/fs/ext2/Makefile
+++ b/fs/ext2/Makefile
@@ -4,7 +4,7 @@
obj-$(CONFIG_EXT2_FS) += ext2.o
-ext2-y := balloc.o dir.o file.o fsync.o ialloc.o inode.o \
+ext2-y := balloc.o dir.o file.o ialloc.o inode.o \
ioctl.o namei.o super.o symlink.o
ext2-$(CONFIG_EXT2_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 2999d72153b..003500498c2 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -720,5 +720,5 @@ const struct file_operations ext2_dir_operations = {
#ifdef CONFIG_COMPAT
.compat_ioctl = ext2_compat_ioctl,
#endif
- .fsync = ext2_sync_file,
+ .fsync = simple_fsync,
};
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 3203042b36e..f2e5811936d 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -27,7 +27,7 @@ struct ext2_inode_info {
/*
* i_block_group is the number of the block group which contains
* this file's inode. Constant across the lifetime of the inode,
- * it is ued for making block allocation decisions - we try to
+ * it is used for making block allocation decisions - we try to
* place a file's data blocks near its inode block, and new inodes
* near to their parent directory's inode.
*/
@@ -113,9 +113,6 @@ extern int ext2_empty_dir (struct inode *);
extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **);
extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *);
-/* fsync.c */
-extern int ext2_sync_file (struct file *, struct dentry *, int);
-
/* ialloc.c */
extern struct inode * ext2_new_inode (struct inode *, int);
extern void ext2_free_inode (struct inode *);
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 45ed0712218..2b9e47dc922 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -55,7 +55,7 @@ const struct file_operations ext2_file_operations = {
.mmap = generic_file_mmap,
.open = generic_file_open,
.release = ext2_release_file,
- .fsync = ext2_sync_file,
+ .fsync = simple_fsync,
.splice_read = generic_file_splice_read,
.splice_write = generic_file_splice_write,
};
@@ -72,7 +72,7 @@ const struct file_operations ext2_xip_file_operations = {
.mmap = xip_file_mmap,
.open = generic_file_open,
.release = ext2_release_file,
- .fsync = ext2_sync_file,
+ .fsync = simple_fsync,
};
#endif
diff --git a/fs/ext2/fsync.c b/fs/ext2/fsync.c
deleted file mode 100644
index fc66c93fcb5..00000000000
--- a/fs/ext2/fsync.c
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * linux/fs/ext2/fsync.c
- *
- * Copyright (C) 1993 Stephen Tweedie (sct@dcs.ed.ac.uk)
- * from
- * Copyright (C) 1992 Remy Card (card@masi.ibp.fr)
- * Laboratoire MASI - Institut Blaise Pascal
- * Universite Pierre et Marie Curie (Paris VI)
- * from
- * linux/fs/minix/truncate.c Copyright (C) 1991, 1992 Linus Torvalds
- *
- * ext2fs fsync primitive
- *
- * Big-endian to little-endian byte-swapping/bitmaps by
- * David S. Miller (davem@caip.rutgers.edu), 1995
- *
- * Removed unnecessary code duplication for little endian machines
- * and excessive __inline__s.
- * Andi Kleen, 1997
- *
- * Major simplications and cleanup - we only need to do the metadata, because
- * we can depend on generic_block_fdatasync() to sync the data blocks.
- */
-
-#include "ext2.h"
-#include <linux/buffer_head.h> /* for sync_mapping_buffers() */
-
-
-/*
- * File may be NULL when we are called. Perhaps we shouldn't
- * even pass file to fsync ?
- */
-
-int ext2_sync_file(struct file *file, struct dentry *dentry, int datasync)
-{
- struct inode *inode = dentry->d_inode;
- int err;
- int ret;
-
- ret = sync_mapping_buffers(inode->i_mapping);
- if (!(inode->i_state & I_DIRTY))
- return ret;
- if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
- return ret;
-
- err = ext2_sync_inode(inode);
- if (ret == 0)
- ret = err;
- return ret;
-}
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index acf67883110..29ed682061f 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -41,8 +41,6 @@ MODULE_AUTHOR("Remy Card and others");
MODULE_DESCRIPTION("Second Extended Filesystem");
MODULE_LICENSE("GPL");
-static int ext2_update_inode(struct inode * inode, int do_sync);
-
/*
* Test whether an inode is a fast symlink.
*/
@@ -66,7 +64,7 @@ void ext2_delete_inode (struct inode * inode)
goto no_delete;
EXT2_I(inode)->i_dtime = get_seconds();
mark_inode_dirty(inode);
- ext2_update_inode(inode, inode_needs_sync(inode));
+ ext2_write_inode(inode, inode_needs_sync(inode));
inode->i_size = 0;
if (inode->i_blocks)
@@ -1337,7 +1335,7 @@ bad_inode:
return ERR_PTR(ret);
}
-static int ext2_update_inode(struct inode * inode, int do_sync)
+int ext2_write_inode(struct inode *inode, int do_sync)
{
struct ext2_inode_info *ei = EXT2_I(inode);
struct super_block *sb = inode->i_sb;
@@ -1442,11 +1440,6 @@ static int ext2_update_inode(struct inode * inode, int do_sync)
return err;
}
-int ext2_write_inode(struct inode *inode, int wait)
-{
- return ext2_update_inode(inode, wait);
-}
-
int ext2_sync_inode(struct inode *inode)
{
struct writeback_control wbc = {
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index e3c748faf2d..458999638c3 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -42,6 +42,7 @@ static void ext2_sync_super(struct super_block *sb,
struct ext2_super_block *es);
static int ext2_remount (struct super_block * sb, int * flags, char * data);
static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf);
+static int ext2_sync_fs(struct super_block *sb, int wait);
void ext2_error (struct super_block * sb, const char * function,
const char * fmt, ...)
@@ -114,6 +115,11 @@ static void ext2_put_super (struct super_block * sb)
int i;
struct ext2_sb_info *sbi = EXT2_SB(sb);
+ lock_kernel();
+
+ if (sb->s_dirt)
+ ext2_write_super(sb);
+
ext2_xattr_put_super(sb);
if (!(sb->s_flags & MS_RDONLY)) {
struct ext2_super_block *es = sbi->s_es;
@@ -135,7 +141,7 @@ static void ext2_put_super (struct super_block * sb)
kfree(sbi->s_blockgroup_lock);
kfree(sbi);
- return;
+ unlock_kernel();
}
static struct kmem_cache * ext2_inode_cachep;
@@ -304,6 +310,7 @@ static const struct super_operations ext2_sops = {
.delete_inode = ext2_delete_inode,
.put_super = ext2_put_super,
.write_super = ext2_write_super,
+ .sync_fs = ext2_sync_fs,
.statfs = ext2_statfs,
.remount_fs = ext2_remount,
.clear_inode = ext2_clear_inode,
@@ -1127,25 +1134,36 @@ static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es)
* set s_state to EXT2_VALID_FS after some corrections.
*/
-void ext2_write_super (struct super_block * sb)
+static int ext2_sync_fs(struct super_block *sb, int wait)
{
- struct ext2_super_block * es;
+ struct ext2_super_block *es = EXT2_SB(sb)->s_es;
+
lock_kernel();
- if (!(sb->s_flags & MS_RDONLY)) {
- es = EXT2_SB(sb)->s_es;
-
- if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) {
- ext2_debug ("setting valid to 0\n");
- es->s_state &= cpu_to_le16(~EXT2_VALID_FS);
- es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb));
- es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb));
- es->s_mtime = cpu_to_le32(get_seconds());
- ext2_sync_super(sb, es);
- } else
- ext2_commit_super (sb, es);
+ if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) {
+ ext2_debug("setting valid to 0\n");
+ es->s_state &= cpu_to_le16(~EXT2_VALID_FS);
+ es->s_free_blocks_count =
+ cpu_to_le32(ext2_count_free_blocks(sb));
+ es->s_free_inodes_count =
+ cpu_to_le32(ext2_count_free_inodes(sb));
+ es->s_mtime = cpu_to_le32(get_seconds());
+ ext2_sync_super(sb, es);
+ } else {
+ ext2_commit_super(sb, es);
}
sb->s_dirt = 0;
unlock_kernel();
+
+ return 0;
+}
+
+
+void ext2_write_super(struct super_block *sb)
+{
+ if (!(sb->s_flags & MS_RDONLY))
+ ext2_sync_fs(sb, 1);
+ else
+ sb->s_dirt = 0;
}
static int ext2_remount (struct super_block * sb, int * flags, char * data)
@@ -1157,6 +1175,8 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
unsigned long old_sb_flags;
int err;
+ lock_kernel();
+
/* Store the old options */
old_sb_flags = sb->s_flags;
old_opts.s_mount_opt = sbi->s_mount_opt;
@@ -1192,12 +1212,16 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
sbi->s_mount_opt &= ~EXT2_MOUNT_XIP;
sbi->s_mount_opt |= old_mount_opt & EXT2_MOUNT_XIP;
}
- if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
+ if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
+ unlock_kernel();
return 0;
+ }
if (*flags & MS_RDONLY) {
if (le16_to_cpu(es->s_state) & EXT2_VALID_FS ||
- !(sbi->s_mount_state & EXT2_VALID_FS))
+ !(sbi->s_mount_state & EXT2_VALID_FS)) {
+ unlock_kernel();
return 0;
+ }
/*
* OK, we are remounting a valid rw partition rdonly, so set
* the rdonly flag and then mark the partition as valid again.
@@ -1224,12 +1248,14 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
sb->s_flags &= ~MS_RDONLY;
}
ext2_sync_super(sb, es);
+ unlock_kernel();
return 0;
restore_opts:
sbi->s_mount_opt = old_opts.s_mount_opt;
sbi->s_resuid = old_opts.s_resuid;
sbi->s_resgid = old_opts.s_resgid;
sb->s_flags = old_sb_flags;
+ unlock_kernel();
return err;
}
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 225202db897..27967f92e82 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -649,7 +649,7 @@ do_more:
count = overflow;
goto do_more;
}
- sb->s_dirt = 1;
+
error_return:
brelse(bitmap_bh);
ext3_std_error(sb, err);
@@ -1708,7 +1708,6 @@ allocated:
if (!fatal)
fatal = err;
- sb->s_dirt = 1;
if (fatal)
goto out;
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index dd13d60d524..b3999128513 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -181,7 +181,7 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
err = ext3_journal_dirty_metadata(handle, bitmap_bh);
if (!fatal)
fatal = err;
- sb->s_dirt = 1;
+
error_return:
brelse(bitmap_bh);
ext3_std_error(sb, fatal);
@@ -537,7 +537,6 @@ got:
percpu_counter_dec(&sbi->s_freeinodes_counter);
if (S_ISDIR(mode))
percpu_counter_inc(&sbi->s_dirs_counter);
- sb->s_dirt = 1;
inode->i_uid = current_fsuid();
if (test_opt (sb, GRPID))
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index fcfa2436185..b0248c6d5d4 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -2960,7 +2960,6 @@ static int ext3_do_update_inode(handle_t *handle,
ext3_update_dynamic_rev(sb);
EXT3_SET_RO_COMPAT_FEATURE(sb,
EXT3_FEATURE_RO_COMPAT_LARGE_FILE);
- sb->s_dirt = 1;
handle->h_sync = 1;
err = ext3_journal_dirty_metadata(handle,
EXT3_SB(sb)->s_sbh);
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 78fdf383637..8a0b26340b5 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -934,7 +934,6 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
EXT3_INODES_PER_GROUP(sb));
ext3_journal_dirty_metadata(handle, sbi->s_sbh);
- sb->s_dirt = 1;
exit_journal:
unlock_super(sb);
@@ -1066,7 +1065,6 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
}
es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
- sb->s_dirt = 1;
unlock_super(sb);
ext3_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count,
o_blocks_count + add);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 3c70d52afb1..26aa64dee6a 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -67,7 +67,6 @@ static const char *ext3_decode_error(struct super_block * sb, int errno,
static int ext3_remount (struct super_block * sb, int * flags, char * data);
static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf);
static int ext3_unfreeze(struct super_block *sb);
-static void ext3_write_super (struct super_block * sb);
static int ext3_freeze(struct super_block *sb);
/*
@@ -399,6 +398,8 @@ static void ext3_put_super (struct super_block * sb)
struct ext3_super_block *es = sbi->s_es;
int i, err;
+ lock_kernel();
+
ext3_xattr_put_super(sb);
err = journal_destroy(sbi->s_journal);
sbi->s_journal = NULL;
@@ -447,7 +448,8 @@ static void ext3_put_super (struct super_block * sb)
sb->s_fs_info = NULL;
kfree(sbi->s_blockgroup_lock);
kfree(sbi);
- return;
+
+ unlock_kernel();
}
static struct kmem_cache *ext3_inode_cachep;
@@ -761,7 +763,6 @@ static const struct super_operations ext3_sops = {
.dirty_inode = ext3_dirty_inode,
.delete_inode = ext3_delete_inode,
.put_super = ext3_put_super,
- .write_super = ext3_write_super,
.sync_fs = ext3_sync_fs,
.freeze_fs = ext3_freeze,
.unfreeze_fs = ext3_unfreeze,
@@ -1785,7 +1786,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
#else
es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
#endif
- sb->s_dirt = 1;
}
if (sbi->s_blocks_per_group > blocksize * 8) {
@@ -2265,7 +2265,6 @@ static int ext3_load_journal(struct super_block *sb,
if (journal_devnum &&
journal_devnum != le32_to_cpu(es->s_journal_dev)) {
es->s_journal_dev = cpu_to_le32(journal_devnum);
- sb->s_dirt = 1;
/* Make sure we flush the recovery flag to disk. */
ext3_commit_super(sb, es, 1);
@@ -2308,7 +2307,6 @@ static int ext3_create_journal(struct super_block * sb,
EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL);
es->s_journal_inum = cpu_to_le32(journal_inum);
- sb->s_dirt = 1;
/* Make sure we flush the recovery flag to disk. */
ext3_commit_super(sb, es, 1);
@@ -2354,7 +2352,6 @@ static void ext3_mark_recovery_complete(struct super_block * sb,
if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) &&
sb->s_flags & MS_RDONLY) {
EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
- sb->s_dirt = 0;
ext3_commit_super(sb, es, 1);
}
unlock_super(sb);
@@ -2413,29 +2410,14 @@ int ext3_force_commit(struct super_block *sb)
return 0;
journal = EXT3_SB(sb)->s_journal;
- sb->s_dirt = 0;
ret = ext3_journal_force_commit(journal);
return ret;
}
-/*
- * Ext3 always journals updates to the superblock itself, so we don't
- * have to propagate any other updates to the superblock on disk at this
- * point. (We can probably nuke this function altogether, and remove
- * any mention to sb->s_dirt in all of fs/ext3; eventual cleanup...)
- */
-static void ext3_write_super (struct super_block * sb)
-{
- if (mutex_trylock(&sb->s_lock) != 0)
- BUG();
- sb->s_dirt = 0;
-}
-
static int ext3_sync_fs(struct super_block *sb, int wait)
{
tid_t target;
- sb->s_dirt = 0;
if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
if (wait)
log_wait_commit(EXT3_SB(sb)->s_journal, target);
@@ -2451,7 +2433,6 @@ static int ext3_freeze(struct super_block *sb)
{
int error = 0;
journal_t *journal;
- sb->s_dirt = 0;
if (!(sb->s_flags & MS_RDONLY)) {
journal = EXT3_SB(sb)->s_journal;
@@ -2509,7 +2490,10 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
int i;
#endif
+ lock_kernel();
+
/* Store the original options */
+ lock_super(sb);
old_sb_flags = sb->s_flags;
old_opts.s_mount_opt = sbi->s_mount_opt;
old_opts.s_resuid = sbi->s_resuid;
@@ -2617,6 +2601,8 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
old_opts.s_qf_names[i] != sbi->s_qf_names[i])
kfree(old_opts.s_qf_names[i]);
#endif
+ unlock_super(sb);
+ unlock_kernel();
return 0;
restore_opts:
sb->s_flags = old_sb_flags;
@@ -2633,6 +2619,8 @@ restore_opts:
sbi->s_qf_names[i] = old_opts.s_qf_names[i];
}
#endif
+ unlock_super(sb);
+ unlock_kernel();
return err;
}
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 83b7be849bd..545e37c4b91 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -463,7 +463,6 @@ static void ext3_xattr_update_super_block(handle_t *handle,
if (ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh) == 0) {
EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR);
- sb->s_dirt = 1;
ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
}
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f016707597a..012c4251397 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -576,6 +576,11 @@ static void ext4_put_super(struct super_block *sb)
struct ext4_super_block *es = sbi->s_es;
int i, err;
+ lock_super(sb);
+ lock_kernel();
+ if (sb->s_dirt)
+ ext4_commit_super(sb, 1);
+
ext4_release_system_zone(sb);
ext4_mb_release(sb);
ext4_ext_release(sb);
@@ -642,8 +647,6 @@ static void ext4_put_super(struct super_block *sb)
unlock_super(sb);
kobject_put(&sbi->s_kobj);
wait_for_completion(&sbi->s_kobj_unregister);
- lock_super(sb);
- lock_kernel();
kfree(sbi->s_blockgroup_lock);
kfree(sbi);
}
@@ -3333,7 +3336,9 @@ int ext4_force_commit(struct super_block *sb)
static void ext4_write_super(struct super_block *sb)
{
+ lock_super(sb);
ext4_commit_super(sb, 1);
+ unlock_super(sb);
}
static int ext4_sync_fs(struct super_block *sb, int wait)
@@ -3417,7 +3422,10 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
int i;
#endif
+ lock_kernel();
+
/* Store the original options */
+ lock_super(sb);
old_sb_flags = sb->s_flags;
old_opts.s_mount_opt = sbi->s_mount_opt;
old_opts.s_resuid = sbi->s_resuid;
@@ -3551,6 +3559,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
old_opts.s_qf_names[i] != sbi->s_qf_names[i])
kfree(old_opts.s_qf_names[i]);
#endif
+ unlock_super(sb);
+ unlock_kernel();
return 0;
restore_opts:
@@ -3570,6 +3580,8 @@ restore_opts:
sbi->s_qf_names[i] = old_opts.s_qf_names[i];
}
#endif
+ unlock_super(sb);
+ unlock_kernel();
return err;
}
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index b4260229808..923990e4f16 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -241,7 +241,7 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus)
while (*fclus < cluster) {
/* prevent the infinite loop of cluster chain */
if (*fclus > limit) {
- fat_fs_panic(sb, "%s: detected the cluster chain loop"
+ fat_fs_error(sb, "%s: detected the cluster chain loop"
" (i_pos %lld)", __func__,
MSDOS_I(inode)->i_pos);
nr = -EIO;
@@ -252,7 +252,7 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus)
if (nr < 0)
goto out;
else if (nr == FAT_ENT_FREE) {
- fat_fs_panic(sb, "%s: invalid cluster chain"
+ fat_fs_error(sb, "%s: invalid cluster chain"
" (i_pos %lld)", __func__,
MSDOS_I(inode)->i_pos);
nr = -EIO;
@@ -285,7 +285,7 @@ static int fat_bmap_cluster(struct inode *inode, int cluster)
if (ret < 0)
return ret;
else if (ret == FAT_ENT_EOF) {
- fat_fs_panic(sb, "%s: request beyond EOF (i_pos %lld)",
+ fat_fs_error(sb, "%s: request beyond EOF (i_pos %lld)",
__func__, MSDOS_I(inode)->i_pos);
return -EIO;
}
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 3a7f603b698..3b8e71b412f 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -840,7 +840,7 @@ const struct file_operations fat_dir_operations = {
#ifdef CONFIG_COMPAT
.compat_ioctl = fat_compat_dir_ioctl,
#endif
- .fsync = file_fsync,
+ .fsync = fat_file_fsync,
};
static int fat_get_short_entry(struct inode *dir, loff_t *pos,
@@ -967,7 +967,7 @@ static int __fat_remove_entries(struct inode *dir, loff_t pos, int nr_slots)
de++;
nr_slots--;
}
- mark_buffer_dirty(bh);
+ mark_buffer_dirty_inode(bh, dir);
if (IS_DIRSYNC(dir))
err = sync_dirty_buffer(bh);
brelse(bh);
@@ -1001,7 +1001,7 @@ int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo)
de--;
nr_slots--;
}
- mark_buffer_dirty(bh);
+ mark_buffer_dirty_inode(bh, dir);
if (IS_DIRSYNC(dir))
err = sync_dirty_buffer(bh);
brelse(bh);
@@ -1051,7 +1051,7 @@ static int fat_zeroed_cluster(struct inode *dir, sector_t blknr, int nr_used,
}
memset(bhs[n]->b_data, 0, sb->s_blocksize);
set_buffer_uptodate(bhs[n]);
- mark_buffer_dirty(bhs[n]);
+ mark_buffer_dirty_inode(bhs[n], dir);
n++;
blknr++;
@@ -1131,7 +1131,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts)
de[0].size = de[1].size = 0;
memset(de + 2, 0, sb->s_blocksize - 2 * sizeof(*de));
set_buffer_uptodate(bhs[0]);
- mark_buffer_dirty(bhs[0]);
+ mark_buffer_dirty_inode(bhs[0], dir);
err = fat_zeroed_cluster(dir, blknr, 1, bhs, MAX_BUF_PER_PAGE);
if (err)
@@ -1193,7 +1193,7 @@ static int fat_add_new_entries(struct inode *dir, void *slots, int nr_slots,
slots += copy;
size -= copy;
set_buffer_uptodate(bhs[n]);
- mark_buffer_dirty(bhs[n]);
+ mark_buffer_dirty_inode(bhs[n], dir);
if (!size)
break;
n++;
@@ -1293,7 +1293,7 @@ found:
for (i = 0; i < long_bhs; i++) {
int copy = min_t(int, sb->s_blocksize - offset, size);
memcpy(bhs[i]->b_data + offset, slots, copy);
- mark_buffer_dirty(bhs[i]);
+ mark_buffer_dirty_inode(bhs[i], dir);
offset = 0;
slots += copy;
size -= copy;
@@ -1304,7 +1304,7 @@ found:
/* Fill the short name slot. */
int copy = min_t(int, sb->s_blocksize - offset, size);
memcpy(bhs[i]->b_data + offset, slots, copy);
- mark_buffer_dirty(bhs[i]);
+ mark_buffer_dirty_inode(bhs[i], dir);
if (IS_DIRSYNC(dir))
err = sync_dirty_buffer(bhs[i]);
}
@@ -1334,7 +1334,7 @@ found:
goto error_remove;
}
if (dir->i_size & (sbi->cluster_size - 1)) {
- fat_fs_panic(sb, "Odd directory size");
+ fat_fs_error(sb, "Odd directory size");
dir->i_size = (dir->i_size + sbi->cluster_size - 1)
& ~((loff_t)sbi->cluster_size - 1);
}
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index ea440d65819..adb0e72a176 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -17,6 +17,10 @@
#define VFAT_SFN_CREATE_WIN95 0x0100 /* emulate win95 rule for create */
#define VFAT_SFN_CREATE_WINNT 0x0200 /* emulate winnt rule for create */
+#define FAT_ERRORS_CONT 1 /* ignore error and continue */
+#define FAT_ERRORS_PANIC 2 /* panic on error */
+#define FAT_ERRORS_RO 3 /* remount r/o on error */
+
struct fat_mount_options {
uid_t fs_uid;
gid_t fs_gid;
@@ -26,6 +30,7 @@ struct fat_mount_options {
char *iocharset; /* Charset used for filename input/display */
unsigned short shortname; /* flags for shortname display/create rule */
unsigned char name_check; /* r = relaxed, n = normal, s = strict */
+ unsigned char errors; /* On error: continue, panic, remount-ro */
unsigned short allow_utime;/* permission for setting the [am]time */
unsigned quiet:1, /* set = fake successful chmods and chowns */
showexec:1, /* set = only set x bit for com/exe/bat */
@@ -74,6 +79,7 @@ struct msdos_sb_info {
int fatent_shift;
struct fatent_operations *fatent_ops;
+ struct inode *fat_inode;
spinlock_t inode_hash_lock;
struct hlist_head inode_hashtable[FAT_HASH_SIZE];
@@ -251,6 +257,7 @@ struct fat_entry {
} u;
int nr_bhs;
struct buffer_head *bhs[2];
+ struct inode *fat_inode;
};
static inline void fatent_init(struct fat_entry *fatent)
@@ -259,6 +266,7 @@ static inline void fatent_init(struct fat_entry *fatent)
fatent->entry = 0;
fatent->u.ent32_p = NULL;
fatent->bhs[0] = fatent->bhs[1] = NULL;
+ fatent->fat_inode = NULL;
}
static inline void fatent_set_entry(struct fat_entry *fatent, int entry)
@@ -275,6 +283,7 @@ static inline void fatent_brelse(struct fat_entry *fatent)
brelse(fatent->bhs[i]);
fatent->nr_bhs = 0;
fatent->bhs[0] = fatent->bhs[1] = NULL;
+ fatent->fat_inode = NULL;
}
extern void fat_ent_access_init(struct super_block *sb);
@@ -296,6 +305,8 @@ extern int fat_setattr(struct dentry * dentry, struct iattr * attr);
extern void fat_truncate(struct inode *inode);
extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat);
+extern int fat_file_fsync(struct file *file, struct dentry *dentry,
+ int datasync);
/* fat/inode.c */
extern void fat_attach(struct inode *inode, loff_t i_pos);
@@ -310,7 +321,7 @@ extern int fat_fill_super(struct super_block *sb, void *data, int silent,
extern int fat_flush_inodes(struct super_block *sb, struct inode *i1,
struct inode *i2);
/* fat/misc.c */
-extern void fat_fs_panic(struct super_block *s, const char *fmt, ...)
+extern void fat_fs_error(struct super_block *s, const char *fmt, ...)
__attribute__ ((format (printf, 2, 3))) __cold;
extern void fat_clusters_flush(struct super_block *sb);
extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster);
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index da6eea47872..a81037721a6 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -73,6 +73,8 @@ static int fat12_ent_bread(struct super_block *sb, struct fat_entry *fatent,
struct buffer_head **bhs = fatent->bhs;
WARN_ON(blocknr < MSDOS_SB(sb)->fat_start);
+ fatent->fat_inode = MSDOS_SB(sb)->fat_inode;
+
bhs[0] = sb_bread(sb, blocknr);
if (!bhs[0])
goto err;
@@ -103,6 +105,7 @@ static int fat_ent_bread(struct super_block *sb, struct fat_entry *fatent,
struct fatent_operations *ops = MSDOS_SB(sb)->fatent_ops;
WARN_ON(blocknr < MSDOS_SB(sb)->fat_start);
+ fatent->fat_inode = MSDOS_SB(sb)->fat_inode;
fatent->bhs[0] = sb_bread(sb, blocknr);
if (!fatent->bhs[0]) {
printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n",
@@ -167,9 +170,9 @@ static void fat12_ent_put(struct fat_entry *fatent, int new)
}
spin_unlock(&fat12_entry_lock);
- mark_buffer_dirty(fatent->bhs[0]);
+ mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode);
if (fatent->nr_bhs == 2)
- mark_buffer_dirty(fatent->bhs[1]);
+ mark_buffer_dirty_inode(fatent->bhs[1], fatent->fat_inode);
}
static void fat16_ent_put(struct fat_entry *fatent, int new)
@@ -178,7 +181,7 @@ static void fat16_ent_put(struct fat_entry *fatent, int new)
new = EOF_FAT16;
*fatent->u.ent16_p = cpu_to_le16(new);
- mark_buffer_dirty(fatent->bhs[0]);
+ mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode);
}
static void fat32_ent_put(struct fat_entry *fatent, int new)
@@ -189,7 +192,7 @@ static void fat32_ent_put(struct fat_entry *fatent, int new)
WARN_ON(new & 0xf0000000);
new |= le32_to_cpu(*fatent->u.ent32_p) & ~0x0fffffff;
*fatent->u.ent32_p = cpu_to_le32(new);
- mark_buffer_dirty(fatent->bhs[0]);
+ mark_buffer_dirty_inode(fatent->bhs[0], fatent->fat_inode);
}
static int fat12_ent_next(struct fat_entry *fatent)
@@ -345,7 +348,7 @@ int fat_ent_read(struct inode *inode, struct fat_entry *fatent, int entry)
if (entry < FAT_START_ENT || sbi->max_cluster <= entry) {
fatent_brelse(fatent);
- fat_fs_panic(sb, "invalid access to FAT (entry 0x%08x)", entry);
+ fat_fs_error(sb, "invalid access to FAT (entry 0x%08x)", entry);
return -EIO;
}
@@ -381,7 +384,7 @@ static int fat_mirror_bhs(struct super_block *sb, struct buffer_head **bhs,
}
memcpy(c_bh->b_data, bhs[n]->b_data, sb->s_blocksize);
set_buffer_uptodate(c_bh);
- mark_buffer_dirty(c_bh);
+ mark_buffer_dirty_inode(c_bh, sbi->fat_inode);
if (sb->s_flags & MS_SYNCHRONOUS)
err = sync_dirty_buffer(c_bh);
brelse(c_bh);
@@ -557,7 +560,7 @@ int fat_free_clusters(struct inode *inode, int cluster)
err = cluster;
goto error;
} else if (cluster == FAT_ENT_FREE) {
- fat_fs_panic(sb, "%s: deleting FAT entry beyond EOF",
+ fat_fs_error(sb, "%s: deleting FAT entry beyond EOF",
__func__);
err = -EIO;
goto error;
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 0a7f4a9918b..b28ea646ff6 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -18,106 +18,112 @@
#include <linux/security.h>
#include "fat.h"
-int fat_generic_ioctl(struct inode *inode, struct file *filp,
- unsigned int cmd, unsigned long arg)
+static int fat_ioctl_get_attributes(struct inode *inode, u32 __user *user_attr)
+{
+ u32 attr;
+
+ mutex_lock(&inode->i_mutex);
+ attr = fat_make_attrs(inode);
+ mutex_unlock(&inode->i_mutex);
+
+ return put_user(attr, user_attr);
+}
+
+static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
{
+ struct inode *inode = file->f_path.dentry->d_inode;
struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
- u32 __user *user_attr = (u32 __user *)arg;
+ int is_dir = S_ISDIR(inode->i_mode);
+ u32 attr, oldattr;
+ struct iattr ia;
+ int err;
- switch (cmd) {
- case FAT_IOCTL_GET_ATTRIBUTES:
- {
- u32 attr;
+ err = get_user(attr, user_attr);
+ if (err)
+ goto out;
- mutex_lock(&inode->i_mutex);
- attr = fat_make_attrs(inode);
- mutex_unlock(&inode->i_mutex);
+ mutex_lock(&inode->i_mutex);
+ err = mnt_want_write(file->f_path.mnt);
+ if (err)
+ goto out_unlock_inode;
- return put_user(attr, user_attr);
+ /*
+ * ATTR_VOLUME and ATTR_DIR cannot be changed; this also
+ * prevents the user from turning us into a VFAT
+ * longname entry. Also, we obviously can't set
+ * any of the NTFS attributes in the high 24 bits.
+ */
+ attr &= 0xff & ~(ATTR_VOLUME | ATTR_DIR);
+ /* Merge in ATTR_VOLUME and ATTR_DIR */
+ attr |= (MSDOS_I(inode)->i_attrs & ATTR_VOLUME) |
+ (is_dir ? ATTR_DIR : 0);
+ oldattr = fat_make_attrs(inode);
+
+ /* Equivalent to a chmod() */
+ ia.ia_valid = ATTR_MODE | ATTR_CTIME;
+ ia.ia_ctime = current_fs_time(inode->i_sb);
+ if (is_dir)
+ ia.ia_mode = fat_make_mode(sbi, attr, S_IRWXUGO);
+ else {
+ ia.ia_mode = fat_make_mode(sbi, attr,
+ S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO));
}
- case FAT_IOCTL_SET_ATTRIBUTES:
- {
- u32 attr, oldattr;
- int err, is_dir = S_ISDIR(inode->i_mode);
- struct iattr ia;
- err = get_user(attr, user_attr);
- if (err)
- return err;
+ /* The root directory has no attributes */
+ if (inode->i_ino == MSDOS_ROOT_INO && attr != ATTR_DIR) {
+ err = -EINVAL;
+ goto out_drop_write;
+ }
- mutex_lock(&inode->i_mutex);
-
- err = mnt_want_write(filp->f_path.mnt);
- if (err)
- goto up_no_drop_write;
-
- /*
- * ATTR_VOLUME and ATTR_DIR cannot be changed; this also
- * prevents the user from turning us into a VFAT
- * longname entry. Also, we obviously can't set
- * any of the NTFS attributes in the high 24 bits.
- */
- attr &= 0xff & ~(ATTR_VOLUME | ATTR_DIR);
- /* Merge in ATTR_VOLUME and ATTR_DIR */
- attr |= (MSDOS_I(inode)->i_attrs & ATTR_VOLUME) |
- (is_dir ? ATTR_DIR : 0);
- oldattr = fat_make_attrs(inode);
-
- /* Equivalent to a chmod() */
- ia.ia_valid = ATTR_MODE | ATTR_CTIME;
- ia.ia_ctime = current_fs_time(inode->i_sb);
- if (is_dir)
- ia.ia_mode = fat_make_mode(sbi, attr, S_IRWXUGO);
- else {
- ia.ia_mode = fat_make_mode(sbi, attr,
- S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO));
- }
+ if (sbi->options.sys_immutable &&
+ ((attr | oldattr) & ATTR_SYS) &&
+ !capable(CAP_LINUX_IMMUTABLE)) {
+ err = -EPERM;
+ goto out_drop_write;
+ }
- /* The root directory has no attributes */
- if (inode->i_ino == MSDOS_ROOT_INO && attr != ATTR_DIR) {
- err = -EINVAL;
- goto up;
- }
+ /*
+ * The security check is questionable... We single
+ * out the RO attribute for checking by the security
+ * module, just because it maps to a file mode.
+ */
+ err = security_inode_setattr(file->f_path.dentry, &ia);
+ if (err)
+ goto out_drop_write;
- if (sbi->options.sys_immutable) {
- if ((attr | oldattr) & ATTR_SYS) {
- if (!capable(CAP_LINUX_IMMUTABLE)) {
- err = -EPERM;
- goto up;
- }
- }
- }
+ /* This MUST be done before doing anything irreversible... */
+ err = fat_setattr(file->f_path.dentry, &ia);
+ if (err)
+ goto out_drop_write;
+
+ fsnotify_change(file->f_path.dentry, ia.ia_valid);
+ if (sbi->options.sys_immutable) {
+ if (attr & ATTR_SYS)
+ inode->i_flags |= S_IMMUTABLE;
+ else
+ inode->i_flags &= S_IMMUTABLE;
+ }
- /*
- * The security check is questionable... We single
- * out the RO attribute for checking by the security
- * module, just because it maps to a file mode.
- */
- err = security_inode_setattr(filp->f_path.dentry, &ia);
- if (err)
- goto up;
-
- /* This MUST be done before doing anything irreversible... */
- err = fat_setattr(filp->f_path.dentry, &ia);
- if (err)
- goto up;
-
- fsnotify_change(filp->f_path.dentry, ia.ia_valid);
- if (sbi->options.sys_immutable) {
- if (attr & ATTR_SYS)
- inode->i_flags |= S_IMMUTABLE;
- else
- inode->i_flags &= S_IMMUTABLE;
- }
+ fat_save_attrs(inode, attr);
+ mark_inode_dirty(inode);
+out_drop_write:
+ mnt_drop_write(file->f_path.mnt);
+out_unlock_inode:
+ mutex_unlock(&inode->i_mutex);
+out:
+ return err;
+}
- fat_save_attrs(inode, attr);
- mark_inode_dirty(inode);
-up:
- mnt_drop_write(filp->f_path.mnt);
-up_no_drop_write:
- mutex_unlock(&inode->i_mutex);
- return err;
- }
+int fat_generic_ioctl(struct inode *inode, struct file *filp,
+ unsigned int cmd, unsigned long arg)
+{
+ u32 __user *user_attr = (u32 __user *)arg;
+
+ switch (cmd) {
+ case FAT_IOCTL_GET_ATTRIBUTES:
+ return fat_ioctl_get_attributes(inode, user_attr);
+ case FAT_IOCTL_SET_ATTRIBUTES:
+ return fat_ioctl_set_attributes(filp, user_attr);
default:
return -ENOTTY; /* Inappropriate ioctl for device */
}
@@ -133,6 +139,18 @@ static int fat_file_release(struct inode *inode, struct file *filp)
return 0;
}
+int fat_file_fsync(struct file *filp, struct dentry *dentry, int datasync)
+{
+ struct inode *inode = dentry->d_inode;
+ int res, err;
+
+ res = simple_fsync(filp, dentry, datasync);
+ err = sync_mapping_buffers(MSDOS_SB(inode->i_sb)->fat_inode->i_mapping);
+
+ return res ? res : err;
+}
+
+
const struct file_operations fat_file_operations = {
.llseek = generic_file_llseek,
.read = do_sync_read,
@@ -142,7 +160,7 @@ const struct file_operations fat_file_operations = {
.mmap = generic_file_mmap,
.release = fat_file_release,
.ioctl = fat_generic_ioctl,
- .fsync = file_fsync,
+ .fsync = fat_file_fsync,
.splice_read = generic_file_splice_read,
};
@@ -213,7 +231,7 @@ static int fat_free(struct inode *inode, int skip)
fatent_brelse(&fatent);
return 0;
} else if (ret == FAT_ENT_FREE) {
- fat_fs_panic(sb,
+ fat_fs_error(sb,
"%s: invalid cluster chain (i_pos %lld)",
__func__, MSDOS_I(inode)->i_pos);
ret = -EIO;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 296785a0dec..304b411cb8b 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -76,7 +76,7 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock,
return 0;
if (iblock != MSDOS_I(inode)->mmu_private >> sb->s_blocksize_bits) {
- fat_fs_panic(sb, "corrupted file size (i_pos %lld, %lld)",
+ fat_fs_error(sb, "corrupted file size (i_pos %lld, %lld)",
MSDOS_I(inode)->i_pos, MSDOS_I(inode)->mmu_private);
return -EIO;
}
@@ -441,16 +441,35 @@ static void fat_clear_inode(struct inode *inode)
static void fat_write_super(struct super_block *sb)
{
+ lock_super(sb);
sb->s_dirt = 0;
if (!(sb->s_flags & MS_RDONLY))
fat_clusters_flush(sb);
+ unlock_super(sb);
+}
+
+static int fat_sync_fs(struct super_block *sb, int wait)
+{
+ lock_super(sb);
+ fat_clusters_flush(sb);
+ sb->s_dirt = 0;
+ unlock_super(sb);
+
+ return 0;
}
static void fat_put_super(struct super_block *sb)
{
struct msdos_sb_info *sbi = MSDOS_SB(sb);
+ lock_kernel();
+
+ if (sb->s_dirt)
+ fat_write_super(sb);
+
+ iput(sbi->fat_inode);
+
if (sbi->nls_disk) {
unload_nls(sbi->nls_disk);
sbi->nls_disk = NULL;
@@ -467,6 +486,8 @@ static void fat_put_super(struct super_block *sb)
sb->s_fs_info = NULL;
kfree(sbi);
+
+ unlock_kernel();
}
static struct kmem_cache *fat_inode_cachep;
@@ -632,6 +653,7 @@ static const struct super_operations fat_sops = {
.delete_inode = fat_delete_inode,
.put_super = fat_put_super,
.write_super = fat_write_super,
+ .sync_fs = fat_sync_fs,
.statfs = fat_statfs,
.clear_inode = fat_clear_inode,
.remount_fs = fat_remount,
@@ -834,6 +856,12 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt)
seq_puts(m, ",flush");
if (opts->tz_utc)
seq_puts(m, ",tz=UTC");
+ if (opts->errors == FAT_ERRORS_CONT)
+ seq_puts(m, ",errors=continue");
+ else if (opts->errors == FAT_ERRORS_PANIC)
+ seq_puts(m, ",errors=panic");
+ else
+ seq_puts(m, ",errors=remount-ro");
return 0;
}
@@ -846,7 +874,8 @@ enum {
Opt_charset, Opt_shortname_lower, Opt_shortname_win95,
Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
- Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err,
+ Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont,
+ Opt_err_panic, Opt_err_ro, Opt_err,
};
static const match_table_t fat_tokens = {
@@ -869,6 +898,11 @@ static const match_table_t fat_tokens = {
{Opt_showexec, "showexec"},
{Opt_debug, "debug"},
{Opt_immutable, "sys_immutable"},
+ {Opt_flush, "flush"},
+ {Opt_tz_utc, "tz=UTC"},
+ {Opt_err_cont, "errors=continue"},
+ {Opt_err_panic, "errors=panic"},
+ {Opt_err_ro, "errors=remount-ro"},
{Opt_obsolate, "conv=binary"},
{Opt_obsolate, "conv=text"},
{Opt_obsolate, "conv=auto"},
@@ -880,8 +914,6 @@ static const match_table_t fat_tokens = {
{Opt_obsolate, "cvf_format=%20s"},
{Opt_obsolate, "cvf_options=%100s"},
{Opt_obsolate, "posix"},
- {Opt_flush, "flush"},
- {Opt_tz_utc, "tz=UTC"},
{Opt_err, NULL},
};
static const match_table_t msdos_tokens = {
@@ -951,6 +983,7 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
opts->numtail = 1;
opts->usefree = opts->nocase = 0;
opts->tz_utc = 0;
+ opts->errors = FAT_ERRORS_RO;
*debug = 0;
if (!options)
@@ -1043,6 +1076,15 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
case Opt_tz_utc:
opts->tz_utc = 1;
break;
+ case Opt_err_cont:
+ opts->errors = FAT_ERRORS_CONT;
+ break;
+ case Opt_err_panic:
+ opts->errors = FAT_ERRORS_PANIC;
+ break;
+ case Opt_err_ro:
+ opts->errors = FAT_ERRORS_RO;
+ break;
/* msdos specific */
case Opt_dots:
@@ -1174,7 +1216,7 @@ static int fat_read_root(struct inode *inode)
int fat_fill_super(struct super_block *sb, void *data, int silent,
const struct inode_operations *fs_dir_inode_ops, int isvfat)
{
- struct inode *root_inode = NULL;
+ struct inode *root_inode = NULL, *fat_inode = NULL;
struct buffer_head *bh;
struct fat_boot_sector *b;
struct msdos_sb_info *sbi;
@@ -1414,6 +1456,11 @@ int fat_fill_super(struct super_block *sb, void *data, int silent,
}
error = -ENOMEM;
+ fat_inode = new_inode(sb);
+ if (!fat_inode)
+ goto out_fail;
+ MSDOS_I(fat_inode)->i_pos = 0;
+ sbi->fat_inode = fat_inode;
root_inode = new_inode(sb);
if (!root_inode)
goto out_fail;
@@ -1439,6 +1486,8 @@ out_invalid:
" on dev %s.\n", sb->s_id);
out_fail:
+ if (fat_inode)
+ iput(fat_inode);
if (root_inode)
iput(root_inode);
if (sbi->nls_io)
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index ac39ebcc149..a6c20473dfd 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -12,14 +12,19 @@
#include "fat.h"
/*
- * fat_fs_panic reports a severe file system problem and sets the file system
- * read-only. The file system can be made writable again by remounting it.
+ * fat_fs_error reports a file system problem that might indicate fa data
+ * corruption/inconsistency. Depending on 'errors' mount option the
+ * panic() is called, or error message is printed FAT and nothing is done,
+ * or filesystem is remounted read-only (default behavior).
+ * In case the file system is remounted read-only, it can be made writable
+ * again by remounting it.
*/
-void fat_fs_panic(struct super_block *s, const char *fmt, ...)
+void fat_fs_error(struct super_block *s, const char *fmt, ...)
{
+ struct fat_mount_options *opts = &MSDOS_SB(s)->options;
va_list args;
- printk(KERN_ERR "FAT: Filesystem panic (dev %s)\n", s->s_id);
+ printk(KERN_ERR "FAT: Filesystem error (dev %s)\n", s->s_id);
printk(KERN_ERR " ");
va_start(args, fmt);
@@ -27,13 +32,14 @@ void fat_fs_panic(struct super_block *s, const char *fmt, ...)
va_end(args);
printk("\n");
- if (!(s->s_flags & MS_RDONLY)) {
+ if (opts->errors == FAT_ERRORS_PANIC)
+ panic(" FAT fs panic from previous error\n");
+ else if (opts->errors == FAT_ERRORS_RO && !(s->s_flags & MS_RDONLY)) {
s->s_flags |= MS_RDONLY;
printk(KERN_ERR " File system has been set read-only\n");
}
}
-
-EXPORT_SYMBOL_GPL(fat_fs_panic);
+EXPORT_SYMBOL_GPL(fat_fs_error);
/* Flushes the number of free clusters on FAT32 */
/* XXX: Need to write one per FSINFO block. Currently only writes 1 */
@@ -124,7 +130,7 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster)
mark_inode_dirty(inode);
}
if (new_fclus != (inode->i_blocks >> (sbi->cluster_bits - 9))) {
- fat_fs_panic(sb, "clusters badly computed (%d != %llu)",
+ fat_fs_error(sb, "clusters badly computed (%d != %llu)",
new_fclus,
(llu)(inode->i_blocks >> (sbi->cluster_bits - 9)));
fat_cache_inval_inode(inode);
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index da3f361a37d..82f88733b68 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -544,7 +544,7 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name,
int start = MSDOS_I(new_dir)->i_logstart;
dotdot_de->start = cpu_to_le16(start);
dotdot_de->starthi = cpu_to_le16(start >> 16);
- mark_buffer_dirty(dotdot_bh);
+ mark_buffer_dirty_inode(dotdot_bh, old_inode);
if (IS_DIRSYNC(new_dir)) {
err = sync_dirty_buffer(dotdot_bh);
if (err)
@@ -586,7 +586,7 @@ error_dotdot:
int start = MSDOS_I(old_dir)->i_logstart;
dotdot_de->start = cpu_to_le16(start);
dotdot_de->starthi = cpu_to_le16(start >> 16);
- mark_buffer_dirty(dotdot_bh);
+ mark_buffer_dirty_inode(dotdot_bh, old_inode);
corrupt |= sync_dirty_buffer(dotdot_bh);
}
error_inode:
@@ -608,7 +608,7 @@ error_inode:
sinfo.bh = NULL;
}
if (corrupt < 0) {
- fat_fs_panic(new_dir->i_sb,
+ fat_fs_error(new_dir->i_sb,
"%s: Filesystem corrupted (i_pos %lld)",
__func__, sinfo.i_pos);
}
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index a0e00e3a46e..8d6fdcfd41d 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -965,7 +965,7 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry,
int start = MSDOS_I(new_dir)->i_logstart;
dotdot_de->start = cpu_to_le16(start);
dotdot_de->starthi = cpu_to_le16(start >> 16);
- mark_buffer_dirty(dotdot_bh);
+ mark_buffer_dirty_inode(dotdot_bh, old_inode);
if (IS_DIRSYNC(new_dir)) {
err = sync_dirty_buffer(dotdot_bh);
if (err)
@@ -1009,7 +1009,7 @@ error_dotdot:
int start = MSDOS_I(old_dir)->i_logstart;
dotdot_de->start = cpu_to_le16(start);
dotdot_de->starthi = cpu_to_le16(start >> 16);
- mark_buffer_dirty(dotdot_bh);
+ mark_buffer_dirty_inode(dotdot_bh, old_inode);
corrupt |= sync_dirty_buffer(dotdot_bh);
}
error_inode:
@@ -1030,7 +1030,7 @@ error_inode:
sinfo.bh = NULL;
}
if (corrupt < 0) {
- fat_fs_panic(new_dir->i_sb,
+ fat_fs_error(new_dir->i_sb,
"%s: Filesystem corrupted (i_pos %lld)",
__func__, sinfo.i_pos);
}
diff --git a/fs/file_table.c b/fs/file_table.c
index 54018fe4884..334ce39881f 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -214,7 +214,7 @@ int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry,
*/
if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) {
file_take_write(file);
- error = mnt_want_write(mnt);
+ error = mnt_clone_write(mnt);
WARN_ON(error);
}
return error;
@@ -399,6 +399,44 @@ too_bad:
return 0;
}
+/**
+ * mark_files_ro - mark all files read-only
+ * @sb: superblock in question
+ *
+ * All files are marked read-only. We don't care about pending
+ * delete files so this should be used in 'force' mode only.
+ */
+void mark_files_ro(struct super_block *sb)
+{
+ struct file *f;
+
+retry:
+ file_list_lock();
+ list_for_each_entry(f, &sb->s_files, f_u.fu_list) {
+ struct vfsmount *mnt;
+ if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
+ continue;
+ if (!file_count(f))
+ continue;
+ if (!(f->f_mode & FMODE_WRITE))
+ continue;
+ f->f_mode &= ~FMODE_WRITE;
+ if (file_check_writeable(f) != 0)
+ continue;
+ file_release_write(f);
+ mnt = mntget(f->f_path.mnt);
+ file_list_unlock();
+ /*
+ * This can sleep, so we can't hold
+ * the file_list_lock() spinlock.
+ */
+ mnt_drop_write(mnt);
+ mntput(mnt);
+ goto retry;
+ }
+ file_list_unlock();
+}
+
void __init files_init(unsigned long mempages)
{
int n;
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index 1dacda83157..cdbd1654e4c 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -80,12 +80,16 @@ vxfs_put_super(struct super_block *sbp)
{
struct vxfs_sb_info *infp = VXFS_SBI(sbp);
+ lock_kernel();
+
vxfs_put_fake_inode(infp->vsi_fship);
vxfs_put_fake_inode(infp->vsi_ilist);
vxfs_put_fake_inode(infp->vsi_stilist);
brelse(infp->vsi_bp);
kfree(infp);
+
+ unlock_kernel();
}
/**
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 91013ff7dd5..40308e98c6a 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -64,6 +64,28 @@ static void writeback_release(struct backing_dev_info *bdi)
clear_bit(BDI_pdflush, &bdi->state);
}
+static noinline void block_dump___mark_inode_dirty(struct inode *inode)
+{
+ if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
+ struct dentry *dentry;
+ const char *name = "?";
+
+ dentry = d_find_alias(inode);
+ if (dentry) {
+ spin_lock(&dentry->d_lock);
+ name = (const char *) dentry->d_name.name;
+ }
+ printk(KERN_DEBUG
+ "%s(%d): dirtied inode %lu (%s) on %s\n",
+ current->comm, task_pid_nr(current), inode->i_ino,
+ name, inode->i_sb->s_id);
+ if (dentry) {
+ spin_unlock(&dentry->d_lock);
+ dput(dentry);
+ }
+ }
+}
+
/**
* __mark_inode_dirty - internal function
* @inode: inode to mark
@@ -114,23 +136,8 @@ void __mark_inode_dirty(struct inode *inode, int flags)
if ((inode->i_state & flags) == flags)
return;
- if (unlikely(block_dump)) {
- struct dentry *dentry = NULL;
- const char *name = "?";
-
- if (!list_empty(&inode->i_dentry)) {
- dentry = list_entry(inode->i_dentry.next,
- struct dentry, d_alias);
- if (dentry && dentry->d_name.name)
- name = (const char *) dentry->d_name.name;
- }
-
- if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev"))
- printk(KERN_DEBUG
- "%s(%d): dirtied inode %lu (%s) on %s\n",
- current->comm, task_pid_nr(current), inode->i_ino,
- name, inode->i_sb->s_id);
- }
+ if (unlikely(block_dump))
+ block_dump___mark_inode_dirty(inode);
spin_lock(&inode_lock);
if ((inode->i_state & flags) != flags) {
@@ -289,7 +296,6 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
int ret;
BUG_ON(inode->i_state & I_SYNC);
- WARN_ON(inode->i_state & I_NEW);
/* Set I_SYNC, reset I_DIRTY */
dirty = inode->i_state & I_DIRTY;
@@ -314,7 +320,6 @@ __sync_single_inode(struct inode *inode, struct writeback_control *wbc)
}
spin_lock(&inode_lock);
- WARN_ON(inode->i_state & I_NEW);
inode->i_state &= ~I_SYNC;
if (!(inode->i_state & I_FREEING)) {
if (!(inode->i_state & I_DIRTY) &&
@@ -679,55 +684,6 @@ void sync_inodes_sb(struct super_block *sb, int wait)
}
/**
- * sync_inodes - writes all inodes to disk
- * @wait: wait for completion
- *
- * sync_inodes() goes through each super block's dirty inode list, writes the
- * inodes out, waits on the writeout and puts the inodes back on the normal
- * list.
- *
- * This is for sys_sync(). fsync_dev() uses the same algorithm. The subtle
- * part of the sync functions is that the blockdev "superblock" is processed
- * last. This is because the write_inode() function of a typical fs will
- * perform no I/O, but will mark buffers in the blockdev mapping as dirty.
- * What we want to do is to perform all that dirtying first, and then write
- * back all those inode blocks via the blockdev mapping in one sweep. So the
- * additional (somewhat redundant) sync_blockdev() calls here are to make
- * sure that really happens. Because if we call sync_inodes_sb(wait=1) with
- * outstanding dirty inodes, the writeback goes block-at-a-time within the
- * filesystem's write_inode(). This is extremely slow.
- */
-static void __sync_inodes(int wait)
-{
- struct super_block *sb;
-
- spin_lock(&sb_lock);
-restart:
- list_for_each_entry(sb, &super_blocks, s_list) {
- sb->s_count++;
- spin_unlock(&sb_lock);
- down_read(&sb->s_umount);
- if (sb->s_root) {
- sync_inodes_sb(sb, wait);
- sync_blockdev(sb->s_bdev);
- }
- up_read(&sb->s_umount);
- spin_lock(&sb_lock);
- if (__put_super_and_need_restart(sb))
- goto restart;
- }
- spin_unlock(&sb_lock);
-}
-
-void sync_inodes(int wait)
-{
- __sync_inodes(0);
-
- if (wait)
- __sync_inodes(1);
-}
-
-/**
* write_inode_now - write an inode to disk
* @inode: inode to write to disk
* @sync: whether the write should be synchronous or not
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile
index 72437065f6a..e95eeb445e5 100644
--- a/fs/fuse/Makefile
+++ b/fs/fuse/Makefile
@@ -3,5 +3,6 @@
#
obj-$(CONFIG_FUSE_FS) += fuse.o
+obj-$(CONFIG_CUSE) += cuse.o
fuse-objs := dev.o dir.o file.o inode.o control.o
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
new file mode 100644
index 00000000000..de792dcf327
--- /dev/null
+++ b/fs/fuse/cuse.c
@@ -0,0 +1,610 @@
+/*
+ * CUSE: Character device in Userspace
+ *
+ * Copyright (C) 2008-2009 SUSE Linux Products GmbH
+ * Copyright (C) 2008-2009 Tejun Heo <tj@kernel.org>
+ *
+ * This file is released under the GPLv2.
+ *
+ * CUSE enables character devices to be implemented from userland much
+ * like FUSE allows filesystems. On initialization /dev/cuse is
+ * created. By opening the file and replying to the CUSE_INIT request
+ * userland CUSE server can create a character device. After that the
+ * operation is very similar to FUSE.
+ *
+ * A CUSE instance involves the following objects.
+ *
+ * cuse_conn : contains fuse_conn and serves as bonding structure
+ * channel : file handle connected to the userland CUSE server
+ * cdev : the implemented character device
+ * dev : generic device for cdev
+ *
+ * Note that 'channel' is what 'dev' is in FUSE. As CUSE deals with
+ * devices, it's called 'channel' to reduce confusion.
+ *
+ * channel determines when the character device dies. When channel is
+ * closed, everything begins to destruct. The cuse_conn is taken off
+ * the lookup table preventing further access from cdev, cdev and
+ * generic device are removed and the base reference of cuse_conn is
+ * put.
+ *
+ * On each open, the matching cuse_conn is looked up and if found an
+ * additional reference is taken which is released when the file is
+ * closed.
+ */
+
+#include <linux/fuse.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/kdev_t.h>
+#include <linux/kthread.h>
+#include <linux/list.h>
+#include <linux/magic.h>
+#include <linux/miscdevice.h>
+#include <linux/mutex.h>
+#include <linux/spinlock.h>
+#include <linux/stat.h>
+
+#include "fuse_i.h"
+
+#define CUSE_CONNTBL_LEN 64
+
+struct cuse_conn {
+ struct list_head list; /* linked on cuse_conntbl */
+ struct fuse_conn fc; /* fuse connection */
+ struct cdev *cdev; /* associated character device */
+ struct device *dev; /* device representing @cdev */
+
+ /* init parameters, set once during initialization */
+ bool unrestricted_ioctl;
+};
+
+static DEFINE_SPINLOCK(cuse_lock); /* protects cuse_conntbl */
+static struct list_head cuse_conntbl[CUSE_CONNTBL_LEN];
+static struct class *cuse_class;
+
+static struct cuse_conn *fc_to_cc(struct fuse_conn *fc)
+{
+ return container_of(fc, struct cuse_conn, fc);
+}
+
+static struct list_head *cuse_conntbl_head(dev_t devt)
+{
+ return &cuse_conntbl[(MAJOR(devt) + MINOR(devt)) % CUSE_CONNTBL_LEN];
+}
+
+
+/**************************************************************************
+ * CUSE frontend operations
+ *
+ * These are file operations for the character device.
+ *
+ * On open, CUSE opens a file from the FUSE mnt and stores it to
+ * private_data of the open file. All other ops call FUSE ops on the
+ * FUSE file.
+ */
+
+static ssize_t cuse_read(struct file *file, char __user *buf, size_t count,
+ loff_t *ppos)
+{
+ loff_t pos = 0;
+
+ return fuse_direct_io(file, buf, count, &pos, 0);
+}
+
+static ssize_t cuse_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ loff_t pos = 0;
+ /*
+ * No locking or generic_write_checks(), the server is
+ * responsible for locking and sanity checks.
+ */
+ return fuse_direct_io(file, buf, count, &pos, 1);
+}
+
+static int cuse_open(struct inode *inode, struct file *file)
+{
+ dev_t devt = inode->i_cdev->dev;
+ struct cuse_conn *cc = NULL, *pos;
+ int rc;
+
+ /* look up and get the connection */
+ spin_lock(&cuse_lock);
+ list_for_each_entry(pos, cuse_conntbl_head(devt), list)
+ if (pos->dev->devt == devt) {
+ fuse_conn_get(&pos->fc);
+ cc = pos;
+ break;
+ }
+ spin_unlock(&cuse_lock);
+
+ /* dead? */
+ if (!cc)
+ return -ENODEV;
+
+ /*
+ * Generic permission check is already done against the chrdev
+ * file, proceed to open.
+ */
+ rc = fuse_do_open(&cc->fc, 0, file, 0);
+ if (rc)
+ fuse_conn_put(&cc->fc);
+ return rc;
+}
+
+static int cuse_release(struct inode *inode, struct file *file)
+{
+ struct fuse_file *ff = file->private_data;
+ struct fuse_conn *fc = ff->fc;
+
+ fuse_sync_release(ff, file->f_flags);
+ fuse_conn_put(fc);
+
+ return 0;
+}
+
+static long cuse_file_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct fuse_file *ff = file->private_data;
+ struct cuse_conn *cc = fc_to_cc(ff->fc);
+ unsigned int flags = 0;
+
+ if (cc->unrestricted_ioctl)
+ flags |= FUSE_IOCTL_UNRESTRICTED;
+
+ return fuse_do_ioctl(file, cmd, arg, flags);
+}
+
+static long cuse_file_compat_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct fuse_file *ff = file->private_data;
+ struct cuse_conn *cc = fc_to_cc(ff->fc);
+ unsigned int flags = FUSE_IOCTL_COMPAT;
+
+ if (cc->unrestricted_ioctl)
+ flags |= FUSE_IOCTL_UNRESTRICTED;
+
+ return fuse_do_ioctl(file, cmd, arg, flags);
+}
+
+static const struct file_operations cuse_frontend_fops = {
+ .owner = THIS_MODULE,
+ .read = cuse_read,
+ .write = cuse_write,
+ .open = cuse_open,
+ .release = cuse_release,
+ .unlocked_ioctl = cuse_file_ioctl,
+ .compat_ioctl = cuse_file_compat_ioctl,
+ .poll = fuse_file_poll,
+};
+
+
+/**************************************************************************
+ * CUSE channel initialization and destruction
+ */
+
+struct cuse_devinfo {
+ const char *name;
+};
+
+/**
+ * cuse_parse_one - parse one key=value pair
+ * @pp: i/o parameter for the current position
+ * @end: points to one past the end of the packed string
+ * @keyp: out parameter for key
+ * @valp: out parameter for value
+ *
+ * *@pp points to packed strings - "key0=val0\0key1=val1\0" which ends
+ * at @end - 1. This function parses one pair and set *@keyp to the
+ * start of the key and *@valp to the start of the value. Note that
+ * the original string is modified such that the key string is
+ * terminated with '\0'. *@pp is updated to point to the next string.
+ *
+ * RETURNS:
+ * 1 on successful parse, 0 on EOF, -errno on failure.
+ */
+static int cuse_parse_one(char **pp, char *end, char **keyp, char **valp)
+{
+ char *p = *pp;
+ char *key, *val;
+
+ while (p < end && *p == '\0')
+ p++;
+ if (p == end)
+ return 0;
+
+ if (end[-1] != '\0') {
+ printk(KERN_ERR "CUSE: info not properly terminated\n");
+ return -EINVAL;
+ }
+
+ key = val = p;
+ p += strlen(p);
+
+ if (valp) {
+ strsep(&val, "=");
+ if (!val)
+ val = key + strlen(key);
+ key = strstrip(key);
+ val = strstrip(val);
+ } else
+ key = strstrip(key);
+
+ if (!strlen(key)) {
+ printk(KERN_ERR "CUSE: zero length info key specified\n");
+ return -EINVAL;
+ }
+
+ *pp = p;
+ *keyp = key;
+ if (valp)
+ *valp = val;
+
+ return 1;
+}
+
+/**
+ * cuse_parse_dev_info - parse device info
+ * @p: device info string
+ * @len: length of device info string
+ * @devinfo: out parameter for parsed device info
+ *
+ * Parse @p to extract device info and store it into @devinfo. String
+ * pointed to by @p is modified by parsing and @devinfo points into
+ * them, so @p shouldn't be freed while @devinfo is in use.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+static int cuse_parse_devinfo(char *p, size_t len, struct cuse_devinfo *devinfo)
+{
+ char *end = p + len;
+ char *key, *val;
+ int rc;
+
+ while (true) {
+ rc = cuse_parse_one(&p, end, &key, &val);
+ if (rc < 0)
+ return rc;
+ if (!rc)
+ break;
+ if (strcmp(key, "DEVNAME") == 0)
+ devinfo->name = val;
+ else
+ printk(KERN_WARNING "CUSE: unknown device info \"%s\"\n",
+ key);
+ }
+
+ if (!devinfo->name || !strlen(devinfo->name)) {
+ printk(KERN_ERR "CUSE: DEVNAME unspecified\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static void cuse_gendev_release(struct device *dev)
+{
+ kfree(dev);
+}
+
+/**
+ * cuse_process_init_reply - finish initializing CUSE channel
+ *
+ * This function creates the character device and sets up all the
+ * required data structures for it. Please read the comment at the
+ * top of this file for high level overview.
+ */
+static void cuse_process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
+{
+ struct cuse_conn *cc = fc_to_cc(fc);
+ struct cuse_init_out *arg = &req->misc.cuse_init_out;
+ struct page *page = req->pages[0];
+ struct cuse_devinfo devinfo = { };
+ struct device *dev;
+ struct cdev *cdev;
+ dev_t devt;
+ int rc;
+
+ if (req->out.h.error ||
+ arg->major != FUSE_KERNEL_VERSION || arg->minor < 11) {
+ goto err;
+ }
+
+ fc->minor = arg->minor;
+ fc->max_read = max_t(unsigned, arg->max_read, 4096);
+ fc->max_write = max_t(unsigned, arg->max_write, 4096);
+
+ /* parse init reply */
+ cc->unrestricted_ioctl = arg->flags & CUSE_UNRESTRICTED_IOCTL;
+
+ rc = cuse_parse_devinfo(page_address(page), req->out.args[1].size,
+ &devinfo);
+ if (rc)
+ goto err;
+
+ /* determine and reserve devt */
+ devt = MKDEV(arg->dev_major, arg->dev_minor);
+ if (!MAJOR(devt))
+ rc = alloc_chrdev_region(&devt, MINOR(devt), 1, devinfo.name);
+ else
+ rc = register_chrdev_region(devt, 1, devinfo.name);
+ if (rc) {
+ printk(KERN_ERR "CUSE: failed to register chrdev region\n");
+ goto err;
+ }
+
+ /* devt determined, create device */
+ rc = -ENOMEM;
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ goto err_region;
+
+ device_initialize(dev);
+ dev_set_uevent_suppress(dev, 1);
+ dev->class = cuse_class;
+ dev->devt = devt;
+ dev->release = cuse_gendev_release;
+ dev_set_drvdata(dev, cc);
+ dev_set_name(dev, "%s", devinfo.name);
+
+ rc = device_add(dev);
+ if (rc)
+ goto err_device;
+
+ /* register cdev */
+ rc = -ENOMEM;
+ cdev = cdev_alloc();
+ if (!cdev)
+ goto err_device;
+
+ cdev->owner = THIS_MODULE;
+ cdev->ops = &cuse_frontend_fops;
+
+ rc = cdev_add(cdev, devt, 1);
+ if (rc)
+ goto err_cdev;
+
+ cc->dev = dev;
+ cc->cdev = cdev;
+
+ /* make the device available */
+ spin_lock(&cuse_lock);
+ list_add(&cc->list, cuse_conntbl_head(devt));
+ spin_unlock(&cuse_lock);
+
+ /* announce device availability */
+ dev_set_uevent_suppress(dev, 0);
+ kobject_uevent(&dev->kobj, KOBJ_ADD);
+out:
+ __free_page(page);
+ return;
+
+err_cdev:
+ cdev_del(cdev);
+err_device:
+ put_device(dev);
+err_region:
+ unregister_chrdev_region(devt, 1);
+err:
+ fc->conn_error = 1;
+ goto out;
+}
+
+static int cuse_send_init(struct cuse_conn *cc)
+{
+ int rc;
+ struct fuse_req *req;
+ struct page *page;
+ struct fuse_conn *fc = &cc->fc;
+ struct cuse_init_in *arg;
+
+ BUILD_BUG_ON(CUSE_INIT_INFO_MAX > PAGE_SIZE);
+
+ req = fuse_get_req(fc);
+ if (IS_ERR(req)) {
+ rc = PTR_ERR(req);
+ goto err;
+ }
+
+ rc = -ENOMEM;
+ page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!page)
+ goto err_put_req;
+
+ arg = &req->misc.cuse_init_in;
+ arg->major = FUSE_KERNEL_VERSION;
+ arg->minor = FUSE_KERNEL_MINOR_VERSION;
+ arg->flags |= CUSE_UNRESTRICTED_IOCTL;
+ req->in.h.opcode = CUSE_INIT;
+ req->in.numargs = 1;
+ req->in.args[0].size = sizeof(struct cuse_init_in);
+ req->in.args[0].value = arg;
+ req->out.numargs = 2;
+ req->out.args[0].size = sizeof(struct cuse_init_out);
+ req->out.args[0].value = &req->misc.cuse_init_out;
+ req->out.args[1].size = CUSE_INIT_INFO_MAX;
+ req->out.argvar = 1;
+ req->out.argpages = 1;
+ req->pages[0] = page;
+ req->num_pages = 1;
+ req->end = cuse_process_init_reply;
+ fuse_request_send_background(fc, req);
+
+ return 0;
+
+err_put_req:
+ fuse_put_request(fc, req);
+err:
+ return rc;
+}
+
+static void cuse_fc_release(struct fuse_conn *fc)
+{
+ struct cuse_conn *cc = fc_to_cc(fc);
+ kfree(cc);
+}
+
+/**
+ * cuse_channel_open - open method for /dev/cuse
+ * @inode: inode for /dev/cuse
+ * @file: file struct being opened
+ *
+ * Userland CUSE server can create a CUSE device by opening /dev/cuse
+ * and replying to the initilaization request kernel sends. This
+ * function is responsible for handling CUSE device initialization.
+ * Because the fd opened by this function is used during
+ * initialization, this function only creates cuse_conn and sends
+ * init. The rest is delegated to a kthread.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+static int cuse_channel_open(struct inode *inode, struct file *file)
+{
+ struct cuse_conn *cc;
+ int rc;
+
+ /* set up cuse_conn */
+ cc = kzalloc(sizeof(*cc), GFP_KERNEL);
+ if (!cc)
+ return -ENOMEM;
+
+ fuse_conn_init(&cc->fc);
+
+ INIT_LIST_HEAD(&cc->list);
+ cc->fc.release = cuse_fc_release;
+
+ cc->fc.connected = 1;
+ cc->fc.blocked = 0;
+ rc = cuse_send_init(cc);
+ if (rc) {
+ fuse_conn_put(&cc->fc);
+ return rc;
+ }
+ file->private_data = &cc->fc; /* channel owns base reference to cc */
+
+ return 0;
+}
+
+/**
+ * cuse_channel_release - release method for /dev/cuse
+ * @inode: inode for /dev/cuse
+ * @file: file struct being closed
+ *
+ * Disconnect the channel, deregister CUSE device and initiate
+ * destruction by putting the default reference.
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+static int cuse_channel_release(struct inode *inode, struct file *file)
+{
+ struct cuse_conn *cc = fc_to_cc(file->private_data);
+ int rc;
+
+ /* remove from the conntbl, no more access from this point on */
+ spin_lock(&cuse_lock);
+ list_del_init(&cc->list);
+ spin_unlock(&cuse_lock);
+
+ /* remove device */
+ if (cc->dev)
+ device_unregister(cc->dev);
+ if (cc->cdev) {
+ unregister_chrdev_region(cc->cdev->dev, 1);
+ cdev_del(cc->cdev);
+ }
+
+ /* kill connection and shutdown channel */
+ fuse_conn_kill(&cc->fc);
+ rc = fuse_dev_release(inode, file); /* puts the base reference */
+
+ return rc;
+}
+
+static struct file_operations cuse_channel_fops; /* initialized during init */
+
+
+/**************************************************************************
+ * Misc stuff and module initializatiion
+ *
+ * CUSE exports the same set of attributes to sysfs as fusectl.
+ */
+
+static ssize_t cuse_class_waiting_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct cuse_conn *cc = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%d\n", atomic_read(&cc->fc.num_waiting));
+}
+
+static ssize_t cuse_class_abort_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct cuse_conn *cc = dev_get_drvdata(dev);
+
+ fuse_abort_conn(&cc->fc);
+ return count;
+}
+
+static struct device_attribute cuse_class_dev_attrs[] = {
+ __ATTR(waiting, S_IFREG | 0400, cuse_class_waiting_show, NULL),
+ __ATTR(abort, S_IFREG | 0200, NULL, cuse_class_abort_store),
+ { }
+};
+
+static struct miscdevice cuse_miscdev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "cuse",
+ .fops = &cuse_channel_fops,
+};
+
+static int __init cuse_init(void)
+{
+ int i, rc;
+
+ /* init conntbl */
+ for (i = 0; i < CUSE_CONNTBL_LEN; i++)
+ INIT_LIST_HEAD(&cuse_conntbl[i]);
+
+ /* inherit and extend fuse_dev_operations */
+ cuse_channel_fops = fuse_dev_operations;
+ cuse_channel_fops.owner = THIS_MODULE;
+ cuse_channel_fops.open = cuse_channel_open;
+ cuse_channel_fops.release = cuse_channel_release;
+
+ cuse_class = class_create(THIS_MODULE, "cuse");
+ if (IS_ERR(cuse_class))
+ return PTR_ERR(cuse_class);
+
+ cuse_class->dev_attrs = cuse_class_dev_attrs;
+
+ rc = misc_register(&cuse_miscdev);
+ if (rc) {
+ class_destroy(cuse_class);
+ return rc;
+ }
+
+ return 0;
+}
+
+static void __exit cuse_exit(void)
+{
+ misc_deregister(&cuse_miscdev);
+ class_destroy(cuse_class);
+}
+
+module_init(cuse_init);
+module_exit(cuse_exit);
+
+MODULE_AUTHOR("Tejun Heo <tj@kernel.org>");
+MODULE_DESCRIPTION("Character device in Userspace");
+MODULE_LICENSE("GPL");
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ba76b68c52f..8fed2ed12f3 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -46,6 +46,7 @@ struct fuse_req *fuse_request_alloc(void)
fuse_request_init(req);
return req;
}
+EXPORT_SYMBOL_GPL(fuse_request_alloc);
struct fuse_req *fuse_request_alloc_nofs(void)
{
@@ -124,6 +125,7 @@ struct fuse_req *fuse_get_req(struct fuse_conn *fc)
atomic_dec(&fc->num_waiting);
return ERR_PTR(err);
}
+EXPORT_SYMBOL_GPL(fuse_get_req);
/*
* Return request in fuse_file->reserved_req. However that may
@@ -208,6 +210,7 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
fuse_request_free(req);
}
}
+EXPORT_SYMBOL_GPL(fuse_put_request);
static unsigned len_args(unsigned numargs, struct fuse_arg *args)
{
@@ -282,7 +285,7 @@ __releases(&fc->lock)
wake_up_all(&fc->blocked_waitq);
}
if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
- fc->connected) {
+ fc->connected && fc->bdi_initialized) {
clear_bdi_congested(&fc->bdi, READ);
clear_bdi_congested(&fc->bdi, WRITE);
}
@@ -400,6 +403,7 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
}
spin_unlock(&fc->lock);
}
+EXPORT_SYMBOL_GPL(fuse_request_send);
static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
struct fuse_req *req)
@@ -408,7 +412,8 @@ static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
fc->num_background++;
if (fc->num_background == FUSE_MAX_BACKGROUND)
fc->blocked = 1;
- if (fc->num_background == FUSE_CONGESTION_THRESHOLD) {
+ if (fc->num_background == FUSE_CONGESTION_THRESHOLD &&
+ fc->bdi_initialized) {
set_bdi_congested(&fc->bdi, READ);
set_bdi_congested(&fc->bdi, WRITE);
}
@@ -439,6 +444,7 @@ void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
req->isreply = 1;
fuse_request_send_nowait(fc, req);
}
+EXPORT_SYMBOL_GPL(fuse_request_send_background);
/*
* Called under fc->lock
@@ -1105,8 +1111,9 @@ void fuse_abort_conn(struct fuse_conn *fc)
}
spin_unlock(&fc->lock);
}
+EXPORT_SYMBOL_GPL(fuse_abort_conn);
-static int fuse_dev_release(struct inode *inode, struct file *file)
+int fuse_dev_release(struct inode *inode, struct file *file)
{
struct fuse_conn *fc = fuse_get_conn(file);
if (fc) {
@@ -1120,6 +1127,7 @@ static int fuse_dev_release(struct inode *inode, struct file *file)
return 0;
}
+EXPORT_SYMBOL_GPL(fuse_dev_release);
static int fuse_dev_fasync(int fd, struct file *file, int on)
{
@@ -1142,6 +1150,7 @@ const struct file_operations fuse_dev_operations = {
.release = fuse_dev_release,
.fasync = fuse_dev_fasync,
};
+EXPORT_SYMBOL_GPL(fuse_dev_operations);
static struct miscdevice fuse_miscdevice = {
.minor = FUSE_MINOR,
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 8b8eebc5614..b3089a083d3 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -362,19 +362,6 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
}
/*
- * Synchronous release for the case when something goes wrong in CREATE_OPEN
- */
-static void fuse_sync_release(struct fuse_conn *fc, struct fuse_file *ff,
- u64 nodeid, int flags)
-{
- fuse_release_fill(ff, nodeid, flags, FUSE_RELEASE);
- ff->reserved_req->force = 1;
- fuse_request_send(fc, ff->reserved_req);
- fuse_put_request(fc, ff->reserved_req);
- kfree(ff);
-}
-
-/*
* Atomic create+open operation
*
* If the filesystem doesn't support this, then fall back to separate
@@ -445,12 +432,14 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
goto out_free_ff;
fuse_put_request(fc, req);
+ ff->fh = outopen.fh;
+ ff->nodeid = outentry.nodeid;
+ ff->open_flags = outopen.open_flags;
inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
&outentry.attr, entry_attr_timeout(&outentry), 0);
if (!inode) {
flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
- ff->fh = outopen.fh;
- fuse_sync_release(fc, ff, outentry.nodeid, flags);
+ fuse_sync_release(ff, flags);
fuse_send_forget(fc, forget_req, outentry.nodeid, 1);
return -ENOMEM;
}
@@ -460,11 +449,11 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
fuse_invalidate_attr(dir);
file = lookup_instantiate_filp(nd, entry, generic_file_open);
if (IS_ERR(file)) {
- ff->fh = outopen.fh;
- fuse_sync_release(fc, ff, outentry.nodeid, flags);
+ fuse_sync_release(ff, flags);
return PTR_ERR(file);
}
- fuse_finish_open(inode, file, ff, &outopen);
+ file->private_data = fuse_file_get(ff);
+ fuse_finish_open(inode, file);
return 0;
out_free_ff:
@@ -1035,7 +1024,7 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir)
req->out.argpages = 1;
req->num_pages = 1;
req->pages[0] = page;
- fuse_read_fill(req, file, inode, file->f_pos, PAGE_SIZE, FUSE_READDIR);
+ fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, FUSE_READDIR);
fuse_request_send(fc, req);
nbytes = req->out.args[0].size;
err = req->out.h.error;
@@ -1101,12 +1090,14 @@ static void fuse_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
static int fuse_dir_open(struct inode *inode, struct file *file)
{
- return fuse_open_common(inode, file, 1);
+ return fuse_open_common(inode, file, true);
}
static int fuse_dir_release(struct inode *inode, struct file *file)
{
- return fuse_release_common(inode, file, 1);
+ fuse_release_common(file, FUSE_RELEASEDIR);
+
+ return 0;
}
static int fuse_dir_fsync(struct file *file, struct dentry *de, int datasync)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 06f30e96567..fce6ce694fd 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -12,13 +12,13 @@
#include <linux/slab.h>
#include <linux/kernel.h>
#include <linux/sched.h>
+#include <linux/module.h>
static const struct file_operations fuse_direct_io_file_operations;
-static int fuse_send_open(struct inode *inode, struct file *file, int isdir,
- struct fuse_open_out *outargp)
+static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
+ int opcode, struct fuse_open_out *outargp)
{
- struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_open_in inarg;
struct fuse_req *req;
int err;
@@ -31,8 +31,8 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir,
inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
if (!fc->atomic_o_trunc)
inarg.flags &= ~O_TRUNC;
- req->in.h.opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
- req->in.h.nodeid = get_node_id(inode);
+ req->in.h.opcode = opcode;
+ req->in.h.nodeid = nodeid;
req->in.numargs = 1;
req->in.args[0].size = sizeof(inarg);
req->in.args[0].value = &inarg;
@@ -49,22 +49,27 @@ static int fuse_send_open(struct inode *inode, struct file *file, int isdir,
struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
{
struct fuse_file *ff;
+
ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL);
- if (ff) {
- ff->reserved_req = fuse_request_alloc();
- if (!ff->reserved_req) {
- kfree(ff);
- return NULL;
- } else {
- INIT_LIST_HEAD(&ff->write_entry);
- atomic_set(&ff->count, 0);
- spin_lock(&fc->lock);
- ff->kh = ++fc->khctr;
- spin_unlock(&fc->lock);
- }
- RB_CLEAR_NODE(&ff->polled_node);
- init_waitqueue_head(&ff->poll_wait);
+ if (unlikely(!ff))
+ return NULL;
+
+ ff->fc = fc;
+ ff->reserved_req = fuse_request_alloc();
+ if (unlikely(!ff->reserved_req)) {
+ kfree(ff);
+ return NULL;
}
+
+ INIT_LIST_HEAD(&ff->write_entry);
+ atomic_set(&ff->count, 0);
+ RB_CLEAR_NODE(&ff->polled_node);
+ init_waitqueue_head(&ff->poll_wait);
+
+ spin_lock(&fc->lock);
+ ff->kh = ++fc->khctr;
+ spin_unlock(&fc->lock);
+
return ff;
}
@@ -74,7 +79,7 @@ void fuse_file_free(struct fuse_file *ff)
kfree(ff);
}
-static struct fuse_file *fuse_file_get(struct fuse_file *ff)
+struct fuse_file *fuse_file_get(struct fuse_file *ff)
{
atomic_inc(&ff->count);
return ff;
@@ -82,40 +87,65 @@ static struct fuse_file *fuse_file_get(struct fuse_file *ff)
static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
{
- dput(req->misc.release.dentry);
- mntput(req->misc.release.vfsmount);
+ path_put(&req->misc.release.path);
}
static void fuse_file_put(struct fuse_file *ff)
{
if (atomic_dec_and_test(&ff->count)) {
struct fuse_req *req = ff->reserved_req;
- struct inode *inode = req->misc.release.dentry->d_inode;
- struct fuse_conn *fc = get_fuse_conn(inode);
+
req->end = fuse_release_end;
- fuse_request_send_background(fc, req);
+ fuse_request_send_background(ff->fc, req);
kfree(ff);
}
}
-void fuse_finish_open(struct inode *inode, struct file *file,
- struct fuse_file *ff, struct fuse_open_out *outarg)
+int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
+ bool isdir)
{
- if (outarg->open_flags & FOPEN_DIRECT_IO)
+ struct fuse_open_out outarg;
+ struct fuse_file *ff;
+ int err;
+ int opcode = isdir ? FUSE_OPENDIR : FUSE_OPEN;
+
+ ff = fuse_file_alloc(fc);
+ if (!ff)
+ return -ENOMEM;
+
+ err = fuse_send_open(fc, nodeid, file, opcode, &outarg);
+ if (err) {
+ fuse_file_free(ff);
+ return err;
+ }
+
+ if (isdir)
+ outarg.open_flags &= ~FOPEN_DIRECT_IO;
+
+ ff->fh = outarg.fh;
+ ff->nodeid = nodeid;
+ ff->open_flags = outarg.open_flags;
+ file->private_data = fuse_file_get(ff);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(fuse_do_open);
+
+void fuse_finish_open(struct inode *inode, struct file *file)
+{
+ struct fuse_file *ff = file->private_data;
+
+ if (ff->open_flags & FOPEN_DIRECT_IO)
file->f_op = &fuse_direct_io_file_operations;
- if (!(outarg->open_flags & FOPEN_KEEP_CACHE))
+ if (!(ff->open_flags & FOPEN_KEEP_CACHE))
invalidate_inode_pages2(inode->i_mapping);
- if (outarg->open_flags & FOPEN_NONSEEKABLE)
+ if (ff->open_flags & FOPEN_NONSEEKABLE)
nonseekable_open(inode, file);
- ff->fh = outarg->fh;
- file->private_data = fuse_file_get(ff);
}
-int fuse_open_common(struct inode *inode, struct file *file, int isdir)
+int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
{
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_open_out outarg;
- struct fuse_file *ff;
int err;
/* VFS checks this, but only _after_ ->open() */
@@ -126,78 +156,85 @@ int fuse_open_common(struct inode *inode, struct file *file, int isdir)
if (err)
return err;
- ff = fuse_file_alloc(fc);
- if (!ff)
- return -ENOMEM;
-
- err = fuse_send_open(inode, file, isdir, &outarg);
+ err = fuse_do_open(fc, get_node_id(inode), file, isdir);
if (err)
- fuse_file_free(ff);
- else {
- if (isdir)
- outarg.open_flags &= ~FOPEN_DIRECT_IO;
- fuse_finish_open(inode, file, ff, &outarg);
- }
+ return err;
- return err;
+ fuse_finish_open(inode, file);
+
+ return 0;
}
-void fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags, int opcode)
+static void fuse_prepare_release(struct fuse_file *ff, int flags, int opcode)
{
+ struct fuse_conn *fc = ff->fc;
struct fuse_req *req = ff->reserved_req;
struct fuse_release_in *inarg = &req->misc.release.in;
+ spin_lock(&fc->lock);
+ list_del(&ff->write_entry);
+ if (!RB_EMPTY_NODE(&ff->polled_node))
+ rb_erase(&ff->polled_node, &fc->polled_files);
+ spin_unlock(&fc->lock);
+
+ wake_up_interruptible_sync(&ff->poll_wait);
+
inarg->fh = ff->fh;
inarg->flags = flags;
req->in.h.opcode = opcode;
- req->in.h.nodeid = nodeid;
+ req->in.h.nodeid = ff->nodeid;
req->in.numargs = 1;
req->in.args[0].size = sizeof(struct fuse_release_in);
req->in.args[0].value = inarg;
}
-int fuse_release_common(struct inode *inode, struct file *file, int isdir)
+void fuse_release_common(struct file *file, int opcode)
{
- struct fuse_file *ff = file->private_data;
- if (ff) {
- struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req = ff->reserved_req;
-
- fuse_release_fill(ff, get_node_id(inode), file->f_flags,
- isdir ? FUSE_RELEASEDIR : FUSE_RELEASE);
+ struct fuse_file *ff;
+ struct fuse_req *req;
- /* Hold vfsmount and dentry until release is finished */
- req->misc.release.vfsmount = mntget(file->f_path.mnt);
- req->misc.release.dentry = dget(file->f_path.dentry);
+ ff = file->private_data;
+ if (unlikely(!ff))
+ return;
- spin_lock(&fc->lock);
- list_del(&ff->write_entry);
- if (!RB_EMPTY_NODE(&ff->polled_node))
- rb_erase(&ff->polled_node, &fc->polled_files);
- spin_unlock(&fc->lock);
+ req = ff->reserved_req;
+ fuse_prepare_release(ff, file->f_flags, opcode);
- wake_up_interruptible_sync(&ff->poll_wait);
- /*
- * Normally this will send the RELEASE request,
- * however if some asynchronous READ or WRITE requests
- * are outstanding, the sending will be delayed
- */
- fuse_file_put(ff);
- }
+ /* Hold vfsmount and dentry until release is finished */
+ path_get(&file->f_path);
+ req->misc.release.path = file->f_path;
- /* Return value is ignored by VFS */
- return 0;
+ /*
+ * Normally this will send the RELEASE request, however if
+ * some asynchronous READ or WRITE requests are outstanding,
+ * the sending will be delayed.
+ */
+ fuse_file_put(ff);
}
static int fuse_open(struct inode *inode, struct file *file)
{
- return fuse_open_common(inode, file, 0);
+ return fuse_open_common(inode, file, false);
}
static int fuse_release(struct inode *inode, struct file *file)
{
- return fuse_release_common(inode, file, 0);
+ fuse_release_common(file, FUSE_RELEASE);
+
+ /* return value is ignored by VFS */
+ return 0;
+}
+
+void fuse_sync_release(struct fuse_file *ff, int flags)
+{
+ WARN_ON(atomic_read(&ff->count) > 1);
+ fuse_prepare_release(ff, flags, FUSE_RELEASE);
+ ff->reserved_req->force = 1;
+ fuse_request_send(ff->fc, ff->reserved_req);
+ fuse_put_request(ff->fc, ff->reserved_req);
+ kfree(ff);
}
+EXPORT_SYMBOL_GPL(fuse_sync_release);
/*
* Scramble the ID space with XTEA, so that the value of the files_struct
@@ -371,8 +408,8 @@ static int fuse_fsync(struct file *file, struct dentry *de, int datasync)
return fuse_fsync_common(file, de, datasync, 0);
}
-void fuse_read_fill(struct fuse_req *req, struct file *file,
- struct inode *inode, loff_t pos, size_t count, int opcode)
+void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos,
+ size_t count, int opcode)
{
struct fuse_read_in *inarg = &req->misc.read.in;
struct fuse_file *ff = file->private_data;
@@ -382,7 +419,7 @@ void fuse_read_fill(struct fuse_req *req, struct file *file,
inarg->size = count;
inarg->flags = file->f_flags;
req->in.h.opcode = opcode;
- req->in.h.nodeid = get_node_id(inode);
+ req->in.h.nodeid = ff->nodeid;
req->in.numargs = 1;
req->in.args[0].size = sizeof(struct fuse_read_in);
req->in.args[0].value = inarg;
@@ -392,12 +429,12 @@ void fuse_read_fill(struct fuse_req *req, struct file *file,
}
static size_t fuse_send_read(struct fuse_req *req, struct file *file,
- struct inode *inode, loff_t pos, size_t count,
- fl_owner_t owner)
+ loff_t pos, size_t count, fl_owner_t owner)
{
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_file *ff = file->private_data;
+ struct fuse_conn *fc = ff->fc;
- fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
+ fuse_read_fill(req, file, pos, count, FUSE_READ);
if (owner != NULL) {
struct fuse_read_in *inarg = &req->misc.read.in;
@@ -455,7 +492,7 @@ static int fuse_readpage(struct file *file, struct page *page)
req->out.argpages = 1;
req->num_pages = 1;
req->pages[0] = page;
- num_read = fuse_send_read(req, file, inode, pos, count, NULL);
+ num_read = fuse_send_read(req, file, pos, count, NULL);
err = req->out.h.error;
fuse_put_request(fc, req);
@@ -504,19 +541,18 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
fuse_file_put(req->ff);
}
-static void fuse_send_readpages(struct fuse_req *req, struct file *file,
- struct inode *inode)
+static void fuse_send_readpages(struct fuse_req *req, struct file *file)
{
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_file *ff = file->private_data;
+ struct fuse_conn *fc = ff->fc;
loff_t pos = page_offset(req->pages[0]);
size_t count = req->num_pages << PAGE_CACHE_SHIFT;
req->out.argpages = 1;
req->out.page_zeroing = 1;
- fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
+ fuse_read_fill(req, file, pos, count, FUSE_READ);
req->misc.read.attr_ver = fuse_get_attr_version(fc);
if (fc->async_read) {
- struct fuse_file *ff = file->private_data;
req->ff = fuse_file_get(ff);
req->end = fuse_readpages_end;
fuse_request_send_background(fc, req);
@@ -546,7 +582,7 @@ static int fuse_readpages_fill(void *_data, struct page *page)
(req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
(req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
req->pages[req->num_pages - 1]->index + 1 != page->index)) {
- fuse_send_readpages(req, data->file, inode);
+ fuse_send_readpages(req, data->file);
data->req = req = fuse_get_req(fc);
if (IS_ERR(req)) {
unlock_page(page);
@@ -580,7 +616,7 @@ static int fuse_readpages(struct file *file, struct address_space *mapping,
err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data);
if (!err) {
if (data.req->num_pages)
- fuse_send_readpages(data.req, file, inode);
+ fuse_send_readpages(data.req, file);
else
fuse_put_request(fc, data.req);
}
@@ -607,24 +643,19 @@ static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
return generic_file_aio_read(iocb, iov, nr_segs, pos);
}
-static void fuse_write_fill(struct fuse_req *req, struct file *file,
- struct fuse_file *ff, struct inode *inode,
- loff_t pos, size_t count, int writepage)
+static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff,
+ loff_t pos, size_t count)
{
- struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_write_in *inarg = &req->misc.write.in;
struct fuse_write_out *outarg = &req->misc.write.out;
- memset(inarg, 0, sizeof(struct fuse_write_in));
inarg->fh = ff->fh;
inarg->offset = pos;
inarg->size = count;
- inarg->write_flags = writepage ? FUSE_WRITE_CACHE : 0;
- inarg->flags = file ? file->f_flags : 0;
req->in.h.opcode = FUSE_WRITE;
- req->in.h.nodeid = get_node_id(inode);
+ req->in.h.nodeid = ff->nodeid;
req->in.numargs = 2;
- if (fc->minor < 9)
+ if (ff->fc->minor < 9)
req->in.args[0].size = FUSE_COMPAT_WRITE_IN_SIZE;
else
req->in.args[0].size = sizeof(struct fuse_write_in);
@@ -636,13 +667,15 @@ static void fuse_write_fill(struct fuse_req *req, struct file *file,
}
static size_t fuse_send_write(struct fuse_req *req, struct file *file,
- struct inode *inode, loff_t pos, size_t count,
- fl_owner_t owner)
+ loff_t pos, size_t count, fl_owner_t owner)
{
- struct fuse_conn *fc = get_fuse_conn(inode);
- fuse_write_fill(req, file, file->private_data, inode, pos, count, 0);
+ struct fuse_file *ff = file->private_data;
+ struct fuse_conn *fc = ff->fc;
+ struct fuse_write_in *inarg = &req->misc.write.in;
+
+ fuse_write_fill(req, ff, pos, count);
+ inarg->flags = file->f_flags;
if (owner != NULL) {
- struct fuse_write_in *inarg = &req->misc.write.in;
inarg->write_flags |= FUSE_WRITE_LOCKOWNER;
inarg->lock_owner = fuse_lock_owner_id(fc, owner);
}
@@ -700,7 +733,7 @@ static int fuse_buffered_write(struct file *file, struct inode *inode,
req->num_pages = 1;
req->pages[0] = page;
req->page_offset = offset;
- nres = fuse_send_write(req, file, inode, pos, count, NULL);
+ nres = fuse_send_write(req, file, pos, count, NULL);
err = req->out.h.error;
fuse_put_request(fc, req);
if (!err && !nres)
@@ -741,7 +774,7 @@ static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
for (i = 0; i < req->num_pages; i++)
fuse_wait_on_page_writeback(inode, req->pages[i]->index);
- res = fuse_send_write(req, file, inode, pos, count, NULL);
+ res = fuse_send_write(req, file, pos, count, NULL);
offset = req->page_offset;
count = res;
@@ -979,25 +1012,23 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
return 0;
}
-static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos, int write)
+ssize_t fuse_direct_io(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos, int write)
{
- struct inode *inode = file->f_path.dentry->d_inode;
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_file *ff = file->private_data;
+ struct fuse_conn *fc = ff->fc;
size_t nmax = write ? fc->max_write : fc->max_read;
loff_t pos = *ppos;
ssize_t res = 0;
struct fuse_req *req;
- if (is_bad_inode(inode))
- return -EIO;
-
req = fuse_get_req(fc);
if (IS_ERR(req))
return PTR_ERR(req);
while (count) {
size_t nres;
+ fl_owner_t owner = current->files;
size_t nbytes = min(count, nmax);
int err = fuse_get_user_pages(req, buf, &nbytes, write);
if (err) {
@@ -1006,11 +1037,10 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
}
if (write)
- nres = fuse_send_write(req, file, inode, pos, nbytes,
- current->files);
+ nres = fuse_send_write(req, file, pos, nbytes, owner);
else
- nres = fuse_send_read(req, file, inode, pos, nbytes,
- current->files);
+ nres = fuse_send_read(req, file, pos, nbytes, owner);
+
fuse_release_user_pages(req, !write);
if (req->out.h.error) {
if (!res)
@@ -1034,20 +1064,27 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
}
}
fuse_put_request(fc, req);
- if (res > 0) {
- if (write)
- fuse_write_update_size(inode, pos);
+ if (res > 0)
*ppos = pos;
- }
- fuse_invalidate_attr(inode);
return res;
}
+EXPORT_SYMBOL_GPL(fuse_direct_io);
static ssize_t fuse_direct_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
- return fuse_direct_io(file, buf, count, ppos, 0);
+ ssize_t res;
+ struct inode *inode = file->f_path.dentry->d_inode;
+
+ if (is_bad_inode(inode))
+ return -EIO;
+
+ res = fuse_direct_io(file, buf, count, ppos, 0);
+
+ fuse_invalidate_attr(inode);
+
+ return res;
}
static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
@@ -1055,12 +1092,22 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
{
struct inode *inode = file->f_path.dentry->d_inode;
ssize_t res;
+
+ if (is_bad_inode(inode))
+ return -EIO;
+
/* Don't allow parallel writes to the same file */
mutex_lock(&inode->i_mutex);
res = generic_write_checks(file, ppos, &count, 0);
- if (!res)
+ if (!res) {
res = fuse_direct_io(file, buf, count, ppos, 1);
+ if (res > 0)
+ fuse_write_update_size(inode, *ppos);
+ }
mutex_unlock(&inode->i_mutex);
+
+ fuse_invalidate_attr(inode);
+
return res;
}
@@ -1177,9 +1224,10 @@ static int fuse_writepage_locked(struct page *page)
req->ff = fuse_file_get(ff);
spin_unlock(&fc->lock);
- fuse_write_fill(req, NULL, ff, inode, page_offset(page), 0, 1);
+ fuse_write_fill(req, ff, page_offset(page), 0);
copy_highpage(tmp_page, page);
+ req->misc.write.in.write_flags |= FUSE_WRITE_CACHE;
req->in.argpages = 1;
req->num_pages = 1;
req->pages[0] = tmp_page;
@@ -1603,12 +1651,11 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov,
* limits ioctl data transfers to well-formed ioctls and is the forced
* behavior for all FUSE servers.
*/
-static long fuse_file_do_ioctl(struct file *file, unsigned int cmd,
- unsigned long arg, unsigned int flags)
+long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
+ unsigned int flags)
{
- struct inode *inode = file->f_dentry->d_inode;
struct fuse_file *ff = file->private_data;
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_conn *fc = ff->fc;
struct fuse_ioctl_in inarg = {
.fh = ff->fh,
.cmd = cmd,
@@ -1627,13 +1674,6 @@ static long fuse_file_do_ioctl(struct file *file, unsigned int cmd,
/* assume all the iovs returned by client always fits in a page */
BUILD_BUG_ON(sizeof(struct iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE);
- if (!fuse_allow_task(fc, current))
- return -EACCES;
-
- err = -EIO;
- if (is_bad_inode(inode))
- goto out;
-
err = -ENOMEM;
pages = kzalloc(sizeof(pages[0]) * FUSE_MAX_PAGES_PER_REQ, GFP_KERNEL);
iov_page = alloc_page(GFP_KERNEL);
@@ -1694,7 +1734,7 @@ static long fuse_file_do_ioctl(struct file *file, unsigned int cmd,
/* okay, let's send it to the client */
req->in.h.opcode = FUSE_IOCTL;
- req->in.h.nodeid = get_node_id(inode);
+ req->in.h.nodeid = ff->nodeid;
req->in.numargs = 1;
req->in.args[0].size = sizeof(inarg);
req->in.args[0].value = &inarg;
@@ -1777,17 +1817,33 @@ static long fuse_file_do_ioctl(struct file *file, unsigned int cmd,
return err ? err : outarg.result;
}
+EXPORT_SYMBOL_GPL(fuse_do_ioctl);
+
+static long fuse_file_ioctl_common(struct file *file, unsigned int cmd,
+ unsigned long arg, unsigned int flags)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+
+ if (!fuse_allow_task(fc, current))
+ return -EACCES;
+
+ if (is_bad_inode(inode))
+ return -EIO;
+
+ return fuse_do_ioctl(file, cmd, arg, flags);
+}
static long fuse_file_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
- return fuse_file_do_ioctl(file, cmd, arg, 0);
+ return fuse_file_ioctl_common(file, cmd, arg, 0);
}
static long fuse_file_compat_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
- return fuse_file_do_ioctl(file, cmd, arg, FUSE_IOCTL_COMPAT);
+ return fuse_file_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT);
}
/*
@@ -1841,11 +1897,10 @@ static void fuse_register_polled_file(struct fuse_conn *fc,
spin_unlock(&fc->lock);
}
-static unsigned fuse_file_poll(struct file *file, poll_table *wait)
+unsigned fuse_file_poll(struct file *file, poll_table *wait)
{
- struct inode *inode = file->f_dentry->d_inode;
struct fuse_file *ff = file->private_data;
- struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_conn *fc = ff->fc;
struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh };
struct fuse_poll_out outarg;
struct fuse_req *req;
@@ -1870,7 +1925,7 @@ static unsigned fuse_file_poll(struct file *file, poll_table *wait)
return PTR_ERR(req);
req->in.h.opcode = FUSE_POLL;
- req->in.h.nodeid = get_node_id(inode);
+ req->in.h.nodeid = ff->nodeid;
req->in.numargs = 1;
req->in.args[0].size = sizeof(inarg);
req->in.args[0].value = &inarg;
@@ -1889,6 +1944,7 @@ static unsigned fuse_file_poll(struct file *file, poll_table *wait)
}
return POLLERR;
}
+EXPORT_SYMBOL_GPL(fuse_file_poll);
/*
* This is called from fuse_handle_notify() on FUSE_NOTIFY_POLL and
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 6fc5aedaa0d..aaf2f9ff970 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -97,8 +97,13 @@ struct fuse_inode {
struct list_head writepages;
};
+struct fuse_conn;
+
/** FUSE specific file data */
struct fuse_file {
+ /** Fuse connection for this file */
+ struct fuse_conn *fc;
+
/** Request reserved for flush and release */
struct fuse_req *reserved_req;
@@ -108,9 +113,15 @@ struct fuse_file {
/** File handle used by userspace */
u64 fh;
+ /** Node id of this file */
+ u64 nodeid;
+
/** Refcount */
atomic_t count;
+ /** FOPEN_* flags returned by open */
+ u32 open_flags;
+
/** Entry on inode's write_files list */
struct list_head write_entry;
@@ -185,8 +196,6 @@ enum fuse_req_state {
FUSE_REQ_FINISHED
};
-struct fuse_conn;
-
/**
* A request to the client
*/
@@ -248,11 +257,12 @@ struct fuse_req {
struct fuse_forget_in forget_in;
struct {
struct fuse_release_in in;
- struct vfsmount *vfsmount;
- struct dentry *dentry;
+ struct path path;
} release;
struct fuse_init_in init_in;
struct fuse_init_out init_out;
+ struct cuse_init_in cuse_init_in;
+ struct cuse_init_out cuse_init_out;
struct {
struct fuse_read_in in;
u64 attr_ver;
@@ -386,6 +396,9 @@ struct fuse_conn {
/** Filesystem supports NFS exporting. Only set in INIT */
unsigned export_support:1;
+ /** Set if bdi is valid */
+ unsigned bdi_initialized:1;
+
/*
* The following bitfields are only for optimization purposes
* and hence races in setting them will not cause malfunction
@@ -515,25 +528,24 @@ void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
* Initialize READ or READDIR request
*/
void fuse_read_fill(struct fuse_req *req, struct file *file,
- struct inode *inode, loff_t pos, size_t count, int opcode);
+ loff_t pos, size_t count, int opcode);
/**
* Send OPEN or OPENDIR request
*/
-int fuse_open_common(struct inode *inode, struct file *file, int isdir);
+int fuse_open_common(struct inode *inode, struct file *file, bool isdir);
struct fuse_file *fuse_file_alloc(struct fuse_conn *fc);
+struct fuse_file *fuse_file_get(struct fuse_file *ff);
void fuse_file_free(struct fuse_file *ff);
-void fuse_finish_open(struct inode *inode, struct file *file,
- struct fuse_file *ff, struct fuse_open_out *outarg);
+void fuse_finish_open(struct inode *inode, struct file *file);
-/** Fill in ff->reserved_req with a RELEASE request */
-void fuse_release_fill(struct fuse_file *ff, u64 nodeid, int flags, int opcode);
+void fuse_sync_release(struct fuse_file *ff, int flags);
/**
* Send RELEASE or RELEASEDIR request
*/
-int fuse_release_common(struct inode *inode, struct file *file, int isdir);
+void fuse_release_common(struct file *file, int opcode);
/**
* Send FSYNC or FSYNCDIR request
@@ -652,10 +664,12 @@ void fuse_invalidate_entry_cache(struct dentry *entry);
*/
struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
+void fuse_conn_kill(struct fuse_conn *fc);
+
/**
* Initialize fuse_conn
*/
-int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb);
+void fuse_conn_init(struct fuse_conn *fc);
/**
* Release reference to fuse_conn
@@ -694,4 +708,13 @@ void fuse_release_nowrite(struct inode *inode);
u64 fuse_get_attr_version(struct fuse_conn *fc);
+int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
+ bool isdir);
+ssize_t fuse_direct_io(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos, int write);
+long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
+ unsigned int flags);
+unsigned fuse_file_poll(struct file *file, poll_table *wait);
+int fuse_dev_release(struct inode *inode, struct file *file);
+
#endif /* _FS_FUSE_I_H */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 91f7c85f1ff..f0df55a5292 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -277,11 +277,14 @@ static void fuse_send_destroy(struct fuse_conn *fc)
}
}
-static void fuse_put_super(struct super_block *sb)
+static void fuse_bdi_destroy(struct fuse_conn *fc)
{
- struct fuse_conn *fc = get_fuse_conn_super(sb);
+ if (fc->bdi_initialized)
+ bdi_destroy(&fc->bdi);
+}
- fuse_send_destroy(fc);
+void fuse_conn_kill(struct fuse_conn *fc)
+{
spin_lock(&fc->lock);
fc->connected = 0;
fc->blocked = 0;
@@ -295,7 +298,16 @@ static void fuse_put_super(struct super_block *sb)
list_del(&fc->entry);
fuse_ctl_remove_conn(fc);
mutex_unlock(&fuse_mutex);
- bdi_destroy(&fc->bdi);
+ fuse_bdi_destroy(fc);
+}
+EXPORT_SYMBOL_GPL(fuse_conn_kill);
+
+static void fuse_put_super(struct super_block *sb)
+{
+ struct fuse_conn *fc = get_fuse_conn_super(sb);
+
+ fuse_send_destroy(fc);
+ fuse_conn_kill(fc);
fuse_conn_put(fc);
}
@@ -466,10 +478,8 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
return 0;
}
-int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb)
+void fuse_conn_init(struct fuse_conn *fc)
{
- int err;
-
memset(fc, 0, sizeof(*fc));
spin_lock_init(&fc->lock);
mutex_init(&fc->inst_mutex);
@@ -484,49 +494,12 @@ int fuse_conn_init(struct fuse_conn *fc, struct super_block *sb)
INIT_LIST_HEAD(&fc->bg_queue);
INIT_LIST_HEAD(&fc->entry);
atomic_set(&fc->num_waiting, 0);
- fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
- fc->bdi.unplug_io_fn = default_unplug_io_fn;
- /* fuse does it's own writeback accounting */
- fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
fc->khctr = 0;
fc->polled_files = RB_ROOT;
- fc->dev = sb->s_dev;
- err = bdi_init(&fc->bdi);
- if (err)
- goto error_mutex_destroy;
- if (sb->s_bdev) {
- err = bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk",
- MAJOR(fc->dev), MINOR(fc->dev));
- } else {
- err = bdi_register_dev(&fc->bdi, fc->dev);
- }
- if (err)
- goto error_bdi_destroy;
- /*
- * For a single fuse filesystem use max 1% of dirty +
- * writeback threshold.
- *
- * This gives about 1M of write buffer for memory maps on a
- * machine with 1G and 10% dirty_ratio, which should be more
- * than enough.
- *
- * Privileged users can raise it by writing to
- *
- * /sys/class/bdi/<bdi>/max_ratio
- */
- bdi_set_max_ratio(&fc->bdi, 1);
fc->reqctr = 0;
fc->blocked = 1;
fc->attr_version = 1;
get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
-
- return 0;
-
- error_bdi_destroy:
- bdi_destroy(&fc->bdi);
- error_mutex_destroy:
- mutex_destroy(&fc->inst_mutex);
- return err;
}
EXPORT_SYMBOL_GPL(fuse_conn_init);
@@ -539,12 +512,14 @@ void fuse_conn_put(struct fuse_conn *fc)
fc->release(fc);
}
}
+EXPORT_SYMBOL_GPL(fuse_conn_put);
struct fuse_conn *fuse_conn_get(struct fuse_conn *fc)
{
atomic_inc(&fc->count);
return fc;
}
+EXPORT_SYMBOL_GPL(fuse_conn_get);
static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
{
@@ -797,6 +772,48 @@ static void fuse_free_conn(struct fuse_conn *fc)
kfree(fc);
}
+static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
+{
+ int err;
+
+ fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+ fc->bdi.unplug_io_fn = default_unplug_io_fn;
+ /* fuse does it's own writeback accounting */
+ fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
+
+ err = bdi_init(&fc->bdi);
+ if (err)
+ return err;
+
+ fc->bdi_initialized = 1;
+
+ if (sb->s_bdev) {
+ err = bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk",
+ MAJOR(fc->dev), MINOR(fc->dev));
+ } else {
+ err = bdi_register_dev(&fc->bdi, fc->dev);
+ }
+
+ if (err)
+ return err;
+
+ /*
+ * For a single fuse filesystem use max 1% of dirty +
+ * writeback threshold.
+ *
+ * This gives about 1M of write buffer for memory maps on a
+ * machine with 1G and 10% dirty_ratio, which should be more
+ * than enough.
+ *
+ * Privileged users can raise it by writing to
+ *
+ * /sys/class/bdi/<bdi>/max_ratio
+ */
+ bdi_set_max_ratio(&fc->bdi, 1);
+
+ return 0;
+}
+
static int fuse_fill_super(struct super_block *sb, void *data, int silent)
{
struct fuse_conn *fc;
@@ -843,11 +860,12 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
if (!fc)
goto err_fput;
- err = fuse_conn_init(fc, sb);
- if (err) {
- kfree(fc);
- goto err_fput;
- }
+ fuse_conn_init(fc);
+
+ fc->dev = sb->s_dev;
+ err = fuse_bdi_init(fc, sb);
+ if (err)
+ goto err_put_conn;
fc->release = fuse_free_conn;
fc->flags = d.flags;
@@ -911,7 +929,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
err_put_root:
dput(root_dentry);
err_put_conn:
- bdi_destroy(&fc->bdi);
+ fuse_bdi_destroy(fc);
fuse_conn_put(fc);
err_fput:
fput(file);
diff --git a/fs/gfs2/Makefile b/fs/gfs2/Makefile
index d53a9bea1c2..3da2f1f4f73 100644
--- a/fs/gfs2/Makefile
+++ b/fs/gfs2/Makefile
@@ -1,3 +1,4 @@
+EXTRA_CFLAGS := -I$(src)
obj-$(CONFIG_GFS2_FS) += gfs2.o
gfs2-y := acl.o bmap.o dir.o eaops.o eattr.o glock.o \
glops.o inode.o log.o lops.o main.o meta_io.o \
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 329763530dc..6d47379e794 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -25,6 +25,7 @@
#include "trans.h"
#include "dir.h"
#include "util.h"
+#include "trace_gfs2.h"
/* This doesn't need to be that large as max 64 bit pointers in a 4k
* block is 512, so __u16 is fine for that. It saves stack space to
@@ -589,6 +590,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
clear_buffer_mapped(bh_map);
clear_buffer_new(bh_map);
clear_buffer_boundary(bh_map);
+ trace_gfs2_bmap(ip, bh_map, lblock, create, 1);
if (gfs2_is_dir(ip)) {
bsize = sdp->sd_jbsize;
arr = sdp->sd_jheightsize;
@@ -623,6 +625,7 @@ int gfs2_block_map(struct inode *inode, sector_t lblock,
ret = 0;
out:
release_metapath(&mp);
+ trace_gfs2_bmap(ip, bh_map, lblock, create, ret);
bmap_unlock(ip, create);
return ret;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 2bf62bcc518..297421c0427 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -39,6 +39,8 @@
#include "super.h"
#include "util.h"
#include "bmap.h"
+#define CREATE_TRACE_POINTS
+#include "trace_gfs2.h"
struct gfs2_gl_hash_bucket {
struct hlist_head hb_list;
@@ -155,7 +157,7 @@ static void glock_free(struct gfs2_glock *gl)
if (aspace)
gfs2_aspace_put(aspace);
-
+ trace_gfs2_glock_put(gl);
sdp->sd_lockstruct.ls_ops->lm_put_lock(gfs2_glock_cachep, gl);
}
@@ -317,14 +319,17 @@ restart:
return 2;
gh->gh_error = ret;
list_del_init(&gh->gh_list);
+ trace_gfs2_glock_queue(gh, 0);
gfs2_holder_wake(gh);
goto restart;
}
set_bit(HIF_HOLDER, &gh->gh_iflags);
+ trace_gfs2_promote(gh, 1);
gfs2_holder_wake(gh);
goto restart;
}
set_bit(HIF_HOLDER, &gh->gh_iflags);
+ trace_gfs2_promote(gh, 0);
gfs2_holder_wake(gh);
continue;
}
@@ -354,6 +359,7 @@ static inline void do_error(struct gfs2_glock *gl, const int ret)
else
continue;
list_del_init(&gh->gh_list);
+ trace_gfs2_glock_queue(gh, 0);
gfs2_holder_wake(gh);
}
}
@@ -422,6 +428,7 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret)
int rv;
spin_lock(&gl->gl_spin);
+ trace_gfs2_glock_state_change(gl, state);
state_change(gl, state);
gh = find_first_waiter(gl);
@@ -851,6 +858,7 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state,
gl->gl_demote_state != state) {
gl->gl_demote_state = LM_ST_UNLOCKED;
}
+ trace_gfs2_demote_rq(gl);
}
/**
@@ -936,6 +944,7 @@ fail:
goto do_cancel;
return;
}
+ trace_gfs2_glock_queue(gh, 1);
list_add_tail(&gh->gh_list, insert_pt);
do_cancel:
gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
@@ -1032,6 +1041,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
!test_bit(GLF_DEMOTE, &gl->gl_flags))
fast_path = 1;
}
+ trace_gfs2_glock_queue(gh, 0);
spin_unlock(&gl->gl_spin);
if (likely(fast_path))
return;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index aa62cf5976e..13c6237c5f6 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -28,6 +28,7 @@
#include "meta_io.h"
#include "util.h"
#include "dir.h"
+#include "trace_gfs2.h"
#define PULL 1
@@ -313,6 +314,7 @@ int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
gfs2_log_lock(sdp);
}
atomic_sub(blks, &sdp->sd_log_blks_free);
+ trace_gfs2_log_blocks(sdp, -blks);
gfs2_log_unlock(sdp);
mutex_unlock(&sdp->sd_log_reserve_mutex);
@@ -333,6 +335,7 @@ void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
gfs2_log_lock(sdp);
atomic_add(blks, &sdp->sd_log_blks_free);
+ trace_gfs2_log_blocks(sdp, blks);
gfs2_assert_withdraw(sdp,
atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks);
gfs2_log_unlock(sdp);
@@ -558,6 +561,7 @@ static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
gfs2_log_lock(sdp);
atomic_add(dist, &sdp->sd_log_blks_free);
+ trace_gfs2_log_blocks(sdp, dist);
gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= sdp->sd_jdesc->jd_blocks);
gfs2_log_unlock(sdp);
@@ -715,6 +719,7 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
up_write(&sdp->sd_log_flush_lock);
return;
}
+ trace_gfs2_log_flush(sdp, 1);
ai = kzalloc(sizeof(struct gfs2_ail), GFP_NOFS | __GFP_NOFAIL);
INIT_LIST_HEAD(&ai->ai_ail1_list);
@@ -746,6 +751,7 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
gfs2_log_lock(sdp);
atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
+ trace_gfs2_log_blocks(sdp, -1);
gfs2_log_unlock(sdp);
log_write_header(sdp, 0, PULL);
}
@@ -763,8 +769,7 @@ void __gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)
ai = NULL;
}
gfs2_log_unlock(sdp);
-
- sdp->sd_vfs->s_dirt = 0;
+ trace_gfs2_log_flush(sdp, 0);
up_write(&sdp->sd_log_flush_lock);
kfree(ai);
@@ -788,6 +793,7 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
gfs2_assert_withdraw(sdp, sdp->sd_log_blks_reserved + tr->tr_reserved >= reserved);
unused = sdp->sd_log_blks_reserved - reserved + tr->tr_reserved;
atomic_add(unused, &sdp->sd_log_blks_free);
+ trace_gfs2_log_blocks(sdp, unused);
gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
sdp->sd_jdesc->jd_blocks);
sdp->sd_log_blks_reserved = reserved;
@@ -823,7 +829,6 @@ void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
log_refund(sdp, tr);
buf_lo_incore_commit(sdp, tr);
- sdp->sd_vfs->s_dirt = 1;
up_read(&sdp->sd_log_flush_lock);
gfs2_log_lock(sdp);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index 00315f50fa4..9969ff062c5 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -27,6 +27,7 @@
#include "rgrp.h"
#include "trans.h"
#include "util.h"
+#include "trace_gfs2.h"
/**
* gfs2_pin - Pin a buffer in memory
@@ -53,6 +54,7 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
if (bd->bd_ail)
list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
get_bh(bh);
+ trace_gfs2_pin(bd, 1);
}
/**
@@ -89,6 +91,7 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
bd->bd_ail = ai;
list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
+ trace_gfs2_pin(bd, 0);
gfs2_log_unlock(sdp);
unlock_buffer(bh);
}
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index cc34f271b3e..7bc3c45cd67 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -33,6 +33,7 @@
#include "log.h"
#include "quota.h"
#include "dir.h"
+#include "trace_gfs2.h"
#define DO 0
#define UNDO 1
@@ -775,6 +776,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
/* Map the extents for this journal's blocks */
map_journal_extents(sdp);
}
+ trace_gfs2_log_blocks(sdp, atomic_read(&sdp->sd_log_blks_free));
if (sdp->sd_lockstruct.ls_first) {
unsigned int x;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index de3239731db..daa4ae341a2 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -29,6 +29,7 @@
#include "util.h"
#include "log.h"
#include "inode.h"
+#include "trace_gfs2.h"
#define BFITNOENT ((u32)~0)
#define NO_BLOCK ((u64)~0)
@@ -1519,7 +1520,7 @@ int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n)
spin_lock(&sdp->sd_rindex_spin);
rgd->rd_free_clone -= *n;
spin_unlock(&sdp->sd_rindex_spin);
-
+ trace_gfs2_block_alloc(ip, block, *n, GFS2_BLKST_USED);
*bn = block;
return 0;
@@ -1571,7 +1572,7 @@ u64 gfs2_alloc_di(struct gfs2_inode *dip, u64 *generation)
spin_lock(&sdp->sd_rindex_spin);
rgd->rd_free_clone--;
spin_unlock(&sdp->sd_rindex_spin);
-
+ trace_gfs2_block_alloc(dip, block, 1, GFS2_BLKST_DINODE);
return block;
}
@@ -1591,7 +1592,7 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
if (!rgd)
return;
-
+ trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE);
rgd->rd_free += blen;
gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
@@ -1619,7 +1620,7 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
if (!rgd)
return;
-
+ trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE);
rgd->rd_free += blen;
gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
@@ -1642,6 +1643,7 @@ void gfs2_unlink_di(struct inode *inode)
rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);
if (!rgd)
return;
+ trace_gfs2_block_alloc(ip, blkno, 1, GFS2_BLKST_UNLINKED);
gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
gfs2_trans_add_rg(rgd);
@@ -1673,6 +1675,7 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
{
gfs2_free_uninit_di(rgd, ip->i_no_addr);
+ trace_gfs2_block_alloc(ip, ip->i_no_addr, 1, GFS2_BLKST_FREE);
gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);
gfs2_meta_wipe(ip, ip->i_no_addr, 1);
}
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 40bcc37e5a7..0a680133647 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -788,17 +788,6 @@ restart:
}
/**
- * gfs2_write_super
- * @sb: the superblock
- *
- */
-
-static void gfs2_write_super(struct super_block *sb)
-{
- sb->s_dirt = 0;
-}
-
-/**
* gfs2_sync_fs - sync the filesystem
* @sb: the superblock
*
@@ -807,7 +796,6 @@ static void gfs2_write_super(struct super_block *sb)
static int gfs2_sync_fs(struct super_block *sb, int wait)
{
- sb->s_dirt = 0;
if (wait && sb->s_fs_info)
gfs2_log_flush(sb->s_fs_info, NULL);
return 0;
@@ -1324,7 +1312,6 @@ const struct super_operations gfs2_super_ops = {
.write_inode = gfs2_write_inode,
.delete_inode = gfs2_delete_inode,
.put_super = gfs2_put_super,
- .write_super = gfs2_write_super,
.sync_fs = gfs2_sync_fs,
.freeze_fs = gfs2_freeze,
.unfreeze_fs = gfs2_unfreeze,
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h
new file mode 100644
index 00000000000..98d6ef1c1dc
--- /dev/null
+++ b/fs/gfs2/trace_gfs2.h
@@ -0,0 +1,407 @@
+#if !defined(_TRACE_GFS2_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_GFS2_H
+
+#include <linux/tracepoint.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM gfs2
+#define TRACE_INCLUDE_FILE trace_gfs2
+
+#include <linux/fs.h>
+#include <linux/buffer_head.h>
+#include <linux/dlmconstants.h>
+#include <linux/gfs2_ondisk.h>
+#include "incore.h"
+#include "glock.h"
+
+#define dlm_state_name(nn) { DLM_LOCK_##nn, #nn }
+#define glock_trace_name(x) __print_symbolic(x, \
+ dlm_state_name(IV), \
+ dlm_state_name(NL), \
+ dlm_state_name(CR), \
+ dlm_state_name(CW), \
+ dlm_state_name(PR), \
+ dlm_state_name(PW), \
+ dlm_state_name(EX))
+
+#define block_state_name(x) __print_symbolic(x, \
+ { GFS2_BLKST_FREE, "free" }, \
+ { GFS2_BLKST_USED, "used" }, \
+ { GFS2_BLKST_DINODE, "dinode" }, \
+ { GFS2_BLKST_UNLINKED, "unlinked" })
+
+#define show_glock_flags(flags) __print_flags(flags, "", \
+ {(1UL << GLF_LOCK), "l" }, \
+ {(1UL << GLF_DEMOTE), "D" }, \
+ {(1UL << GLF_PENDING_DEMOTE), "d" }, \
+ {(1UL << GLF_DEMOTE_IN_PROGRESS), "p" }, \
+ {(1UL << GLF_DIRTY), "y" }, \
+ {(1UL << GLF_LFLUSH), "f" }, \
+ {(1UL << GLF_INVALIDATE_IN_PROGRESS), "i" }, \
+ {(1UL << GLF_REPLY_PENDING), "r" }, \
+ {(1UL << GLF_INITIAL), "I" }, \
+ {(1UL << GLF_FROZEN), "F" })
+
+#ifndef NUMPTY
+#define NUMPTY
+static inline u8 glock_trace_state(unsigned int state)
+{
+ switch(state) {
+ case LM_ST_SHARED:
+ return DLM_LOCK_PR;
+ case LM_ST_DEFERRED:
+ return DLM_LOCK_CW;
+ case LM_ST_EXCLUSIVE:
+ return DLM_LOCK_EX;
+ }
+ return DLM_LOCK_NL;
+}
+#endif
+
+/* Section 1 - Locking
+ *
+ * Objectives:
+ * Latency: Remote demote request to state change
+ * Latency: Local lock request to state change
+ * Latency: State change to lock grant
+ * Correctness: Ordering of local lock state vs. I/O requests
+ * Correctness: Responses to remote demote requests
+ */
+
+/* General glock state change (DLM lock request completes) */
+TRACE_EVENT(gfs2_glock_state_change,
+
+ TP_PROTO(const struct gfs2_glock *gl, unsigned int new_state),
+
+ TP_ARGS(gl, new_state),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( u64, glnum )
+ __field( u32, gltype )
+ __field( u8, cur_state )
+ __field( u8, new_state )
+ __field( u8, dmt_state )
+ __field( u8, tgt_state )
+ __field( unsigned long, flags )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = gl->gl_sbd->sd_vfs->s_dev;
+ __entry->glnum = gl->gl_name.ln_number;
+ __entry->gltype = gl->gl_name.ln_type;
+ __entry->cur_state = glock_trace_state(gl->gl_state);
+ __entry->new_state = glock_trace_state(new_state);
+ __entry->tgt_state = glock_trace_state(gl->gl_target);
+ __entry->dmt_state = glock_trace_state(gl->gl_demote_state);
+ __entry->flags = gl->gl_flags;
+ ),
+
+ TP_printk("%u,%u glock %d:%lld state %s to %s tgt:%s dmt:%s flags:%s",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
+ (unsigned long long)__entry->glnum,
+ glock_trace_name(__entry->cur_state),
+ glock_trace_name(__entry->new_state),
+ glock_trace_name(__entry->tgt_state),
+ glock_trace_name(__entry->dmt_state),
+ show_glock_flags(__entry->flags))
+);
+
+/* State change -> unlocked, glock is being deallocated */
+TRACE_EVENT(gfs2_glock_put,
+
+ TP_PROTO(const struct gfs2_glock *gl),
+
+ TP_ARGS(gl),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( u64, glnum )
+ __field( u32, gltype )
+ __field( u8, cur_state )
+ __field( unsigned long, flags )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = gl->gl_sbd->sd_vfs->s_dev;
+ __entry->gltype = gl->gl_name.ln_type;
+ __entry->glnum = gl->gl_name.ln_number;
+ __entry->cur_state = glock_trace_state(gl->gl_state);
+ __entry->flags = gl->gl_flags;
+ ),
+
+ TP_printk("%u,%u glock %d:%lld state %s => %s flags:%s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->gltype, (unsigned long long)__entry->glnum,
+ glock_trace_name(__entry->cur_state),
+ glock_trace_name(DLM_LOCK_IV),
+ show_glock_flags(__entry->flags))
+
+);
+
+/* Callback (local or remote) requesting lock demotion */
+TRACE_EVENT(gfs2_demote_rq,
+
+ TP_PROTO(const struct gfs2_glock *gl),
+
+ TP_ARGS(gl),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( u64, glnum )
+ __field( u32, gltype )
+ __field( u8, cur_state )
+ __field( u8, dmt_state )
+ __field( unsigned long, flags )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = gl->gl_sbd->sd_vfs->s_dev;
+ __entry->gltype = gl->gl_name.ln_type;
+ __entry->glnum = gl->gl_name.ln_number;
+ __entry->cur_state = glock_trace_state(gl->gl_state);
+ __entry->dmt_state = glock_trace_state(gl->gl_demote_state);
+ __entry->flags = gl->gl_flags;
+ ),
+
+ TP_printk("%u,%u glock %d:%lld demote %s to %s flags:%s",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
+ (unsigned long long)__entry->glnum,
+ glock_trace_name(__entry->cur_state),
+ glock_trace_name(__entry->dmt_state),
+ show_glock_flags(__entry->flags))
+
+);
+
+/* Promotion/grant of a glock */
+TRACE_EVENT(gfs2_promote,
+
+ TP_PROTO(const struct gfs2_holder *gh, int first),
+
+ TP_ARGS(gh, first),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( u64, glnum )
+ __field( u32, gltype )
+ __field( int, first )
+ __field( u8, state )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = gh->gh_gl->gl_sbd->sd_vfs->s_dev;
+ __entry->glnum = gh->gh_gl->gl_name.ln_number;
+ __entry->gltype = gh->gh_gl->gl_name.ln_type;
+ __entry->first = first;
+ __entry->state = glock_trace_state(gh->gh_state);
+ ),
+
+ TP_printk("%u,%u glock %u:%llu promote %s %s",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
+ (unsigned long long)__entry->glnum,
+ __entry->first ? "first": "other",
+ glock_trace_name(__entry->state))
+);
+
+/* Queue/dequeue a lock request */
+TRACE_EVENT(gfs2_glock_queue,
+
+ TP_PROTO(const struct gfs2_holder *gh, int queue),
+
+ TP_ARGS(gh, queue),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( u64, glnum )
+ __field( u32, gltype )
+ __field( int, queue )
+ __field( u8, state )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = gh->gh_gl->gl_sbd->sd_vfs->s_dev;
+ __entry->glnum = gh->gh_gl->gl_name.ln_number;
+ __entry->gltype = gh->gh_gl->gl_name.ln_type;
+ __entry->queue = queue;
+ __entry->state = glock_trace_state(gh->gh_state);
+ ),
+
+ TP_printk("%u,%u glock %u:%llu %squeue %s",
+ MAJOR(__entry->dev), MINOR(__entry->dev), __entry->gltype,
+ (unsigned long long)__entry->glnum,
+ __entry->queue ? "" : "de",
+ glock_trace_name(__entry->state))
+);
+
+/* Section 2 - Log/journal
+ *
+ * Objectives:
+ * Latency: Log flush time
+ * Correctness: pin/unpin vs. disk I/O ordering
+ * Performance: Log usage stats
+ */
+
+/* Pin/unpin a block in the log */
+TRACE_EVENT(gfs2_pin,
+
+ TP_PROTO(const struct gfs2_bufdata *bd, int pin),
+
+ TP_ARGS(bd, pin),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( int, pin )
+ __field( u32, len )
+ __field( sector_t, block )
+ __field( u64, ino )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = bd->bd_gl->gl_sbd->sd_vfs->s_dev;
+ __entry->pin = pin;
+ __entry->len = bd->bd_bh->b_size;
+ __entry->block = bd->bd_bh->b_blocknr;
+ __entry->ino = bd->bd_gl->gl_name.ln_number;
+ ),
+
+ TP_printk("%u,%u log %s %llu/%lu inode %llu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->pin ? "pin" : "unpin",
+ (unsigned long long)__entry->block,
+ (unsigned long)__entry->len,
+ (unsigned long long)__entry->ino)
+);
+
+/* Flushing the log */
+TRACE_EVENT(gfs2_log_flush,
+
+ TP_PROTO(const struct gfs2_sbd *sdp, int start),
+
+ TP_ARGS(sdp, start),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( int, start )
+ __field( u64, log_seq )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sdp->sd_vfs->s_dev;
+ __entry->start = start;
+ __entry->log_seq = sdp->sd_log_sequence;
+ ),
+
+ TP_printk("%u,%u log flush %s %llu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->start ? "start" : "end",
+ (unsigned long long)__entry->log_seq)
+);
+
+/* Reserving/releasing blocks in the log */
+TRACE_EVENT(gfs2_log_blocks,
+
+ TP_PROTO(const struct gfs2_sbd *sdp, int blocks),
+
+ TP_ARGS(sdp, blocks),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( int, blocks )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = sdp->sd_vfs->s_dev;
+ __entry->blocks = blocks;
+ ),
+
+ TP_printk("%u,%u log reserve %d", MAJOR(__entry->dev),
+ MINOR(__entry->dev), __entry->blocks)
+);
+
+/* Section 3 - bmap
+ *
+ * Objectives:
+ * Latency: Bmap request time
+ * Performance: Block allocator tracing
+ * Correctness: Test of disard generation vs. blocks allocated
+ */
+
+/* Map an extent of blocks, possibly a new allocation */
+TRACE_EVENT(gfs2_bmap,
+
+ TP_PROTO(const struct gfs2_inode *ip, const struct buffer_head *bh,
+ sector_t lblock, int create, int errno),
+
+ TP_ARGS(ip, bh, lblock, create, errno),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( sector_t, lblock )
+ __field( sector_t, pblock )
+ __field( u64, inum )
+ __field( unsigned long, state )
+ __field( u32, len )
+ __field( int, create )
+ __field( int, errno )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = ip->i_gl->gl_sbd->sd_vfs->s_dev;
+ __entry->lblock = lblock;
+ __entry->pblock = buffer_mapped(bh) ? bh->b_blocknr : 0;
+ __entry->inum = ip->i_no_addr;
+ __entry->state = bh->b_state;
+ __entry->len = bh->b_size;
+ __entry->create = create;
+ __entry->errno = errno;
+ ),
+
+ TP_printk("%u,%u bmap %llu map %llu/%lu to %llu flags:%08lx %s %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->inum,
+ (unsigned long long)__entry->lblock,
+ (unsigned long)__entry->len,
+ (unsigned long long)__entry->pblock,
+ __entry->state, __entry->create ? "create " : "nocreate",
+ __entry->errno)
+);
+
+/* Keep track of blocks as they are allocated/freed */
+TRACE_EVENT(gfs2_block_alloc,
+
+ TP_PROTO(const struct gfs2_inode *ip, u64 block, unsigned len,
+ u8 block_state),
+
+ TP_ARGS(ip, block, len, block_state),
+
+ TP_STRUCT__entry(
+ __field( dev_t, dev )
+ __field( u64, start )
+ __field( u64, inum )
+ __field( u32, len )
+ __field( u8, block_state )
+ ),
+
+ TP_fast_assign(
+ __entry->dev = ip->i_gl->gl_sbd->sd_vfs->s_dev;
+ __entry->start = block;
+ __entry->inum = ip->i_no_addr;
+ __entry->len = len;
+ __entry->block_state = block_state;
+ ),
+
+ TP_printk("%u,%u bmap %llu alloc %llu/%lu %s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->inum,
+ (unsigned long long)__entry->start,
+ (unsigned long)__entry->len,
+ block_state_name(__entry->block_state))
+);
+
+#endif /* _TRACE_GFS2_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#include <trace/define_trace.h>
+
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index a36bb749926..6f833dc8e91 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -49,11 +49,23 @@ MODULE_LICENSE("GPL");
*/
static void hfs_write_super(struct super_block *sb)
{
+ lock_super(sb);
sb->s_dirt = 0;
- if (sb->s_flags & MS_RDONLY)
- return;
+
/* sync everything to the buffers */
+ if (!(sb->s_flags & MS_RDONLY))
+ hfs_mdb_commit(sb);
+ unlock_super(sb);
+}
+
+static int hfs_sync_fs(struct super_block *sb, int wait)
+{
+ lock_super(sb);
hfs_mdb_commit(sb);
+ sb->s_dirt = 0;
+ unlock_super(sb);
+
+ return 0;
}
/*
@@ -65,9 +77,15 @@ static void hfs_write_super(struct super_block *sb)
*/
static void hfs_put_super(struct super_block *sb)
{
+ lock_kernel();
+
+ if (sb->s_dirt)
+ hfs_write_super(sb);
hfs_mdb_close(sb);
/* release the MDB's resources */
hfs_mdb_put(sb);
+
+ unlock_kernel();
}
/*
@@ -164,6 +182,7 @@ static const struct super_operations hfs_super_operations = {
.clear_inode = hfs_clear_inode,
.put_super = hfs_put_super,
.write_super = hfs_write_super,
+ .sync_fs = hfs_sync_fs,
.statfs = hfs_statfs,
.remount_fs = hfs_remount,
.show_options = hfs_show_options,
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index f2a64020f42..9fc3af0c0da 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -152,15 +152,14 @@ static void hfsplus_clear_inode(struct inode *inode)
}
}
-static void hfsplus_write_super(struct super_block *sb)
+static int hfsplus_sync_fs(struct super_block *sb, int wait)
{
struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr;
dprint(DBG_SUPER, "hfsplus_write_super\n");
+
+ lock_super(sb);
sb->s_dirt = 0;
- if (sb->s_flags & MS_RDONLY)
- /* warn? */
- return;
vhdr->free_blocks = cpu_to_be32(HFSPLUS_SB(sb).free_blocks);
vhdr->next_alloc = cpu_to_be32(HFSPLUS_SB(sb).next_alloc);
@@ -192,6 +191,16 @@ static void hfsplus_write_super(struct super_block *sb)
}
HFSPLUS_SB(sb).flags &= ~HFSPLUS_SB_WRITEBACKUP;
}
+ unlock_super(sb);
+ return 0;
+}
+
+static void hfsplus_write_super(struct super_block *sb)
+{
+ if (!(sb->s_flags & MS_RDONLY))
+ hfsplus_sync_fs(sb, 1);
+ else
+ sb->s_dirt = 0;
}
static void hfsplus_put_super(struct super_block *sb)
@@ -199,6 +208,11 @@ static void hfsplus_put_super(struct super_block *sb)
dprint(DBG_SUPER, "hfsplus_put_super\n");
if (!sb->s_fs_info)
return;
+
+ lock_kernel();
+
+ if (sb->s_dirt)
+ hfsplus_write_super(sb);
if (!(sb->s_flags & MS_RDONLY) && HFSPLUS_SB(sb).s_vhdr) {
struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr;
@@ -218,6 +232,8 @@ static void hfsplus_put_super(struct super_block *sb)
unload_nls(HFSPLUS_SB(sb).nls);
kfree(sb->s_fs_info);
sb->s_fs_info = NULL;
+
+ unlock_kernel();
}
static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -279,6 +295,7 @@ static const struct super_operations hfsplus_sops = {
.clear_inode = hfsplus_clear_inode,
.put_super = hfsplus_put_super,
.write_super = hfsplus_write_super,
+ .sync_fs = hfsplus_sync_fs,
.statfs = hfsplus_statfs,
.remount_fs = hfsplus_remount,
.show_options = hfsplus_show_options,
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index fc77965be84..f2feaa06bf2 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -13,6 +13,7 @@
#include <linux/statfs.h>
#include <linux/magic.h>
#include <linux/sched.h>
+#include <linux/smp_lock.h>
/* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */
@@ -99,11 +100,16 @@ int hpfs_stop_cycles(struct super_block *s, int key, int *c1, int *c2,
static void hpfs_put_super(struct super_block *s)
{
struct hpfs_sb_info *sbi = hpfs_sb(s);
+
+ lock_kernel();
+
kfree(sbi->sb_cp_table);
kfree(sbi->sb_bmp_dir);
unmark_dirty(s);
s->s_fs_info = NULL;
kfree(sbi);
+
+ unlock_kernel();
}
unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno)
@@ -393,6 +399,8 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
*flags |= MS_NOATIME;
+ lock_kernel();
+ lock_super(s);
uid = sbi->sb_uid; gid = sbi->sb_gid;
umask = 0777 & ~sbi->sb_mode;
lowercase = sbi->sb_lowercase; conv = sbi->sb_conv;
@@ -425,9 +433,13 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
replace_mount_options(s, new_opts);
+ unlock_super(s);
+ unlock_kernel();
return 0;
out_err:
+ unlock_super(s);
+ unlock_kernel();
kfree(new_opts);
return -EINVAL;
}
diff --git a/fs/inode.c b/fs/inode.c
index ca337014ae2..f643be565df 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1408,7 +1408,7 @@ EXPORT_SYMBOL(touch_atime);
* for writeback. Note that this function is meant exclusively for
* usage in the file write path of filesystems, and filesystems may
* choose to explicitly ignore update via this function with the
- * S_NOCTIME inode flag, e.g. for network filesystem where these
+ * S_NOCMTIME inode flag, e.g. for network filesystem where these
* timestamps are handled by the server.
*/
@@ -1422,7 +1422,7 @@ void file_update_time(struct file *file)
if (IS_NOCMTIME(inode))
return;
- err = mnt_want_write(file->f_path.mnt);
+ err = mnt_want_write_file(file);
if (err)
return;
diff --git a/fs/internal.h b/fs/internal.h
index b4dac4fb6b6..d55ef562f0b 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -25,6 +25,8 @@ static inline int sb_is_blkdev_sb(struct super_block *sb)
return sb == blockdev_superblock;
}
+extern int __sync_blockdev(struct block_device *bdev, int wait);
+
#else
static inline void bdev_cache_init(void)
{
@@ -34,6 +36,11 @@ static inline int sb_is_blkdev_sb(struct super_block *sb)
{
return 0;
}
+
+static inline int __sync_blockdev(struct block_device *bdev, int wait)
+{
+ return 0;
+}
#endif
/*
@@ -66,3 +73,13 @@ extern void __init mnt_init(void);
* fs_struct.c
*/
extern void chroot_fs_refs(struct path *, struct path *);
+
+/*
+ * file_table.c
+ */
+extern void mark_files_ro(struct super_block *);
+
+/*
+ * super.c
+ */
+extern int do_remount_sb(struct super_block *, int, void *, int);
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index b4cbe9603c7..068b34b5a10 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -42,11 +42,16 @@ static int isofs_dentry_cmp_ms(struct dentry *dentry, struct qstr *a, struct qst
static void isofs_put_super(struct super_block *sb)
{
struct isofs_sb_info *sbi = ISOFS_SB(sb);
+
#ifdef CONFIG_JOLIET
+ lock_kernel();
+
if (sbi->s_nls_iocharset) {
unload_nls(sbi->s_nls_iocharset);
sbi->s_nls_iocharset = NULL;
}
+
+ unlock_kernel();
#endif
kfree(sbi);
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 249305d65d5..3451a81b214 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -20,6 +20,7 @@
#include <linux/vmalloc.h>
#include <linux/vfs.h>
#include <linux/crc32.h>
+#include <linux/smp_lock.h>
#include "nodelist.h"
static int jffs2_flash_setup(struct jffs2_sb_info *c);
@@ -387,6 +388,7 @@ int jffs2_remount_fs (struct super_block *sb, int *flags, char *data)
This also catches the case where it was stopped and this
is just a remount to restart it.
Flush the writebuffer, if neccecary, else we loose it */
+ lock_kernel();
if (!(sb->s_flags & MS_RDONLY)) {
jffs2_stop_garbage_collect_thread(c);
mutex_lock(&c->alloc_sem);
@@ -399,24 +401,10 @@ int jffs2_remount_fs (struct super_block *sb, int *flags, char *data)
*flags |= MS_NOATIME;
+ unlock_kernel();
return 0;
}
-void jffs2_write_super (struct super_block *sb)
-{
- struct jffs2_sb_info *c = JFFS2_SB_INFO(sb);
- sb->s_dirt = 0;
-
- if (sb->s_flags & MS_RDONLY)
- return;
-
- D1(printk(KERN_DEBUG "jffs2_write_super()\n"));
- jffs2_garbage_collect_trigger(c);
- jffs2_erase_pending_blocks(c, 0);
- jffs2_flush_wbuf_gc(c, 0);
-}
-
-
/* jffs2_new_inode: allocate a new inode and inocache, add it to the hash,
fill in the raw_inode while you're at it. */
struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_inode *ri)
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 5e194a5c8e2..2228380c47b 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -181,7 +181,6 @@ void jffs2_dirty_inode(struct inode *inode);
struct inode *jffs2_new_inode (struct inode *dir_i, int mode,
struct jffs2_raw_inode *ri);
int jffs2_statfs (struct dentry *, struct kstatfs *);
-void jffs2_write_super (struct super_block *);
int jffs2_remount_fs (struct super_block *, int *, char *);
int jffs2_do_fill_super(struct super_block *sb, void *data, int silent);
void jffs2_gc_release_inode(struct jffs2_sb_info *c,
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 4c4e18c54a5..07a22caf268 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -53,10 +53,29 @@ static void jffs2_i_init_once(void *foo)
inode_init_once(&f->vfs_inode);
}
+static void jffs2_write_super(struct super_block *sb)
+{
+ struct jffs2_sb_info *c = JFFS2_SB_INFO(sb);
+
+ lock_super(sb);
+ sb->s_dirt = 0;
+
+ if (!(sb->s_flags & MS_RDONLY)) {
+ D1(printk(KERN_DEBUG "jffs2_write_super()\n"));
+ jffs2_garbage_collect_trigger(c);
+ jffs2_erase_pending_blocks(c, 0);
+ jffs2_flush_wbuf_gc(c, 0);
+ }
+
+ unlock_super(sb);
+}
+
static int jffs2_sync_fs(struct super_block *sb, int wait)
{
struct jffs2_sb_info *c = JFFS2_SB_INFO(sb);
+ jffs2_write_super(sb);
+
mutex_lock(&c->alloc_sem);
jffs2_flush_wbuf_pad(c);
mutex_unlock(&c->alloc_sem);
@@ -174,6 +193,11 @@ static void jffs2_put_super (struct super_block *sb)
D2(printk(KERN_DEBUG "jffs2: jffs2_put_super()\n"));
+ lock_kernel();
+
+ if (sb->s_dirt)
+ jffs2_write_super(sb);
+
mutex_lock(&c->alloc_sem);
jffs2_flush_wbuf_pad(c);
mutex_unlock(&c->alloc_sem);
@@ -192,6 +216,8 @@ static void jffs2_put_super (struct super_block *sb)
if (c->mtd->sync)
c->mtd->sync(c->mtd);
+ unlock_kernel();
+
D1(printk(KERN_DEBUG "jffs2_put_super returning\n"));
}
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index d9b0e92b360..09b1b6ee218 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -32,6 +32,7 @@
#include <linux/crc32.h>
#include <asm/uaccess.h>
#include <linux/seq_file.h>
+#include <linux/smp_lock.h>
#include "jfs_incore.h"
#include "jfs_filsys.h"
@@ -183,6 +184,9 @@ static void jfs_put_super(struct super_block *sb)
int rc;
jfs_info("In jfs_put_super");
+
+ lock_kernel();
+
rc = jfs_umount(sb);
if (rc)
jfs_err("jfs_umount failed with return code %d", rc);
@@ -195,6 +199,8 @@ static void jfs_put_super(struct super_block *sb)
sbi->direct_inode = NULL;
kfree(sbi);
+
+ unlock_kernel();
}
enum {
@@ -370,19 +376,24 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data)
s64 newLVSize = 0;
int rc = 0;
int flag = JFS_SBI(sb)->flag;
+ int ret;
if (!parse_options(data, sb, &newLVSize, &flag)) {
return -EINVAL;
}
+ lock_kernel();
if (newLVSize) {
if (sb->s_flags & MS_RDONLY) {
printk(KERN_ERR
"JFS: resize requires volume to be mounted read-write\n");
+ unlock_kernel();
return -EROFS;
}
rc = jfs_extendfs(sb, newLVSize, 0);
- if (rc)
+ if (rc) {
+ unlock_kernel();
return rc;
+ }
}
if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) {
@@ -393,23 +404,31 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data)
truncate_inode_pages(JFS_SBI(sb)->direct_inode->i_mapping, 0);
JFS_SBI(sb)->flag = flag;
- return jfs_mount_rw(sb, 1);
+ ret = jfs_mount_rw(sb, 1);
+ unlock_kernel();
+ return ret;
}
if ((!(sb->s_flags & MS_RDONLY)) && (*flags & MS_RDONLY)) {
rc = jfs_umount_rw(sb);
JFS_SBI(sb)->flag = flag;
+ unlock_kernel();
return rc;
}
if ((JFS_SBI(sb)->flag & JFS_NOINTEGRITY) != (flag & JFS_NOINTEGRITY))
if (!(sb->s_flags & MS_RDONLY)) {
rc = jfs_umount_rw(sb);
- if (rc)
+ if (rc) {
+ unlock_kernel();
return rc;
+ }
JFS_SBI(sb)->flag = flag;
- return jfs_mount_rw(sb, 1);
+ ret = jfs_mount_rw(sb, 1);
+ unlock_kernel();
+ return ret;
}
JFS_SBI(sb)->flag = flag;
+ unlock_kernel();
return 0;
}
diff --git a/fs/libfs.c b/fs/libfs.c
index 80046ddf506..ddfa89948c3 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -9,6 +9,8 @@
#include <linux/vfs.h>
#include <linux/mutex.h>
#include <linux/exportfs.h>
+#include <linux/writeback.h>
+#include <linux/buffer_head.h>
#include <asm/uaccess.h>
@@ -807,6 +809,29 @@ struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid,
}
EXPORT_SYMBOL_GPL(generic_fh_to_parent);
+int simple_fsync(struct file *file, struct dentry *dentry, int datasync)
+{
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .nr_to_write = 0, /* metadata-only; caller takes care of data */
+ };
+ struct inode *inode = dentry->d_inode;
+ int err;
+ int ret;
+
+ ret = sync_mapping_buffers(inode->i_mapping);
+ if (!(inode->i_state & I_DIRTY))
+ return ret;
+ if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
+ return ret;
+
+ err = sync_inode(inode, &wbc);
+ if (ret == 0)
+ ret = err;
+ return ret;
+}
+EXPORT_SYMBOL(simple_fsync);
+
EXPORT_SYMBOL(dcache_dir_close);
EXPORT_SYMBOL(dcache_dir_lseek);
EXPORT_SYMBOL(dcache_dir_open);
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index d4946c4c90e..e5f206467e4 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -22,7 +22,7 @@ static int minix_readdir(struct file *, void *, filldir_t);
const struct file_operations minix_dir_operations = {
.read = generic_read_dir,
.readdir = minix_readdir,
- .fsync = minix_sync_file,
+ .fsync = simple_fsync,
};
static inline void dir_put_page(struct page *page)
diff --git a/fs/minix/file.c b/fs/minix/file.c
index 17765f697e5..3eec3e607a8 100644
--- a/fs/minix/file.c
+++ b/fs/minix/file.c
@@ -6,15 +6,12 @@
* minix regular file handling primitives
*/
-#include <linux/buffer_head.h> /* for fsync_inode_buffers() */
#include "minix.h"
/*
* We have mostly NULLs here: the current defaults are OK for
* the minix filesystem.
*/
-int minix_sync_file(struct file *, struct dentry *, int);
-
const struct file_operations minix_file_operations = {
.llseek = generic_file_llseek,
.read = do_sync_read,
@@ -22,7 +19,7 @@ const struct file_operations minix_file_operations = {
.write = do_sync_write,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
- .fsync = minix_sync_file,
+ .fsync = simple_fsync,
.splice_read = generic_file_splice_read,
};
@@ -30,18 +27,3 @@ const struct inode_operations minix_file_inode_operations = {
.truncate = minix_truncate,
.getattr = minix_getattr,
};
-
-int minix_sync_file(struct file * file, struct dentry *dentry, int datasync)
-{
- struct inode *inode = dentry->d_inode;
- int err;
-
- err = sync_mapping_buffers(inode->i_mapping);
- if (!(inode->i_state & I_DIRTY))
- return err;
- if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
- return err;
-
- err |= minix_sync_inode(inode);
- return err ? -EIO : 0;
-}
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index daad3c2740d..f91a2369359 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -35,6 +35,8 @@ static void minix_put_super(struct super_block *sb)
int i;
struct minix_sb_info *sbi = minix_sb(sb);
+ lock_kernel();
+
if (!(sb->s_flags & MS_RDONLY)) {
if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */
sbi->s_ms->s_state = sbi->s_mount_state;
@@ -49,7 +51,7 @@ static void minix_put_super(struct super_block *sb)
sb->s_fs_info = NULL;
kfree(sbi);
- return;
+ unlock_kernel();
}
static struct kmem_cache * minix_inode_cachep;
@@ -554,38 +556,25 @@ static struct buffer_head * V2_minix_update_inode(struct inode * inode)
return bh;
}
-static struct buffer_head *minix_update_inode(struct inode *inode)
-{
- if (INODE_VERSION(inode) == MINIX_V1)
- return V1_minix_update_inode(inode);
- else
- return V2_minix_update_inode(inode);
-}
-
-static int minix_write_inode(struct inode * inode, int wait)
-{
- brelse(minix_update_inode(inode));
- return 0;
-}
-
-int minix_sync_inode(struct inode * inode)
+static int minix_write_inode(struct inode *inode, int wait)
{
int err = 0;
struct buffer_head *bh;
- bh = minix_update_inode(inode);
- if (bh && buffer_dirty(bh))
- {
+ if (INODE_VERSION(inode) == MINIX_V1)
+ bh = V1_minix_update_inode(inode);
+ else
+ bh = V2_minix_update_inode(inode);
+ if (!bh)
+ return -EIO;
+ if (wait && buffer_dirty(bh)) {
sync_dirty_buffer(bh);
- if (buffer_req(bh) && !buffer_uptodate(bh))
- {
+ if (buffer_req(bh) && !buffer_uptodate(bh)) {
printk("IO error syncing minix inode [%s:%08lx]\n",
inode->i_sb->s_id, inode->i_ino);
- err = -1;
+ err = -EIO;
}
}
- else if (!bh)
- err = -1;
brelse (bh);
return err;
}
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index e6a0b193bea..cb7fdd11f9a 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -57,7 +57,6 @@ extern int __minix_write_begin(struct file *file, struct address_space *mapping,
extern void V1_minix_truncate(struct inode *);
extern void V2_minix_truncate(struct inode *);
extern void minix_truncate(struct inode *);
-extern int minix_sync_inode(struct inode *);
extern void minix_set_inode(struct inode *, dev_t);
extern int V1_minix_get_block(struct inode *, long, struct buffer_head *, int);
extern int V2_minix_get_block(struct inode *, long, struct buffer_head *, int);
@@ -72,7 +71,6 @@ extern int minix_empty_dir(struct inode*);
extern void minix_set_link(struct minix_dir_entry*, struct page*, struct inode*);
extern struct minix_dir_entry *minix_dotdot(struct inode*, struct page**);
extern ino_t minix_inode_by_name(struct dentry*);
-extern int minix_sync_file(struct file *, struct dentry *, int);
extern const struct inode_operations minix_file_inode_operations;
extern const struct inode_operations minix_dir_inode_operations;
diff --git a/fs/namei.c b/fs/namei.c
index c82805d088e..527119afb6a 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -552,6 +552,17 @@ static __always_inline int link_path_walk(const char *name, struct nameidata *nd
return result;
}
+static __always_inline void set_root(struct nameidata *nd)
+{
+ if (!nd->root.mnt) {
+ struct fs_struct *fs = current->fs;
+ read_lock(&fs->lock);
+ nd->root = fs->root;
+ path_get(&nd->root);
+ read_unlock(&fs->lock);
+ }
+}
+
static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
{
int res = 0;
@@ -560,14 +571,10 @@ static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *l
goto fail;
if (*link == '/') {
- struct fs_struct *fs = current->fs;
-
+ set_root(nd);
path_put(&nd->path);
-
- read_lock(&fs->lock);
- nd->path = fs->root;
- path_get(&fs->root);
- read_unlock(&fs->lock);
+ nd->path = nd->root;
+ path_get(&nd->root);
}
res = link_path_walk(link, nd);
@@ -668,23 +675,23 @@ loop:
return err;
}
-int follow_up(struct vfsmount **mnt, struct dentry **dentry)
+int follow_up(struct path *path)
{
struct vfsmount *parent;
struct dentry *mountpoint;
spin_lock(&vfsmount_lock);
- parent=(*mnt)->mnt_parent;
- if (parent == *mnt) {
+ parent = path->mnt->mnt_parent;
+ if (parent == path->mnt) {
spin_unlock(&vfsmount_lock);
return 0;
}
mntget(parent);
- mountpoint=dget((*mnt)->mnt_mountpoint);
+ mountpoint = dget(path->mnt->mnt_mountpoint);
spin_unlock(&vfsmount_lock);
- dput(*dentry);
- *dentry = mountpoint;
- mntput(*mnt);
- *mnt = parent;
+ dput(path->dentry);
+ path->dentry = mountpoint;
+ mntput(path->mnt);
+ path->mnt = parent;
return 1;
}
@@ -695,7 +702,7 @@ static int __follow_mount(struct path *path)
{
int res = 0;
while (d_mountpoint(path->dentry)) {
- struct vfsmount *mounted = lookup_mnt(path->mnt, path->dentry);
+ struct vfsmount *mounted = lookup_mnt(path);
if (!mounted)
break;
dput(path->dentry);
@@ -708,32 +715,32 @@ static int __follow_mount(struct path *path)
return res;
}
-static void follow_mount(struct vfsmount **mnt, struct dentry **dentry)
+static void follow_mount(struct path *path)
{
- while (d_mountpoint(*dentry)) {
- struct vfsmount *mounted = lookup_mnt(*mnt, *dentry);
+ while (d_mountpoint(path->dentry)) {
+ struct vfsmount *mounted = lookup_mnt(path);
if (!mounted)
break;
- dput(*dentry);
- mntput(*mnt);
- *mnt = mounted;
- *dentry = dget(mounted->mnt_root);
+ dput(path->dentry);
+ mntput(path->mnt);
+ path->mnt = mounted;
+ path->dentry = dget(mounted->mnt_root);
}
}
/* no need for dcache_lock, as serialization is taken care in
* namespace.c
*/
-int follow_down(struct vfsmount **mnt, struct dentry **dentry)
+int follow_down(struct path *path)
{
struct vfsmount *mounted;
- mounted = lookup_mnt(*mnt, *dentry);
+ mounted = lookup_mnt(path);
if (mounted) {
- dput(*dentry);
- mntput(*mnt);
- *mnt = mounted;
- *dentry = dget(mounted->mnt_root);
+ dput(path->dentry);
+ mntput(path->mnt);
+ path->mnt = mounted;
+ path->dentry = dget(mounted->mnt_root);
return 1;
}
return 0;
@@ -741,19 +748,16 @@ int follow_down(struct vfsmount **mnt, struct dentry **dentry)
static __always_inline void follow_dotdot(struct nameidata *nd)
{
- struct fs_struct *fs = current->fs;
+ set_root(nd);
while(1) {
struct vfsmount *parent;
struct dentry *old = nd->path.dentry;
- read_lock(&fs->lock);
- if (nd->path.dentry == fs->root.dentry &&
- nd->path.mnt == fs->root.mnt) {
- read_unlock(&fs->lock);
+ if (nd->path.dentry == nd->root.dentry &&
+ nd->path.mnt == nd->root.mnt) {
break;
}
- read_unlock(&fs->lock);
spin_lock(&dcache_lock);
if (nd->path.dentry != nd->path.mnt->mnt_root) {
nd->path.dentry = dget(nd->path.dentry->d_parent);
@@ -775,7 +779,7 @@ static __always_inline void follow_dotdot(struct nameidata *nd)
mntput(nd->path.mnt);
nd->path.mnt = parent;
}
- follow_mount(&nd->path.mnt, &nd->path.dentry);
+ follow_mount(&nd->path);
}
/*
@@ -1017,25 +1021,23 @@ static int path_walk(const char *name, struct nameidata *nd)
return link_path_walk(name, nd);
}
-/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
-static int do_path_lookup(int dfd, const char *name,
- unsigned int flags, struct nameidata *nd)
+static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
{
int retval = 0;
int fput_needed;
struct file *file;
- struct fs_struct *fs = current->fs;
nd->last_type = LAST_ROOT; /* if there are only slashes... */
nd->flags = flags;
nd->depth = 0;
+ nd->root.mnt = NULL;
if (*name=='/') {
- read_lock(&fs->lock);
- nd->path = fs->root;
- path_get(&fs->root);
- read_unlock(&fs->lock);
+ set_root(nd);
+ nd->path = nd->root;
+ path_get(&nd->root);
} else if (dfd == AT_FDCWD) {
+ struct fs_struct *fs = current->fs;
read_lock(&fs->lock);
nd->path = fs->pwd;
path_get(&fs->pwd);
@@ -1063,17 +1065,29 @@ static int do_path_lookup(int dfd, const char *name,
fput_light(file, fput_needed);
}
+ return 0;
- retval = path_walk(name, nd);
+fput_fail:
+ fput_light(file, fput_needed);
+out_fail:
+ return retval;
+}
+
+/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
+static int do_path_lookup(int dfd, const char *name,
+ unsigned int flags, struct nameidata *nd)
+{
+ int retval = path_init(dfd, name, flags, nd);
+ if (!retval)
+ retval = path_walk(name, nd);
if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
nd->path.dentry->d_inode))
audit_inode(name, nd->path.dentry);
-out_fail:
+ if (nd->root.mnt) {
+ path_put(&nd->root);
+ nd->root.mnt = NULL;
+ }
return retval;
-
-fput_fail:
- fput_light(file, fput_needed);
- goto out_fail;
}
int path_lookup(const char *name, unsigned int flags,
@@ -1113,14 +1127,18 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
nd->path.dentry = dentry;
nd->path.mnt = mnt;
path_get(&nd->path);
+ nd->root = nd->path;
+ path_get(&nd->root);
retval = path_walk(name, nd);
if (unlikely(!retval && !audit_dummy_context() && nd->path.dentry &&
nd->path.dentry->d_inode))
audit_inode(name, nd->path.dentry);
- return retval;
+ path_put(&nd->root);
+ nd->root.mnt = NULL;
+ return retval;
}
/**
@@ -1676,9 +1694,14 @@ struct file *do_filp_open(int dfd, const char *pathname,
/*
* Create - we need to know the parent.
*/
- error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd);
+ error = path_init(dfd, pathname, LOOKUP_PARENT, &nd);
if (error)
return ERR_PTR(error);
+ error = path_walk(pathname, &nd);
+ if (error)
+ return ERR_PTR(error);
+ if (unlikely(!audit_dummy_context()))
+ audit_inode(pathname, nd.path.dentry);
/*
* We have the parent and last component. First of all, check
@@ -1806,6 +1829,8 @@ exit:
if (!IS_ERR(nd.intent.open.file))
release_open_intent(&nd);
exit_parent:
+ if (nd.root.mnt)
+ path_put(&nd.root);
path_put(&nd.path);
return ERR_PTR(error);
diff --git a/fs/namespace.c b/fs/namespace.c
index 134d494158d..2dd333b0fe7 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -131,10 +131,20 @@ struct vfsmount *alloc_vfsmnt(const char *name)
INIT_LIST_HEAD(&mnt->mnt_share);
INIT_LIST_HEAD(&mnt->mnt_slave_list);
INIT_LIST_HEAD(&mnt->mnt_slave);
- atomic_set(&mnt->__mnt_writers, 0);
+#ifdef CONFIG_SMP
+ mnt->mnt_writers = alloc_percpu(int);
+ if (!mnt->mnt_writers)
+ goto out_free_devname;
+#else
+ mnt->mnt_writers = 0;
+#endif
}
return mnt;
+#ifdef CONFIG_SMP
+out_free_devname:
+ kfree(mnt->mnt_devname);
+#endif
out_free_id:
mnt_free_id(mnt);
out_free_cache:
@@ -171,65 +181,38 @@ int __mnt_is_readonly(struct vfsmount *mnt)
}
EXPORT_SYMBOL_GPL(__mnt_is_readonly);
-struct mnt_writer {
- /*
- * If holding multiple instances of this lock, they
- * must be ordered by cpu number.
- */
- spinlock_t lock;
- struct lock_class_key lock_class; /* compiles out with !lockdep */
- unsigned long count;
- struct vfsmount *mnt;
-} ____cacheline_aligned_in_smp;
-static DEFINE_PER_CPU(struct mnt_writer, mnt_writers);
+static inline void inc_mnt_writers(struct vfsmount *mnt)
+{
+#ifdef CONFIG_SMP
+ (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))++;
+#else
+ mnt->mnt_writers++;
+#endif
+}
-static int __init init_mnt_writers(void)
+static inline void dec_mnt_writers(struct vfsmount *mnt)
{
- int cpu;
- for_each_possible_cpu(cpu) {
- struct mnt_writer *writer = &per_cpu(mnt_writers, cpu);
- spin_lock_init(&writer->lock);
- lockdep_set_class(&writer->lock, &writer->lock_class);
- writer->count = 0;
- }
- return 0;
+#ifdef CONFIG_SMP
+ (*per_cpu_ptr(mnt->mnt_writers, smp_processor_id()))--;
+#else
+ mnt->mnt_writers--;
+#endif
}
-fs_initcall(init_mnt_writers);
-static void unlock_mnt_writers(void)
+static unsigned int count_mnt_writers(struct vfsmount *mnt)
{
+#ifdef CONFIG_SMP
+ unsigned int count = 0;
int cpu;
- struct mnt_writer *cpu_writer;
for_each_possible_cpu(cpu) {
- cpu_writer = &per_cpu(mnt_writers, cpu);
- spin_unlock(&cpu_writer->lock);
+ count += *per_cpu_ptr(mnt->mnt_writers, cpu);
}
-}
-static inline void __clear_mnt_count(struct mnt_writer *cpu_writer)
-{
- if (!cpu_writer->mnt)
- return;
- /*
- * This is in case anyone ever leaves an invalid,
- * old ->mnt and a count of 0.
- */
- if (!cpu_writer->count)
- return;
- atomic_add(cpu_writer->count, &cpu_writer->mnt->__mnt_writers);
- cpu_writer->count = 0;
-}
- /*
- * must hold cpu_writer->lock
- */
-static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer,
- struct vfsmount *mnt)
-{
- if (cpu_writer->mnt == mnt)
- return;
- __clear_mnt_count(cpu_writer);
- cpu_writer->mnt = mnt;
+ return count;
+#else
+ return mnt->mnt_writers;
+#endif
}
/*
@@ -253,74 +236,73 @@ static inline void use_cpu_writer_for_mount(struct mnt_writer *cpu_writer,
int mnt_want_write(struct vfsmount *mnt)
{
int ret = 0;
- struct mnt_writer *cpu_writer;
- cpu_writer = &get_cpu_var(mnt_writers);
- spin_lock(&cpu_writer->lock);
+ preempt_disable();
+ inc_mnt_writers(mnt);
+ /*
+ * The store to inc_mnt_writers must be visible before we pass
+ * MNT_WRITE_HOLD loop below, so that the slowpath can see our
+ * incremented count after it has set MNT_WRITE_HOLD.
+ */
+ smp_mb();
+ while (mnt->mnt_flags & MNT_WRITE_HOLD)
+ cpu_relax();
+ /*
+ * After the slowpath clears MNT_WRITE_HOLD, mnt_is_readonly will
+ * be set to match its requirements. So we must not load that until
+ * MNT_WRITE_HOLD is cleared.
+ */
+ smp_rmb();
if (__mnt_is_readonly(mnt)) {
+ dec_mnt_writers(mnt);
ret = -EROFS;
goto out;
}
- use_cpu_writer_for_mount(cpu_writer, mnt);
- cpu_writer->count++;
out:
- spin_unlock(&cpu_writer->lock);
- put_cpu_var(mnt_writers);
+ preempt_enable();
return ret;
}
EXPORT_SYMBOL_GPL(mnt_want_write);
-static void lock_mnt_writers(void)
-{
- int cpu;
- struct mnt_writer *cpu_writer;
-
- for_each_possible_cpu(cpu) {
- cpu_writer = &per_cpu(mnt_writers, cpu);
- spin_lock(&cpu_writer->lock);
- __clear_mnt_count(cpu_writer);
- cpu_writer->mnt = NULL;
- }
+/**
+ * mnt_clone_write - get write access to a mount
+ * @mnt: the mount on which to take a write
+ *
+ * This is effectively like mnt_want_write, except
+ * it must only be used to take an extra write reference
+ * on a mountpoint that we already know has a write reference
+ * on it. This allows some optimisation.
+ *
+ * After finished, mnt_drop_write must be called as usual to
+ * drop the reference.
+ */
+int mnt_clone_write(struct vfsmount *mnt)
+{
+ /* superblock may be r/o */
+ if (__mnt_is_readonly(mnt))
+ return -EROFS;
+ preempt_disable();
+ inc_mnt_writers(mnt);
+ preempt_enable();
+ return 0;
}
+EXPORT_SYMBOL_GPL(mnt_clone_write);
-/*
- * These per-cpu write counts are not guaranteed to have
- * matched increments and decrements on any given cpu.
- * A file open()ed for write on one cpu and close()d on
- * another cpu will imbalance this count. Make sure it
- * does not get too far out of whack.
+/**
+ * mnt_want_write_file - get write access to a file's mount
+ * @file: the file who's mount on which to take a write
+ *
+ * This is like mnt_want_write, but it takes a file and can
+ * do some optimisations if the file is open for write already
*/
-static void handle_write_count_underflow(struct vfsmount *mnt)
+int mnt_want_write_file(struct file *file)
{
- if (atomic_read(&mnt->__mnt_writers) >=
- MNT_WRITER_UNDERFLOW_LIMIT)
- return;
- /*
- * It isn't necessary to hold all of the locks
- * at the same time, but doing it this way makes
- * us share a lot more code.
- */
- lock_mnt_writers();
- /*
- * vfsmount_lock is for mnt_flags.
- */
- spin_lock(&vfsmount_lock);
- /*
- * If coalescing the per-cpu writer counts did not
- * get us back to a positive writer count, we have
- * a bug.
- */
- if ((atomic_read(&mnt->__mnt_writers) < 0) &&
- !(mnt->mnt_flags & MNT_IMBALANCED_WRITE_COUNT)) {
- WARN(1, KERN_DEBUG "leak detected on mount(%p) writers "
- "count: %d\n",
- mnt, atomic_read(&mnt->__mnt_writers));
- /* use the flag to keep the dmesg spam down */
- mnt->mnt_flags |= MNT_IMBALANCED_WRITE_COUNT;
- }
- spin_unlock(&vfsmount_lock);
- unlock_mnt_writers();
+ if (!(file->f_mode & FMODE_WRITE))
+ return mnt_want_write(file->f_path.mnt);
+ else
+ return mnt_clone_write(file->f_path.mnt);
}
+EXPORT_SYMBOL_GPL(mnt_want_write_file);
/**
* mnt_drop_write - give up write access to a mount
@@ -332,37 +314,9 @@ static void handle_write_count_underflow(struct vfsmount *mnt)
*/
void mnt_drop_write(struct vfsmount *mnt)
{
- int must_check_underflow = 0;
- struct mnt_writer *cpu_writer;
-
- cpu_writer = &get_cpu_var(mnt_writers);
- spin_lock(&cpu_writer->lock);
-
- use_cpu_writer_for_mount(cpu_writer, mnt);
- if (cpu_writer->count > 0) {
- cpu_writer->count--;
- } else {
- must_check_underflow = 1;
- atomic_dec(&mnt->__mnt_writers);
- }
-
- spin_unlock(&cpu_writer->lock);
- /*
- * Logically, we could call this each time,
- * but the __mnt_writers cacheline tends to
- * be cold, and makes this expensive.
- */
- if (must_check_underflow)
- handle_write_count_underflow(mnt);
- /*
- * This could be done right after the spinlock
- * is taken because the spinlock keeps us on
- * the cpu, and disables preemption. However,
- * putting it here bounds the amount that
- * __mnt_writers can underflow. Without it,
- * we could theoretically wrap __mnt_writers.
- */
- put_cpu_var(mnt_writers);
+ preempt_disable();
+ dec_mnt_writers(mnt);
+ preempt_enable();
}
EXPORT_SYMBOL_GPL(mnt_drop_write);
@@ -370,24 +324,41 @@ static int mnt_make_readonly(struct vfsmount *mnt)
{
int ret = 0;
- lock_mnt_writers();
+ spin_lock(&vfsmount_lock);
+ mnt->mnt_flags |= MNT_WRITE_HOLD;
/*
- * With all the locks held, this value is stable
+ * After storing MNT_WRITE_HOLD, we'll read the counters. This store
+ * should be visible before we do.
*/
- if (atomic_read(&mnt->__mnt_writers) > 0) {
- ret = -EBUSY;
- goto out;
- }
+ smp_mb();
+
/*
- * nobody can do a successful mnt_want_write() with all
- * of the counts in MNT_DENIED_WRITE and the locks held.
+ * With writers on hold, if this value is zero, then there are
+ * definitely no active writers (although held writers may subsequently
+ * increment the count, they'll have to wait, and decrement it after
+ * seeing MNT_READONLY).
+ *
+ * It is OK to have counter incremented on one CPU and decremented on
+ * another: the sum will add up correctly. The danger would be when we
+ * sum up each counter, if we read a counter before it is incremented,
+ * but then read another CPU's count which it has been subsequently
+ * decremented from -- we would see more decrements than we should.
+ * MNT_WRITE_HOLD protects against this scenario, because
+ * mnt_want_write first increments count, then smp_mb, then spins on
+ * MNT_WRITE_HOLD, so it can't be decremented by another CPU while
+ * we're counting up here.
*/
- spin_lock(&vfsmount_lock);
- if (!ret)
+ if (count_mnt_writers(mnt) > 0)
+ ret = -EBUSY;
+ else
mnt->mnt_flags |= MNT_READONLY;
+ /*
+ * MNT_READONLY must become visible before ~MNT_WRITE_HOLD, so writers
+ * that become unheld will see MNT_READONLY.
+ */
+ smp_wmb();
+ mnt->mnt_flags &= ~MNT_WRITE_HOLD;
spin_unlock(&vfsmount_lock);
-out:
- unlock_mnt_writers();
return ret;
}
@@ -410,6 +381,9 @@ void free_vfsmnt(struct vfsmount *mnt)
{
kfree(mnt->mnt_devname);
mnt_free_id(mnt);
+#ifdef CONFIG_SMP
+ free_percpu(mnt->mnt_writers);
+#endif
kmem_cache_free(mnt_cache, mnt);
}
@@ -442,11 +416,11 @@ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
* lookup_mnt increments the ref count before returning
* the vfsmount struct.
*/
-struct vfsmount *lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
+struct vfsmount *lookup_mnt(struct path *path)
{
struct vfsmount *child_mnt;
spin_lock(&vfsmount_lock);
- if ((child_mnt = __lookup_mnt(mnt, dentry, 1)))
+ if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1)))
mntget(child_mnt);
spin_unlock(&vfsmount_lock);
return child_mnt;
@@ -604,38 +578,18 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
static inline void __mntput(struct vfsmount *mnt)
{
- int cpu;
struct super_block *sb = mnt->mnt_sb;
/*
- * We don't have to hold all of the locks at the
- * same time here because we know that we're the
- * last reference to mnt and that no new writers
- * can come in.
- */
- for_each_possible_cpu(cpu) {
- struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu);
- spin_lock(&cpu_writer->lock);
- if (cpu_writer->mnt != mnt) {
- spin_unlock(&cpu_writer->lock);
- continue;
- }
- atomic_add(cpu_writer->count, &mnt->__mnt_writers);
- cpu_writer->count = 0;
- /*
- * Might as well do this so that no one
- * ever sees the pointer and expects
- * it to be valid.
- */
- cpu_writer->mnt = NULL;
- spin_unlock(&cpu_writer->lock);
- }
- /*
* This probably indicates that somebody messed
* up a mnt_want/drop_write() pair. If this
* happens, the filesystem was probably unable
* to make r/w->r/o transitions.
*/
- WARN_ON(atomic_read(&mnt->__mnt_writers));
+ /*
+ * atomic_dec_and_lock() used to deal with ->mnt_count decrements
+ * provides barriers, so count_mnt_writers() below is safe. AV
+ */
+ WARN_ON(count_mnt_writers(mnt));
dput(mnt->mnt_root);
free_vfsmnt(mnt);
deactivate_super(sb);
@@ -1106,11 +1060,8 @@ static int do_umount(struct vfsmount *mnt, int flags)
* we just try to remount it readonly.
*/
down_write(&sb->s_umount);
- if (!(sb->s_flags & MS_RDONLY)) {
- lock_kernel();
+ if (!(sb->s_flags & MS_RDONLY))
retval = do_remount_sb(sb, MS_RDONLY, NULL, 0);
- unlock_kernel();
- }
up_write(&sb->s_umount);
return retval;
}
@@ -1253,11 +1204,11 @@ Enomem:
return NULL;
}
-struct vfsmount *collect_mounts(struct vfsmount *mnt, struct dentry *dentry)
+struct vfsmount *collect_mounts(struct path *path)
{
struct vfsmount *tree;
down_write(&namespace_sem);
- tree = copy_tree(mnt, dentry, CL_COPY_ALL | CL_PRIVATE);
+ tree = copy_tree(path->mnt, path->dentry, CL_COPY_ALL | CL_PRIVATE);
up_write(&namespace_sem);
return tree;
}
@@ -1430,7 +1381,7 @@ static int graft_tree(struct vfsmount *mnt, struct path *path)
goto out_unlock;
err = -ENOENT;
- if (IS_ROOT(path->dentry) || !d_unhashed(path->dentry))
+ if (!d_unlinked(path->dentry))
err = attach_recursive_mnt(mnt, path, NULL);
out_unlock:
mutex_unlock(&path->dentry->d_inode->i_mutex);
@@ -1601,7 +1552,7 @@ static int do_move_mount(struct path *path, char *old_name)
down_write(&namespace_sem);
while (d_mountpoint(path->dentry) &&
- follow_down(&path->mnt, &path->dentry))
+ follow_down(path))
;
err = -EINVAL;
if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt))
@@ -1612,7 +1563,7 @@ static int do_move_mount(struct path *path, char *old_name)
if (IS_DEADDIR(path->dentry->d_inode))
goto out1;
- if (!IS_ROOT(path->dentry) && d_unhashed(path->dentry))
+ if (d_unlinked(path->dentry))
goto out1;
err = -EINVAL;
@@ -1676,7 +1627,9 @@ static int do_new_mount(struct path *path, char *type, int flags,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
+ lock_kernel();
mnt = do_kern_mount(type, flags, name, data);
+ unlock_kernel();
if (IS_ERR(mnt))
return PTR_ERR(mnt);
@@ -1695,10 +1648,10 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
down_write(&namespace_sem);
/* Something was mounted here while we slept */
while (d_mountpoint(path->dentry) &&
- follow_down(&path->mnt, &path->dentry))
+ follow_down(path))
;
err = -EINVAL;
- if (!check_mnt(path->mnt))
+ if (!(mnt_flags & MNT_SHRINKABLE) && !check_mnt(path->mnt))
goto unlock;
/* Refuse the same filesystem on the same mount point */
@@ -2092,10 +2045,8 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
if (retval < 0)
goto out3;
- lock_kernel();
retval = do_mount((char *)dev_page, dir_page, (char *)type_page,
flags, (void *)data_page);
- unlock_kernel();
free_page(data_page);
out3:
@@ -2175,9 +2126,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
error = -ENOENT;
if (IS_DEADDIR(new.dentry->d_inode))
goto out2;
- if (d_unhashed(new.dentry) && !IS_ROOT(new.dentry))
+ if (d_unlinked(new.dentry))
goto out2;
- if (d_unhashed(old.dentry) && !IS_ROOT(old.dentry))
+ if (d_unlinked(old.dentry))
goto out2;
error = -EBUSY;
if (new.mnt == root.mnt ||
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index d642f0e5b36..b99ce205b1b 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -736,6 +736,8 @@ static void ncp_put_super(struct super_block *sb)
{
struct ncp_server *server = NCP_SBP(sb);
+ lock_kernel();
+
ncp_lock_server(server);
ncp_disconnect(server);
ncp_unlock_server(server);
@@ -769,6 +771,8 @@ static void ncp_put_super(struct super_block *sb)
vfree(server->packet);
sb->s_fs_info = NULL;
kfree(server);
+
+ unlock_kernel();
}
static int ncp_statfs(struct dentry *dentry, struct kstatfs *buf)
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 64a288ee046..f01caec8446 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -154,7 +154,7 @@ out_err:
goto out;
out_follow:
while (d_mountpoint(nd->path.dentry) &&
- follow_down(&nd->path.mnt, &nd->path.dentry))
+ follow_down(&nd->path))
;
err = 0;
goto out;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index d2d67781c57..26127b69a27 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1813,6 +1813,7 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
if (data == NULL)
return -ENOMEM;
+ lock_kernel();
/* fill out struct with values from existing mount */
data->flags = nfss->flags;
data->rsize = nfss->rsize;
@@ -1837,6 +1838,7 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
error = nfs_compare_remount_data(nfss, data);
out:
kfree(data);
+ unlock_kernel();
return error;
}
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 5839b229cd0..8b1f8efb469 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -847,9 +847,8 @@ exp_get_fsid_key(svc_client *clp, int fsid)
return exp_find_key(clp, FSID_NUM, fsidv, NULL);
}
-static svc_export *exp_get_by_name(svc_client *clp, struct vfsmount *mnt,
- struct dentry *dentry,
- struct cache_req *reqp)
+static svc_export *exp_get_by_name(svc_client *clp, const struct path *path,
+ struct cache_req *reqp)
{
struct svc_export *exp, key;
int err;
@@ -858,8 +857,7 @@ static svc_export *exp_get_by_name(svc_client *clp, struct vfsmount *mnt,
return ERR_PTR(-ENOENT);
key.ex_client = clp;
- key.ex_path.mnt = mnt;
- key.ex_path.dentry = dentry;
+ key.ex_path = *path;
exp = svc_export_lookup(&key);
if (exp == NULL)
@@ -873,24 +871,19 @@ static svc_export *exp_get_by_name(svc_client *clp, struct vfsmount *mnt,
/*
* Find the export entry for a given dentry.
*/
-static struct svc_export *exp_parent(svc_client *clp, struct vfsmount *mnt,
- struct dentry *dentry,
- struct cache_req *reqp)
+static struct svc_export *exp_parent(svc_client *clp, struct path *path)
{
- svc_export *exp;
-
- dget(dentry);
- exp = exp_get_by_name(clp, mnt, dentry, reqp);
-
- while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(dentry)) {
- struct dentry *parent;
-
- parent = dget_parent(dentry);
- dput(dentry);
- dentry = parent;
- exp = exp_get_by_name(clp, mnt, dentry, reqp);
+ struct dentry *saved = dget(path->dentry);
+ svc_export *exp = exp_get_by_name(clp, path, NULL);
+
+ while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) {
+ struct dentry *parent = dget_parent(path->dentry);
+ dput(path->dentry);
+ path->dentry = parent;
+ exp = exp_get_by_name(clp, path, NULL);
}
- dput(dentry);
+ dput(path->dentry);
+ path->dentry = saved;
return exp;
}
@@ -1018,7 +1011,7 @@ exp_export(struct nfsctl_export *nxp)
goto out_put_clp;
err = -EINVAL;
- exp = exp_get_by_name(clp, path.mnt, path.dentry, NULL);
+ exp = exp_get_by_name(clp, &path, NULL);
memset(&new, 0, sizeof(new));
@@ -1135,7 +1128,7 @@ exp_unexport(struct nfsctl_export *nxp)
goto out_domain;
err = -EINVAL;
- exp = exp_get_by_name(dom, path.mnt, path.dentry, NULL);
+ exp = exp_get_by_name(dom, &path, NULL);
path_put(&path);
if (IS_ERR(exp))
goto out_domain;
@@ -1177,7 +1170,7 @@ exp_rootfh(svc_client *clp, char *name, struct knfsd_fh *f, int maxsize)
dprintk("nfsd: exp_rootfh(%s [%p] %s:%s/%ld)\n",
name, path.dentry, clp->name,
inode->i_sb->s_id, inode->i_ino);
- exp = exp_parent(clp, path.mnt, path.dentry, NULL);
+ exp = exp_parent(clp, &path);
if (IS_ERR(exp)) {
err = PTR_ERR(exp);
goto out;
@@ -1207,7 +1200,7 @@ static struct svc_export *exp_find(struct auth_domain *clp, int fsid_type,
if (IS_ERR(ek))
return ERR_CAST(ek);
- exp = exp_get_by_name(clp, ek->ek_path.mnt, ek->ek_path.dentry, reqp);
+ exp = exp_get_by_name(clp, &ek->ek_path, reqp);
cache_put(&ek->h, &svc_expkey_cache);
if (IS_ERR(exp))
@@ -1247,8 +1240,7 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp)
* use exp_get_by_name() or exp_find().
*/
struct svc_export *
-rqst_exp_get_by_name(struct svc_rqst *rqstp, struct vfsmount *mnt,
- struct dentry *dentry)
+rqst_exp_get_by_name(struct svc_rqst *rqstp, struct path *path)
{
struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT);
@@ -1256,8 +1248,7 @@ rqst_exp_get_by_name(struct svc_rqst *rqstp, struct vfsmount *mnt,
goto gss;
/* First try the auth_unix client: */
- exp = exp_get_by_name(rqstp->rq_client, mnt, dentry,
- &rqstp->rq_chandle);
+ exp = exp_get_by_name(rqstp->rq_client, path, &rqstp->rq_chandle);
if (PTR_ERR(exp) == -ENOENT)
goto gss;
if (IS_ERR(exp))
@@ -1269,8 +1260,7 @@ gss:
/* Otherwise, try falling back on gss client */
if (rqstp->rq_gssclient == NULL)
return exp;
- gssexp = exp_get_by_name(rqstp->rq_gssclient, mnt, dentry,
- &rqstp->rq_chandle);
+ gssexp = exp_get_by_name(rqstp->rq_gssclient, path, &rqstp->rq_chandle);
if (PTR_ERR(gssexp) == -ENOENT)
return exp;
if (!IS_ERR(exp))
@@ -1309,23 +1299,19 @@ gss:
}
struct svc_export *
-rqst_exp_parent(struct svc_rqst *rqstp, struct vfsmount *mnt,
- struct dentry *dentry)
+rqst_exp_parent(struct svc_rqst *rqstp, struct path *path)
{
- struct svc_export *exp;
-
- dget(dentry);
- exp = rqst_exp_get_by_name(rqstp, mnt, dentry);
-
- while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(dentry)) {
- struct dentry *parent;
-
- parent = dget_parent(dentry);
- dput(dentry);
- dentry = parent;
- exp = rqst_exp_get_by_name(rqstp, mnt, dentry);
+ struct dentry *saved = dget(path->dentry);
+ struct svc_export *exp = rqst_exp_get_by_name(rqstp, path);
+
+ while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) {
+ struct dentry *parent = dget_parent(path->dentry);
+ dput(path->dentry);
+ path->dentry = parent;
+ exp = rqst_exp_get_by_name(rqstp, path);
}
- dput(dentry);
+ dput(path->dentry);
+ path->dentry = saved;
return exp;
}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index bd584bcf1d9..99f83575359 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -101,36 +101,35 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
{
struct svc_export *exp = *expp, *exp2 = NULL;
struct dentry *dentry = *dpp;
- struct vfsmount *mnt = mntget(exp->ex_path.mnt);
- struct dentry *mounts = dget(dentry);
+ struct path path = {.mnt = mntget(exp->ex_path.mnt),
+ .dentry = dget(dentry)};
int err = 0;
- while (follow_down(&mnt,&mounts)&&d_mountpoint(mounts));
+ while (d_mountpoint(path.dentry) && follow_down(&path))
+ ;
- exp2 = rqst_exp_get_by_name(rqstp, mnt, mounts);
+ exp2 = rqst_exp_get_by_name(rqstp, &path);
if (IS_ERR(exp2)) {
if (PTR_ERR(exp2) != -ENOENT)
err = PTR_ERR(exp2);
- dput(mounts);
- mntput(mnt);
+ path_put(&path);
goto out;
}
if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) {
/* successfully crossed mount point */
/*
- * This is subtle: dentry is *not* under mnt at this point.
- * The only reason we are safe is that original mnt is pinned
- * down by exp, so we should dput before putting exp.
+ * This is subtle: path.dentry is *not* on path.mnt
+ * at this point. The only reason we are safe is that
+ * original mnt is pinned down by exp, so we should
+ * put path *before* putting exp
*/
- dput(dentry);
- *dpp = mounts;
- exp_put(exp);
+ *dpp = path.dentry;
+ path.dentry = dentry;
*expp = exp2;
- } else {
- exp_put(exp2);
- dput(mounts);
+ exp2 = exp;
}
- mntput(mnt);
+ path_put(&path);
+ exp_put(exp2);
out:
return err;
}
@@ -169,28 +168,29 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
/* checking mountpoint crossing is very different when stepping up */
struct svc_export *exp2 = NULL;
struct dentry *dp;
- struct vfsmount *mnt = mntget(exp->ex_path.mnt);
- dentry = dget(dparent);
- while(dentry == mnt->mnt_root && follow_up(&mnt, &dentry))
+ struct path path = {.mnt = mntget(exp->ex_path.mnt),
+ .dentry = dget(dparent)};
+
+ while (path.dentry == path.mnt->mnt_root &&
+ follow_up(&path))
;
- dp = dget_parent(dentry);
- dput(dentry);
- dentry = dp;
+ dp = dget_parent(path.dentry);
+ dput(path.dentry);
+ path.dentry = dp;
- exp2 = rqst_exp_parent(rqstp, mnt, dentry);
+ exp2 = rqst_exp_parent(rqstp, &path);
if (PTR_ERR(exp2) == -ENOENT) {
- dput(dentry);
dentry = dget(dparent);
} else if (IS_ERR(exp2)) {
host_err = PTR_ERR(exp2);
- dput(dentry);
- mntput(mnt);
+ path_put(&path);
goto out_nfserr;
} else {
+ dentry = dget(path.dentry);
exp_put(exp);
exp = exp2;
}
- mntput(mnt);
+ path_put(&path);
}
} else {
fh_lock(fhp);
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 064279e33bb..36df60b6d8a 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -31,21 +31,26 @@
#include "dat.h"
#include "alloc.h"
+struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap)
+{
+ return nilfs_dat_inode(NILFS_I_NILFS(bmap->b_inode));
+}
+
int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level,
__u64 *ptrp)
{
- __u64 ptr;
+ sector_t blocknr;
int ret;
down_read(&bmap->b_sem);
ret = bmap->b_ops->bop_lookup(bmap, key, level, ptrp);
if (ret < 0)
goto out;
- if (bmap->b_pops->bpop_translate != NULL) {
- ret = bmap->b_pops->bpop_translate(bmap, *ptrp, &ptr);
- if (ret < 0)
- goto out;
- *ptrp = ptr;
+ if (NILFS_BMAP_USE_VBN(bmap)) {
+ ret = nilfs_dat_translate(nilfs_bmap_get_dat(bmap), *ptrp,
+ &blocknr);
+ if (!ret)
+ *ptrp = blocknr;
}
out:
@@ -53,6 +58,16 @@ int nilfs_bmap_lookup_at_level(struct nilfs_bmap *bmap, __u64 key, int level,
return ret;
}
+int nilfs_bmap_lookup_contig(struct nilfs_bmap *bmap, __u64 key, __u64 *ptrp,
+ unsigned maxblocks)
+{
+ int ret;
+
+ down_read(&bmap->b_sem);
+ ret = bmap->b_ops->bop_lookup_contig(bmap, key, ptrp, maxblocks);
+ up_read(&bmap->b_sem);
+ return ret;
+}
/**
* nilfs_bmap_lookup - find a record
@@ -101,8 +116,7 @@ static int nilfs_bmap_do_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
if (n < 0)
return n;
ret = nilfs_btree_convert_and_insert(
- bmap, key, ptr, keys, ptrs, n,
- NILFS_BMAP_LARGE_LOW, NILFS_BMAP_LARGE_HIGH);
+ bmap, key, ptr, keys, ptrs, n);
if (ret == 0)
bmap->b_u.u_flags |= NILFS_BMAP_LARGE;
@@ -158,8 +172,7 @@ static int nilfs_bmap_do_delete(struct nilfs_bmap *bmap, __u64 key)
if (n < 0)
return n;
ret = nilfs_direct_delete_and_convert(
- bmap, key, keys, ptrs, n,
- NILFS_BMAP_SMALL_LOW, NILFS_BMAP_SMALL_HIGH);
+ bmap, key, keys, ptrs, n);
if (ret == 0)
bmap->b_u.u_flags &= ~NILFS_BMAP_LARGE;
@@ -417,38 +430,6 @@ void nilfs_bmap_sub_blocks(const struct nilfs_bmap *bmap, int n)
mark_inode_dirty(bmap->b_inode);
}
-int nilfs_bmap_get_block(const struct nilfs_bmap *bmap, __u64 ptr,
- struct buffer_head **bhp)
-{
- return nilfs_btnode_get(&NILFS_BMAP_I(bmap)->i_btnode_cache,
- ptr, 0, bhp, 0);
-}
-
-void nilfs_bmap_put_block(const struct nilfs_bmap *bmap,
- struct buffer_head *bh)
-{
- brelse(bh);
-}
-
-int nilfs_bmap_get_new_block(const struct nilfs_bmap *bmap, __u64 ptr,
- struct buffer_head **bhp)
-{
- int ret;
-
- ret = nilfs_btnode_get(&NILFS_BMAP_I(bmap)->i_btnode_cache,
- ptr, 0, bhp, 1);
- if (ret < 0)
- return ret;
- set_buffer_nilfs_volatile(*bhp);
- return 0;
-}
-
-void nilfs_bmap_delete_block(const struct nilfs_bmap *bmap,
- struct buffer_head *bh)
-{
- nilfs_btnode_delete(bh);
-}
-
__u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap,
const struct buffer_head *bh)
{
@@ -476,11 +457,6 @@ __u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *bmap, __u64 key)
return NILFS_BMAP_INVALID_PTR;
}
-static struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *bmap)
-{
- return nilfs_dat_inode(NILFS_I_NILFS(bmap->b_inode));
-}
-
#define NILFS_BMAP_GROUP_DIV 8
__u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *bmap)
{
@@ -493,64 +469,51 @@ __u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *bmap)
(entries_per_group / NILFS_BMAP_GROUP_DIV);
}
-static int nilfs_bmap_prepare_alloc_v(struct nilfs_bmap *bmap,
- union nilfs_bmap_ptr_req *req)
+int nilfs_bmap_prepare_alloc_v(struct nilfs_bmap *bmap,
+ union nilfs_bmap_ptr_req *req)
{
return nilfs_dat_prepare_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req);
}
-static void nilfs_bmap_commit_alloc_v(struct nilfs_bmap *bmap,
- union nilfs_bmap_ptr_req *req)
+void nilfs_bmap_commit_alloc_v(struct nilfs_bmap *bmap,
+ union nilfs_bmap_ptr_req *req)
{
nilfs_dat_commit_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req);
}
-static void nilfs_bmap_abort_alloc_v(struct nilfs_bmap *bmap,
- union nilfs_bmap_ptr_req *req)
+void nilfs_bmap_abort_alloc_v(struct nilfs_bmap *bmap,
+ union nilfs_bmap_ptr_req *req)
{
nilfs_dat_abort_alloc(nilfs_bmap_get_dat(bmap), &req->bpr_req);
}
-static int nilfs_bmap_prepare_start_v(struct nilfs_bmap *bmap,
- union nilfs_bmap_ptr_req *req)
+int nilfs_bmap_start_v(struct nilfs_bmap *bmap, union nilfs_bmap_ptr_req *req,
+ sector_t blocknr)
{
- return nilfs_dat_prepare_start(nilfs_bmap_get_dat(bmap), &req->bpr_req);
-}
-
-static void nilfs_bmap_commit_start_v(struct nilfs_bmap *bmap,
- union nilfs_bmap_ptr_req *req,
- sector_t blocknr)
-{
- nilfs_dat_commit_start(nilfs_bmap_get_dat(bmap), &req->bpr_req,
- blocknr);
-}
+ struct inode *dat = nilfs_bmap_get_dat(bmap);
+ int ret;
-static void nilfs_bmap_abort_start_v(struct nilfs_bmap *bmap,
- union nilfs_bmap_ptr_req *req)
-{
- nilfs_dat_abort_start(nilfs_bmap_get_dat(bmap), &req->bpr_req);
+ ret = nilfs_dat_prepare_start(dat, &req->bpr_req);
+ if (likely(!ret))
+ nilfs_dat_commit_start(dat, &req->bpr_req, blocknr);
+ return ret;
}
-static int nilfs_bmap_prepare_end_v(struct nilfs_bmap *bmap,
- union nilfs_bmap_ptr_req *req)
+int nilfs_bmap_prepare_end_v(struct nilfs_bmap *bmap,
+ union nilfs_bmap_ptr_req *req)
{
return nilfs_dat_prepare_end(nilfs_bmap_get_dat(bmap), &req->bpr_req);
}
-static void nilfs_bmap_commit_end_v(struct nilfs_bmap *bmap,
- union nilfs_bmap_ptr_req *req)
-{
- nilfs_dat_commit_end(nilfs_bmap_get_dat(bmap), &req->bpr_req, 0);
-}
-
-static void nilfs_bmap_commit_end_vmdt(struct nilfs_bmap *bmap,
- union nilfs_bmap_ptr_req *req)
+void nilfs_bmap_commit_end_v(struct nilfs_bmap *bmap,
+ union nilfs_bmap_ptr_req *req)
{
- nilfs_dat_commit_end(nilfs_bmap_get_dat(bmap), &req->bpr_req, 1);
+ nilfs_dat_commit_end(nilfs_bmap_get_dat(bmap), &req->bpr_req,
+ bmap->b_ptr_type == NILFS_BMAP_PTR_VS);
}
-static void nilfs_bmap_abort_end_v(struct nilfs_bmap *bmap,
- union nilfs_bmap_ptr_req *req)
+void nilfs_bmap_abort_end_v(struct nilfs_bmap *bmap,
+ union nilfs_bmap_ptr_req *req)
{
nilfs_dat_abort_end(nilfs_bmap_get_dat(bmap), &req->bpr_req);
}
@@ -566,128 +529,44 @@ int nilfs_bmap_mark_dirty(const struct nilfs_bmap *bmap, __u64 vblocknr)
return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(bmap), vblocknr);
}
-int nilfs_bmap_prepare_update(struct nilfs_bmap *bmap,
- union nilfs_bmap_ptr_req *oldreq,
- union nilfs_bmap_ptr_req *newreq)
+int nilfs_bmap_prepare_update_v(struct nilfs_bmap *bmap,
+ union nilfs_bmap_ptr_req *oldreq,
+ union nilfs_bmap_ptr_req *newreq)
{
+ struct inode *dat = nilfs_bmap_get_dat(bmap);
int ret;
- ret = bmap->b_pops->bpop_prepare_end_ptr(bmap, oldreq);
+ ret = nilfs_dat_prepare_end(dat, &oldreq->bpr_req);
if (ret < 0)
return ret;
- ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, newreq);
+ ret = nilfs_dat_prepare_alloc(dat, &newreq->bpr_req);
if (ret < 0)
- bmap->b_pops->bpop_abort_end_ptr(bmap, oldreq);
+ nilfs_dat_abort_end(dat, &oldreq->bpr_req);
return ret;
}
-void nilfs_bmap_commit_update(struct nilfs_bmap *bmap,
- union nilfs_bmap_ptr_req *oldreq,
- union nilfs_bmap_ptr_req *newreq)
+void nilfs_bmap_commit_update_v(struct nilfs_bmap *bmap,
+ union nilfs_bmap_ptr_req *oldreq,
+ union nilfs_bmap_ptr_req *newreq)
{
- bmap->b_pops->bpop_commit_end_ptr(bmap, oldreq);
- bmap->b_pops->bpop_commit_alloc_ptr(bmap, newreq);
-}
+ struct inode *dat = nilfs_bmap_get_dat(bmap);
-void nilfs_bmap_abort_update(struct nilfs_bmap *bmap,
- union nilfs_bmap_ptr_req *oldreq,
- union nilfs_bmap_ptr_req *newreq)
-{
- bmap->b_pops->bpop_abort_end_ptr(bmap, oldreq);
- bmap->b_pops->bpop_abort_alloc_ptr(bmap, newreq);
+ nilfs_dat_commit_end(dat, &oldreq->bpr_req,
+ bmap->b_ptr_type == NILFS_BMAP_PTR_VS);
+ nilfs_dat_commit_alloc(dat, &newreq->bpr_req);
}
-static int nilfs_bmap_translate_v(const struct nilfs_bmap *bmap, __u64 ptr,
- __u64 *ptrp)
+void nilfs_bmap_abort_update_v(struct nilfs_bmap *bmap,
+ union nilfs_bmap_ptr_req *oldreq,
+ union nilfs_bmap_ptr_req *newreq)
{
- sector_t blocknr;
- int ret;
-
- ret = nilfs_dat_translate(nilfs_bmap_get_dat(bmap), ptr, &blocknr);
- if (ret < 0)
- return ret;
- if (ptrp != NULL)
- *ptrp = blocknr;
- return 0;
-}
+ struct inode *dat = nilfs_bmap_get_dat(bmap);
-static int nilfs_bmap_prepare_alloc_p(struct nilfs_bmap *bmap,
- union nilfs_bmap_ptr_req *req)
-{
- /* ignore target ptr */
- req->bpr_ptr = bmap->b_last_allocated_ptr++;
- return 0;
+ nilfs_dat_abort_end(dat, &oldreq->bpr_req);
+ nilfs_dat_abort_alloc(dat, &newreq->bpr_req);
}
-static void nilfs_bmap_commit_alloc_p(struct nilfs_bmap *bmap,
- union nilfs_bmap_ptr_req *req)
-{
- /* do nothing */
-}
-
-static void nilfs_bmap_abort_alloc_p(struct nilfs_bmap *bmap,
- union nilfs_bmap_ptr_req *req)
-{
- bmap->b_last_allocated_ptr--;
-}
-
-static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_v = {
- .bpop_prepare_alloc_ptr = nilfs_bmap_prepare_alloc_v,
- .bpop_commit_alloc_ptr = nilfs_bmap_commit_alloc_v,
- .bpop_abort_alloc_ptr = nilfs_bmap_abort_alloc_v,
- .bpop_prepare_start_ptr = nilfs_bmap_prepare_start_v,
- .bpop_commit_start_ptr = nilfs_bmap_commit_start_v,
- .bpop_abort_start_ptr = nilfs_bmap_abort_start_v,
- .bpop_prepare_end_ptr = nilfs_bmap_prepare_end_v,
- .bpop_commit_end_ptr = nilfs_bmap_commit_end_v,
- .bpop_abort_end_ptr = nilfs_bmap_abort_end_v,
-
- .bpop_translate = nilfs_bmap_translate_v,
-};
-
-static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_vmdt = {
- .bpop_prepare_alloc_ptr = nilfs_bmap_prepare_alloc_v,
- .bpop_commit_alloc_ptr = nilfs_bmap_commit_alloc_v,
- .bpop_abort_alloc_ptr = nilfs_bmap_abort_alloc_v,
- .bpop_prepare_start_ptr = nilfs_bmap_prepare_start_v,
- .bpop_commit_start_ptr = nilfs_bmap_commit_start_v,
- .bpop_abort_start_ptr = nilfs_bmap_abort_start_v,
- .bpop_prepare_end_ptr = nilfs_bmap_prepare_end_v,
- .bpop_commit_end_ptr = nilfs_bmap_commit_end_vmdt,
- .bpop_abort_end_ptr = nilfs_bmap_abort_end_v,
-
- .bpop_translate = nilfs_bmap_translate_v,
-};
-
-static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_p = {
- .bpop_prepare_alloc_ptr = nilfs_bmap_prepare_alloc_p,
- .bpop_commit_alloc_ptr = nilfs_bmap_commit_alloc_p,
- .bpop_abort_alloc_ptr = nilfs_bmap_abort_alloc_p,
- .bpop_prepare_start_ptr = NULL,
- .bpop_commit_start_ptr = NULL,
- .bpop_abort_start_ptr = NULL,
- .bpop_prepare_end_ptr = NULL,
- .bpop_commit_end_ptr = NULL,
- .bpop_abort_end_ptr = NULL,
-
- .bpop_translate = NULL,
-};
-
-static const struct nilfs_bmap_ptr_operations nilfs_bmap_ptr_ops_gc = {
- .bpop_prepare_alloc_ptr = NULL,
- .bpop_commit_alloc_ptr = NULL,
- .bpop_abort_alloc_ptr = NULL,
- .bpop_prepare_start_ptr = NULL,
- .bpop_commit_start_ptr = NULL,
- .bpop_abort_start_ptr = NULL,
- .bpop_prepare_end_ptr = NULL,
- .bpop_commit_end_ptr = NULL,
- .bpop_abort_end_ptr = NULL,
-
- .bpop_translate = NULL,
-};
-
static struct lock_class_key nilfs_bmap_dat_lock_key;
/**
@@ -714,31 +593,26 @@ int nilfs_bmap_read(struct nilfs_bmap *bmap, struct nilfs_inode *raw_inode)
bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode;
switch (bmap->b_inode->i_ino) {
case NILFS_DAT_INO:
- bmap->b_pops = &nilfs_bmap_ptr_ops_p;
- bmap->b_last_allocated_key = 0; /* XXX: use macro */
+ bmap->b_ptr_type = NILFS_BMAP_PTR_P;
+ bmap->b_last_allocated_key = 0;
bmap->b_last_allocated_ptr = NILFS_BMAP_NEW_PTR_INIT;
lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key);
break;
case NILFS_CPFILE_INO:
case NILFS_SUFILE_INO:
- bmap->b_pops = &nilfs_bmap_ptr_ops_vmdt;
- bmap->b_last_allocated_key = 0; /* XXX: use macro */
+ bmap->b_ptr_type = NILFS_BMAP_PTR_VS;
+ bmap->b_last_allocated_key = 0;
bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR;
break;
default:
- bmap->b_pops = &nilfs_bmap_ptr_ops_v;
- bmap->b_last_allocated_key = 0; /* XXX: use macro */
+ bmap->b_ptr_type = NILFS_BMAP_PTR_VM;
+ bmap->b_last_allocated_key = 0;
bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR;
break;
}
return (bmap->b_u.u_flags & NILFS_BMAP_LARGE) ?
- nilfs_btree_init(bmap,
- NILFS_BMAP_LARGE_LOW,
- NILFS_BMAP_LARGE_HIGH) :
- nilfs_direct_init(bmap,
- NILFS_BMAP_SMALL_LOW,
- NILFS_BMAP_SMALL_HIGH);
+ nilfs_btree_init(bmap) : nilfs_direct_init(bmap);
}
/**
@@ -764,7 +638,7 @@ void nilfs_bmap_init_gc(struct nilfs_bmap *bmap)
memset(&bmap->b_u, 0, NILFS_BMAP_SIZE);
init_rwsem(&bmap->b_sem);
bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode;
- bmap->b_pops = &nilfs_bmap_ptr_ops_gc;
+ bmap->b_ptr_type = NILFS_BMAP_PTR_U;
bmap->b_last_allocated_key = 0;
bmap->b_last_allocated_ptr = NILFS_BMAP_INVALID_PTR;
bmap->b_state = 0;
diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h
index 4f2708abb1b..b2890cdcef1 100644
--- a/fs/nilfs2/bmap.h
+++ b/fs/nilfs2/bmap.h
@@ -64,6 +64,8 @@ struct nilfs_bmap_stats {
*/
struct nilfs_bmap_operations {
int (*bop_lookup)(const struct nilfs_bmap *, __u64, int, __u64 *);
+ int (*bop_lookup_contig)(const struct nilfs_bmap *, __u64, __u64 *,
+ unsigned);
int (*bop_insert)(struct nilfs_bmap *, __u64, __u64);
int (*bop_delete)(struct nilfs_bmap *, __u64);
void (*bop_clear)(struct nilfs_bmap *);
@@ -86,34 +88,6 @@ struct nilfs_bmap_operations {
};
-/**
- * struct nilfs_bmap_ptr_operations - bmap ptr operation table
- */
-struct nilfs_bmap_ptr_operations {
- int (*bpop_prepare_alloc_ptr)(struct nilfs_bmap *,
- union nilfs_bmap_ptr_req *);
- void (*bpop_commit_alloc_ptr)(struct nilfs_bmap *,
- union nilfs_bmap_ptr_req *);
- void (*bpop_abort_alloc_ptr)(struct nilfs_bmap *,
- union nilfs_bmap_ptr_req *);
- int (*bpop_prepare_start_ptr)(struct nilfs_bmap *,
- union nilfs_bmap_ptr_req *);
- void (*bpop_commit_start_ptr)(struct nilfs_bmap *,
- union nilfs_bmap_ptr_req *,
- sector_t);
- void (*bpop_abort_start_ptr)(struct nilfs_bmap *,
- union nilfs_bmap_ptr_req *);
- int (*bpop_prepare_end_ptr)(struct nilfs_bmap *,
- union nilfs_bmap_ptr_req *);
- void (*bpop_commit_end_ptr)(struct nilfs_bmap *,
- union nilfs_bmap_ptr_req *);
- void (*bpop_abort_end_ptr)(struct nilfs_bmap *,
- union nilfs_bmap_ptr_req *);
-
- int (*bpop_translate)(const struct nilfs_bmap *, __u64, __u64 *);
-};
-
-
#define NILFS_BMAP_SIZE (NILFS_INODE_BMAP_SIZE * sizeof(__le64))
#define NILFS_BMAP_KEY_BIT (sizeof(unsigned long) * 8 /* CHAR_BIT */)
#define NILFS_BMAP_NEW_PTR_INIT \
@@ -131,11 +105,9 @@ static inline int nilfs_bmap_is_new_ptr(unsigned long ptr)
* @b_sem: semaphore
* @b_inode: owner of bmap
* @b_ops: bmap operation table
- * @b_pops: bmap ptr operation table
- * @b_low: low watermark of conversion
- * @b_high: high watermark of conversion
* @b_last_allocated_key: last allocated key for data block
* @b_last_allocated_ptr: last allocated ptr for data block
+ * @b_ptr_type: pointer type
* @b_state: state
*/
struct nilfs_bmap {
@@ -146,14 +118,22 @@ struct nilfs_bmap {
struct rw_semaphore b_sem;
struct inode *b_inode;
const struct nilfs_bmap_operations *b_ops;
- const struct nilfs_bmap_ptr_operations *b_pops;
- __u64 b_low;
- __u64 b_high;
__u64 b_last_allocated_key;
__u64 b_last_allocated_ptr;
+ int b_ptr_type;
int b_state;
};
+/* pointer type */
+#define NILFS_BMAP_PTR_P 0 /* physical block number (i.e. LBN) */
+#define NILFS_BMAP_PTR_VS 1 /* virtual block number (single
+ version) */
+#define NILFS_BMAP_PTR_VM 2 /* virtual block number (has multiple
+ versions) */
+#define NILFS_BMAP_PTR_U (-1) /* never perform pointer operations */
+
+#define NILFS_BMAP_USE_VBN(bmap) ((bmap)->b_ptr_type > 0)
+
/* state */
#define NILFS_BMAP_DIRTY 0x00000001
@@ -162,6 +142,7 @@ int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *);
int nilfs_bmap_read(struct nilfs_bmap *, struct nilfs_inode *);
void nilfs_bmap_write(struct nilfs_bmap *, struct nilfs_inode *);
int nilfs_bmap_lookup(struct nilfs_bmap *, unsigned long, unsigned long *);
+int nilfs_bmap_lookup_contig(struct nilfs_bmap *, __u64, __u64 *, unsigned);
int nilfs_bmap_insert(struct nilfs_bmap *, unsigned long, unsigned long);
int nilfs_bmap_delete(struct nilfs_bmap *, unsigned long);
int nilfs_bmap_last_key(struct nilfs_bmap *, unsigned long *);
@@ -182,7 +163,67 @@ void nilfs_bmap_commit_gcdat(struct nilfs_bmap *, struct nilfs_bmap *);
/*
* Internal use only
*/
+struct inode *nilfs_bmap_get_dat(const struct nilfs_bmap *);
+int nilfs_bmap_prepare_alloc_v(struct nilfs_bmap *,
+ union nilfs_bmap_ptr_req *);
+void nilfs_bmap_commit_alloc_v(struct nilfs_bmap *,
+ union nilfs_bmap_ptr_req *);
+void nilfs_bmap_abort_alloc_v(struct nilfs_bmap *,
+ union nilfs_bmap_ptr_req *);
+static inline int nilfs_bmap_prepare_alloc_ptr(struct nilfs_bmap *bmap,
+ union nilfs_bmap_ptr_req *req)
+{
+ if (NILFS_BMAP_USE_VBN(bmap))
+ return nilfs_bmap_prepare_alloc_v(bmap, req);
+ /* ignore target ptr */
+ req->bpr_ptr = bmap->b_last_allocated_ptr++;
+ return 0;
+}
+
+static inline void nilfs_bmap_commit_alloc_ptr(struct nilfs_bmap *bmap,
+ union nilfs_bmap_ptr_req *req)
+{
+ if (NILFS_BMAP_USE_VBN(bmap))
+ nilfs_bmap_commit_alloc_v(bmap, req);
+}
+
+static inline void nilfs_bmap_abort_alloc_ptr(struct nilfs_bmap *bmap,
+ union nilfs_bmap_ptr_req *req)
+{
+ if (NILFS_BMAP_USE_VBN(bmap))
+ nilfs_bmap_abort_alloc_v(bmap, req);
+ else
+ bmap->b_last_allocated_ptr--;
+}
+
+int nilfs_bmap_prepare_end_v(struct nilfs_bmap *, union nilfs_bmap_ptr_req *);
+void nilfs_bmap_commit_end_v(struct nilfs_bmap *, union nilfs_bmap_ptr_req *);
+void nilfs_bmap_abort_end_v(struct nilfs_bmap *, union nilfs_bmap_ptr_req *);
+
+static inline int nilfs_bmap_prepare_end_ptr(struct nilfs_bmap *bmap,
+ union nilfs_bmap_ptr_req *req)
+{
+ return NILFS_BMAP_USE_VBN(bmap) ?
+ nilfs_bmap_prepare_end_v(bmap, req) : 0;
+}
+
+static inline void nilfs_bmap_commit_end_ptr(struct nilfs_bmap *bmap,
+ union nilfs_bmap_ptr_req *req)
+{
+ if (NILFS_BMAP_USE_VBN(bmap))
+ nilfs_bmap_commit_end_v(bmap, req);
+}
+
+static inline void nilfs_bmap_abort_end_ptr(struct nilfs_bmap *bmap,
+ union nilfs_bmap_ptr_req *req)
+{
+ if (NILFS_BMAP_USE_VBN(bmap))
+ nilfs_bmap_abort_end_v(bmap, req);
+}
+
+int nilfs_bmap_start_v(struct nilfs_bmap *, union nilfs_bmap_ptr_req *,
+ sector_t);
int nilfs_bmap_move_v(const struct nilfs_bmap *, __u64, sector_t);
int nilfs_bmap_mark_dirty(const struct nilfs_bmap *, __u64);
@@ -193,28 +234,20 @@ __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *,
__u64 nilfs_bmap_find_target_seq(const struct nilfs_bmap *, __u64);
__u64 nilfs_bmap_find_target_in_group(const struct nilfs_bmap *);
-int nilfs_bmap_prepare_update(struct nilfs_bmap *,
- union nilfs_bmap_ptr_req *,
- union nilfs_bmap_ptr_req *);
-void nilfs_bmap_commit_update(struct nilfs_bmap *,
- union nilfs_bmap_ptr_req *,
- union nilfs_bmap_ptr_req *);
-void nilfs_bmap_abort_update(struct nilfs_bmap *,
- union nilfs_bmap_ptr_req *,
- union nilfs_bmap_ptr_req *);
+int nilfs_bmap_prepare_update_v(struct nilfs_bmap *,
+ union nilfs_bmap_ptr_req *,
+ union nilfs_bmap_ptr_req *);
+void nilfs_bmap_commit_update_v(struct nilfs_bmap *,
+ union nilfs_bmap_ptr_req *,
+ union nilfs_bmap_ptr_req *);
+void nilfs_bmap_abort_update_v(struct nilfs_bmap *,
+ union nilfs_bmap_ptr_req *,
+ union nilfs_bmap_ptr_req *);
void nilfs_bmap_add_blocks(const struct nilfs_bmap *, int);
void nilfs_bmap_sub_blocks(const struct nilfs_bmap *, int);
-int nilfs_bmap_get_block(const struct nilfs_bmap *, __u64,
- struct buffer_head **);
-void nilfs_bmap_put_block(const struct nilfs_bmap *, struct buffer_head *);
-int nilfs_bmap_get_new_block(const struct nilfs_bmap *, __u64,
- struct buffer_head **);
-void nilfs_bmap_delete_block(const struct nilfs_bmap *, struct buffer_head *);
-
-
/* Assume that bmap semaphore is locked. */
static inline int nilfs_bmap_dirty(const struct nilfs_bmap *bmap)
{
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index 4cc07b2c30e..7e0b61be212 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -46,15 +46,18 @@ void nilfs_btnode_cache_init_once(struct address_space *btnc)
INIT_LIST_HEAD(&btnc->i_mmap_nonlinear);
}
-static struct address_space_operations def_btnode_aops;
+static struct address_space_operations def_btnode_aops = {
+ .sync_page = block_sync_page,
+};
-void nilfs_btnode_cache_init(struct address_space *btnc)
+void nilfs_btnode_cache_init(struct address_space *btnc,
+ struct backing_dev_info *bdi)
{
btnc->host = NULL; /* can safely set to host inode ? */
btnc->flags = 0;
mapping_set_gfp_mask(btnc, GFP_NOFS);
btnc->assoc_mapping = NULL;
- btnc->backing_dev_info = &default_backing_dev_info;
+ btnc->backing_dev_info = bdi;
btnc->a_ops = &def_btnode_aops;
}
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
index 35faa86444a..3e2275172ed 100644
--- a/fs/nilfs2/btnode.h
+++ b/fs/nilfs2/btnode.h
@@ -38,7 +38,7 @@ struct nilfs_btnode_chkey_ctxt {
};
void nilfs_btnode_cache_init_once(struct address_space *);
-void nilfs_btnode_cache_init(struct address_space *);
+void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *);
void nilfs_btnode_cache_clear(struct address_space *);
int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t,
struct buffer_head **, int);
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 6b37a276729..aa412724b64 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -29,6 +29,7 @@
#include "btnode.h"
#include "btree.h"
#include "alloc.h"
+#include "dat.h"
/**
* struct nilfs_btree_path - A path on which B-tree operations are executed
@@ -109,8 +110,7 @@ static void nilfs_btree_clear_path(const struct nilfs_btree *btree,
level < NILFS_BTREE_LEVEL_MAX;
level++) {
if (path[level].bp_bh != NULL) {
- nilfs_bmap_put_block(&btree->bt_bmap,
- path[level].bp_bh);
+ brelse(path[level].bp_bh);
path[level].bp_bh = NULL;
}
/* sib_bh is released or deleted by prepare or commit
@@ -123,10 +123,29 @@ static void nilfs_btree_clear_path(const struct nilfs_btree *btree,
}
}
-
/*
* B-tree node operations
*/
+static int nilfs_btree_get_block(const struct nilfs_btree *btree, __u64 ptr,
+ struct buffer_head **bhp)
+{
+ struct address_space *btnc =
+ &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
+ return nilfs_btnode_get(btnc, ptr, 0, bhp, 0);
+}
+
+static int nilfs_btree_get_new_block(const struct nilfs_btree *btree,
+ __u64 ptr, struct buffer_head **bhp)
+{
+ struct address_space *btnc =
+ &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache;
+ int ret;
+
+ ret = nilfs_btnode_get(btnc, ptr, 0, bhp, 1);
+ if (!ret)
+ set_buffer_nilfs_volatile(*bhp);
+ return ret;
+}
static inline int
nilfs_btree_node_get_flags(const struct nilfs_btree *btree,
@@ -488,8 +507,7 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
path[level].bp_index = index;
for (level--; level >= minlevel; level--) {
- ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr,
- &path[level].bp_bh);
+ ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
if (ret < 0)
return ret;
node = nilfs_btree_get_nonroot_node(btree, path, level);
@@ -535,8 +553,7 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
path[level].bp_index = index;
for (level--; level > 0; level--) {
- ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr,
- &path[level].bp_bh);
+ ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh);
if (ret < 0)
return ret;
node = nilfs_btree_get_nonroot_node(btree, path, level);
@@ -579,6 +596,87 @@ static int nilfs_btree_lookup(const struct nilfs_bmap *bmap,
return ret;
}
+static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap,
+ __u64 key, __u64 *ptrp, unsigned maxblocks)
+{
+ struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
+ struct nilfs_btree_path *path;
+ struct nilfs_btree_node *node;
+ struct inode *dat = NULL;
+ __u64 ptr, ptr2;
+ sector_t blocknr;
+ int level = NILFS_BTREE_LEVEL_NODE_MIN;
+ int ret, cnt, index, maxlevel;
+
+ path = nilfs_btree_alloc_path(btree);
+ if (path == NULL)
+ return -ENOMEM;
+ nilfs_btree_init_path(btree, path);
+ ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
+ if (ret < 0)
+ goto out;
+
+ if (NILFS_BMAP_USE_VBN(bmap)) {
+ dat = nilfs_bmap_get_dat(bmap);
+ ret = nilfs_dat_translate(dat, ptr, &blocknr);
+ if (ret < 0)
+ goto out;
+ ptr = blocknr;
+ }
+ cnt = 1;
+ if (cnt == maxblocks)
+ goto end;
+
+ maxlevel = nilfs_btree_height(btree) - 1;
+ node = nilfs_btree_get_node(btree, path, level);
+ index = path[level].bp_index + 1;
+ for (;;) {
+ while (index < nilfs_btree_node_get_nchildren(btree, node)) {
+ if (nilfs_btree_node_get_key(btree, node, index) !=
+ key + cnt)
+ goto end;
+ ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
+ if (dat) {
+ ret = nilfs_dat_translate(dat, ptr2, &blocknr);
+ if (ret < 0)
+ goto out;
+ ptr2 = blocknr;
+ }
+ if (ptr2 != ptr + cnt || ++cnt == maxblocks)
+ goto end;
+ index++;
+ continue;
+ }
+ if (level == maxlevel)
+ break;
+
+ /* look-up right sibling node */
+ node = nilfs_btree_get_node(btree, path, level + 1);
+ index = path[level + 1].bp_index + 1;
+ if (index >= nilfs_btree_node_get_nchildren(btree, node) ||
+ nilfs_btree_node_get_key(btree, node, index) != key + cnt)
+ break;
+ ptr2 = nilfs_btree_node_get_ptr(btree, node, index);
+ path[level + 1].bp_index = index;
+
+ brelse(path[level].bp_bh);
+ path[level].bp_bh = NULL;
+ ret = nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh);
+ if (ret < 0)
+ goto out;
+ node = nilfs_btree_get_nonroot_node(btree, path, level);
+ index = 0;
+ path[level].bp_index = index;
+ }
+ end:
+ *ptrp = ptr;
+ ret = cnt;
+ out:
+ nilfs_btree_clear_path(btree, path);
+ nilfs_btree_free_path(btree, path);
+ return ret;
+}
+
static void nilfs_btree_promote_key(struct nilfs_btree *btree,
struct nilfs_btree_path *path,
int level, __u64 key)
@@ -669,13 +767,13 @@ static void nilfs_btree_carry_left(struct nilfs_btree *btree,
nilfs_btree_node_get_key(btree, node, 0));
if (move) {
- nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh);
+ brelse(path[level].bp_bh);
path[level].bp_bh = path[level].bp_sib_bh;
path[level].bp_sib_bh = NULL;
path[level].bp_index += lnchildren;
path[level + 1].bp_index--;
} else {
- nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh);
+ brelse(path[level].bp_sib_bh);
path[level].bp_sib_bh = NULL;
path[level].bp_index -= n;
}
@@ -722,14 +820,14 @@ static void nilfs_btree_carry_right(struct nilfs_btree *btree,
path[level + 1].bp_index--;
if (move) {
- nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh);
+ brelse(path[level].bp_bh);
path[level].bp_bh = path[level].bp_sib_bh;
path[level].bp_sib_bh = NULL;
path[level].bp_index -=
nilfs_btree_node_get_nchildren(btree, node);
path[level + 1].bp_index++;
} else {
- nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh);
+ brelse(path[level].bp_sib_bh);
path[level].bp_sib_bh = NULL;
}
@@ -781,7 +879,7 @@ static void nilfs_btree_split(struct nilfs_btree *btree,
*keyp = nilfs_btree_node_get_key(btree, right, 0);
*ptrp = path[level].bp_newreq.bpr_ptr;
- nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh);
+ brelse(path[level].bp_bh);
path[level].bp_bh = path[level].bp_sib_bh;
path[level].bp_sib_bh = NULL;
} else {
@@ -790,7 +888,7 @@ static void nilfs_btree_split(struct nilfs_btree *btree,
*keyp = nilfs_btree_node_get_key(btree, right, 0);
*ptrp = path[level].bp_newreq.bpr_ptr;
- nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh);
+ brelse(path[level].bp_sib_bh);
path[level].bp_sib_bh = NULL;
}
@@ -897,12 +995,12 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
level = NILFS_BTREE_LEVEL_DATA;
/* allocate a new ptr for data block */
- if (btree->bt_ops->btop_find_target != NULL)
+ if (NILFS_BMAP_USE_VBN(&btree->bt_bmap))
path[level].bp_newreq.bpr_ptr =
- btree->bt_ops->btop_find_target(btree, path, key);
+ nilfs_btree_find_target_v(btree, path, key);
- ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr(
- &btree->bt_bmap, &path[level].bp_newreq);
+ ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
+ &path[level].bp_newreq);
if (ret < 0)
goto err_out_data;
@@ -924,8 +1022,7 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
if (pindex > 0) {
sibptr = nilfs_btree_node_get_ptr(btree, parent,
pindex - 1);
- ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr,
- &bh);
+ ret = nilfs_btree_get_block(btree, sibptr, &bh);
if (ret < 0)
goto err_out_child_node;
sib = (struct nilfs_btree_node *)bh->b_data;
@@ -936,7 +1033,7 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
stats->bs_nblocks++;
goto out;
} else
- nilfs_bmap_put_block(&btree->bt_bmap, bh);
+ brelse(bh);
}
/* right sibling */
@@ -944,8 +1041,7 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
nilfs_btree_node_get_nchildren(btree, parent) - 1) {
sibptr = nilfs_btree_node_get_ptr(btree, parent,
pindex + 1);
- ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr,
- &bh);
+ ret = nilfs_btree_get_block(btree, sibptr, &bh);
if (ret < 0)
goto err_out_child_node;
sib = (struct nilfs_btree_node *)bh->b_data;
@@ -956,19 +1052,19 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
stats->bs_nblocks++;
goto out;
} else
- nilfs_bmap_put_block(&btree->bt_bmap, bh);
+ brelse(bh);
}
/* split */
path[level].bp_newreq.bpr_ptr =
path[level - 1].bp_newreq.bpr_ptr + 1;
- ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr(
- &btree->bt_bmap, &path[level].bp_newreq);
+ ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
+ &path[level].bp_newreq);
if (ret < 0)
goto err_out_child_node;
- ret = nilfs_bmap_get_new_block(&btree->bt_bmap,
- path[level].bp_newreq.bpr_ptr,
- &bh);
+ ret = nilfs_btree_get_new_block(btree,
+ path[level].bp_newreq.bpr_ptr,
+ &bh);
if (ret < 0)
goto err_out_curr_node;
@@ -994,12 +1090,12 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
/* grow */
path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
- ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr(
- &btree->bt_bmap, &path[level].bp_newreq);
+ ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap,
+ &path[level].bp_newreq);
if (ret < 0)
goto err_out_child_node;
- ret = nilfs_bmap_get_new_block(&btree->bt_bmap,
- path[level].bp_newreq.bpr_ptr, &bh);
+ ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr,
+ &bh);
if (ret < 0)
goto err_out_curr_node;
@@ -1023,18 +1119,16 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
/* error */
err_out_curr_node:
- btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(&btree->bt_bmap,
- &path[level].bp_newreq);
+ nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq);
err_out_child_node:
for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
- nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_sib_bh);
- btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(
- &btree->bt_bmap, &path[level].bp_newreq);
+ nilfs_btnode_delete(path[level].bp_sib_bh);
+ nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap,
+ &path[level].bp_newreq);
}
- btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(&btree->bt_bmap,
- &path[level].bp_newreq);
+ nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq);
err_out_data:
*levelp = level;
stats->bs_nblocks = 0;
@@ -1049,14 +1143,12 @@ static void nilfs_btree_commit_insert(struct nilfs_btree *btree,
set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
- if (btree->bt_ops->btop_set_target != NULL)
- btree->bt_ops->btop_set_target(btree, key, ptr);
+ if (NILFS_BMAP_USE_VBN(&btree->bt_bmap))
+ nilfs_btree_set_target_v(btree, key, ptr);
for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
- if (btree->bt_bmap.b_pops->bpop_commit_alloc_ptr != NULL) {
- btree->bt_bmap.b_pops->bpop_commit_alloc_ptr(
- &btree->bt_bmap, &path[level - 1].bp_newreq);
- }
+ nilfs_bmap_commit_alloc_ptr(&btree->bt_bmap,
+ &path[level - 1].bp_newreq);
path[level].bp_op(btree, path, level, &key, &ptr);
}
@@ -1153,7 +1245,7 @@ static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
nilfs_btree_promote_key(btree, path, level + 1,
nilfs_btree_node_get_key(btree, node, 0));
- nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh);
+ brelse(path[level].bp_sib_bh);
path[level].bp_sib_bh = NULL;
path[level].bp_index += n;
}
@@ -1192,7 +1284,7 @@ static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
nilfs_btree_node_get_key(btree, right, 0));
path[level + 1].bp_index--;
- nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh);
+ brelse(path[level].bp_sib_bh);
path[level].bp_sib_bh = NULL;
}
@@ -1221,7 +1313,7 @@ static void nilfs_btree_concat_left(struct nilfs_btree *btree,
unlock_buffer(path[level].bp_bh);
unlock_buffer(path[level].bp_sib_bh);
- nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_bh);
+ nilfs_btnode_delete(path[level].bp_bh);
path[level].bp_bh = path[level].bp_sib_bh;
path[level].bp_sib_bh = NULL;
path[level].bp_index += nilfs_btree_node_get_nchildren(btree, left);
@@ -1252,7 +1344,7 @@ static void nilfs_btree_concat_right(struct nilfs_btree *btree,
unlock_buffer(path[level].bp_bh);
unlock_buffer(path[level].bp_sib_bh);
- nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_sib_bh);
+ nilfs_btnode_delete(path[level].bp_sib_bh);
path[level].bp_sib_bh = NULL;
path[level + 1].bp_index++;
}
@@ -1276,7 +1368,7 @@ static void nilfs_btree_shrink(struct nilfs_btree *btree,
nilfs_btree_node_move_left(btree, root, child, n);
unlock_buffer(path[level].bp_bh);
- nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_bh);
+ nilfs_btnode_delete(path[level].bp_bh);
path[level].bp_bh = NULL;
}
@@ -1300,12 +1392,10 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
path[level].bp_oldreq.bpr_ptr =
nilfs_btree_node_get_ptr(btree, node,
path[level].bp_index);
- if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) {
- ret = btree->bt_bmap.b_pops->bpop_prepare_end_ptr(
- &btree->bt_bmap, &path[level].bp_oldreq);
- if (ret < 0)
- goto err_out_child_node;
- }
+ ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
+ &path[level].bp_oldreq);
+ if (ret < 0)
+ goto err_out_child_node;
if (nilfs_btree_node_get_nchildren(btree, node) >
nilfs_btree_node_nchildren_min(btree, node)) {
@@ -1321,8 +1411,7 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
/* left sibling */
sibptr = nilfs_btree_node_get_ptr(btree, parent,
pindex - 1);
- ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr,
- &bh);
+ ret = nilfs_btree_get_block(btree, sibptr, &bh);
if (ret < 0)
goto err_out_curr_node;
sib = (struct nilfs_btree_node *)bh->b_data;
@@ -1343,8 +1432,7 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
/* right sibling */
sibptr = nilfs_btree_node_get_ptr(btree, parent,
pindex + 1);
- ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr,
- &bh);
+ ret = nilfs_btree_get_block(btree, sibptr, &bh);
if (ret < 0)
goto err_out_curr_node;
sib = (struct nilfs_btree_node *)bh->b_data;
@@ -1381,12 +1469,12 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
node = nilfs_btree_get_root(btree);
path[level].bp_oldreq.bpr_ptr =
nilfs_btree_node_get_ptr(btree, node, path[level].bp_index);
- if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) {
- ret = btree->bt_bmap.b_pops->bpop_prepare_end_ptr(
- &btree->bt_bmap, &path[level].bp_oldreq);
- if (ret < 0)
- goto err_out_child_node;
- }
+
+ ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap,
+ &path[level].bp_oldreq);
+ if (ret < 0)
+ goto err_out_child_node;
+
/* child of the root node is deleted */
path[level].bp_op = nilfs_btree_do_delete;
stats->bs_nblocks++;
@@ -1398,15 +1486,12 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
/* error */
err_out_curr_node:
- if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL)
- btree->bt_bmap.b_pops->bpop_abort_end_ptr(
- &btree->bt_bmap, &path[level].bp_oldreq);
+ nilfs_bmap_abort_end_ptr(&btree->bt_bmap, &path[level].bp_oldreq);
err_out_child_node:
for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
- nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh);
- if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL)
- btree->bt_bmap.b_pops->bpop_abort_end_ptr(
- &btree->bt_bmap, &path[level].bp_oldreq);
+ brelse(path[level].bp_sib_bh);
+ nilfs_bmap_abort_end_ptr(&btree->bt_bmap,
+ &path[level].bp_oldreq);
}
*levelp = level;
stats->bs_nblocks = 0;
@@ -1420,9 +1505,8 @@ static void nilfs_btree_commit_delete(struct nilfs_btree *btree,
int level;
for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
- if (btree->bt_bmap.b_pops->bpop_commit_end_ptr != NULL)
- btree->bt_bmap.b_pops->bpop_commit_end_ptr(
- &btree->bt_bmap, &path[level].bp_oldreq);
+ nilfs_bmap_commit_end_ptr(&btree->bt_bmap,
+ &path[level].bp_oldreq);
path[level].bp_op(btree, path, level, NULL, NULL);
}
@@ -1501,7 +1585,7 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
if (nchildren > 1)
return 0;
ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
- ret = nilfs_bmap_get_block(bmap, ptr, &bh);
+ ret = nilfs_btree_get_block(btree, ptr, &bh);
if (ret < 0)
return ret;
node = (struct nilfs_btree_node *)bh->b_data;
@@ -1515,9 +1599,9 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
nextmaxkey = (nchildren > 1) ?
nilfs_btree_node_get_key(btree, node, nchildren - 2) : 0;
if (bh != NULL)
- nilfs_bmap_put_block(bmap, bh);
+ brelse(bh);
- return (maxkey == key) && (nextmaxkey < bmap->b_low);
+ return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW);
}
static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
@@ -1542,7 +1626,7 @@ static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
nchildren = nilfs_btree_node_get_nchildren(btree, root);
WARN_ON(nchildren > 1);
ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
- ret = nilfs_bmap_get_block(bmap, ptr, &bh);
+ ret = nilfs_btree_get_block(btree, ptr, &bh);
if (ret < 0)
return ret;
node = (struct nilfs_btree_node *)bh->b_data;
@@ -1563,7 +1647,7 @@ static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
}
if (bh != NULL)
- nilfs_bmap_put_block(bmap, bh);
+ brelse(bh);
return nitems;
}
@@ -1584,10 +1668,10 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
/* for data */
/* cannot find near ptr */
- if (btree->bt_ops->btop_find_target != NULL)
- dreq->bpr_ptr
- = btree->bt_ops->btop_find_target(btree, NULL, key);
- ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, dreq);
+ if (NILFS_BMAP_USE_VBN(bmap))
+ dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key);
+
+ ret = nilfs_bmap_prepare_alloc_ptr(bmap, dreq);
if (ret < 0)
return ret;
@@ -1595,11 +1679,11 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
stats->bs_nblocks++;
if (nreq != NULL) {
nreq->bpr_ptr = dreq->bpr_ptr + 1;
- ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, nreq);
+ ret = nilfs_bmap_prepare_alloc_ptr(bmap, nreq);
if (ret < 0)
goto err_out_dreq;
- ret = nilfs_bmap_get_new_block(bmap, nreq->bpr_ptr, &bh);
+ ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh);
if (ret < 0)
goto err_out_nreq;
@@ -1612,9 +1696,9 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
/* error */
err_out_nreq:
- bmap->b_pops->bpop_abort_alloc_ptr(bmap, nreq);
+ nilfs_bmap_abort_alloc_ptr(bmap, nreq);
err_out_dreq:
- bmap->b_pops->bpop_abort_alloc_ptr(bmap, dreq);
+ nilfs_bmap_abort_alloc_ptr(bmap, dreq);
stats->bs_nblocks = 0;
return ret;
@@ -1624,7 +1708,7 @@ static void
nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
__u64 key, __u64 ptr,
const __u64 *keys, const __u64 *ptrs,
- int n, __u64 low, __u64 high,
+ int n,
union nilfs_bmap_ptr_req *dreq,
union nilfs_bmap_ptr_req *nreq,
struct buffer_head *bh)
@@ -1642,12 +1726,10 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
/* convert and insert */
btree = (struct nilfs_btree *)bmap;
- nilfs_btree_init(bmap, low, high);
+ nilfs_btree_init(bmap);
if (nreq != NULL) {
- if (bmap->b_pops->bpop_commit_alloc_ptr != NULL) {
- bmap->b_pops->bpop_commit_alloc_ptr(bmap, dreq);
- bmap->b_pops->bpop_commit_alloc_ptr(bmap, nreq);
- }
+ nilfs_bmap_commit_alloc_ptr(bmap, dreq);
+ nilfs_bmap_commit_alloc_ptr(bmap, nreq);
/* create child node at level 1 */
lock_buffer(bh);
@@ -1661,7 +1743,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
nilfs_bmap_set_dirty(bmap);
unlock_buffer(bh);
- nilfs_bmap_put_block(bmap, bh);
+ brelse(bh);
/* create root node at level 2 */
node = nilfs_btree_get_root(btree);
@@ -1669,8 +1751,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
2, 1, &keys[0], &tmpptr);
} else {
- if (bmap->b_pops->bpop_commit_alloc_ptr != NULL)
- bmap->b_pops->bpop_commit_alloc_ptr(bmap, dreq);
+ nilfs_bmap_commit_alloc_ptr(bmap, dreq);
/* create root node at level 1 */
node = nilfs_btree_get_root(btree);
@@ -1682,8 +1763,8 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
nilfs_bmap_set_dirty(bmap);
}
- if (btree->bt_ops->btop_set_target != NULL)
- btree->bt_ops->btop_set_target(btree, key, dreq->bpr_ptr);
+ if (NILFS_BMAP_USE_VBN(bmap))
+ nilfs_btree_set_target_v(btree, key, dreq->bpr_ptr);
}
/**
@@ -1694,13 +1775,10 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
* @keys:
* @ptrs:
* @n:
- * @low:
- * @high:
*/
int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap,
__u64 key, __u64 ptr,
- const __u64 *keys, const __u64 *ptrs,
- int n, __u64 low, __u64 high)
+ const __u64 *keys, const __u64 *ptrs, int n)
{
struct buffer_head *bh;
union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
@@ -1725,7 +1803,7 @@ int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap,
if (ret < 0)
return ret;
nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n,
- low, high, di, ni, bh);
+ di, ni, bh);
nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
return 0;
}
@@ -1754,9 +1832,9 @@ static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
nilfs_btree_node_get_ptr(btree, parent,
path[level + 1].bp_index);
path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
- ret = nilfs_bmap_prepare_update(&btree->bt_bmap,
- &path[level].bp_oldreq,
- &path[level].bp_newreq);
+ ret = nilfs_bmap_prepare_update_v(&btree->bt_bmap,
+ &path[level].bp_oldreq,
+ &path[level].bp_newreq);
if (ret < 0)
return ret;
@@ -1768,9 +1846,9 @@ static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
&path[level].bp_ctxt);
if (ret < 0) {
- nilfs_bmap_abort_update(&btree->bt_bmap,
- &path[level].bp_oldreq,
- &path[level].bp_newreq);
+ nilfs_bmap_abort_update_v(&btree->bt_bmap,
+ &path[level].bp_oldreq,
+ &path[level].bp_newreq);
return ret;
}
}
@@ -1784,9 +1862,9 @@ static void nilfs_btree_commit_update_v(struct nilfs_btree *btree,
{
struct nilfs_btree_node *parent;
- nilfs_bmap_commit_update(&btree->bt_bmap,
- &path[level].bp_oldreq,
- &path[level].bp_newreq);
+ nilfs_bmap_commit_update_v(&btree->bt_bmap,
+ &path[level].bp_oldreq,
+ &path[level].bp_newreq);
if (buffer_nilfs_node(path[level].bp_bh)) {
nilfs_btnode_commit_change_key(
@@ -1805,9 +1883,9 @@ static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
struct nilfs_btree_path *path,
int level)
{
- nilfs_bmap_abort_update(&btree->bt_bmap,
- &path[level].bp_oldreq,
- &path[level].bp_newreq);
+ nilfs_bmap_abort_update_v(&btree->bt_bmap,
+ &path[level].bp_oldreq,
+ &path[level].bp_newreq);
if (buffer_nilfs_node(path[level].bp_bh))
nilfs_btnode_abort_change_key(
&NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
@@ -1930,7 +2008,9 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
goto out;
}
- ret = btree->bt_ops->btop_propagate(btree, path, level, bh);
+ ret = NILFS_BMAP_USE_VBN(bmap) ?
+ nilfs_btree_propagate_v(btree, path, level, bh) :
+ nilfs_btree_propagate_p(btree, path, level, bh);
out:
nilfs_btree_clear_path(btree, path);
@@ -2066,12 +2146,9 @@ static int nilfs_btree_assign_v(struct nilfs_btree *btree,
ptr = nilfs_btree_node_get_ptr(btree, parent,
path[level + 1].bp_index);
req.bpr_ptr = ptr;
- ret = btree->bt_bmap.b_pops->bpop_prepare_start_ptr(&btree->bt_bmap,
- &req);
- if (ret < 0)
+ ret = nilfs_bmap_start_v(&btree->bt_bmap, &req, blocknr);
+ if (unlikely(ret < 0))
return ret;
- btree->bt_bmap.b_pops->bpop_commit_start_ptr(&btree->bt_bmap,
- &req, blocknr);
key = nilfs_btree_node_get_key(btree, parent,
path[level + 1].bp_index);
@@ -2114,8 +2191,9 @@ static int nilfs_btree_assign(struct nilfs_bmap *bmap,
goto out;
}
- ret = btree->bt_ops->btop_assign(btree, path, level, bh,
- blocknr, binfo);
+ ret = NILFS_BMAP_USE_VBN(bmap) ?
+ nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) :
+ nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo);
out:
nilfs_btree_clear_path(btree, path);
@@ -2171,7 +2249,7 @@ static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
WARN_ON(ret == -ENOENT);
goto out;
}
- ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr, &bh);
+ ret = nilfs_btree_get_block(btree, ptr, &bh);
if (ret < 0) {
WARN_ON(ret == -ENOENT);
goto out;
@@ -2179,7 +2257,7 @@ static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
if (!buffer_dirty(bh))
nilfs_btnode_mark_dirty(bh);
- nilfs_bmap_put_block(&btree->bt_bmap, bh);
+ brelse(bh);
if (!nilfs_bmap_dirty(&btree->bt_bmap))
nilfs_bmap_set_dirty(&btree->bt_bmap);
@@ -2191,6 +2269,7 @@ static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
static const struct nilfs_bmap_operations nilfs_btree_ops = {
.bop_lookup = nilfs_btree_lookup,
+ .bop_lookup_contig = nilfs_btree_lookup_contig,
.bop_insert = nilfs_btree_insert,
.bop_delete = nilfs_btree_delete,
.bop_clear = NULL,
@@ -2210,6 +2289,7 @@ static const struct nilfs_bmap_operations nilfs_btree_ops = {
static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
.bop_lookup = NULL,
+ .bop_lookup_contig = NULL,
.bop_insert = NULL,
.bop_delete = NULL,
.bop_clear = NULL,
@@ -2227,43 +2307,13 @@ static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
.bop_gather_data = NULL,
};
-static const struct nilfs_btree_operations nilfs_btree_ops_v = {
- .btop_find_target = nilfs_btree_find_target_v,
- .btop_set_target = nilfs_btree_set_target_v,
- .btop_propagate = nilfs_btree_propagate_v,
- .btop_assign = nilfs_btree_assign_v,
-};
-
-static const struct nilfs_btree_operations nilfs_btree_ops_p = {
- .btop_find_target = NULL,
- .btop_set_target = NULL,
- .btop_propagate = nilfs_btree_propagate_p,
- .btop_assign = nilfs_btree_assign_p,
-};
-
-int nilfs_btree_init(struct nilfs_bmap *bmap, __u64 low, __u64 high)
+int nilfs_btree_init(struct nilfs_bmap *bmap)
{
- struct nilfs_btree *btree;
-
- btree = (struct nilfs_btree *)bmap;
bmap->b_ops = &nilfs_btree_ops;
- bmap->b_low = low;
- bmap->b_high = high;
- switch (bmap->b_inode->i_ino) {
- case NILFS_DAT_INO:
- btree->bt_ops = &nilfs_btree_ops_p;
- break;
- default:
- btree->bt_ops = &nilfs_btree_ops_v;
- break;
- }
-
return 0;
}
void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
{
- bmap->b_low = NILFS_BMAP_LARGE_LOW;
- bmap->b_high = NILFS_BMAP_LARGE_HIGH;
bmap->b_ops = &nilfs_btree_ops_gc;
}
diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h
index 4766deb52fb..0e72bbbc6b6 100644
--- a/fs/nilfs2/btree.h
+++ b/fs/nilfs2/btree.h
@@ -34,28 +34,6 @@ struct nilfs_btree;
struct nilfs_btree_path;
/**
- * struct nilfs_btree_operations - B-tree operation table
- */
-struct nilfs_btree_operations {
- __u64 (*btop_find_target)(const struct nilfs_btree *,
- const struct nilfs_btree_path *, __u64);
- void (*btop_set_target)(struct nilfs_btree *, __u64, __u64);
-
- struct the_nilfs *(*btop_get_nilfs)(struct nilfs_btree *);
-
- int (*btop_propagate)(struct nilfs_btree *,
- struct nilfs_btree_path *,
- int,
- struct buffer_head *);
- int (*btop_assign)(struct nilfs_btree *,
- struct nilfs_btree_path *,
- int,
- struct buffer_head **,
- sector_t,
- union nilfs_binfo *);
-};
-
-/**
* struct nilfs_btree_node - B-tree node
* @bn_flags: flags
* @bn_level: level
@@ -80,13 +58,9 @@ struct nilfs_btree_node {
/**
* struct nilfs_btree - B-tree structure
* @bt_bmap: bmap base structure
- * @bt_ops: B-tree operation table
*/
struct nilfs_btree {
struct nilfs_bmap bt_bmap;
-
- /* B-tree-specific members */
- const struct nilfs_btree_operations *bt_ops;
};
@@ -108,10 +82,9 @@ struct nilfs_btree {
int nilfs_btree_path_cache_init(void);
void nilfs_btree_path_cache_destroy(void);
-int nilfs_btree_init(struct nilfs_bmap *, __u64, __u64);
+int nilfs_btree_init(struct nilfs_bmap *);
int nilfs_btree_convert_and_insert(struct nilfs_bmap *, __u64, __u64,
- const __u64 *, const __u64 *,
- int, __u64, __u64);
+ const __u64 *, const __u64 *, int);
void nilfs_btree_init_gc(struct nilfs_bmap *);
#endif /* _NILFS_BTREE_H */
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index 300f1cdfa86..7d49813f66d 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -295,10 +295,6 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
return -EINVAL;
}
- /* cannot delete the latest checkpoint */
- if (start == nilfs_mdt_cno(cpfile) - 1)
- return -EPERM;
-
down_write(&NILFS_MDT(cpfile)->mi_sem);
ret = nilfs_cpfile_get_header_block(cpfile, &header_bh);
@@ -384,9 +380,10 @@ static void nilfs_cpfile_checkpoint_to_cpinfo(struct inode *cpfile,
}
static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop,
- struct nilfs_cpinfo *ci, size_t nci)
+ void *buf, unsigned cisz, size_t nci)
{
struct nilfs_checkpoint *cp;
+ struct nilfs_cpinfo *ci = buf;
struct buffer_head *bh;
size_t cpsz = NILFS_MDT(cpfile)->mi_entry_size;
__u64 cur_cno = nilfs_mdt_cno(cpfile), cno = *cnop;
@@ -410,17 +407,22 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop,
kaddr = kmap_atomic(bh->b_page, KM_USER0);
cp = nilfs_cpfile_block_get_checkpoint(cpfile, cno, bh, kaddr);
for (i = 0; i < ncps && n < nci; i++, cp = (void *)cp + cpsz) {
- if (!nilfs_checkpoint_invalid(cp))
- nilfs_cpfile_checkpoint_to_cpinfo(
- cpfile, cp, &ci[n++]);
+ if (!nilfs_checkpoint_invalid(cp)) {
+ nilfs_cpfile_checkpoint_to_cpinfo(cpfile, cp,
+ ci);
+ ci = (void *)ci + cisz;
+ n++;
+ }
}
kunmap_atomic(kaddr, KM_USER0);
brelse(bh);
}
ret = n;
- if (n > 0)
- *cnop = ci[n - 1].ci_cno + 1;
+ if (n > 0) {
+ ci = (void *)ci - cisz;
+ *cnop = ci->ci_cno + 1;
+ }
out:
up_read(&NILFS_MDT(cpfile)->mi_sem);
@@ -428,11 +430,12 @@ static ssize_t nilfs_cpfile_do_get_cpinfo(struct inode *cpfile, __u64 *cnop,
}
static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
- struct nilfs_cpinfo *ci, size_t nci)
+ void *buf, unsigned cisz, size_t nci)
{
struct buffer_head *bh;
struct nilfs_cpfile_header *header;
struct nilfs_checkpoint *cp;
+ struct nilfs_cpinfo *ci = buf;
__u64 curr = *cnop, next;
unsigned long curr_blkoff, next_blkoff;
void *kaddr;
@@ -472,7 +475,9 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
if (unlikely(nilfs_checkpoint_invalid(cp) ||
!nilfs_checkpoint_snapshot(cp)))
break;
- nilfs_cpfile_checkpoint_to_cpinfo(cpfile, cp, &ci[n++]);
+ nilfs_cpfile_checkpoint_to_cpinfo(cpfile, cp, ci);
+ ci = (void *)ci + cisz;
+ n++;
next = le64_to_cpu(cp->cp_snapshot_list.ssl_next);
if (next == 0)
break; /* reach end of the snapshot list */
@@ -511,13 +516,13 @@ static ssize_t nilfs_cpfile_do_get_ssinfo(struct inode *cpfile, __u64 *cnop,
*/
ssize_t nilfs_cpfile_get_cpinfo(struct inode *cpfile, __u64 *cnop, int mode,
- struct nilfs_cpinfo *ci, size_t nci)
+ void *buf, unsigned cisz, size_t nci)
{
switch (mode) {
case NILFS_CHECKPOINT:
- return nilfs_cpfile_do_get_cpinfo(cpfile, cnop, ci, nci);
+ return nilfs_cpfile_do_get_cpinfo(cpfile, cnop, buf, cisz, nci);
case NILFS_SNAPSHOT:
- return nilfs_cpfile_do_get_ssinfo(cpfile, cnop, ci, nci);
+ return nilfs_cpfile_do_get_ssinfo(cpfile, cnop, buf, cisz, nci);
default:
return -EINVAL;
}
@@ -533,20 +538,14 @@ int nilfs_cpfile_delete_checkpoint(struct inode *cpfile, __u64 cno)
struct nilfs_cpinfo ci;
__u64 tcno = cno;
ssize_t nci;
- int ret;
- nci = nilfs_cpfile_do_get_cpinfo(cpfile, &tcno, &ci, 1);
+ nci = nilfs_cpfile_do_get_cpinfo(cpfile, &tcno, &ci, sizeof(ci), 1);
if (nci < 0)
return nci;
else if (nci == 0 || ci.ci_cno != cno)
return -ENOENT;
-
- /* cannot delete the latest checkpoint nor snapshots */
- ret = nilfs_cpinfo_snapshot(&ci);
- if (ret < 0)
- return ret;
- else if (ret > 0 || cno == nilfs_mdt_cno(cpfile) - 1)
- return -EPERM;
+ else if (nilfs_cpinfo_snapshot(&ci))
+ return -EBUSY;
return nilfs_cpfile_delete_checkpoints(cpfile, cno, cno + 1);
}
@@ -864,11 +863,11 @@ int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode)
case NILFS_CHECKPOINT:
/*
* Check for protecting existing snapshot mounts:
- * bd_mount_sem is used to make this operation atomic and
+ * ns_mount_mutex is used to make this operation atomic and
* exclusive with a new mount job. Though it doesn't cover
* umount, it's enough for the purpose.
*/
- down(&nilfs->ns_bdev->bd_mount_sem);
+ mutex_lock(&nilfs->ns_mount_mutex);
if (nilfs_checkpoint_is_mounted(nilfs, cno, 1)) {
/* Current implementation does not have to protect
plain read-only mounts since they are exclusive
@@ -877,7 +876,7 @@ int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode)
ret = -EBUSY;
} else
ret = nilfs_cpfile_clear_snapshot(cpfile, cno);
- up(&nilfs->ns_bdev->bd_mount_sem);
+ mutex_unlock(&nilfs->ns_mount_mutex);
return ret;
case NILFS_SNAPSHOT:
return nilfs_cpfile_set_snapshot(cpfile, cno);
diff --git a/fs/nilfs2/cpfile.h b/fs/nilfs2/cpfile.h
index 1a8a1008c34..788a4595019 100644
--- a/fs/nilfs2/cpfile.h
+++ b/fs/nilfs2/cpfile.h
@@ -39,7 +39,7 @@ int nilfs_cpfile_delete_checkpoint(struct inode *, __u64);
int nilfs_cpfile_change_cpmode(struct inode *, __u64, int);
int nilfs_cpfile_is_snapshot(struct inode *, __u64);
int nilfs_cpfile_get_stat(struct inode *, struct nilfs_cpstat *);
-ssize_t nilfs_cpfile_get_cpinfo(struct inode *, __u64 *, int,
- struct nilfs_cpinfo *, size_t);
+ssize_t nilfs_cpfile_get_cpinfo(struct inode *, __u64 *, int, void *, unsigned,
+ size_t);
#endif /* _NILFS_CPFILE_H */
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index bb8a5818e7f..0b2710e2d56 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -92,21 +92,6 @@ void nilfs_dat_abort_alloc(struct inode *dat, struct nilfs_palloc_req *req)
nilfs_palloc_abort_alloc_entry(dat, req);
}
-int nilfs_dat_prepare_free(struct inode *dat, struct nilfs_palloc_req *req)
-{
- int ret;
-
- ret = nilfs_palloc_prepare_free_entry(dat, req);
- if (ret < 0)
- return ret;
- ret = nilfs_dat_prepare_entry(dat, req, 0);
- if (ret < 0) {
- nilfs_palloc_abort_free_entry(dat, req);
- return ret;
- }
- return 0;
-}
-
void nilfs_dat_commit_free(struct inode *dat, struct nilfs_palloc_req *req)
{
struct nilfs_dat_entry *entry;
@@ -391,36 +376,37 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp)
return ret;
}
-ssize_t nilfs_dat_get_vinfo(struct inode *dat, struct nilfs_vinfo *vinfo,
+ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz,
size_t nvi)
{
struct buffer_head *entry_bh;
struct nilfs_dat_entry *entry;
+ struct nilfs_vinfo *vinfo = buf;
__u64 first, last;
void *kaddr;
unsigned long entries_per_block = NILFS_MDT(dat)->mi_entries_per_block;
int i, j, n, ret;
for (i = 0; i < nvi; i += n) {
- ret = nilfs_palloc_get_entry_block(dat, vinfo[i].vi_vblocknr,
+ ret = nilfs_palloc_get_entry_block(dat, vinfo->vi_vblocknr,
0, &entry_bh);
if (ret < 0)
return ret;
kaddr = kmap_atomic(entry_bh->b_page, KM_USER0);
/* last virtual block number in this block */
- first = vinfo[i].vi_vblocknr;
+ first = vinfo->vi_vblocknr;
do_div(first, entries_per_block);
first *= entries_per_block;
last = first + entries_per_block - 1;
for (j = i, n = 0;
- j < nvi && vinfo[j].vi_vblocknr >= first &&
- vinfo[j].vi_vblocknr <= last;
- j++, n++) {
+ j < nvi && vinfo->vi_vblocknr >= first &&
+ vinfo->vi_vblocknr <= last;
+ j++, n++, vinfo = (void *)vinfo + visz) {
entry = nilfs_palloc_block_get_entry(
- dat, vinfo[j].vi_vblocknr, entry_bh, kaddr);
- vinfo[j].vi_start = le64_to_cpu(entry->de_start);
- vinfo[j].vi_end = le64_to_cpu(entry->de_end);
- vinfo[j].vi_blocknr = le64_to_cpu(entry->de_blocknr);
+ dat, vinfo->vi_vblocknr, entry_bh, kaddr);
+ vinfo->vi_start = le64_to_cpu(entry->de_start);
+ vinfo->vi_end = le64_to_cpu(entry->de_end);
+ vinfo->vi_blocknr = le64_to_cpu(entry->de_blocknr);
}
kunmap_atomic(kaddr, KM_USER0);
brelse(entry_bh);
diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h
index d9560654a4b..d328b81eead 100644
--- a/fs/nilfs2/dat.h
+++ b/fs/nilfs2/dat.h
@@ -47,6 +47,6 @@ void nilfs_dat_abort_end(struct inode *, struct nilfs_palloc_req *);
int nilfs_dat_mark_dirty(struct inode *, __u64);
int nilfs_dat_freev(struct inode *, __u64 *, size_t);
int nilfs_dat_move(struct inode *, __u64, sector_t);
-ssize_t nilfs_dat_get_vinfo(struct inode *, struct nilfs_vinfo *, size_t);
+ssize_t nilfs_dat_get_vinfo(struct inode *, void *, unsigned, size_t);
#endif /* _NILFS_DAT_H */
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c
index c6379e48278..342d9765df8 100644
--- a/fs/nilfs2/direct.c
+++ b/fs/nilfs2/direct.c
@@ -25,6 +25,7 @@
#include "page.h"
#include "direct.h"
#include "alloc.h"
+#include "dat.h"
static inline __le64 *nilfs_direct_dptrs(const struct nilfs_direct *direct)
{
@@ -62,6 +63,47 @@ static int nilfs_direct_lookup(const struct nilfs_bmap *bmap,
return 0;
}
+static int nilfs_direct_lookup_contig(const struct nilfs_bmap *bmap,
+ __u64 key, __u64 *ptrp,
+ unsigned maxblocks)
+{
+ struct nilfs_direct *direct = (struct nilfs_direct *)bmap;
+ struct inode *dat = NULL;
+ __u64 ptr, ptr2;
+ sector_t blocknr;
+ int ret, cnt;
+
+ if (key > NILFS_DIRECT_KEY_MAX ||
+ (ptr = nilfs_direct_get_ptr(direct, key)) ==
+ NILFS_BMAP_INVALID_PTR)
+ return -ENOENT;
+
+ if (NILFS_BMAP_USE_VBN(bmap)) {
+ dat = nilfs_bmap_get_dat(bmap);
+ ret = nilfs_dat_translate(dat, ptr, &blocknr);
+ if (ret < 0)
+ return ret;
+ ptr = blocknr;
+ }
+
+ maxblocks = min_t(unsigned, maxblocks, NILFS_DIRECT_KEY_MAX - key + 1);
+ for (cnt = 1; cnt < maxblocks &&
+ (ptr2 = nilfs_direct_get_ptr(direct, key + cnt)) !=
+ NILFS_BMAP_INVALID_PTR;
+ cnt++) {
+ if (dat) {
+ ret = nilfs_dat_translate(dat, ptr2, &blocknr);
+ if (ret < 0)
+ return ret;
+ ptr2 = blocknr;
+ }
+ if (ptr2 != ptr + cnt)
+ break;
+ }
+ *ptrp = ptr;
+ return cnt;
+}
+
static __u64
nilfs_direct_find_target_v(const struct nilfs_direct *direct, __u64 key)
{
@@ -90,10 +132,9 @@ static int nilfs_direct_prepare_insert(struct nilfs_direct *direct,
{
int ret;
- if (direct->d_ops->dop_find_target != NULL)
- req->bpr_ptr = direct->d_ops->dop_find_target(direct, key);
- ret = direct->d_bmap.b_pops->bpop_prepare_alloc_ptr(&direct->d_bmap,
- req);
+ if (NILFS_BMAP_USE_VBN(&direct->d_bmap))
+ req->bpr_ptr = nilfs_direct_find_target_v(direct, key);
+ ret = nilfs_bmap_prepare_alloc_ptr(&direct->d_bmap, req);
if (ret < 0)
return ret;
@@ -111,16 +152,14 @@ static void nilfs_direct_commit_insert(struct nilfs_direct *direct,
bh = (struct buffer_head *)((unsigned long)ptr);
set_buffer_nilfs_volatile(bh);
- if (direct->d_bmap.b_pops->bpop_commit_alloc_ptr != NULL)
- direct->d_bmap.b_pops->bpop_commit_alloc_ptr(
- &direct->d_bmap, req);
+ nilfs_bmap_commit_alloc_ptr(&direct->d_bmap, req);
nilfs_direct_set_ptr(direct, key, req->bpr_ptr);
if (!nilfs_bmap_dirty(&direct->d_bmap))
nilfs_bmap_set_dirty(&direct->d_bmap);
- if (direct->d_ops->dop_set_target != NULL)
- direct->d_ops->dop_set_target(direct, key, req->bpr_ptr);
+ if (NILFS_BMAP_USE_VBN(&direct->d_bmap))
+ nilfs_direct_set_target_v(direct, key, req->bpr_ptr);
}
static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
@@ -152,25 +191,18 @@ static int nilfs_direct_prepare_delete(struct nilfs_direct *direct,
{
int ret;
- if (direct->d_bmap.b_pops->bpop_prepare_end_ptr != NULL) {
- req->bpr_ptr = nilfs_direct_get_ptr(direct, key);
- ret = direct->d_bmap.b_pops->bpop_prepare_end_ptr(
- &direct->d_bmap, req);
- if (ret < 0)
- return ret;
- }
-
- stats->bs_nblocks = 1;
- return 0;
+ req->bpr_ptr = nilfs_direct_get_ptr(direct, key);
+ ret = nilfs_bmap_prepare_end_ptr(&direct->d_bmap, req);
+ if (!ret)
+ stats->bs_nblocks = 1;
+ return ret;
}
static void nilfs_direct_commit_delete(struct nilfs_direct *direct,
union nilfs_bmap_ptr_req *req,
__u64 key)
{
- if (direct->d_bmap.b_pops->bpop_commit_end_ptr != NULL)
- direct->d_bmap.b_pops->bpop_commit_end_ptr(
- &direct->d_bmap, req);
+ nilfs_bmap_commit_end_ptr(&direct->d_bmap, req);
nilfs_direct_set_ptr(direct, key, NILFS_BMAP_INVALID_PTR);
}
@@ -244,8 +276,7 @@ static int nilfs_direct_gather_data(struct nilfs_bmap *bmap,
}
int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap,
- __u64 key, __u64 *keys, __u64 *ptrs,
- int n, __u64 low, __u64 high)
+ __u64 key, __u64 *keys, __u64 *ptrs, int n)
{
struct nilfs_direct *direct;
__le64 *dptrs;
@@ -275,8 +306,7 @@ int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap,
dptrs[i] = NILFS_BMAP_INVALID_PTR;
}
- nilfs_direct_init(bmap, low, high);
-
+ nilfs_direct_init(bmap);
return 0;
}
@@ -293,11 +323,11 @@ static int nilfs_direct_propagate_v(struct nilfs_direct *direct,
if (!buffer_nilfs_volatile(bh)) {
oldreq.bpr_ptr = ptr;
newreq.bpr_ptr = ptr;
- ret = nilfs_bmap_prepare_update(&direct->d_bmap, &oldreq,
- &newreq);
+ ret = nilfs_bmap_prepare_update_v(&direct->d_bmap, &oldreq,
+ &newreq);
if (ret < 0)
return ret;
- nilfs_bmap_commit_update(&direct->d_bmap, &oldreq, &newreq);
+ nilfs_bmap_commit_update_v(&direct->d_bmap, &oldreq, &newreq);
set_buffer_nilfs_volatile(bh);
nilfs_direct_set_ptr(direct, key, newreq.bpr_ptr);
} else
@@ -309,12 +339,10 @@ static int nilfs_direct_propagate_v(struct nilfs_direct *direct,
static int nilfs_direct_propagate(const struct nilfs_bmap *bmap,
struct buffer_head *bh)
{
- struct nilfs_direct *direct;
+ struct nilfs_direct *direct = (struct nilfs_direct *)bmap;
- direct = (struct nilfs_direct *)bmap;
- return (direct->d_ops->dop_propagate != NULL) ?
- direct->d_ops->dop_propagate(direct, bh) :
- 0;
+ return NILFS_BMAP_USE_VBN(bmap) ?
+ nilfs_direct_propagate_v(direct, bh) : 0;
}
static int nilfs_direct_assign_v(struct nilfs_direct *direct,
@@ -327,12 +355,9 @@ static int nilfs_direct_assign_v(struct nilfs_direct *direct,
int ret;
req.bpr_ptr = ptr;
- ret = direct->d_bmap.b_pops->bpop_prepare_start_ptr(
- &direct->d_bmap, &req);
- if (ret < 0)
+ ret = nilfs_bmap_start_v(&direct->d_bmap, &req, blocknr);
+ if (unlikely(ret < 0))
return ret;
- direct->d_bmap.b_pops->bpop_commit_start_ptr(&direct->d_bmap,
- &req, blocknr);
binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
@@ -377,12 +402,14 @@ static int nilfs_direct_assign(struct nilfs_bmap *bmap,
return -EINVAL;
}
- return direct->d_ops->dop_assign(direct, key, ptr, bh,
- blocknr, binfo);
+ return NILFS_BMAP_USE_VBN(bmap) ?
+ nilfs_direct_assign_v(direct, key, ptr, bh, blocknr, binfo) :
+ nilfs_direct_assign_p(direct, key, ptr, bh, blocknr, binfo);
}
static const struct nilfs_bmap_operations nilfs_direct_ops = {
.bop_lookup = nilfs_direct_lookup,
+ .bop_lookup_contig = nilfs_direct_lookup_contig,
.bop_insert = nilfs_direct_insert,
.bop_delete = nilfs_direct_delete,
.bop_clear = NULL,
@@ -401,36 +428,8 @@ static const struct nilfs_bmap_operations nilfs_direct_ops = {
};
-static const struct nilfs_direct_operations nilfs_direct_ops_v = {
- .dop_find_target = nilfs_direct_find_target_v,
- .dop_set_target = nilfs_direct_set_target_v,
- .dop_propagate = nilfs_direct_propagate_v,
- .dop_assign = nilfs_direct_assign_v,
-};
-
-static const struct nilfs_direct_operations nilfs_direct_ops_p = {
- .dop_find_target = NULL,
- .dop_set_target = NULL,
- .dop_propagate = NULL,
- .dop_assign = nilfs_direct_assign_p,
-};
-
-int nilfs_direct_init(struct nilfs_bmap *bmap, __u64 low, __u64 high)
+int nilfs_direct_init(struct nilfs_bmap *bmap)
{
- struct nilfs_direct *direct;
-
- direct = (struct nilfs_direct *)bmap;
bmap->b_ops = &nilfs_direct_ops;
- bmap->b_low = low;
- bmap->b_high = high;
- switch (bmap->b_inode->i_ino) {
- case NILFS_DAT_INO:
- direct->d_ops = &nilfs_direct_ops_p;
- break;
- default:
- direct->d_ops = &nilfs_direct_ops_v;
- break;
- }
-
return 0;
}
diff --git a/fs/nilfs2/direct.h b/fs/nilfs2/direct.h
index 45d2c5cda81..a5ffd66e25d 100644
--- a/fs/nilfs2/direct.h
+++ b/fs/nilfs2/direct.h
@@ -31,18 +31,6 @@
struct nilfs_direct;
/**
- * struct nilfs_direct_operations - direct mapping operation table
- */
-struct nilfs_direct_operations {
- __u64 (*dop_find_target)(const struct nilfs_direct *, __u64);
- void (*dop_set_target)(struct nilfs_direct *, __u64, __u64);
- int (*dop_propagate)(struct nilfs_direct *, struct buffer_head *);
- int (*dop_assign)(struct nilfs_direct *, __u64, __u64,
- struct buffer_head **, sector_t,
- union nilfs_binfo *);
-};
-
-/**
* struct nilfs_direct_node - direct node
* @dn_flags: flags
* @dn_pad: padding
@@ -55,13 +43,9 @@ struct nilfs_direct_node {
/**
* struct nilfs_direct - direct mapping
* @d_bmap: bmap structure
- * @d_ops: direct mapping operation table
*/
struct nilfs_direct {
struct nilfs_bmap d_bmap;
-
- /* direct-mapping-specific members */
- const struct nilfs_direct_operations *d_ops;
};
@@ -70,9 +54,9 @@ struct nilfs_direct {
#define NILFS_DIRECT_KEY_MAX (NILFS_DIRECT_NBLOCKS - 1)
-int nilfs_direct_init(struct nilfs_bmap *, __u64, __u64);
+int nilfs_direct_init(struct nilfs_bmap *);
int nilfs_direct_delete_and_convert(struct nilfs_bmap *, __u64, __u64 *,
- __u64 *, int, __u64, __u64);
+ __u64 *, int);
#endif /* _NILFS_DIRECT_H */
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 19d2102b6a6..1b3c2bb20da 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -52,8 +52,9 @@
#include "dat.h"
#include "ifile.h"
-static struct address_space_operations def_gcinode_aops = {};
-/* XXX need def_gcinode_iops/fops? */
+static struct address_space_operations def_gcinode_aops = {
+ .sync_page = block_sync_page,
+};
/*
* nilfs_gccache_submit_read_data() - add data buffer and submit read request
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 49ab4a49bb4..2696d6b513b 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -43,22 +43,23 @@
*
* This function does not issue actual read request of the specified data
* block. It is done by VFS.
- * Bulk read for direct-io is not supported yet. (should be supported)
*/
int nilfs_get_block(struct inode *inode, sector_t blkoff,
struct buffer_head *bh_result, int create)
{
struct nilfs_inode_info *ii = NILFS_I(inode);
- unsigned long blknum = 0;
+ __u64 blknum = 0;
int err = 0, ret;
struct inode *dat = nilfs_dat_inode(NILFS_I_NILFS(inode));
+ unsigned maxblocks = bh_result->b_size >> inode->i_blkbits;
- /* This exclusion control is a workaround; should be revised */
- down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */
- ret = nilfs_bmap_lookup(ii->i_bmap, (unsigned long)blkoff, &blknum);
- up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */
- if (ret == 0) { /* found */
+ down_read(&NILFS_MDT(dat)->mi_sem);
+ ret = nilfs_bmap_lookup_contig(ii->i_bmap, blkoff, &blknum, maxblocks);
+ up_read(&NILFS_MDT(dat)->mi_sem);
+ if (ret >= 0) { /* found */
map_bh(bh_result, inode->i_sb, blknum);
+ if (ret > 0)
+ bh_result->b_size = (ret << inode->i_blkbits);
goto out;
}
/* data block was not found */
@@ -240,7 +241,7 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
struct address_space_operations nilfs_aops = {
.writepage = nilfs_writepage,
.readpage = nilfs_readpage,
- /* .sync_page = nilfs_sync_page, */
+ .sync_page = block_sync_page,
.writepages = nilfs_writepages,
.set_page_dirty = nilfs_set_page_dirty,
.readpages = nilfs_readpages,
@@ -249,6 +250,7 @@ struct address_space_operations nilfs_aops = {
/* .releasepage = nilfs_releasepage, */
.invalidatepage = block_invalidatepage,
.direct_IO = nilfs_direct_IO,
+ .is_partially_uptodate = block_is_partially_uptodate,
};
struct inode *nilfs_new_inode(struct inode *dir, int mode)
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index d6759b92006..6ea5f872e2d 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -152,7 +152,7 @@ nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
down_read(&nilfs->ns_segctor_sem);
ret = nilfs_cpfile_get_cpinfo(nilfs->ns_cpfile, posp, flags, buf,
- nmembs);
+ size, nmembs);
up_read(&nilfs->ns_segctor_sem);
return ret;
}
@@ -182,7 +182,8 @@ nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
int ret;
down_read(&nilfs->ns_segctor_sem);
- ret = nilfs_sufile_get_suinfo(nilfs->ns_sufile, *posp, buf, nmembs);
+ ret = nilfs_sufile_get_suinfo(nilfs->ns_sufile, *posp, buf, size,
+ nmembs);
up_read(&nilfs->ns_segctor_sem);
return ret;
}
@@ -212,7 +213,7 @@ nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
int ret;
down_read(&nilfs->ns_segctor_sem);
- ret = nilfs_dat_get_vinfo(nilfs_dat_inode(nilfs), buf, nmembs);
+ ret = nilfs_dat_get_vinfo(nilfs_dat_inode(nilfs), buf, size, nmembs);
up_read(&nilfs->ns_segctor_sem);
return ret;
}
@@ -435,24 +436,6 @@ static int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs,
return nmembs;
}
-static int nilfs_ioctl_free_segments(struct the_nilfs *nilfs,
- struct nilfs_argv *argv, void *buf)
-{
- size_t nmembs = argv->v_nmembs;
- struct nilfs_sb_info *sbi = nilfs->ns_writer;
- int ret;
-
- if (unlikely(!sbi)) {
- /* never happens because called for a writable mount */
- WARN_ON(1);
- return -EROFS;
- }
- ret = nilfs_segctor_add_segments_to_be_freed(
- NILFS_SC(sbi), buf, nmembs);
-
- return (ret < 0) ? ret : nmembs;
-}
-
int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs,
struct nilfs_argv *argv, void **kbufs)
{
@@ -491,14 +474,6 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs,
msg = "cannot mark copying blocks dirty";
goto failed;
}
- ret = nilfs_ioctl_free_segments(nilfs, &argv[4], kbufs[4]);
- if (ret < 0) {
- /*
- * can safely abort because this operation is atomic.
- */
- msg = "cannot set segments to be freed";
- goto failed;
- }
return 0;
failed:
@@ -615,7 +590,7 @@ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp,
if (copy_from_user(&argv, argp, sizeof(argv)))
return -EFAULT;
- if (argv.v_size != membsz)
+ if (argv.v_size < membsz)
return -EINVAL;
ret = nilfs_ioctl_wrap_copy(nilfs, &argv, _IOC_DIR(cmd), dofunc);
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index bb78745a0e3..3d3ddb3f517 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -430,6 +430,7 @@ nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
static struct address_space_operations def_mdt_aops = {
.writepage = nilfs_mdt_write_page,
+ .sync_page = block_sync_page,
};
static struct inode_operations def_mdt_iops;
@@ -449,7 +450,7 @@ struct inode *
nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb,
ino_t ino, gfp_t gfp_mask)
{
- struct inode *inode = nilfs_alloc_inode(sb);
+ struct inode *inode = nilfs_alloc_inode_common(nilfs);
if (!inode)
return NULL;
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index da6fc0bba2e..edf6a59d9f2 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -263,6 +263,7 @@ extern void nilfs_dirty_inode(struct inode *);
extern struct dentry *nilfs_get_parent(struct dentry *);
/* super.c */
+extern struct inode *nilfs_alloc_inode_common(struct the_nilfs *);
extern struct inode *nilfs_alloc_inode(struct super_block *);
extern void nilfs_destroy_inode(struct inode *);
extern void nilfs_error(struct super_block *, const char *, const char *, ...)
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index 57afa9d2406..d80cc71be74 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -28,7 +28,6 @@
#include "segment.h"
#include "sufile.h"
#include "page.h"
-#include "seglist.h"
#include "segbuf.h"
/*
@@ -395,6 +394,24 @@ static void dispose_recovery_list(struct list_head *head)
}
}
+struct nilfs_segment_entry {
+ struct list_head list;
+ __u64 segnum;
+};
+
+static int nilfs_segment_list_add(struct list_head *head, __u64 segnum)
+{
+ struct nilfs_segment_entry *ent = kmalloc(sizeof(*ent), GFP_NOFS);
+
+ if (unlikely(!ent))
+ return -ENOMEM;
+
+ ent->segnum = segnum;
+ INIT_LIST_HEAD(&ent->list);
+ list_add_tail(&ent->list, head);
+ return 0;
+}
+
void nilfs_dispose_segment_list(struct list_head *head)
{
while (!list_empty(head)) {
@@ -402,7 +419,7 @@ void nilfs_dispose_segment_list(struct list_head *head)
= list_entry(head->next,
struct nilfs_segment_entry, list);
list_del(&ent->list);
- nilfs_free_segment_entry(ent);
+ kfree(ent);
}
}
@@ -431,12 +448,10 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
if (unlikely(err))
goto failed;
- err = -ENOMEM;
for (i = 1; i < 4; i++) {
- ent = nilfs_alloc_segment_entry(segnum[i]);
- if (unlikely(!ent))
+ err = nilfs_segment_list_add(head, segnum[i]);
+ if (unlikely(err))
goto failed;
- list_add_tail(&ent->list, head);
}
/*
@@ -450,7 +465,7 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs,
goto failed;
}
list_del(&ent->list);
- nilfs_free_segment_entry(ent);
+ kfree(ent);
}
/* Allocate new segments for recovery */
@@ -791,7 +806,6 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
u64 seg_seq;
__u64 segnum, nextnum = 0;
__u64 cno;
- struct nilfs_segment_entry *ent;
LIST_HEAD(segments);
int empty_seg = 0, scan_newer = 0;
int ret;
@@ -892,12 +906,9 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi,
if (empty_seg++)
goto super_root_found; /* found a valid super root */
- ent = nilfs_alloc_segment_entry(segnum);
- if (unlikely(!ent)) {
- ret = -ENOMEM;
+ ret = nilfs_segment_list_add(&segments, segnum);
+ if (unlikely(ret))
goto failed;
- }
- list_add_tail(&ent->list, &segments);
seg_seq++;
segnum = nextnum;
diff --git a/fs/nilfs2/sb.h b/fs/nilfs2/sb.h
index adccd4fc654..0776ccc2504 100644
--- a/fs/nilfs2/sb.h
+++ b/fs/nilfs2/sb.h
@@ -60,6 +60,7 @@ struct nilfs_sb_info {
struct super_block *s_super; /* reverse pointer to super_block */
struct the_nilfs *s_nilfs;
struct list_head s_list; /* list head for nilfs->ns_supers */
+ atomic_t s_count; /* reference count */
/* Segment constructor */
struct list_head s_dirty_files; /* dirty files list */
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 1e68821b4a9..9e3fe17bb96 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -26,7 +26,6 @@
#include <linux/crc32.h>
#include "page.h"
#include "segbuf.h"
-#include "seglist.h"
static struct kmem_cache *nilfs_segbuf_cachep;
@@ -394,7 +393,7 @@ int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf,
* Last BIO is always sent through the following
* submission.
*/
- rw |= (1 << BIO_RW_SYNCIO);
+ rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG);
res = nilfs_submit_seg_bio(wi, rw);
if (unlikely(res))
goto failed_bio;
diff --git a/fs/nilfs2/seglist.h b/fs/nilfs2/seglist.h
deleted file mode 100644
index d39df9144e9..00000000000
--- a/fs/nilfs2/seglist.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * seglist.h - expediential structure and routines to handle list of segments
- * (would be removed in a future release)
- *
- * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * Written by Ryusuke Konishi <ryusuke@osrg.net>
- *
- */
-#ifndef _NILFS_SEGLIST_H
-#define _NILFS_SEGLIST_H
-
-#include <linux/fs.h>
-#include <linux/buffer_head.h>
-#include <linux/nilfs2_fs.h>
-#include "sufile.h"
-
-struct nilfs_segment_entry {
- __u64 segnum;
-
-#define NILFS_SLH_FREED 0x0001 /* The segment was freed provisonally.
- It must be cancelled if
- construction aborted */
-
- unsigned flags;
- struct list_head list;
- struct buffer_head *bh_su;
- struct nilfs_segment_usage *raw_su;
-};
-
-
-void nilfs_dispose_segment_list(struct list_head *);
-
-static inline struct nilfs_segment_entry *
-nilfs_alloc_segment_entry(__u64 segnum)
-{
- struct nilfs_segment_entry *ent = kmalloc(sizeof(*ent), GFP_NOFS);
-
- if (likely(ent)) {
- ent->segnum = segnum;
- ent->flags = 0;
- ent->bh_su = NULL;
- ent->raw_su = NULL;
- INIT_LIST_HEAD(&ent->list);
- }
- return ent;
-}
-
-static inline int nilfs_open_segment_entry(struct nilfs_segment_entry *ent,
- struct inode *sufile)
-{
- return nilfs_sufile_get_segment_usage(sufile, ent->segnum,
- &ent->raw_su, &ent->bh_su);
-}
-
-static inline void nilfs_close_segment_entry(struct nilfs_segment_entry *ent,
- struct inode *sufile)
-{
- if (!ent->bh_su)
- return;
- nilfs_sufile_put_segment_usage(sufile, ent->segnum, ent->bh_su);
- ent->bh_su = NULL;
- ent->raw_su = NULL;
-}
-
-static inline void nilfs_free_segment_entry(struct nilfs_segment_entry *ent)
-{
- kfree(ent);
-}
-
-#endif /* _NILFS_SEGLIST_H */
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 22c7f65c240..aa977549919 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -39,7 +39,6 @@
#include "sufile.h"
#include "cpfile.h"
#include "ifile.h"
-#include "seglist.h"
#include "segbuf.h"
@@ -79,7 +78,8 @@ enum {
/* State flags of collection */
#define NILFS_CF_NODE 0x0001 /* Collecting node blocks */
#define NILFS_CF_IFILE_STARTED 0x0002 /* IFILE stage has started */
-#define NILFS_CF_HISTORY_MASK (NILFS_CF_IFILE_STARTED)
+#define NILFS_CF_SUFREED 0x0004 /* segment usages has been freed */
+#define NILFS_CF_HISTORY_MASK (NILFS_CF_IFILE_STARTED | NILFS_CF_SUFREED)
/* Operations depending on the construction mode and file type */
struct nilfs_sc_operations {
@@ -810,7 +810,7 @@ static int nilfs_segctor_clean(struct nilfs_sc_info *sci)
{
return list_empty(&sci->sc_dirty_files) &&
!test_bit(NILFS_SC_DIRTY, &sci->sc_flags) &&
- list_empty(&sci->sc_cleaning_segments) &&
+ sci->sc_nfreesegs == 0 &&
(!nilfs_doing_gc() || list_empty(&sci->sc_gc_inodes));
}
@@ -1005,44 +1005,6 @@ static void nilfs_drop_collected_inodes(struct list_head *head)
}
}
-static void nilfs_segctor_cancel_free_segments(struct nilfs_sc_info *sci,
- struct inode *sufile)
-
-{
- struct list_head *head = &sci->sc_cleaning_segments;
- struct nilfs_segment_entry *ent;
- int err;
-
- list_for_each_entry(ent, head, list) {
- if (!(ent->flags & NILFS_SLH_FREED))
- break;
- err = nilfs_sufile_cancel_free(sufile, ent->segnum);
- WARN_ON(err); /* do not happen */
- ent->flags &= ~NILFS_SLH_FREED;
- }
-}
-
-static int nilfs_segctor_prepare_free_segments(struct nilfs_sc_info *sci,
- struct inode *sufile)
-{
- struct list_head *head = &sci->sc_cleaning_segments;
- struct nilfs_segment_entry *ent;
- int err;
-
- list_for_each_entry(ent, head, list) {
- err = nilfs_sufile_free(sufile, ent->segnum);
- if (unlikely(err))
- return err;
- ent->flags |= NILFS_SLH_FREED;
- }
- return 0;
-}
-
-static void nilfs_segctor_commit_free_segments(struct nilfs_sc_info *sci)
-{
- nilfs_dispose_segment_list(&sci->sc_cleaning_segments);
-}
-
static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci,
struct inode *inode,
struct list_head *listp,
@@ -1161,6 +1123,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
struct the_nilfs *nilfs = sbi->s_nilfs;
struct list_head *head;
struct nilfs_inode_info *ii;
+ size_t ndone;
int err = 0;
switch (sci->sc_stage.scnt) {
@@ -1250,10 +1213,16 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode)
break;
sci->sc_stage.scnt++; /* Fall through */
case NILFS_ST_SUFILE:
- err = nilfs_segctor_prepare_free_segments(sci,
- nilfs->ns_sufile);
- if (unlikely(err))
+ err = nilfs_sufile_freev(nilfs->ns_sufile, sci->sc_freesegs,
+ sci->sc_nfreesegs, &ndone);
+ if (unlikely(err)) {
+ nilfs_sufile_cancel_freev(nilfs->ns_sufile,
+ sci->sc_freesegs, ndone,
+ NULL);
break;
+ }
+ sci->sc_stage.flags |= NILFS_CF_SUFREED;
+
err = nilfs_segctor_scan_file(sci, nilfs->ns_sufile,
&nilfs_sc_file_ops);
if (unlikely(err))
@@ -1486,7 +1455,15 @@ static void nilfs_segctor_end_construction(struct nilfs_sc_info *sci,
{
if (unlikely(err)) {
nilfs_segctor_free_incomplete_segments(sci, nilfs);
- nilfs_segctor_cancel_free_segments(sci, nilfs->ns_sufile);
+ if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
+ int ret;
+
+ ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
+ sci->sc_freesegs,
+ sci->sc_nfreesegs,
+ NULL);
+ WARN_ON(ret); /* do not happen */
+ }
}
nilfs_segctor_clear_segment_buffers(sci);
}
@@ -1585,7 +1562,13 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci,
if (mode != SC_LSEG_SR || sci->sc_stage.scnt < NILFS_ST_CPFILE)
break;
- nilfs_segctor_cancel_free_segments(sci, nilfs->ns_sufile);
+ if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
+ err = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
+ sci->sc_freesegs,
+ sci->sc_nfreesegs,
+ NULL);
+ WARN_ON(err); /* do not happen */
+ }
nilfs_segctor_clear_segment_buffers(sci);
err = nilfs_segctor_extend_segments(sci, nilfs, nadd);
@@ -2224,10 +2207,8 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
nilfs_segctor_complete_write(sci);
/* Commit segments */
- if (has_sr) {
- nilfs_segctor_commit_free_segments(sci);
+ if (has_sr)
nilfs_segctor_clear_metadata_dirty(sci);
- }
nilfs_segctor_end_construction(sci, nilfs, 0);
@@ -2301,48 +2282,6 @@ void nilfs_flush_segment(struct super_block *sb, ino_t ino)
/* assign bit 0 to data files */
}
-int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *sci,
- __u64 *segnum, size_t nsegs)
-{
- struct nilfs_segment_entry *ent;
- struct the_nilfs *nilfs = sci->sc_sbi->s_nilfs;
- struct inode *sufile = nilfs->ns_sufile;
- LIST_HEAD(list);
- __u64 *pnum;
- size_t i;
- int err;
-
- for (pnum = segnum, i = 0; i < nsegs; pnum++, i++) {
- ent = nilfs_alloc_segment_entry(*pnum);
- if (unlikely(!ent)) {
- err = -ENOMEM;
- goto failed;
- }
- list_add_tail(&ent->list, &list);
-
- err = nilfs_open_segment_entry(ent, sufile);
- if (unlikely(err))
- goto failed;
-
- if (unlikely(!nilfs_segment_usage_dirty(ent->raw_su)))
- printk(KERN_WARNING "NILFS: unused segment is "
- "requested to be cleaned (segnum=%llu)\n",
- (unsigned long long)ent->segnum);
- nilfs_close_segment_entry(ent, sufile);
- }
- list_splice(&list, sci->sc_cleaning_segments.prev);
- return 0;
-
- failed:
- nilfs_dispose_segment_list(&list);
- return err;
-}
-
-void nilfs_segctor_clear_segments_to_be_freed(struct nilfs_sc_info *sci)
-{
- nilfs_dispose_segment_list(&sci->sc_cleaning_segments);
-}
-
struct nilfs_segctor_wait_request {
wait_queue_t wq;
__u32 seq;
@@ -2607,10 +2546,13 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv,
err = nilfs_init_gcdat_inode(nilfs);
if (unlikely(err))
goto out_unlock;
+
err = nilfs_ioctl_prepare_clean_segments(nilfs, argv, kbufs);
if (unlikely(err))
goto out_unlock;
+ sci->sc_freesegs = kbufs[4];
+ sci->sc_nfreesegs = argv[4].v_nmembs;
list_splice_init(&nilfs->ns_gc_inodes, sci->sc_gc_inodes.prev);
for (;;) {
@@ -2629,6 +2571,8 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv,
}
out_unlock:
+ sci->sc_freesegs = NULL;
+ sci->sc_nfreesegs = 0;
nilfs_clear_gcdat_inode(nilfs);
nilfs_transaction_unlock(sbi);
return err;
@@ -2835,7 +2779,6 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi)
INIT_LIST_HEAD(&sci->sc_dirty_files);
INIT_LIST_HEAD(&sci->sc_segbufs);
INIT_LIST_HEAD(&sci->sc_gc_inodes);
- INIT_LIST_HEAD(&sci->sc_cleaning_segments);
INIT_LIST_HEAD(&sci->sc_copied_buffers);
sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT;
@@ -2901,9 +2844,6 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
nilfs_dispose_list(sbi, &sci->sc_dirty_files, 1);
}
- if (!list_empty(&sci->sc_cleaning_segments))
- nilfs_dispose_segment_list(&sci->sc_cleaning_segments);
-
WARN_ON(!list_empty(&sci->sc_segbufs));
down_write(&sbi->s_nilfs->ns_segctor_sem);
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index 476bdd5df5b..0d2a475a741 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -90,8 +90,9 @@ struct nilfs_segsum_pointer {
* @sc_nblk_inc: Block count of current generation
* @sc_dirty_files: List of files to be written
* @sc_gc_inodes: List of GC inodes having blocks to be written
- * @sc_cleaning_segments: List of segments to be freed through construction
* @sc_copied_buffers: List of copied buffers (buffer heads) to freeze data
+ * @sc_freesegs: array of segment numbers to be freed
+ * @sc_nfreesegs: number of segments on @sc_freesegs
* @sc_dsync_inode: inode whose data pages are written for a sync operation
* @sc_dsync_start: start byte offset of data pages
* @sc_dsync_end: end byte offset of data pages (inclusive)
@@ -131,9 +132,11 @@ struct nilfs_sc_info {
struct list_head sc_dirty_files;
struct list_head sc_gc_inodes;
- struct list_head sc_cleaning_segments;
struct list_head sc_copied_buffers;
+ __u64 *sc_freesegs;
+ size_t sc_nfreesegs;
+
struct nilfs_inode_info *sc_dsync_inode;
loff_t sc_dsync_start;
loff_t sc_dsync_end;
@@ -225,10 +228,6 @@ extern void nilfs_flush_segment(struct super_block *, ino_t);
extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *,
void **);
-extern int nilfs_segctor_add_segments_to_be_freed(struct nilfs_sc_info *,
- __u64 *, size_t);
-extern void nilfs_segctor_clear_segments_to_be_freed(struct nilfs_sc_info *);
-
extern int nilfs_attach_segment_constructor(struct nilfs_sb_info *);
extern void nilfs_detach_segment_constructor(struct nilfs_sb_info *);
@@ -240,5 +239,6 @@ extern int nilfs_search_super_root(struct the_nilfs *, struct nilfs_sb_info *,
extern int nilfs_recover_logical_segments(struct the_nilfs *,
struct nilfs_sb_info *,
struct nilfs_recovery_info *);
+extern void nilfs_dispose_segment_list(struct list_head *);
#endif /* _NILFS_SEGMENT_H */
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index 98e68677f04..37994d4a59c 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -18,6 +18,7 @@
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Written by Koji Sato <koji@osrg.net>.
+ * Rivised by Ryusuke Konishi <ryusuke@osrg.net>.
*/
#include <linux/kernel.h>
@@ -108,6 +109,102 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh,
nilfs_mdt_mark_buffer_dirty(header_bh);
}
+/**
+ * nilfs_sufile_updatev - modify multiple segment usages at a time
+ * @sufile: inode of segment usage file
+ * @segnumv: array of segment numbers
+ * @nsegs: size of @segnumv array
+ * @create: creation flag
+ * @ndone: place to store number of modified segments on @segnumv
+ * @dofunc: primitive operation for the update
+ *
+ * Description: nilfs_sufile_updatev() repeatedly calls @dofunc
+ * against the given array of segments. The @dofunc is called with
+ * buffers of a header block and the sufile block in which the target
+ * segment usage entry is contained. If @ndone is given, the number
+ * of successfully modified segments from the head is stored in the
+ * place @ndone points to.
+ *
+ * Return Value: On success, zero is returned. On error, one of the
+ * following negative error codes is returned.
+ *
+ * %-EIO - I/O error.
+ *
+ * %-ENOMEM - Insufficient amount of memory available.
+ *
+ * %-ENOENT - Given segment usage is in hole block (may be returned if
+ * @create is zero)
+ *
+ * %-EINVAL - Invalid segment usage number
+ */
+int nilfs_sufile_updatev(struct inode *sufile, __u64 *segnumv, size_t nsegs,
+ int create, size_t *ndone,
+ void (*dofunc)(struct inode *, __u64,
+ struct buffer_head *,
+ struct buffer_head *))
+{
+ struct buffer_head *header_bh, *bh;
+ unsigned long blkoff, prev_blkoff;
+ __u64 *seg;
+ size_t nerr = 0, n = 0;
+ int ret = 0;
+
+ if (unlikely(nsegs == 0))
+ goto out;
+
+ down_write(&NILFS_MDT(sufile)->mi_sem);
+ for (seg = segnumv; seg < segnumv + nsegs; seg++) {
+ if (unlikely(*seg >= nilfs_sufile_get_nsegments(sufile))) {
+ printk(KERN_WARNING
+ "%s: invalid segment number: %llu\n", __func__,
+ (unsigned long long)*seg);
+ nerr++;
+ }
+ }
+ if (nerr > 0) {
+ ret = -EINVAL;
+ goto out_sem;
+ }
+
+ ret = nilfs_sufile_get_header_block(sufile, &header_bh);
+ if (ret < 0)
+ goto out_sem;
+
+ seg = segnumv;
+ blkoff = nilfs_sufile_get_blkoff(sufile, *seg);
+ ret = nilfs_mdt_get_block(sufile, blkoff, create, NULL, &bh);
+ if (ret < 0)
+ goto out_header;
+
+ for (;;) {
+ dofunc(sufile, *seg, header_bh, bh);
+
+ if (++seg >= segnumv + nsegs)
+ break;
+ prev_blkoff = blkoff;
+ blkoff = nilfs_sufile_get_blkoff(sufile, *seg);
+ if (blkoff == prev_blkoff)
+ continue;
+
+ /* get different block */
+ brelse(bh);
+ ret = nilfs_mdt_get_block(sufile, blkoff, create, NULL, &bh);
+ if (unlikely(ret < 0))
+ goto out_header;
+ }
+ brelse(bh);
+
+ out_header:
+ n = seg - segnumv;
+ brelse(header_bh);
+ out_sem:
+ up_write(&NILFS_MDT(sufile)->mi_sem);
+ out:
+ if (ndone)
+ *ndone = n;
+ return ret;
+}
+
int nilfs_sufile_update(struct inode *sufile, __u64 segnum, int create,
void (*dofunc)(struct inode *, __u64,
struct buffer_head *,
@@ -490,7 +587,8 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum,
* nilfs_sufile_get_suinfo -
* @sufile: inode of segment usage file
* @segnum: segment number to start looking
- * @si: array of suinfo
+ * @buf: array of suinfo
+ * @sisz: byte size of suinfo
* @nsi: size of suinfo array
*
* Description:
@@ -502,11 +600,12 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum,
*
* %-ENOMEM - Insufficient amount of memory available.
*/
-ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum,
- struct nilfs_suinfo *si, size_t nsi)
+ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf,
+ unsigned sisz, size_t nsi)
{
struct buffer_head *su_bh;
struct nilfs_segment_usage *su;
+ struct nilfs_suinfo *si = buf;
size_t susz = NILFS_MDT(sufile)->mi_entry_size;
struct the_nilfs *nilfs = NILFS_MDT(sufile)->mi_nilfs;
void *kaddr;
@@ -531,20 +630,22 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum,
if (ret != -ENOENT)
goto out;
/* hole */
- memset(&si[i], 0, sizeof(struct nilfs_suinfo) * n);
+ memset(si, 0, sisz * n);
+ si = (void *)si + sisz * n;
continue;
}
kaddr = kmap_atomic(su_bh->b_page, KM_USER0);
su = nilfs_sufile_block_get_segment_usage(
sufile, segnum, su_bh, kaddr);
- for (j = 0; j < n; j++, su = (void *)su + susz) {
- si[i + j].sui_lastmod = le64_to_cpu(su->su_lastmod);
- si[i + j].sui_nblocks = le32_to_cpu(su->su_nblocks);
- si[i + j].sui_flags = le32_to_cpu(su->su_flags) &
+ for (j = 0; j < n;
+ j++, su = (void *)su + susz, si = (void *)si + sisz) {
+ si->sui_lastmod = le64_to_cpu(su->su_lastmod);
+ si->sui_nblocks = le32_to_cpu(su->su_nblocks);
+ si->sui_flags = le32_to_cpu(su->su_flags) &
~(1UL << NILFS_SEGMENT_USAGE_ACTIVE);
if (nilfs_segment_is_active(nilfs, segnum + j))
- si[i + j].sui_flags |=
+ si->sui_flags |=
(1UL << NILFS_SEGMENT_USAGE_ACTIVE);
}
kunmap_atomic(kaddr, KM_USER0);
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h
index a2e2efd4ade..a2c4d76c336 100644
--- a/fs/nilfs2/sufile.h
+++ b/fs/nilfs2/sufile.h
@@ -43,43 +43,27 @@ void nilfs_sufile_put_segment_usage(struct inode *, __u64,
struct buffer_head *);
int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *);
int nilfs_sufile_get_ncleansegs(struct inode *, unsigned long *);
-ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, struct nilfs_suinfo *,
+ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, void *, unsigned,
size_t);
+int nilfs_sufile_updatev(struct inode *, __u64 *, size_t, int, size_t *,
+ void (*dofunc)(struct inode *, __u64,
+ struct buffer_head *,
+ struct buffer_head *));
int nilfs_sufile_update(struct inode *, __u64, int,
void (*dofunc)(struct inode *, __u64,
struct buffer_head *,
struct buffer_head *));
-void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *,
- struct buffer_head *);
void nilfs_sufile_do_scrap(struct inode *, __u64, struct buffer_head *,
struct buffer_head *);
void nilfs_sufile_do_free(struct inode *, __u64, struct buffer_head *,
struct buffer_head *);
+void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *,
+ struct buffer_head *);
void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *,
struct buffer_head *);
/**
- * nilfs_sufile_cancel_free -
- * @sufile: inode of segment usage file
- * @segnum: segment number
- *
- * Description:
- *
- * Return Value: On success, 0 is returned. On error, one of the following
- * negative error codes is returned.
- *
- * %-EIO - I/O error.
- *
- * %-ENOMEM - Insufficient amount of memory available.
- */
-static inline int nilfs_sufile_cancel_free(struct inode *sufile, __u64 segnum)
-{
- return nilfs_sufile_update(sufile, segnum, 0,
- nilfs_sufile_do_cancel_free);
-}
-
-/**
* nilfs_sufile_scrap - make a segment garbage
* @sufile: inode of segment usage file
* @segnum: segment number to be freed
@@ -100,6 +84,38 @@ static inline int nilfs_sufile_free(struct inode *sufile, __u64 segnum)
}
/**
+ * nilfs_sufile_freev - free segments
+ * @sufile: inode of segment usage file
+ * @segnumv: array of segment numbers
+ * @nsegs: size of @segnumv array
+ * @ndone: place to store the number of freed segments
+ */
+static inline int nilfs_sufile_freev(struct inode *sufile, __u64 *segnumv,
+ size_t nsegs, size_t *ndone)
+{
+ return nilfs_sufile_updatev(sufile, segnumv, nsegs, 0, ndone,
+ nilfs_sufile_do_free);
+}
+
+/**
+ * nilfs_sufile_cancel_freev - reallocate freeing segments
+ * @sufile: inode of segment usage file
+ * @segnumv: array of segment numbers
+ * @nsegs: size of @segnumv array
+ * @ndone: place to store the number of cancelled segments
+ *
+ * Return Value: On success, 0 is returned. On error, a negative error codes
+ * is returned.
+ */
+static inline int nilfs_sufile_cancel_freev(struct inode *sufile,
+ __u64 *segnumv, size_t nsegs,
+ size_t *ndone)
+{
+ return nilfs_sufile_updatev(sufile, segnumv, nsegs, 0, ndone,
+ nilfs_sufile_do_cancel_free);
+}
+
+/**
* nilfs_sufile_set_error - mark a segment as erroneous
* @sufile: inode of segment usage file
* @segnum: segment number
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 6989b03e97a..ab785f85aa5 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -65,9 +65,8 @@ MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem "
"(NILFS)");
MODULE_LICENSE("GPL");
+static void nilfs_write_super(struct super_block *sb);
static int nilfs_remount(struct super_block *sb, int *flags, char *data);
-static int test_exclusive_mount(struct file_system_type *fs_type,
- struct block_device *bdev, int flags);
/**
* nilfs_error() - report failure condition on a filesystem
@@ -134,7 +133,7 @@ void nilfs_warning(struct super_block *sb, const char *function,
static struct kmem_cache *nilfs_inode_cachep;
-struct inode *nilfs_alloc_inode(struct super_block *sb)
+struct inode *nilfs_alloc_inode_common(struct the_nilfs *nilfs)
{
struct nilfs_inode_info *ii;
@@ -144,10 +143,15 @@ struct inode *nilfs_alloc_inode(struct super_block *sb)
ii->i_bh = NULL;
ii->i_state = 0;
ii->vfs_inode.i_version = 1;
- nilfs_btnode_cache_init(&ii->i_btnode_cache);
+ nilfs_btnode_cache_init(&ii->i_btnode_cache, nilfs->ns_bdi);
return &ii->vfs_inode;
}
+struct inode *nilfs_alloc_inode(struct super_block *sb)
+{
+ return nilfs_alloc_inode_common(NILFS_SB(sb)->s_nilfs);
+}
+
void nilfs_destroy_inode(struct inode *inode)
{
kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode));
@@ -315,6 +319,11 @@ static void nilfs_put_super(struct super_block *sb)
struct nilfs_sb_info *sbi = NILFS_SB(sb);
struct the_nilfs *nilfs = sbi->s_nilfs;
+ lock_kernel();
+
+ if (sb->s_dirt)
+ nilfs_write_super(sb);
+
nilfs_detach_segment_constructor(sbi);
if (!(sb->s_flags & MS_RDONLY)) {
@@ -323,12 +332,18 @@ static void nilfs_put_super(struct super_block *sb)
nilfs_commit_super(sbi, 1);
up_write(&nilfs->ns_sem);
}
+ down_write(&nilfs->ns_super_sem);
+ if (nilfs->ns_current == sbi)
+ nilfs->ns_current = NULL;
+ up_write(&nilfs->ns_super_sem);
nilfs_detach_checkpoint(sbi);
put_nilfs(sbi->s_nilfs);
sbi->s_super = NULL;
sb->s_fs_info = NULL;
- kfree(sbi);
+ nilfs_put_sbinfo(sbi);
+
+ unlock_kernel();
}
/**
@@ -383,6 +398,8 @@ static int nilfs_sync_fs(struct super_block *sb, int wait)
{
int err = 0;
+ nilfs_write_super(sb);
+
/* This function is called when super block should be written back */
if (wait)
err = nilfs_construct_segment(sb);
@@ -396,9 +413,9 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno)
struct buffer_head *bh_cp;
int err;
- down_write(&nilfs->ns_sem);
+ down_write(&nilfs->ns_super_sem);
list_add(&sbi->s_list, &nilfs->ns_supers);
- up_write(&nilfs->ns_sem);
+ up_write(&nilfs->ns_super_sem);
sbi->s_ifile = nilfs_mdt_new(
nilfs, sbi->s_super, NILFS_IFILE_INO, NILFS_IFILE_GFP);
@@ -436,9 +453,9 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno)
nilfs_mdt_destroy(sbi->s_ifile);
sbi->s_ifile = NULL;
- down_write(&nilfs->ns_sem);
+ down_write(&nilfs->ns_super_sem);
list_del_init(&sbi->s_list);
- up_write(&nilfs->ns_sem);
+ up_write(&nilfs->ns_super_sem);
return err;
}
@@ -450,9 +467,9 @@ void nilfs_detach_checkpoint(struct nilfs_sb_info *sbi)
nilfs_mdt_clear(sbi->s_ifile);
nilfs_mdt_destroy(sbi->s_ifile);
sbi->s_ifile = NULL;
- down_write(&nilfs->ns_sem);
+ down_write(&nilfs->ns_super_sem);
list_del_init(&sbi->s_list);
- up_write(&nilfs->ns_sem);
+ up_write(&nilfs->ns_super_sem);
}
static int nilfs_mark_recovery_complete(struct nilfs_sb_info *sbi)
@@ -752,7 +769,7 @@ int nilfs_store_magic_and_option(struct super_block *sb,
* @silent: silent mode flag
* @nilfs: the_nilfs struct
*
- * This function is called exclusively by bd_mount_mutex.
+ * This function is called exclusively by nilfs->ns_mount_mutex.
* So, the recovery process is protected from other simultaneous mounts.
*/
static int
@@ -773,6 +790,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
get_nilfs(nilfs);
sbi->s_nilfs = nilfs;
sbi->s_super = sb;
+ atomic_set(&sbi->s_count, 1);
err = init_nilfs(nilfs, sbi, (char *)data);
if (err)
@@ -870,6 +888,11 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
goto failed_root;
}
+ down_write(&nilfs->ns_super_sem);
+ if (!nilfs_test_opt(sbi, SNAPSHOT))
+ nilfs->ns_current = sbi;
+ up_write(&nilfs->ns_super_sem);
+
return 0;
failed_root:
@@ -885,7 +908,7 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent,
failed_sbi:
put_nilfs(nilfs);
sb->s_fs_info = NULL;
- kfree(sbi);
+ nilfs_put_sbinfo(sbi);
return err;
}
@@ -898,6 +921,9 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
struct nilfs_mount_options old_opts;
int err;
+ lock_kernel();
+
+ down_write(&nilfs->ns_super_sem);
old_sb_flags = sb->s_flags;
old_opts.mount_opt = sbi->s_mount_opt;
old_opts.snapshot_cno = sbi->s_snapshot_cno;
@@ -945,14 +971,12 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
* store the current valid flag. (It may have been changed
* by fsck since we originally mounted the partition.)
*/
- down(&sb->s_bdev->bd_mount_sem);
- /* Check existing RW-mount */
- if (test_exclusive_mount(sb->s_type, sb->s_bdev, 0)) {
+ if (nilfs->ns_current && nilfs->ns_current != sbi) {
printk(KERN_WARNING "NILFS (device %s): couldn't "
- "remount because a RW-mount exists.\n",
+ "remount because an RW-mount exists.\n",
sb->s_id);
err = -EBUSY;
- goto rw_remount_failed;
+ goto restore_opts;
}
if (sbi->s_snapshot_cno != nilfs_last_cno(nilfs)) {
printk(KERN_WARNING "NILFS (device %s): couldn't "
@@ -960,7 +984,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
"the latest one.\n",
sb->s_id);
err = -EINVAL;
- goto rw_remount_failed;
+ goto restore_opts;
}
sb->s_flags &= ~MS_RDONLY;
nilfs_clear_opt(sbi, SNAPSHOT);
@@ -968,28 +992,31 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
err = nilfs_attach_segment_constructor(sbi);
if (err)
- goto rw_remount_failed;
+ goto restore_opts;
down_write(&nilfs->ns_sem);
nilfs_setup_super(sbi);
up_write(&nilfs->ns_sem);
- up(&sb->s_bdev->bd_mount_sem);
+ nilfs->ns_current = sbi;
}
out:
+ up_write(&nilfs->ns_super_sem);
+ unlock_kernel();
return 0;
- rw_remount_failed:
- up(&sb->s_bdev->bd_mount_sem);
restore_opts:
sb->s_flags = old_sb_flags;
sbi->s_mount_opt = old_opts.mount_opt;
sbi->s_snapshot_cno = old_opts.snapshot_cno;
+ up_write(&nilfs->ns_super_sem);
+ unlock_kernel();
return err;
}
struct nilfs_super_data {
struct block_device *bdev;
+ struct nilfs_sb_info *sbi;
__u64 cno;
int flags;
};
@@ -1048,33 +1075,7 @@ static int nilfs_test_bdev_super(struct super_block *s, void *data)
{
struct nilfs_super_data *sd = data;
- return s->s_bdev == sd->bdev;
-}
-
-static int nilfs_test_bdev_super2(struct super_block *s, void *data)
-{
- struct nilfs_super_data *sd = data;
- int ret;
-
- if (s->s_bdev != sd->bdev)
- return 0;
-
- if (!((s->s_flags | sd->flags) & MS_RDONLY))
- return 1; /* Reuse an old R/W-mode super_block */
-
- if (s->s_flags & sd->flags & MS_RDONLY) {
- if (down_read_trylock(&s->s_umount)) {
- ret = s->s_root &&
- (sd->cno == NILFS_SB(s)->s_snapshot_cno);
- up_read(&s->s_umount);
- /*
- * This path is locked with sb_lock by sget().
- * So, drop_super() causes deadlock.
- */
- return ret;
- }
- }
- return 0;
+ return sd->sbi && s->s_fs_info == (void *)sd->sbi;
}
static int
@@ -1082,8 +1083,8 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
const char *dev_name, void *data, struct vfsmount *mnt)
{
struct nilfs_super_data sd;
- struct super_block *s, *s2;
- struct the_nilfs *nilfs = NULL;
+ struct super_block *s;
+ struct the_nilfs *nilfs;
int err, need_to_close = 1;
sd.bdev = open_bdev_exclusive(dev_name, flags, fs_type);
@@ -1095,7 +1096,6 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
* much more information than normal filesystems to identify mount
* instance. For snapshot mounts, not only a mount type (ro-mount
* or rw-mount) but also a checkpoint number is required.
- * The results are passed in sget() using nilfs_super_data.
*/
sd.cno = 0;
sd.flags = flags;
@@ -1104,64 +1104,59 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
goto failed;
}
- /*
- * once the super is inserted into the list by sget, s_umount
- * will protect the lockfs code from trying to start a snapshot
- * while we are mounting
- */
- down(&sd.bdev->bd_mount_sem);
- if (!sd.cno &&
- (err = test_exclusive_mount(fs_type, sd.bdev, flags ^ MS_RDONLY))) {
- err = (err < 0) ? : -EBUSY;
- goto failed_unlock;
+ nilfs = find_or_create_nilfs(sd.bdev);
+ if (!nilfs) {
+ err = -ENOMEM;
+ goto failed;
}
- /*
- * Phase-1: search any existent instance and get the_nilfs
- */
- s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, &sd);
- if (IS_ERR(s))
- goto error_s;
-
- if (!s->s_root) {
- err = -ENOMEM;
- nilfs = alloc_nilfs(sd.bdev);
- if (!nilfs)
- goto cancel_new;
- } else {
- struct nilfs_sb_info *sbi = NILFS_SB(s);
+ mutex_lock(&nilfs->ns_mount_mutex);
+ if (!sd.cno) {
/*
- * s_umount protects super_block from unmount process;
- * It covers pointers of nilfs_sb_info and the_nilfs.
+ * Check if an exclusive mount exists or not.
+ * Snapshot mounts coexist with a current mount
+ * (i.e. rw-mount or ro-mount), whereas rw-mount and
+ * ro-mount are mutually exclusive.
*/
- nilfs = sbi->s_nilfs;
- get_nilfs(nilfs);
- up_write(&s->s_umount);
+ down_read(&nilfs->ns_super_sem);
+ if (nilfs->ns_current &&
+ ((nilfs->ns_current->s_super->s_flags ^ flags)
+ & MS_RDONLY)) {
+ up_read(&nilfs->ns_super_sem);
+ err = -EBUSY;
+ goto failed_unlock;
+ }
+ up_read(&nilfs->ns_super_sem);
+ }
- /*
- * Phase-2: search specified snapshot or R/W mode super_block
- */
- if (!sd.cno)
- /* trying to get the latest checkpoint. */
- sd.cno = nilfs_last_cno(nilfs);
+ /*
+ * Find existing nilfs_sb_info struct
+ */
+ sd.sbi = nilfs_find_sbinfo(nilfs, !(flags & MS_RDONLY), sd.cno);
- s2 = sget(fs_type, nilfs_test_bdev_super2,
- nilfs_set_bdev_super, &sd);
- deactivate_super(s);
- /*
- * Although deactivate_super() invokes close_bdev_exclusive() at
- * kill_block_super(). Here, s is an existent mount; we need
- * one more close_bdev_exclusive() call.
- */
- s = s2;
- if (IS_ERR(s))
- goto error_s;
+ if (!sd.cno)
+ /* trying to get the latest checkpoint. */
+ sd.cno = nilfs_last_cno(nilfs);
+
+ /*
+ * Get super block instance holding the nilfs_sb_info struct.
+ * A new instance is allocated if no existing mount is present or
+ * existing instance has been unmounted.
+ */
+ s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, &sd);
+ if (sd.sbi)
+ nilfs_put_sbinfo(sd.sbi);
+
+ if (IS_ERR(s)) {
+ err = PTR_ERR(s);
+ goto failed_unlock;
}
if (!s->s_root) {
char b[BDEVNAME_SIZE];
+ /* New superblock instance created */
s->s_flags = flags;
strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id));
sb_set_blocksize(s, block_size(sd.bdev));
@@ -1172,26 +1167,18 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
s->s_flags |= MS_ACTIVE;
need_to_close = 0;
- } else if (!(s->s_flags & MS_RDONLY)) {
- err = -EBUSY;
}
- up(&sd.bdev->bd_mount_sem);
+ mutex_unlock(&nilfs->ns_mount_mutex);
put_nilfs(nilfs);
if (need_to_close)
close_bdev_exclusive(sd.bdev, flags);
simple_set_mnt(mnt, s);
return 0;
- error_s:
- up(&sd.bdev->bd_mount_sem);
- if (nilfs)
- put_nilfs(nilfs);
- close_bdev_exclusive(sd.bdev, flags);
- return PTR_ERR(s);
-
failed_unlock:
- up(&sd.bdev->bd_mount_sem);
+ mutex_unlock(&nilfs->ns_mount_mutex);
+ put_nilfs(nilfs);
failed:
close_bdev_exclusive(sd.bdev, flags);
@@ -1199,70 +1186,18 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
cancel_new:
/* Abandoning the newly allocated superblock */
- up(&sd.bdev->bd_mount_sem);
- if (nilfs)
- put_nilfs(nilfs);
+ mutex_unlock(&nilfs->ns_mount_mutex);
+ put_nilfs(nilfs);
up_write(&s->s_umount);
deactivate_super(s);
/*
* deactivate_super() invokes close_bdev_exclusive().
* We must finish all post-cleaning before this call;
- * put_nilfs() and unlocking bd_mount_sem need the block device.
+ * put_nilfs() needs the block device.
*/
return err;
}
-static int nilfs_test_bdev_super3(struct super_block *s, void *data)
-{
- struct nilfs_super_data *sd = data;
- int ret;
-
- if (s->s_bdev != sd->bdev)
- return 0;
- if (down_read_trylock(&s->s_umount)) {
- ret = (s->s_flags & MS_RDONLY) && s->s_root &&
- nilfs_test_opt(NILFS_SB(s), SNAPSHOT);
- up_read(&s->s_umount);
- if (ret)
- return 0; /* ignore snapshot mounts */
- }
- return !((sd->flags ^ s->s_flags) & MS_RDONLY);
-}
-
-static int __false_bdev_super(struct super_block *s, void *data)
-{
-#if 0 /* XXX: workaround for lock debug. This is not good idea */
- up_write(&s->s_umount);
-#endif
- return -EFAULT;
-}
-
-/**
- * test_exclusive_mount - check whether an exclusive RW/RO mount exists or not.
- * fs_type: filesystem type
- * bdev: block device
- * flag: 0 (check rw-mount) or MS_RDONLY (check ro-mount)
- * res: pointer to an integer to store result
- *
- * This function must be called within a section protected by bd_mount_mutex.
- */
-static int test_exclusive_mount(struct file_system_type *fs_type,
- struct block_device *bdev, int flags)
-{
- struct super_block *s;
- struct nilfs_super_data sd = { .flags = flags, .bdev = bdev };
-
- s = sget(fs_type, nilfs_test_bdev_super3, __false_bdev_super, &sd);
- if (IS_ERR(s)) {
- if (PTR_ERR(s) != -EFAULT)
- return PTR_ERR(s);
- return 0; /* Not found */
- }
- up_write(&s->s_umount);
- deactivate_super(s);
- return 1; /* Found */
-}
-
struct file_system_type nilfs_fs_type = {
.owner = THIS_MODULE,
.name = "nilfs2",
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index a91f15b8673..8b888982571 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -32,9 +32,12 @@
#include "cpfile.h"
#include "sufile.h"
#include "dat.h"
-#include "seglist.h"
#include "segbuf.h"
+
+static LIST_HEAD(nilfs_objects);
+static DEFINE_SPINLOCK(nilfs_lock);
+
void nilfs_set_last_segment(struct the_nilfs *nilfs,
sector_t start_blocknr, u64 seq, __u64 cno)
{
@@ -55,7 +58,7 @@ void nilfs_set_last_segment(struct the_nilfs *nilfs,
* Return Value: On success, pointer to the_nilfs is returned.
* On error, NULL is returned.
*/
-struct the_nilfs *alloc_nilfs(struct block_device *bdev)
+static struct the_nilfs *alloc_nilfs(struct block_device *bdev)
{
struct the_nilfs *nilfs;
@@ -68,7 +71,10 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev)
atomic_set(&nilfs->ns_writer_refcount, -1);
atomic_set(&nilfs->ns_ndirtyblks, 0);
init_rwsem(&nilfs->ns_sem);
+ init_rwsem(&nilfs->ns_super_sem);
+ mutex_init(&nilfs->ns_mount_mutex);
mutex_init(&nilfs->ns_writer_mutex);
+ INIT_LIST_HEAD(&nilfs->ns_list);
INIT_LIST_HEAD(&nilfs->ns_supers);
spin_lock_init(&nilfs->ns_last_segment_lock);
nilfs->ns_gc_inodes_h = NULL;
@@ -78,6 +84,45 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev)
}
/**
+ * find_or_create_nilfs - find or create nilfs object
+ * @bdev: block device to which the_nilfs is related
+ *
+ * find_nilfs() looks up an existent nilfs object created on the
+ * device and gets the reference count of the object. If no nilfs object
+ * is found on the device, a new nilfs object is allocated.
+ *
+ * Return Value: On success, pointer to the nilfs object is returned.
+ * On error, NULL is returned.
+ */
+struct the_nilfs *find_or_create_nilfs(struct block_device *bdev)
+{
+ struct the_nilfs *nilfs, *new = NULL;
+
+ retry:
+ spin_lock(&nilfs_lock);
+ list_for_each_entry(nilfs, &nilfs_objects, ns_list) {
+ if (nilfs->ns_bdev == bdev) {
+ get_nilfs(nilfs);
+ spin_unlock(&nilfs_lock);
+ if (new)
+ put_nilfs(new);
+ return nilfs; /* existing object */
+ }
+ }
+ if (new) {
+ list_add_tail(&new->ns_list, &nilfs_objects);
+ spin_unlock(&nilfs_lock);
+ return new; /* new object */
+ }
+ spin_unlock(&nilfs_lock);
+
+ new = alloc_nilfs(bdev);
+ if (new)
+ goto retry;
+ return NULL; /* insufficient memory */
+}
+
+/**
* put_nilfs - release a reference to the_nilfs
* @nilfs: the_nilfs structure to be released
*
@@ -86,13 +131,20 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev)
*/
void put_nilfs(struct the_nilfs *nilfs)
{
- if (!atomic_dec_and_test(&nilfs->ns_count))
+ spin_lock(&nilfs_lock);
+ if (!atomic_dec_and_test(&nilfs->ns_count)) {
+ spin_unlock(&nilfs_lock);
return;
+ }
+ list_del_init(&nilfs->ns_list);
+ spin_unlock(&nilfs_lock);
+
/*
- * Increment of ns_count never occur below because the caller
+ * Increment of ns_count never occurs below because the caller
* of get_nilfs() holds at least one reference to the_nilfs.
* Thus its exclusion control is not required here.
*/
+
might_sleep();
if (nilfs_loaded(nilfs)) {
nilfs_mdt_clear(nilfs->ns_sufile);
@@ -613,13 +665,63 @@ int nilfs_near_disk_full(struct the_nilfs *nilfs)
return ret;
}
+/**
+ * nilfs_find_sbinfo - find existing nilfs_sb_info structure
+ * @nilfs: nilfs object
+ * @rw_mount: mount type (non-zero value for read/write mount)
+ * @cno: checkpoint number (zero for read-only mount)
+ *
+ * nilfs_find_sbinfo() returns the nilfs_sb_info structure which
+ * @rw_mount and @cno (in case of snapshots) matched. If no instance
+ * was found, NULL is returned. Although the super block instance can
+ * be unmounted after this function returns, the nilfs_sb_info struct
+ * is kept on memory until nilfs_put_sbinfo() is called.
+ */
+struct nilfs_sb_info *nilfs_find_sbinfo(struct the_nilfs *nilfs,
+ int rw_mount, __u64 cno)
+{
+ struct nilfs_sb_info *sbi;
+
+ down_read(&nilfs->ns_super_sem);
+ /*
+ * The SNAPSHOT flag and sb->s_flags are supposed to be
+ * protected with nilfs->ns_super_sem.
+ */
+ sbi = nilfs->ns_current;
+ if (rw_mount) {
+ if (sbi && !(sbi->s_super->s_flags & MS_RDONLY))
+ goto found; /* read/write mount */
+ else
+ goto out;
+ } else if (cno == 0) {
+ if (sbi && (sbi->s_super->s_flags & MS_RDONLY))
+ goto found; /* read-only mount */
+ else
+ goto out;
+ }
+
+ list_for_each_entry(sbi, &nilfs->ns_supers, s_list) {
+ if (nilfs_test_opt(sbi, SNAPSHOT) &&
+ sbi->s_snapshot_cno == cno)
+ goto found; /* snapshot mount */
+ }
+ out:
+ up_read(&nilfs->ns_super_sem);
+ return NULL;
+
+ found:
+ atomic_inc(&sbi->s_count);
+ up_read(&nilfs->ns_super_sem);
+ return sbi;
+}
+
int nilfs_checkpoint_is_mounted(struct the_nilfs *nilfs, __u64 cno,
int snapshot_mount)
{
struct nilfs_sb_info *sbi;
int ret = 0;
- down_read(&nilfs->ns_sem);
+ down_read(&nilfs->ns_super_sem);
if (cno == 0 || cno > nilfs->ns_cno)
goto out_unlock;
@@ -636,6 +738,6 @@ int nilfs_checkpoint_is_mounted(struct the_nilfs *nilfs, __u64 cno,
ret++;
out_unlock:
- up_read(&nilfs->ns_sem);
+ up_read(&nilfs->ns_super_sem);
return ret;
}
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index 30fe58778d0..e8adbffc626 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -43,12 +43,16 @@ enum {
* struct the_nilfs - struct to supervise multiple nilfs mount points
* @ns_flags: flags
* @ns_count: reference count
+ * @ns_list: list head for nilfs_list
* @ns_bdev: block device
* @ns_bdi: backing dev info
* @ns_writer: back pointer to writable nilfs_sb_info
* @ns_sem: semaphore for shared states
+ * @ns_super_sem: semaphore for global operations across super block instances
+ * @ns_mount_mutex: mutex protecting mount process of nilfs
* @ns_writer_mutex: mutex protecting ns_writer attach/detach
* @ns_writer_refcount: number of referrers on ns_writer
+ * @ns_current: back pointer to current mount
* @ns_sbh: buffer heads of on-disk super blocks
* @ns_sbp: pointers to super block data
* @ns_sbwtime: previous write time of super blocks
@@ -88,15 +92,24 @@ enum {
struct the_nilfs {
unsigned long ns_flags;
atomic_t ns_count;
+ struct list_head ns_list;
struct block_device *ns_bdev;
struct backing_dev_info *ns_bdi;
struct nilfs_sb_info *ns_writer;
struct rw_semaphore ns_sem;
+ struct rw_semaphore ns_super_sem;
+ struct mutex ns_mount_mutex;
struct mutex ns_writer_mutex;
atomic_t ns_writer_refcount;
/*
+ * components protected by ns_super_sem
+ */
+ struct nilfs_sb_info *ns_current;
+ struct list_head ns_supers;
+
+ /*
* used for
* - loading the latest checkpoint exclusively.
* - allocating a new full segment.
@@ -108,7 +121,6 @@ struct the_nilfs {
time_t ns_sbwtime[2];
unsigned ns_sbsize;
unsigned ns_mount_state;
- struct list_head ns_supers;
/*
* Following fields are dedicated to a writable FS-instance.
@@ -191,11 +203,12 @@ THE_NILFS_FNS(DISCONTINUED, discontinued)
#define NILFS_ALTSB_FREQ 60 /* spare superblock */
void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64);
-struct the_nilfs *alloc_nilfs(struct block_device *);
+struct the_nilfs *find_or_create_nilfs(struct block_device *);
void put_nilfs(struct the_nilfs *);
int init_nilfs(struct the_nilfs *, struct nilfs_sb_info *, char *);
int load_nilfs(struct the_nilfs *, struct nilfs_sb_info *);
int nilfs_count_free_blocks(struct the_nilfs *, sector_t *);
+struct nilfs_sb_info *nilfs_find_sbinfo(struct the_nilfs *, int, __u64);
int nilfs_checkpoint_is_mounted(struct the_nilfs *, __u64, int);
int nilfs_near_disk_full(struct the_nilfs *);
void nilfs_fall_back_super_block(struct the_nilfs *);
@@ -238,6 +251,12 @@ nilfs_detach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi)
mutex_unlock(&nilfs->ns_writer_mutex);
}
+static inline void nilfs_put_sbinfo(struct nilfs_sb_info *sbi)
+{
+ if (!atomic_dec_and_test(&sbi->s_count))
+ kfree(sbi);
+}
+
static inline void
nilfs_get_segment_range(struct the_nilfs *nilfs, __u64 segnum,
sector_t *seg_start, sector_t *seg_end)
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 6aa7c471353..abaaa1cbf8d 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -443,6 +443,8 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
ntfs_volume *vol = NTFS_SB(sb);
ntfs_debug("Entering with remount options string: %s", opt);
+
+ lock_kernel();
#ifndef NTFS_RW
/* For read-only compiled driver, enforce read-only flag. */
*flags |= MS_RDONLY;
@@ -466,15 +468,18 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
if (NVolErrors(vol)) {
ntfs_error(sb, "Volume has errors and is read-only%s",
es);
+ unlock_kernel();
return -EROFS;
}
if (vol->vol_flags & VOLUME_IS_DIRTY) {
ntfs_error(sb, "Volume is dirty and read-only%s", es);
+ unlock_kernel();
return -EROFS;
}
if (vol->vol_flags & VOLUME_MODIFIED_BY_CHKDSK) {
ntfs_error(sb, "Volume has been modified by chkdsk "
"and is read-only%s", es);
+ unlock_kernel();
return -EROFS;
}
if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) {
@@ -482,11 +487,13 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
"(0x%x) and is read-only%s",
(unsigned)le16_to_cpu(vol->vol_flags),
es);
+ unlock_kernel();
return -EROFS;
}
if (ntfs_set_volume_flags(vol, VOLUME_IS_DIRTY)) {
ntfs_error(sb, "Failed to set dirty bit in volume "
"information flags%s", es);
+ unlock_kernel();
return -EROFS;
}
#if 0
@@ -506,18 +513,21 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
ntfs_error(sb, "Failed to empty journal $LogFile%s",
es);
NVolSetErrors(vol);
+ unlock_kernel();
return -EROFS;
}
if (!ntfs_mark_quotas_out_of_date(vol)) {
ntfs_error(sb, "Failed to mark quotas out of date%s",
es);
NVolSetErrors(vol);
+ unlock_kernel();
return -EROFS;
}
if (!ntfs_stamp_usnjrnl(vol)) {
ntfs_error(sb, "Failed to stamp transation log "
"($UsnJrnl)%s", es);
NVolSetErrors(vol);
+ unlock_kernel();
return -EROFS;
}
} else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) {
@@ -533,8 +543,11 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt)
// TODO: Deal with *flags.
- if (!parse_options(vol, opt))
+ if (!parse_options(vol, opt)) {
+ unlock_kernel();
return -EINVAL;
+ }
+ unlock_kernel();
ntfs_debug("Done.");
return 0;
}
@@ -2246,6 +2259,9 @@ static void ntfs_put_super(struct super_block *sb)
ntfs_volume *vol = NTFS_SB(sb);
ntfs_debug("Entering.");
+
+ lock_kernel();
+
#ifdef NTFS_RW
/*
* Commit all inodes while they are still open in case some of them
@@ -2373,39 +2389,12 @@ static void ntfs_put_super(struct super_block *sb)
vol->mftmirr_ino = NULL;
}
/*
- * If any dirty inodes are left, throw away all mft data page cache
- * pages to allow a clean umount. This should never happen any more
- * due to mft.c::ntfs_mft_writepage() cleaning all the dirty pages as
- * the underlying mft records are written out and cleaned. If it does,
- * happen anyway, we want to know...
+ * We should have no dirty inodes left, due to
+ * mft.c::ntfs_mft_writepage() cleaning all the dirty pages as
+ * the underlying mft records are written out and cleaned.
*/
ntfs_commit_inode(vol->mft_ino);
write_inode_now(vol->mft_ino, 1);
- if (sb_has_dirty_inodes(sb)) {
- const char *s1, *s2;
-
- mutex_lock(&vol->mft_ino->i_mutex);
- truncate_inode_pages(vol->mft_ino->i_mapping, 0);
- mutex_unlock(&vol->mft_ino->i_mutex);
- write_inode_now(vol->mft_ino, 1);
- if (sb_has_dirty_inodes(sb)) {
- static const char *_s1 = "inodes";
- static const char *_s2 = "";
- s1 = _s1;
- s2 = _s2;
- } else {
- static const char *_s1 = "mft pages";
- static const char *_s2 = "They have been thrown "
- "away. ";
- s1 = _s1;
- s2 = _s2;
- }
- ntfs_error(sb, "Dirty %s found at umount time. %sYou should "
- "run chkdsk. Please email "
- "linux-ntfs-dev@lists.sourceforge.net and say "
- "that you saw this message. Thank you.", s1,
- s2);
- }
#endif /* NTFS_RW */
iput(vol->mft_ino);
@@ -2444,7 +2433,8 @@ static void ntfs_put_super(struct super_block *sb)
}
sb->s_fs_info = NULL;
kfree(vol);
- return;
+
+ unlock_kernel();
}
/**
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 678a067d925..9edcde4974a 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -475,6 +475,12 @@ struct ocfs2_path {
#define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el)
#define path_num_items(_path) ((_path)->p_tree_depth + 1)
+static int ocfs2_find_path(struct inode *inode, struct ocfs2_path *path,
+ u32 cpos);
+static void ocfs2_adjust_rightmost_records(struct inode *inode,
+ handle_t *handle,
+ struct ocfs2_path *path,
+ struct ocfs2_extent_rec *insert_rec);
/*
* Reset the actual path elements so that we can re-use the structure
* to build another path. Generally, this involves freeing the buffer
@@ -1013,6 +1019,54 @@ static inline u32 ocfs2_sum_rightmost_rec(struct ocfs2_extent_list *el)
}
/*
+ * Change range of the branches in the right most path according to the leaf
+ * extent block's rightmost record.
+ */
+static int ocfs2_adjust_rightmost_branch(handle_t *handle,
+ struct inode *inode,
+ struct ocfs2_extent_tree *et)
+{
+ int status;
+ struct ocfs2_path *path = NULL;
+ struct ocfs2_extent_list *el;
+ struct ocfs2_extent_rec *rec;
+
+ path = ocfs2_new_path_from_et(et);
+ if (!path) {
+ status = -ENOMEM;
+ return status;
+ }
+
+ status = ocfs2_find_path(inode, path, UINT_MAX);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+
+ status = ocfs2_extend_trans(handle, path_num_items(path) +
+ handle->h_buffer_credits);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+
+ status = ocfs2_journal_access_path(inode, handle, path);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out;
+ }
+
+ el = path_leaf_el(path);
+ rec = &el->l_recs[le32_to_cpu(el->l_next_free_rec) - 1];
+
+ ocfs2_adjust_rightmost_records(inode, handle, path, rec);
+
+out:
+ ocfs2_free_path(path);
+ return status;
+}
+
+/*
* Add an entire tree branch to our inode. eb_bh is the extent block
* to start at, if we don't want to start the branch at the dinode
* structure.
@@ -1038,7 +1092,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
struct ocfs2_extent_block *eb;
struct ocfs2_extent_list *eb_el;
struct ocfs2_extent_list *el;
- u32 new_cpos;
+ u32 new_cpos, root_end;
mlog_entry_void();
@@ -1055,6 +1109,27 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
new_blocks = le16_to_cpu(el->l_tree_depth);
+ eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data;
+ new_cpos = ocfs2_sum_rightmost_rec(&eb->h_list);
+ root_end = ocfs2_sum_rightmost_rec(et->et_root_el);
+
+ /*
+ * If there is a gap before the root end and the real end
+ * of the righmost leaf block, we need to remove the gap
+ * between new_cpos and root_end first so that the tree
+ * is consistent after we add a new branch(it will start
+ * from new_cpos).
+ */
+ if (root_end > new_cpos) {
+ mlog(0, "adjust the cluster end from %u to %u\n",
+ root_end, new_cpos);
+ status = ocfs2_adjust_rightmost_branch(handle, inode, et);
+ if (status) {
+ mlog_errno(status);
+ goto bail;
+ }
+ }
+
/* allocate the number of new eb blocks we need */
new_eb_bhs = kcalloc(new_blocks, sizeof(struct buffer_head *),
GFP_KERNEL);
@@ -1071,9 +1146,6 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
goto bail;
}
- eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data;
- new_cpos = ocfs2_sum_rightmost_rec(&eb->h_list);
-
/* Note: new_eb_bhs[new_blocks - 1] is the guy which will be
* linked with the rest of the tree.
* conversly, new_eb_bhs[0] is the new bottommost leaf.
diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c
index 2a947c44e59..a1163b8b417 100644
--- a/fs/ocfs2/blockcheck.c
+++ b/fs/ocfs2/blockcheck.c
@@ -22,6 +22,9 @@
#include <linux/crc32.h>
#include <linux/buffer_head.h>
#include <linux/bitops.h>
+#include <linux/debugfs.h>
+#include <linux/module.h>
+#include <linux/fs.h>
#include <asm/byteorder.h>
#include <cluster/masklog.h>
@@ -222,6 +225,155 @@ void ocfs2_hamming_fix_block(void *data, unsigned int blocksize,
ocfs2_hamming_fix(data, blocksize * 8, 0, fix);
}
+
+/*
+ * Debugfs handling.
+ */
+
+#ifdef CONFIG_DEBUG_FS
+
+static int blockcheck_u64_get(void *data, u64 *val)
+{
+ *val = *(u64 *)data;
+ return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(blockcheck_fops, blockcheck_u64_get, NULL, "%llu\n");
+
+static struct dentry *blockcheck_debugfs_create(const char *name,
+ struct dentry *parent,
+ u64 *value)
+{
+ return debugfs_create_file(name, S_IFREG | S_IRUSR, parent, value,
+ &blockcheck_fops);
+}
+
+static void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats)
+{
+ if (stats) {
+ debugfs_remove(stats->b_debug_check);
+ stats->b_debug_check = NULL;
+ debugfs_remove(stats->b_debug_failure);
+ stats->b_debug_failure = NULL;
+ debugfs_remove(stats->b_debug_recover);
+ stats->b_debug_recover = NULL;
+ debugfs_remove(stats->b_debug_dir);
+ stats->b_debug_dir = NULL;
+ }
+}
+
+static int ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats,
+ struct dentry *parent)
+{
+ int rc = -EINVAL;
+
+ if (!stats)
+ goto out;
+
+ stats->b_debug_dir = debugfs_create_dir("blockcheck", parent);
+ if (!stats->b_debug_dir)
+ goto out;
+
+ stats->b_debug_check =
+ blockcheck_debugfs_create("blocks_checked",
+ stats->b_debug_dir,
+ &stats->b_check_count);
+
+ stats->b_debug_failure =
+ blockcheck_debugfs_create("checksums_failed",
+ stats->b_debug_dir,
+ &stats->b_failure_count);
+
+ stats->b_debug_recover =
+ blockcheck_debugfs_create("ecc_recoveries",
+ stats->b_debug_dir,
+ &stats->b_recover_count);
+ if (stats->b_debug_check && stats->b_debug_failure &&
+ stats->b_debug_recover)
+ rc = 0;
+
+out:
+ if (rc)
+ ocfs2_blockcheck_debug_remove(stats);
+ return rc;
+}
+#else
+static inline int ocfs2_blockcheck_debug_install(struct ocfs2_blockcheck_stats *stats,
+ struct dentry *parent)
+{
+ return 0;
+}
+
+static inline void ocfs2_blockcheck_debug_remove(struct ocfs2_blockcheck_stats *stats)
+{
+}
+#endif /* CONFIG_DEBUG_FS */
+
+/* Always-called wrappers for starting and stopping the debugfs files */
+int ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats,
+ struct dentry *parent)
+{
+ return ocfs2_blockcheck_debug_install(stats, parent);
+}
+
+void ocfs2_blockcheck_stats_debugfs_remove(struct ocfs2_blockcheck_stats *stats)
+{
+ ocfs2_blockcheck_debug_remove(stats);
+}
+
+static void ocfs2_blockcheck_inc_check(struct ocfs2_blockcheck_stats *stats)
+{
+ u64 new_count;
+
+ if (!stats)
+ return;
+
+ spin_lock(&stats->b_lock);
+ stats->b_check_count++;
+ new_count = stats->b_check_count;
+ spin_unlock(&stats->b_lock);
+
+ if (!new_count)
+ mlog(ML_NOTICE, "Block check count has wrapped\n");
+}
+
+static void ocfs2_blockcheck_inc_failure(struct ocfs2_blockcheck_stats *stats)
+{
+ u64 new_count;
+
+ if (!stats)
+ return;
+
+ spin_lock(&stats->b_lock);
+ stats->b_failure_count++;
+ new_count = stats->b_failure_count;
+ spin_unlock(&stats->b_lock);
+
+ if (!new_count)
+ mlog(ML_NOTICE, "Checksum failure count has wrapped\n");
+}
+
+static void ocfs2_blockcheck_inc_recover(struct ocfs2_blockcheck_stats *stats)
+{
+ u64 new_count;
+
+ if (!stats)
+ return;
+
+ spin_lock(&stats->b_lock);
+ stats->b_recover_count++;
+ new_count = stats->b_recover_count;
+ spin_unlock(&stats->b_lock);
+
+ if (!new_count)
+ mlog(ML_NOTICE, "ECC recovery count has wrapped\n");
+}
+
+
+
+/*
+ * These are the low-level APIs for using the ocfs2_block_check structure.
+ */
+
/*
* This function generates check information for a block.
* data is the block to be checked. bc is a pointer to the
@@ -266,12 +418,15 @@ void ocfs2_block_check_compute(void *data, size_t blocksize,
* Again, the data passed in should be the on-disk endian.
*/
int ocfs2_block_check_validate(void *data, size_t blocksize,
- struct ocfs2_block_check *bc)
+ struct ocfs2_block_check *bc,
+ struct ocfs2_blockcheck_stats *stats)
{
int rc = 0;
struct ocfs2_block_check check;
u32 crc, ecc;
+ ocfs2_blockcheck_inc_check(stats);
+
check.bc_crc32e = le32_to_cpu(bc->bc_crc32e);
check.bc_ecc = le16_to_cpu(bc->bc_ecc);
@@ -282,6 +437,7 @@ int ocfs2_block_check_validate(void *data, size_t blocksize,
if (crc == check.bc_crc32e)
goto out;
+ ocfs2_blockcheck_inc_failure(stats);
mlog(ML_ERROR,
"CRC32 failed: stored: %u, computed %u. Applying ECC.\n",
(unsigned int)check.bc_crc32e, (unsigned int)crc);
@@ -292,8 +448,10 @@ int ocfs2_block_check_validate(void *data, size_t blocksize,
/* And check the crc32 again */
crc = crc32_le(~0, data, blocksize);
- if (crc == check.bc_crc32e)
+ if (crc == check.bc_crc32e) {
+ ocfs2_blockcheck_inc_recover(stats);
goto out;
+ }
mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n",
(unsigned int)check.bc_crc32e, (unsigned int)crc);
@@ -366,7 +524,8 @@ void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr,
* Again, the data passed in should be the on-disk endian.
*/
int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,
- struct ocfs2_block_check *bc)
+ struct ocfs2_block_check *bc,
+ struct ocfs2_blockcheck_stats *stats)
{
int i, rc = 0;
struct ocfs2_block_check check;
@@ -377,6 +536,8 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,
if (!nr)
return 0;
+ ocfs2_blockcheck_inc_check(stats);
+
check.bc_crc32e = le32_to_cpu(bc->bc_crc32e);
check.bc_ecc = le16_to_cpu(bc->bc_ecc);
@@ -388,6 +549,7 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,
if (crc == check.bc_crc32e)
goto out;
+ ocfs2_blockcheck_inc_failure(stats);
mlog(ML_ERROR,
"CRC32 failed: stored: %u, computed %u. Applying ECC.\n",
(unsigned int)check.bc_crc32e, (unsigned int)crc);
@@ -416,8 +578,10 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,
/* And check the crc32 again */
for (i = 0, crc = ~0; i < nr; i++)
crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size);
- if (crc == check.bc_crc32e)
+ if (crc == check.bc_crc32e) {
+ ocfs2_blockcheck_inc_recover(stats);
goto out;
+ }
mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n",
(unsigned int)check.bc_crc32e, (unsigned int)crc);
@@ -448,9 +612,11 @@ int ocfs2_validate_meta_ecc(struct super_block *sb, void *data,
struct ocfs2_block_check *bc)
{
int rc = 0;
+ struct ocfs2_super *osb = OCFS2_SB(sb);
- if (ocfs2_meta_ecc(OCFS2_SB(sb)))
- rc = ocfs2_block_check_validate(data, sb->s_blocksize, bc);
+ if (ocfs2_meta_ecc(osb))
+ rc = ocfs2_block_check_validate(data, sb->s_blocksize, bc,
+ &osb->osb_ecc_stats);
return rc;
}
@@ -468,9 +634,11 @@ int ocfs2_validate_meta_ecc_bhs(struct super_block *sb,
struct ocfs2_block_check *bc)
{
int rc = 0;
+ struct ocfs2_super *osb = OCFS2_SB(sb);
- if (ocfs2_meta_ecc(OCFS2_SB(sb)))
- rc = ocfs2_block_check_validate_bhs(bhs, nr, bc);
+ if (ocfs2_meta_ecc(osb))
+ rc = ocfs2_block_check_validate_bhs(bhs, nr, bc,
+ &osb->osb_ecc_stats);
return rc;
}
diff --git a/fs/ocfs2/blockcheck.h b/fs/ocfs2/blockcheck.h
index 70ec3feda32..d4b69febf70 100644
--- a/fs/ocfs2/blockcheck.h
+++ b/fs/ocfs2/blockcheck.h
@@ -21,6 +21,24 @@
#define OCFS2_BLOCKCHECK_H
+/* Count errors and error correction from blockcheck.c */
+struct ocfs2_blockcheck_stats {
+ spinlock_t b_lock;
+ u64 b_check_count; /* Number of blocks we've checked */
+ u64 b_failure_count; /* Number of failed checksums */
+ u64 b_recover_count; /* Number of blocks fixed by ecc */
+
+ /*
+ * debugfs entries, used if this is passed to
+ * ocfs2_blockcheck_stats_debugfs_install()
+ */
+ struct dentry *b_debug_dir; /* Parent of the debugfs files */
+ struct dentry *b_debug_check; /* Exposes b_check_count */
+ struct dentry *b_debug_failure; /* Exposes b_failure_count */
+ struct dentry *b_debug_recover; /* Exposes b_recover_count */
+};
+
+
/* High level block API */
void ocfs2_compute_meta_ecc(struct super_block *sb, void *data,
struct ocfs2_block_check *bc);
@@ -37,11 +55,18 @@ int ocfs2_validate_meta_ecc_bhs(struct super_block *sb,
void ocfs2_block_check_compute(void *data, size_t blocksize,
struct ocfs2_block_check *bc);
int ocfs2_block_check_validate(void *data, size_t blocksize,
- struct ocfs2_block_check *bc);
+ struct ocfs2_block_check *bc,
+ struct ocfs2_blockcheck_stats *stats);
void ocfs2_block_check_compute_bhs(struct buffer_head **bhs, int nr,
struct ocfs2_block_check *bc);
int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr,
- struct ocfs2_block_check *bc);
+ struct ocfs2_block_check *bc,
+ struct ocfs2_blockcheck_stats *stats);
+
+/* Debug Initialization */
+int ocfs2_blockcheck_stats_debugfs_install(struct ocfs2_blockcheck_stats *stats,
+ struct dentry *parent);
+void ocfs2_blockcheck_stats_debugfs_remove(struct ocfs2_blockcheck_stats *stats);
/*
* Hamming code functions
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 7e72a81bc2d..696c32e5071 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -48,34 +48,33 @@
* only emit the appropriage printk() when the caller passes in a constant
* mask, as is almost always the case.
*
- * All this bitmask nonsense is hidden from the /proc interface so that Joel
- * doesn't have an aneurism. Reading the file gives a straight forward
- * indication of which bits are on or off:
- * ENTRY off
- * EXIT off
+ * All this bitmask nonsense is managed from the files under
+ * /sys/fs/o2cb/logmask/. Reading the files gives a straightforward
+ * indication of which bits are allowed (allow) or denied (off/deny).
+ * ENTRY deny
+ * EXIT deny
* TCP off
* MSG off
* SOCKET off
- * ERROR off
- * NOTICE on
+ * ERROR allow
+ * NOTICE allow
*
* Writing changes the state of a given bit and requires a strictly formatted
* single write() call:
*
- * write(fd, "ENTRY on", 8);
+ * write(fd, "allow", 5);
*
- * would turn the entry bit on. "1" is also accepted in the place of "on", and
- * "off" and "0" behave as expected.
+ * Echoing allow/deny/off string into the logmask files can flip the bits
+ * on or off as expected; here is the bash script for example:
*
- * Some trivial shell can flip all the bits on or off:
+ * log_mask="/sys/fs/o2cb/log_mask"
+ * for node in ENTRY EXIT TCP MSG SOCKET ERROR NOTICE; do
+ * echo allow >"$log_mask"/"$node"
+ * done
*
- * log_mask="/proc/fs/ocfs2_nodemanager/log_mask"
- * cat $log_mask | (
- * while read bit status; do
- * # $1 is "on" or "off", say
- * echo "$bit $1" > $log_mask
- * done
- * )
+ * The debugfs.ocfs2 tool can also flip the bits with the -l option:
+ *
+ * debugfs.ocfs2 -l TCP allow
*/
/* for task_struct */
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 9fbe849f634..334f231a422 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -974,7 +974,7 @@ static int o2net_tx_can_proceed(struct o2net_node *nn,
int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
size_t caller_veclen, u8 target_node, int *status)
{
- int ret, error = 0;
+ int ret;
struct o2net_msg *msg = NULL;
size_t veclen, caller_bytes = 0;
struct kvec *vec = NULL;
@@ -1015,10 +1015,7 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec,
o2net_set_nst_sock_time(&nst);
- ret = wait_event_interruptible(nn->nn_sc_wq,
- o2net_tx_can_proceed(nn, &sc, &error));
- if (!ret && error)
- ret = error;
+ wait_event(nn->nn_sc_wq, o2net_tx_can_proceed(nn, &sc, &ret));
if (ret)
goto out;
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index c5752305627..b358f3bf896 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2900,6 +2900,8 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
alloc = ocfs2_clusters_for_bytes(sb, bytes);
dx_alloc = 0;
+ down_write(&oi->ip_alloc_sem);
+
if (ocfs2_supports_indexed_dirs(osb)) {
credits += ocfs2_add_dir_index_credits(sb);
@@ -2940,8 +2942,6 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
goto out;
}
- down_write(&oi->ip_alloc_sem);
-
/*
* Prepare for worst case allocation scenario of two separate
* extents in the unindexed tree.
@@ -2953,7 +2953,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
mlog_errno(ret);
- goto out_sem;
+ goto out;
}
if (vfs_dq_alloc_space_nodirty(dir,
@@ -3172,10 +3172,8 @@ out_commit:
ocfs2_commit_trans(osb, handle);
-out_sem:
- up_write(&oi->ip_alloc_sem);
-
out:
+ up_write(&oi->ip_alloc_sem);
if (data_ac)
ocfs2_free_alloc_context(data_ac);
if (meta_ac)
@@ -3322,11 +3320,15 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
brelse(new_bh);
new_bh = NULL;
+ down_write(&OCFS2_I(dir)->ip_alloc_sem);
+ drop_alloc_sem = 1;
dir_i_size = i_size_read(dir);
credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS;
goto do_extend;
}
+ down_write(&OCFS2_I(dir)->ip_alloc_sem);
+ drop_alloc_sem = 1;
dir_i_size = i_size_read(dir);
mlog(0, "extending dir %llu (i_size = %lld)\n",
(unsigned long long)OCFS2_I(dir)->ip_blkno, dir_i_size);
@@ -3370,9 +3372,6 @@ do_extend:
credits++; /* For attaching the new dirent block to the
* dx_root */
- down_write(&OCFS2_I(dir)->ip_alloc_sem);
- drop_alloc_sem = 1;
-
handle = ocfs2_start_trans(osb, credits);
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
@@ -3435,10 +3434,10 @@ bail_bh:
*new_de_bh = new_bh;
get_bh(*new_de_bh);
bail:
- if (drop_alloc_sem)
- up_write(&OCFS2_I(dir)->ip_alloc_sem);
if (handle)
ocfs2_commit_trans(osb, handle);
+ if (drop_alloc_sem)
+ up_write(&OCFS2_I(dir)->ip_alloc_sem);
if (data_ac)
ocfs2_free_alloc_context(data_ac);
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index e15fc7d5082..6cdeaa76f27 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -248,6 +248,10 @@ static struct ocfs2_lock_res_ops ocfs2_nfs_sync_lops = {
.flags = 0,
};
+static struct ocfs2_lock_res_ops ocfs2_orphan_scan_lops = {
+ .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB,
+};
+
static struct ocfs2_lock_res_ops ocfs2_dentry_lops = {
.get_osb = ocfs2_get_dentry_osb,
.post_unlock = ocfs2_dentry_post_unlock,
@@ -637,6 +641,19 @@ static void ocfs2_nfs_sync_lock_res_init(struct ocfs2_lock_res *res,
&ocfs2_nfs_sync_lops, osb);
}
+static void ocfs2_orphan_scan_lock_res_init(struct ocfs2_lock_res *res,
+ struct ocfs2_super *osb)
+{
+ struct ocfs2_orphan_scan_lvb *lvb;
+
+ ocfs2_lock_res_init_once(res);
+ ocfs2_build_lock_name(OCFS2_LOCK_TYPE_ORPHAN_SCAN, 0, 0, res->l_name);
+ ocfs2_lock_res_init_common(osb, res, OCFS2_LOCK_TYPE_ORPHAN_SCAN,
+ &ocfs2_orphan_scan_lops, osb);
+ lvb = ocfs2_dlm_lvb(&res->l_lksb);
+ lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
+}
+
void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
struct ocfs2_file_private *fp)
{
@@ -2352,6 +2369,37 @@ void ocfs2_inode_unlock(struct inode *inode,
mlog_exit_void();
}
+int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex)
+{
+ struct ocfs2_lock_res *lockres;
+ struct ocfs2_orphan_scan_lvb *lvb;
+ int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
+ int status = 0;
+
+ lockres = &osb->osb_orphan_scan.os_lockres;
+ status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
+ if (status < 0)
+ return status;
+
+ lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
+ if (lvb->lvb_version == OCFS2_ORPHAN_LVB_VERSION)
+ *seqno = be32_to_cpu(lvb->lvb_os_seqno);
+ return status;
+}
+
+void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno, int ex)
+{
+ struct ocfs2_lock_res *lockres;
+ struct ocfs2_orphan_scan_lvb *lvb;
+ int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
+
+ lockres = &osb->osb_orphan_scan.os_lockres;
+ lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
+ lvb->lvb_version = OCFS2_ORPHAN_LVB_VERSION;
+ lvb->lvb_os_seqno = cpu_to_be32(seqno);
+ ocfs2_cluster_unlock(osb, lockres, level);
+}
+
int ocfs2_super_lock(struct ocfs2_super *osb,
int ex)
{
@@ -2842,6 +2890,7 @@ local:
ocfs2_super_lock_res_init(&osb->osb_super_lockres, osb);
ocfs2_rename_lock_res_init(&osb->osb_rename_lockres, osb);
ocfs2_nfs_sync_lock_res_init(&osb->osb_nfs_sync_lockres, osb);
+ ocfs2_orphan_scan_lock_res_init(&osb->osb_orphan_scan.os_lockres, osb);
osb->cconn = conn;
@@ -2878,6 +2927,7 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb,
ocfs2_lock_res_free(&osb->osb_super_lockres);
ocfs2_lock_res_free(&osb->osb_rename_lockres);
ocfs2_lock_res_free(&osb->osb_nfs_sync_lockres);
+ ocfs2_lock_res_free(&osb->osb_orphan_scan.os_lockres);
ocfs2_cluster_disconnect(osb->cconn, hangup_pending);
osb->cconn = NULL;
@@ -3061,6 +3111,7 @@ static void ocfs2_drop_osb_locks(struct ocfs2_super *osb)
ocfs2_simple_drop_lockres(osb, &osb->osb_super_lockres);
ocfs2_simple_drop_lockres(osb, &osb->osb_rename_lockres);
ocfs2_simple_drop_lockres(osb, &osb->osb_nfs_sync_lockres);
+ ocfs2_simple_drop_lockres(osb, &osb->osb_orphan_scan.os_lockres);
}
int ocfs2_drop_inode_locks(struct inode *inode)
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index e1fd5721cd7..31b90d7b8f5 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -62,6 +62,14 @@ struct ocfs2_qinfo_lvb {
__be32 lvb_free_entry;
};
+#define OCFS2_ORPHAN_LVB_VERSION 1
+
+struct ocfs2_orphan_scan_lvb {
+ __u8 lvb_version;
+ __u8 lvb_reserved[3];
+ __be32 lvb_os_seqno;
+};
+
/* ocfs2_inode_lock_full() 'arg_flags' flags */
/* don't wait on recovery. */
#define OCFS2_META_LOCK_RECOVERY (0x01)
@@ -113,6 +121,9 @@ int ocfs2_super_lock(struct ocfs2_super *osb,
int ex);
void ocfs2_super_unlock(struct ocfs2_super *osb,
int ex);
+int ocfs2_orphan_scan_lock(struct ocfs2_super *osb, u32 *seqno, int ex);
+void ocfs2_orphan_scan_unlock(struct ocfs2_super *osb, u32 seqno, int ex);
+
int ocfs2_rename_lock(struct ocfs2_super *osb);
void ocfs2_rename_unlock(struct ocfs2_super *osb);
int ocfs2_nfs_sync_lock(struct ocfs2_super *osb, int ex);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index c2a87c885b7..07267e0da90 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -187,6 +187,9 @@ static int ocfs2_sync_file(struct file *file,
if (err)
goto bail;
+ if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
+ goto bail;
+
journal = osb->journal->j_journal;
err = jbd2_journal_force_commit(journal);
@@ -894,9 +897,9 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
struct ocfs2_super *osb = OCFS2_SB(sb);
struct buffer_head *bh = NULL;
handle_t *handle = NULL;
- int locked[MAXQUOTAS] = {0, 0};
- int credits, qtype;
- struct ocfs2_mem_dqinfo *oinfo;
+ int qtype;
+ struct dquot *transfer_from[MAXQUOTAS] = { };
+ struct dquot *transfer_to[MAXQUOTAS] = { };
mlog_entry("(0x%p, '%.*s')\n", dentry,
dentry->d_name.len, dentry->d_name.name);
@@ -969,30 +972,37 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
if ((attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
(attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
- credits = OCFS2_INODE_UPDATE_CREDITS;
+ /*
+ * Gather pointers to quota structures so that allocation /
+ * freeing of quota structures happens here and not inside
+ * vfs_dq_transfer() where we have problems with lock ordering
+ */
if (attr->ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid
&& OCFS2_HAS_RO_COMPAT_FEATURE(sb,
OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
- oinfo = sb_dqinfo(sb, USRQUOTA)->dqi_priv;
- status = ocfs2_lock_global_qf(oinfo, 1);
- if (status < 0)
+ transfer_to[USRQUOTA] = dqget(sb, attr->ia_uid,
+ USRQUOTA);
+ transfer_from[USRQUOTA] = dqget(sb, inode->i_uid,
+ USRQUOTA);
+ if (!transfer_to[USRQUOTA] || !transfer_from[USRQUOTA]) {
+ status = -ESRCH;
goto bail_unlock;
- credits += ocfs2_calc_qinit_credits(sb, USRQUOTA) +
- ocfs2_calc_qdel_credits(sb, USRQUOTA);
- locked[USRQUOTA] = 1;
+ }
}
if (attr->ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid
&& OCFS2_HAS_RO_COMPAT_FEATURE(sb,
OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
- oinfo = sb_dqinfo(sb, GRPQUOTA)->dqi_priv;
- status = ocfs2_lock_global_qf(oinfo, 1);
- if (status < 0)
+ transfer_to[GRPQUOTA] = dqget(sb, attr->ia_gid,
+ GRPQUOTA);
+ transfer_from[GRPQUOTA] = dqget(sb, inode->i_gid,
+ GRPQUOTA);
+ if (!transfer_to[GRPQUOTA] || !transfer_from[GRPQUOTA]) {
+ status = -ESRCH;
goto bail_unlock;
- credits += ocfs2_calc_qinit_credits(sb, GRPQUOTA) +
- ocfs2_calc_qdel_credits(sb, GRPQUOTA);
- locked[GRPQUOTA] = 1;
+ }
}
- handle = ocfs2_start_trans(osb, credits);
+ handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS +
+ 2 * ocfs2_quota_trans_credits(sb));
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
mlog_errno(status);
@@ -1030,12 +1040,6 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
bail_commit:
ocfs2_commit_trans(osb, handle);
bail_unlock:
- for (qtype = 0; qtype < MAXQUOTAS; qtype++) {
- if (!locked[qtype])
- continue;
- oinfo = sb_dqinfo(sb, qtype)->dqi_priv;
- ocfs2_unlock_global_qf(oinfo, 1);
- }
ocfs2_inode_unlock(inode, 1);
bail_unlock_rw:
if (size_change)
@@ -1043,6 +1047,12 @@ bail_unlock_rw:
bail:
brelse(bh);
+ /* Release quota pointers in case we acquired them */
+ for (qtype = 0; qtype < MAXQUOTAS; qtype++) {
+ dqput(transfer_to[qtype]);
+ dqput(transfer_from[qtype]);
+ }
+
if (!status && attr->ia_valid & ATTR_MODE) {
status = ocfs2_acl_chmod(inode);
if (status < 0)
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index a20a0f1e37f..4a3b9e6b31a 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -28,6 +28,8 @@
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/kthread.h>
+#include <linux/time.h>
+#include <linux/random.h>
#define MLOG_MASK_PREFIX ML_JOURNAL
#include <cluster/masklog.h>
@@ -52,6 +54,8 @@
DEFINE_SPINLOCK(trans_inc_lock);
+#define ORPHAN_SCAN_SCHEDULE_TIMEOUT 300000
+
static int ocfs2_force_read_journal(struct inode *inode);
static int ocfs2_recover_node(struct ocfs2_super *osb,
int node_num, int slot_num);
@@ -1841,6 +1845,113 @@ bail:
return status;
}
+/*
+ * Scan timer should get fired every ORPHAN_SCAN_SCHEDULE_TIMEOUT. Add some
+ * randomness to the timeout to minimize multple nodes firing the timer at the
+ * same time.
+ */
+static inline unsigned long ocfs2_orphan_scan_timeout(void)
+{
+ unsigned long time;
+
+ get_random_bytes(&time, sizeof(time));
+ time = ORPHAN_SCAN_SCHEDULE_TIMEOUT + (time % 5000);
+ return msecs_to_jiffies(time);
+}
+
+/*
+ * ocfs2_queue_orphan_scan calls ocfs2_queue_recovery_completion for
+ * every slot, queuing a recovery of the slot on the ocfs2_wq thread. This
+ * is done to catch any orphans that are left over in orphan directories.
+ *
+ * ocfs2_queue_orphan_scan gets called every ORPHAN_SCAN_SCHEDULE_TIMEOUT
+ * seconds. It gets an EX lock on os_lockres and checks sequence number
+ * stored in LVB. If the sequence number has changed, it means some other
+ * node has done the scan. This node skips the scan and tracks the
+ * sequence number. If the sequence number didn't change, it means a scan
+ * hasn't happened. The node queues a scan and increments the
+ * sequence number in the LVB.
+ */
+void ocfs2_queue_orphan_scan(struct ocfs2_super *osb)
+{
+ struct ocfs2_orphan_scan *os;
+ int status, i;
+ u32 seqno = 0;
+
+ os = &osb->osb_orphan_scan;
+
+ status = ocfs2_orphan_scan_lock(osb, &seqno, DLM_LOCK_EX);
+ if (status < 0) {
+ if (status != -EAGAIN)
+ mlog_errno(status);
+ goto out;
+ }
+
+ if (os->os_seqno != seqno) {
+ os->os_seqno = seqno;
+ goto unlock;
+ }
+
+ for (i = 0; i < osb->max_slots; i++)
+ ocfs2_queue_recovery_completion(osb->journal, i, NULL, NULL,
+ NULL);
+ /*
+ * We queued a recovery on orphan slots, increment the sequence
+ * number and update LVB so other node will skip the scan for a while
+ */
+ seqno++;
+ os->os_count++;
+ os->os_scantime = CURRENT_TIME;
+unlock:
+ ocfs2_orphan_scan_unlock(osb, seqno, DLM_LOCK_EX);
+out:
+ return;
+}
+
+/* Worker task that gets fired every ORPHAN_SCAN_SCHEDULE_TIMEOUT millsec */
+void ocfs2_orphan_scan_work(struct work_struct *work)
+{
+ struct ocfs2_orphan_scan *os;
+ struct ocfs2_super *osb;
+
+ os = container_of(work, struct ocfs2_orphan_scan,
+ os_orphan_scan_work.work);
+ osb = os->os_osb;
+
+ mutex_lock(&os->os_lock);
+ ocfs2_queue_orphan_scan(osb);
+ schedule_delayed_work(&os->os_orphan_scan_work,
+ ocfs2_orphan_scan_timeout());
+ mutex_unlock(&os->os_lock);
+}
+
+void ocfs2_orphan_scan_stop(struct ocfs2_super *osb)
+{
+ struct ocfs2_orphan_scan *os;
+
+ os = &osb->osb_orphan_scan;
+ mutex_lock(&os->os_lock);
+ cancel_delayed_work(&os->os_orphan_scan_work);
+ mutex_unlock(&os->os_lock);
+}
+
+int ocfs2_orphan_scan_init(struct ocfs2_super *osb)
+{
+ struct ocfs2_orphan_scan *os;
+
+ os = &osb->osb_orphan_scan;
+ os->os_osb = osb;
+ os->os_count = 0;
+ os->os_scantime = CURRENT_TIME;
+ mutex_init(&os->os_lock);
+
+ INIT_DELAYED_WORK(&os->os_orphan_scan_work,
+ ocfs2_orphan_scan_work);
+ schedule_delayed_work(&os->os_orphan_scan_work,
+ ocfs2_orphan_scan_timeout());
+ return 0;
+}
+
struct ocfs2_orphan_filldir_priv {
struct inode *head;
struct ocfs2_super *osb;
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index eb7b76331eb..61045eeb3f6 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -144,6 +144,10 @@ static inline void ocfs2_inode_set_new(struct ocfs2_super *osb,
}
/* Exported only for the journal struct init code in super.c. Do not call. */
+int ocfs2_orphan_scan_init(struct ocfs2_super *osb);
+void ocfs2_orphan_scan_stop(struct ocfs2_super *osb);
+void ocfs2_orphan_scan_exit(struct ocfs2_super *osb);
+
void ocfs2_complete_recovery(struct work_struct *work);
void ocfs2_wait_for_recovery(struct ocfs2_super *osb);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 1386281950d..18c1d9ec1c9 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -47,6 +47,9 @@
#include "ocfs2_fs.h"
#include "ocfs2_lockid.h"
+/* For struct ocfs2_blockcheck_stats */
+#include "blockcheck.h"
+
/* Most user visible OCFS2 inodes will have very few pieces of
* metadata, but larger files (including bitmaps, etc) must be taken
* into account when designing an access scheme. We allow a small
@@ -151,6 +154,16 @@ struct ocfs2_lock_res {
#endif
};
+struct ocfs2_orphan_scan {
+ struct mutex os_lock;
+ struct ocfs2_super *os_osb;
+ struct ocfs2_lock_res os_lockres; /* lock to synchronize scans */
+ struct delayed_work os_orphan_scan_work;
+ struct timespec os_scantime; /* time this node ran the scan */
+ u32 os_count; /* tracks node specific scans */
+ u32 os_seqno; /* tracks cluster wide scans */
+};
+
struct ocfs2_dlm_debug {
struct kref d_refcnt;
struct dentry *d_locking_state;
@@ -295,6 +308,7 @@ struct ocfs2_super
struct ocfs2_dinode *local_alloc_copy;
struct ocfs2_quota_recovery *quota_rec;
+ struct ocfs2_blockcheck_stats osb_ecc_stats;
struct ocfs2_alloc_stats alloc_stats;
char dev_str[20]; /* "major,minor" of the device */
@@ -341,6 +355,8 @@ struct ocfs2_super
unsigned int *osb_orphan_wipes;
wait_queue_head_t osb_wipe_event;
+ struct ocfs2_orphan_scan osb_orphan_scan;
+
/* used to protect metaecc calculation check of xattr. */
spinlock_t osb_xattr_lock;
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h
index a53ce87481b..fcdba091af3 100644
--- a/fs/ocfs2/ocfs2_lockid.h
+++ b/fs/ocfs2/ocfs2_lockid.h
@@ -48,6 +48,7 @@ enum ocfs2_lock_type {
OCFS2_LOCK_TYPE_FLOCK,
OCFS2_LOCK_TYPE_QINFO,
OCFS2_LOCK_TYPE_NFS_SYNC,
+ OCFS2_LOCK_TYPE_ORPHAN_SCAN,
OCFS2_NUM_LOCK_TYPES
};
@@ -85,6 +86,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
case OCFS2_LOCK_TYPE_NFS_SYNC:
c = 'Y';
break;
+ case OCFS2_LOCK_TYPE_ORPHAN_SCAN:
+ c = 'P';
+ break;
default:
c = '\0';
}
@@ -104,6 +108,7 @@ static char *ocfs2_lock_type_strings[] = {
[OCFS2_LOCK_TYPE_OPEN] = "Open",
[OCFS2_LOCK_TYPE_FLOCK] = "Flock",
[OCFS2_LOCK_TYPE_QINFO] = "Quota",
+ [OCFS2_LOCK_TYPE_ORPHAN_SCAN] = "OrphanScan",
};
static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type)
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 1ed0f7c8686..edfa60cd155 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -421,6 +421,7 @@ int ocfs2_global_read_dquot(struct dquot *dquot)
OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes;
if (!dquot->dq_off) { /* No real quota entry? */
/* Upgrade to exclusive lock for allocation */
+ ocfs2_qinfo_unlock(info, 0);
err = ocfs2_qinfo_lock(info, 1);
if (err < 0)
goto out_qlock;
@@ -435,7 +436,8 @@ int ocfs2_global_read_dquot(struct dquot *dquot)
out_qlock:
if (ex)
ocfs2_qinfo_unlock(info, 1);
- ocfs2_qinfo_unlock(info, 0);
+ else
+ ocfs2_qinfo_unlock(info, 0);
out:
if (err < 0)
mlog_errno(err);
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 07deec5e972..5a460fa8255 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -444,10 +444,6 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
mlog_entry("ino=%lu type=%u", (unsigned long)lqinode->i_ino, type);
- status = ocfs2_lock_global_qf(oinfo, 1);
- if (status < 0)
- goto out;
-
list_for_each_entry_safe(rchunk, next, &(rec->r_list[type]), rc_list) {
chunk = rchunk->rc_chunk;
hbh = NULL;
@@ -480,12 +476,18 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
type);
goto out_put_bh;
}
+ status = ocfs2_lock_global_qf(oinfo, 1);
+ if (status < 0) {
+ mlog_errno(status);
+ goto out_put_dquot;
+ }
+
handle = ocfs2_start_trans(OCFS2_SB(sb),
OCFS2_QSYNC_CREDITS);
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
mlog_errno(status);
- goto out_put_dquot;
+ goto out_drop_lock;
}
mutex_lock(&sb_dqopt(sb)->dqio_mutex);
spin_lock(&dq_data_lock);
@@ -523,6 +525,8 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
out_commit:
mutex_unlock(&sb_dqopt(sb)->dqio_mutex);
ocfs2_commit_trans(OCFS2_SB(sb), handle);
+out_drop_lock:
+ ocfs2_unlock_global_qf(oinfo, 1);
out_put_dquot:
dqput(dquot);
out_put_bh:
@@ -537,8 +541,6 @@ out_put_bh:
if (status < 0)
break;
}
- ocfs2_unlock_global_qf(oinfo, 1);
-out:
if (status < 0)
free_recovery_list(&(rec->r_list[type]));
mlog_exit(status);
@@ -655,6 +657,9 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)
struct ocfs2_quota_recovery *rec;
int locked = 0;
+ /* We don't need the lock and we have to acquire quota file locks
+ * which will later depend on this lock */
+ mutex_unlock(&sb_dqopt(sb)->dqio_mutex);
info->dqi_maxblimit = 0x7fffffffffffffffLL;
info->dqi_maxilimit = 0x7fffffffffffffffLL;
oinfo = kmalloc(sizeof(struct ocfs2_mem_dqinfo), GFP_NOFS);
@@ -733,6 +738,7 @@ static int ocfs2_local_read_info(struct super_block *sb, int type)
goto out_err;
}
+ mutex_lock(&sb_dqopt(sb)->dqio_mutex);
return 0;
out_err:
if (oinfo) {
@@ -746,6 +752,7 @@ out_err:
kfree(oinfo);
}
brelse(bh);
+ mutex_lock(&sb_dqopt(sb)->dqio_mutex);
return -1;
}
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 5c6163f5503..d33767f17ba 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -42,6 +42,7 @@
#include <linux/mount.h>
#include <linux/seq_file.h>
#include <linux/quotaops.h>
+#include <linux/smp_lock.h>
#define MLOG_MASK_PREFIX ML_SUPER
#include <cluster/masklog.h>
@@ -118,15 +119,16 @@ static void ocfs2_release_system_inodes(struct ocfs2_super *osb);
static int ocfs2_check_volume(struct ocfs2_super *osb);
static int ocfs2_verify_volume(struct ocfs2_dinode *di,
struct buffer_head *bh,
- u32 sectsize);
+ u32 sectsize,
+ struct ocfs2_blockcheck_stats *stats);
static int ocfs2_initialize_super(struct super_block *sb,
struct buffer_head *bh,
- int sector_size);
+ int sector_size,
+ struct ocfs2_blockcheck_stats *stats);
static int ocfs2_get_sector(struct super_block *sb,
struct buffer_head **bh,
int block,
int sect_size);
-static void ocfs2_write_super(struct super_block *sb);
static struct inode *ocfs2_alloc_inode(struct super_block *sb);
static void ocfs2_destroy_inode(struct inode *inode);
static int ocfs2_susp_quotas(struct ocfs2_super *osb, int unsuspend);
@@ -141,7 +143,6 @@ static const struct super_operations ocfs2_sops = {
.clear_inode = ocfs2_clear_inode,
.delete_inode = ocfs2_delete_inode,
.sync_fs = ocfs2_sync_fs,
- .write_super = ocfs2_write_super,
.put_super = ocfs2_put_super,
.remount_fs = ocfs2_remount,
.show_options = ocfs2_show_options,
@@ -208,6 +209,7 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
int i;
struct ocfs2_cluster_connection *cconn = osb->cconn;
struct ocfs2_recovery_map *rm = osb->recovery_map;
+ struct ocfs2_orphan_scan *os;
out += snprintf(buf + out, len - out,
"%10s => Id: %-s Uuid: %-s Gen: 0x%X Label: %-s\n",
@@ -309,6 +311,13 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
i, osb->slot_recovery_generations[i]);
}
+ os = &osb->osb_orphan_scan;
+ out += snprintf(buf + out, len - out, "Orphan Scan=> ");
+ out += snprintf(buf + out, len - out, "Local: %u Global: %u ",
+ os->os_count, os->os_seqno);
+ out += snprintf(buf + out, len - out, " Last Scan: %lu seconds ago\n",
+ (get_seconds() - os->os_scantime.tv_sec));
+
return out;
}
@@ -365,24 +374,12 @@ static struct file_operations ocfs2_osb_debug_fops = {
.llseek = generic_file_llseek,
};
-/*
- * write_super and sync_fs ripped right out of ext3.
- */
-static void ocfs2_write_super(struct super_block *sb)
-{
- if (mutex_trylock(&sb->s_lock) != 0)
- BUG();
- sb->s_dirt = 0;
-}
-
static int ocfs2_sync_fs(struct super_block *sb, int wait)
{
int status;
tid_t target;
struct ocfs2_super *osb = OCFS2_SB(sb);
- sb->s_dirt = 0;
-
if (ocfs2_is_hard_readonly(osb))
return -EROFS;
@@ -595,6 +592,8 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
struct mount_options parsed_options;
struct ocfs2_super *osb = OCFS2_SB(sb);
+ lock_kernel();
+
if (!ocfs2_parse_options(sb, data, &parsed_options, 1)) {
ret = -EINVAL;
goto out;
@@ -698,12 +697,14 @@ unlock_osb:
ocfs2_set_journal_params(osb);
}
out:
+ unlock_kernel();
return ret;
}
static int ocfs2_sb_probe(struct super_block *sb,
struct buffer_head **bh,
- int *sector_size)
+ int *sector_size,
+ struct ocfs2_blockcheck_stats *stats)
{
int status, tmpstat;
struct ocfs1_vol_disk_hdr *hdr;
@@ -769,7 +770,8 @@ static int ocfs2_sb_probe(struct super_block *sb,
goto bail;
}
di = (struct ocfs2_dinode *) (*bh)->b_data;
- status = ocfs2_verify_volume(di, *bh, blksize);
+ memset(stats, 0, sizeof(struct ocfs2_blockcheck_stats));
+ status = ocfs2_verify_volume(di, *bh, blksize, stats);
if (status >= 0)
goto bail;
brelse(*bh);
@@ -975,6 +977,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
struct ocfs2_super *osb = NULL;
struct buffer_head *bh = NULL;
char nodestr[8];
+ struct ocfs2_blockcheck_stats stats;
mlog_entry("%p, %p, %i", sb, data, silent);
@@ -984,13 +987,13 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
}
/* probe for superblock */
- status = ocfs2_sb_probe(sb, &bh, &sector_size);
+ status = ocfs2_sb_probe(sb, &bh, &sector_size, &stats);
if (status < 0) {
mlog(ML_ERROR, "superblock probe failed!\n");
goto read_super_error;
}
- status = ocfs2_initialize_super(sb, bh, sector_size);
+ status = ocfs2_initialize_super(sb, bh, sector_size, &stats);
osb = OCFS2_SB(sb);
if (status < 0) {
mlog_errno(status);
@@ -1100,6 +1103,18 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
goto read_super_error;
}
+ if (ocfs2_meta_ecc(osb)) {
+ status = ocfs2_blockcheck_stats_debugfs_install(
+ &osb->osb_ecc_stats,
+ osb->osb_debug_root);
+ if (status) {
+ mlog(ML_ERROR,
+ "Unable to create blockcheck statistics "
+ "files\n");
+ goto read_super_error;
+ }
+ }
+
status = ocfs2_mount_volume(sb);
if (osb->root_inode)
inode = igrab(osb->root_inode);
@@ -1550,9 +1565,13 @@ static void ocfs2_put_super(struct super_block *sb)
{
mlog_entry("(0x%p)\n", sb);
+ lock_kernel();
+
ocfs2_sync_blockdev(sb);
ocfs2_dismount_volume(sb, 0);
+ unlock_kernel();
+
mlog_exit_void();
}
@@ -1766,13 +1785,8 @@ static int ocfs2_mount_volume(struct super_block *sb)
}
status = ocfs2_truncate_log_init(osb);
- if (status < 0) {
+ if (status < 0)
mlog_errno(status);
- goto leave;
- }
-
- if (ocfs2_mount_local(osb))
- goto leave;
leave:
if (unlock_super)
@@ -1802,6 +1816,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
ocfs2_truncate_log_shutdown(osb);
+ ocfs2_orphan_scan_stop(osb);
+
/* This will disable recovery and flush any recovery work. */
ocfs2_recovery_exit(osb);
@@ -1839,6 +1855,7 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
if (osb->cconn)
ocfs2_dlm_shutdown(osb, hangup_needed);
+ ocfs2_blockcheck_stats_debugfs_remove(&osb->osb_ecc_stats);
debugfs_remove(osb->osb_debug_root);
if (hangup_needed)
@@ -1886,7 +1903,8 @@ static int ocfs2_setup_osb_uuid(struct ocfs2_super *osb, const unsigned char *uu
static int ocfs2_initialize_super(struct super_block *sb,
struct buffer_head *bh,
- int sector_size)
+ int sector_size,
+ struct ocfs2_blockcheck_stats *stats)
{
int status;
int i, cbits, bbits;
@@ -1945,6 +1963,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
atomic_set(&osb->alloc_stats.bg_allocs, 0);
atomic_set(&osb->alloc_stats.bg_extends, 0);
+ /* Copy the blockcheck stats from the superblock probe */
+ osb->osb_ecc_stats = *stats;
+
ocfs2_init_node_maps(osb);
snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u",
@@ -1957,6 +1978,13 @@ static int ocfs2_initialize_super(struct super_block *sb,
goto bail;
}
+ status = ocfs2_orphan_scan_init(osb);
+ if (status) {
+ mlog(ML_ERROR, "Unable to initialize delayed orphan scan\n");
+ mlog_errno(status);
+ goto bail;
+ }
+
init_waitqueue_head(&osb->checkpoint_event);
atomic_set(&osb->needs_checkpoint, 0);
@@ -2175,7 +2203,8 @@ bail:
*/
static int ocfs2_verify_volume(struct ocfs2_dinode *di,
struct buffer_head *bh,
- u32 blksz)
+ u32 blksz,
+ struct ocfs2_blockcheck_stats *stats)
{
int status = -EAGAIN;
@@ -2188,7 +2217,8 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,
OCFS2_FEATURE_INCOMPAT_META_ECC) {
status = ocfs2_block_check_validate(bh->b_data,
bh->b_size,
- &di->i_check);
+ &di->i_check,
+ stats);
if (status)
goto out;
}
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 15631019dc6..ba320e25074 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -3154,7 +3154,7 @@ static int ocfs2_iterate_xattr_buckets(struct inode *inode,
le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
if (func) {
ret = func(inode, bucket, para);
- if (ret)
+ if (ret && ret != -ERANGE)
mlog_errno(ret);
/* Fall through to bucket_relse() */
}
@@ -3261,7 +3261,8 @@ static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
ocfs2_list_xattr_bucket,
&xl);
if (ret) {
- mlog_errno(ret);
+ if (ret != -ERANGE)
+ mlog_errno(ret);
goto out;
}
diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index 834b2331f6b..d17e774eaf4 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -11,21 +11,6 @@
#include <linux/mpage.h>
#include "omfs.h"
-static int omfs_sync_file(struct file *file, struct dentry *dentry,
- int datasync)
-{
- struct inode *inode = dentry->d_inode;
- int err;
-
- err = sync_mapping_buffers(inode->i_mapping);
- if (!(inode->i_state & I_DIRTY))
- return err;
- if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
- return err;
- err |= omfs_sync_inode(inode);
- return err ? -EIO : 0;
-}
-
static u32 omfs_max_extents(struct omfs_sb_info *sbi, int offset)
{
return (sbi->s_sys_blocksize - offset -
@@ -344,7 +329,7 @@ struct file_operations omfs_file_operations = {
.aio_read = generic_file_aio_read,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
- .fsync = omfs_sync_file,
+ .fsync = simple_fsync,
.splice_read = generic_file_splice_read,
};
diff --git a/fs/open.c b/fs/open.c
index bdfbf03615a..7200e23d925 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -612,7 +612,7 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, mode_t, mode)
audit_inode(NULL, dentry);
- err = mnt_want_write(file->f_path.mnt);
+ err = mnt_want_write_file(file);
if (err)
goto out_putf;
mutex_lock(&inode->i_mutex);
@@ -761,7 +761,7 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
if (!file)
goto out;
- error = mnt_want_write(file->f_path.mnt);
+ error = mnt_want_write_file(file);
if (error)
goto out_fput;
dentry = file->f_path.dentry;
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 0af36085eb2..1a9c7878f86 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -556,27 +556,49 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
/* add partitions */
for (p = 1; p < state->limit; p++) {
- sector_t size = state->parts[p].size;
- sector_t from = state->parts[p].from;
+ sector_t size, from;
+try_scan:
+ size = state->parts[p].size;
if (!size)
continue;
+
+ from = state->parts[p].from;
if (from >= get_capacity(disk)) {
printk(KERN_WARNING
"%s: p%d ignored, start %llu is behind the end of the disk\n",
disk->disk_name, p, (unsigned long long) from);
continue;
}
+
if (from + size > get_capacity(disk)) {
- /*
- * we can not ignore partitions of broken tables
- * created by for example camera firmware, but we
- * limit them to the end of the disk to avoid
- * creating invalid block devices
- */
+ struct block_device_operations *bdops = disk->fops;
+ unsigned long long capacity;
+
printk(KERN_WARNING
- "%s: p%d size %llu limited to end of disk\n",
+ "%s: p%d size %llu exceeds device capacity, ",
disk->disk_name, p, (unsigned long long) size);
- size = get_capacity(disk) - from;
+
+ if (bdops->set_capacity &&
+ (disk->flags & GENHD_FL_NATIVE_CAPACITY) == 0) {
+ printk(KERN_CONT "enabling native capacity\n");
+ capacity = bdops->set_capacity(disk, ~0ULL);
+ disk->flags |= GENHD_FL_NATIVE_CAPACITY;
+ if (capacity > get_capacity(disk)) {
+ set_capacity(disk, capacity);
+ check_disk_size_change(disk, bdev);
+ bdev->bd_invalidated = 0;
+ }
+ goto try_scan;
+ } else {
+ /*
+ * we can not ignore partitions of broken tables
+ * created by for example camera firmware, but
+ * we limit them to the end of the disk to avoid
+ * creating invalid block devices
+ */
+ printk(KERN_CONT "limited to end of disk\n");
+ size = get_capacity(disk) - from;
+ }
}
part = add_partition(disk, p, from, size,
state->parts[p].flags);
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index f6db9618a88..753ca37002c 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -92,3 +92,28 @@ struct pde_opener {
struct list_head lh;
};
void pde_users_dec(struct proc_dir_entry *pde);
+
+extern spinlock_t proc_subdir_lock;
+
+struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *);
+int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
+unsigned long task_vsize(struct mm_struct *);
+int task_statm(struct mm_struct *, int *, int *, int *, int *);
+void task_mem(struct seq_file *, struct mm_struct *);
+
+struct proc_dir_entry *de_get(struct proc_dir_entry *de);
+void de_put(struct proc_dir_entry *de);
+
+extern struct vfsmount *proc_mnt;
+int proc_fill_super(struct super_block *);
+struct inode *proc_get_inode(struct super_block *, unsigned int, struct proc_dir_entry *);
+
+/*
+ * These are generic /proc routines that use the internal
+ * "struct proc_dir_entry" tree to traverse the filesystem.
+ *
+ * The /proc root directory has extended versions to take care
+ * of the /proc/<pid> subdirectories.
+ */
+int proc_readdir(struct file *, void *, filldir_t);
+struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *);
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index de2bba5a344..fc6c3025bef 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -11,6 +11,7 @@
#include <linux/string.h>
#include <asm/prom.h>
#include <asm/uaccess.h>
+#include "internal.h"
#ifndef HAVE_ARCH_DEVTREE_FIXUPS
static inline void set_node_proc_entry(struct device_node *np,
diff --git a/fs/qnx4/Makefile b/fs/qnx4/Makefile
index 502d7fe98ba..e4d408cc547 100644
--- a/fs/qnx4/Makefile
+++ b/fs/qnx4/Makefile
@@ -4,4 +4,4 @@
obj-$(CONFIG_QNX4FS_FS) += qnx4.o
-qnx4-objs := inode.o dir.o namei.o file.o bitmap.o truncate.o fsync.o
+qnx4-objs := inode.o dir.o namei.o file.o bitmap.o truncate.o
diff --git a/fs/qnx4/bitmap.c b/fs/qnx4/bitmap.c
index 8425cf6e962..e1cd061a25f 100644
--- a/fs/qnx4/bitmap.c
+++ b/fs/qnx4/bitmap.c
@@ -13,14 +13,9 @@
* 28-06-1998 by Frank Denis : qnx4_free_inode (to be fixed) .
*/
-#include <linux/time.h>
-#include <linux/fs.h>
-#include <linux/qnx4_fs.h>
-#include <linux/stat.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
#include <linux/buffer_head.h>
#include <linux/bitops.h>
+#include "qnx4.h"
#if 0
int qnx4_new_block(struct super_block *sb)
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c
index ea9ffefb48a..003c68f3238 100644
--- a/fs/qnx4/dir.c
+++ b/fs/qnx4/dir.c
@@ -11,14 +11,9 @@
* 20-06-1998 by Frank Denis : Linux 2.1.99+ & dcache support.
*/
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/qnx4_fs.h>
-#include <linux/stat.h>
#include <linux/smp_lock.h>
#include <linux/buffer_head.h>
-
+#include "qnx4.h"
static int qnx4_readdir(struct file *filp, void *dirent, filldir_t filldir)
{
@@ -84,7 +79,7 @@ const struct file_operations qnx4_dir_operations =
{
.read = generic_read_dir,
.readdir = qnx4_readdir,
- .fsync = file_fsync,
+ .fsync = simple_fsync,
};
const struct inode_operations qnx4_dir_inode_operations =
diff --git a/fs/qnx4/file.c b/fs/qnx4/file.c
index 867f42b0203..09b170ac936 100644
--- a/fs/qnx4/file.c
+++ b/fs/qnx4/file.c
@@ -12,8 +12,7 @@
* 27-06-1998 by Frank Denis : file overwriting.
*/
-#include <linux/fs.h>
-#include <linux/qnx4_fs.h>
+#include "qnx4.h"
/*
* We have mostly NULL's here: the current defaults are ok for
@@ -29,7 +28,7 @@ const struct file_operations qnx4_file_operations =
#ifdef CONFIG_QNX4FS_RW
.write = do_sync_write,
.aio_write = generic_file_aio_write,
- .fsync = qnx4_sync_file,
+ .fsync = simple_fsync,
#endif
};
diff --git a/fs/qnx4/fsync.c b/fs/qnx4/fsync.c
deleted file mode 100644
index aa3b19544be..00000000000
--- a/fs/qnx4/fsync.c
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- * QNX4 file system, Linux implementation.
- *
- * Version : 0.1
- *
- * Using parts of the xiafs filesystem.
- *
- * History :
- *
- * 24-03-1998 by Richard Frowijn : first release.
- */
-
-#include <linux/errno.h>
-#include <linux/time.h>
-#include <linux/stat.h>
-#include <linux/fcntl.h>
-#include <linux/smp_lock.h>
-#include <linux/buffer_head.h>
-
-#include <linux/fs.h>
-#include <linux/qnx4_fs.h>
-
-#include <asm/system.h>
-
-/*
- * The functions for qnx4 fs file synchronization.
- */
-
-#ifdef CONFIG_QNX4FS_RW
-
-static int sync_block(struct inode *inode, unsigned short *block, int wait)
-{
- struct buffer_head *bh;
- unsigned short tmp;
-
- if (!*block)
- return 0;
- tmp = *block;
- bh = sb_find_get_block(inode->i_sb, *block);
- if (!bh)
- return 0;
- if (*block != tmp) {
- brelse(bh);
- return 1;
- }
- if (wait && buffer_req(bh) && !buffer_uptodate(bh)) {
- brelse(bh);
- return -1;
- }
- if (wait || !buffer_uptodate(bh) || !buffer_dirty(bh)) {
- brelse(bh);
- return 0;
- }
- ll_rw_block(WRITE, 1, &bh);
- atomic_dec(&bh->b_count);
- return 0;
-}
-
-#ifdef WTF
-static int sync_iblock(struct inode *inode, unsigned short *iblock,
- struct buffer_head **bh, int wait)
-{
- int rc;
- unsigned short tmp;
-
- *bh = NULL;
- tmp = *iblock;
- if (!tmp)
- return 0;
- rc = sync_block(inode, iblock, wait);
- if (rc)
- return rc;
- *bh = sb_bread(inode->i_sb, tmp);
- if (tmp != *iblock) {
- brelse(*bh);
- *bh = NULL;
- return 1;
- }
- if (!*bh)
- return -1;
- return 0;
-}
-#endif
-
-static int sync_direct(struct inode *inode, int wait)
-{
- int i;
- int rc, err = 0;
-
- for (i = 0; i < 7; i++) {
- rc = sync_block(inode,
- (unsigned short *) qnx4_raw_inode(inode)->di_first_xtnt.xtnt_blk + i, wait);
- if (rc > 0)
- break;
- if (rc)
- err = rc;
- }
- return err;
-}
-
-#ifdef WTF
-static int sync_indirect(struct inode *inode, unsigned short *iblock, int wait)
-{
- int i;
- struct buffer_head *ind_bh;
- int rc, err = 0;
-
- rc = sync_iblock(inode, iblock, &ind_bh, wait);
- if (rc || !ind_bh)
- return rc;
-
- for (i = 0; i < 512; i++) {
- rc = sync_block(inode,
- ((unsigned short *) ind_bh->b_data) + i,
- wait);
- if (rc > 0)
- break;
- if (rc)
- err = rc;
- }
- brelse(ind_bh);
- return err;
-}
-
-static int sync_dindirect(struct inode *inode, unsigned short *diblock,
- int wait)
-{
- int i;
- struct buffer_head *dind_bh;
- int rc, err = 0;
-
- rc = sync_iblock(inode, diblock, &dind_bh, wait);
- if (rc || !dind_bh)
- return rc;
-
- for (i = 0; i < 512; i++) {
- rc = sync_indirect(inode,
- ((unsigned short *) dind_bh->b_data) + i,
- wait);
- if (rc > 0)
- break;
- if (rc)
- err = rc;
- }
- brelse(dind_bh);
- return err;
-}
-#endif
-
-int qnx4_sync_file(struct file *file, struct dentry *dentry, int unused)
-{
- struct inode *inode = dentry->d_inode;
- int wait, err = 0;
-
- (void) file;
- if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
- S_ISLNK(inode->i_mode)))
- return -EINVAL;
-
- lock_kernel();
- for (wait = 0; wait <= 1; wait++) {
- err |= sync_direct(inode, wait);
- }
- err |= qnx4_sync_inode(inode);
- unlock_kernel();
- return (err < 0) ? -EIO : 0;
-}
-
-#endif
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index fe1f0f31d11..681df5fcd16 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -13,19 +13,15 @@
*/
#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/fs.h>
-#include <linux/qnx4_fs.h>
#include <linux/init.h>
+#include <linux/slab.h>
#include <linux/highuid.h>
#include <linux/smp_lock.h>
#include <linux/pagemap.h>
#include <linux/buffer_head.h>
-#include <linux/vfs.h>
-#include <asm/uaccess.h>
+#include <linux/writeback.h>
+#include <linux/statfs.h>
+#include "qnx4.h"
#define QNX4_VERSION 4
#define QNX4_BMNAME ".bitmap"
@@ -34,31 +30,6 @@ static const struct super_operations qnx4_sops;
#ifdef CONFIG_QNX4FS_RW
-int qnx4_sync_inode(struct inode *inode)
-{
- int err = 0;
-# if 0
- struct buffer_head *bh;
-
- bh = qnx4_update_inode(inode);
- if (bh && buffer_dirty(bh))
- {
- sync_dirty_buffer(bh);
- if (buffer_req(bh) && !buffer_uptodate(bh))
- {
- printk ("IO error syncing qnx4 inode [%s:%08lx]\n",
- inode->i_sb->s_id, inode->i_ino);
- err = -1;
- }
- brelse (bh);
- } else if (!bh) {
- err = -1;
- }
-# endif
-
- return err;
-}
-
static void qnx4_delete_inode(struct inode *inode)
{
QNX4DEBUG(("qnx4: deleting inode [%lu]\n", (unsigned long) inode->i_ino));
@@ -70,15 +41,7 @@ static void qnx4_delete_inode(struct inode *inode)
unlock_kernel();
}
-static void qnx4_write_super(struct super_block *sb)
-{
- lock_kernel();
- QNX4DEBUG(("qnx4: write_super\n"));
- sb->s_dirt = 0;
- unlock_kernel();
-}
-
-static int qnx4_write_inode(struct inode *inode, int unused)
+static int qnx4_write_inode(struct inode *inode, int do_sync)
{
struct qnx4_inode_entry *raw_inode;
int block, ino;
@@ -115,6 +78,16 @@ static int qnx4_write_inode(struct inode *inode, int unused)
raw_inode->di_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
raw_inode->di_first_xtnt.xtnt_size = cpu_to_le32(inode->i_blocks);
mark_buffer_dirty(bh);
+ if (do_sync) {
+ sync_dirty_buffer(bh);
+ if (buffer_req(bh) && !buffer_uptodate(bh)) {
+ printk("qnx4: IO error syncing inode [%s:%08x]\n",
+ inode->i_sb->s_id, ino);
+ brelse(bh);
+ unlock_kernel();
+ return -EIO;
+ }
+ }
brelse(bh);
unlock_kernel();
return 0;
@@ -138,7 +111,6 @@ static const struct super_operations qnx4_sops =
#ifdef CONFIG_QNX4FS_RW
.write_inode = qnx4_write_inode,
.delete_inode = qnx4_delete_inode,
- .write_super = qnx4_write_super,
#endif
};
diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c
index 775eed3a408..5972ed21493 100644
--- a/fs/qnx4/namei.c
+++ b/fs/qnx4/namei.c
@@ -12,16 +12,9 @@
* 04-07-1998 by Frank Denis : first step for rmdir/unlink.
*/
-#include <linux/time.h>
-#include <linux/fs.h>
-#include <linux/qnx4_fs.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/fcntl.h>
-#include <linux/errno.h>
#include <linux/smp_lock.h>
#include <linux/buffer_head.h>
+#include "qnx4.h"
/*
@@ -187,7 +180,7 @@ int qnx4_rmdir(struct inode *dir, struct dentry *dentry)
de->di_status = 0;
memset(de->di_fname, 0, sizeof de->di_fname);
de->di_mode = 0;
- mark_buffer_dirty(bh);
+ mark_buffer_dirty_inode(bh, dir);
clear_nlink(inode);
mark_inode_dirty(inode);
inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
@@ -232,7 +225,7 @@ int qnx4_unlink(struct inode *dir, struct dentry *dentry)
de->di_status = 0;
memset(de->di_fname, 0, sizeof de->di_fname);
de->di_mode = 0;
- mark_buffer_dirty(bh);
+ mark_buffer_dirty_inode(bh, dir);
dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
mark_inode_dirty(dir);
inode->i_ctime = dir->i_ctime;
diff --git a/fs/qnx4/qnx4.h b/fs/qnx4/qnx4.h
new file mode 100644
index 00000000000..9efc089454f
--- /dev/null
+++ b/fs/qnx4/qnx4.h
@@ -0,0 +1,57 @@
+#include <linux/fs.h>
+#include <linux/qnx4_fs.h>
+
+#define QNX4_DEBUG 0
+
+#if QNX4_DEBUG
+#define QNX4DEBUG(X) printk X
+#else
+#define QNX4DEBUG(X) (void) 0
+#endif
+
+struct qnx4_sb_info {
+ struct buffer_head *sb_buf; /* superblock buffer */
+ struct qnx4_super_block *sb; /* our superblock */
+ unsigned int Version; /* may be useful */
+ struct qnx4_inode_entry *BitMap; /* useful */
+};
+
+struct qnx4_inode_info {
+ struct qnx4_inode_entry raw;
+ loff_t mmu_private;
+ struct inode vfs_inode;
+};
+
+extern struct inode *qnx4_iget(struct super_block *, unsigned long);
+extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd);
+extern unsigned long qnx4_count_free_blocks(struct super_block *sb);
+extern unsigned long qnx4_block_map(struct inode *inode, long iblock);
+
+extern struct buffer_head *qnx4_bread(struct inode *, int, int);
+
+extern const struct inode_operations qnx4_file_inode_operations;
+extern const struct inode_operations qnx4_dir_inode_operations;
+extern const struct file_operations qnx4_file_operations;
+extern const struct file_operations qnx4_dir_operations;
+extern int qnx4_is_free(struct super_block *sb, long block);
+extern int qnx4_set_bitmap(struct super_block *sb, long block, int busy);
+extern int qnx4_create(struct inode *inode, struct dentry *dentry, int mode, struct nameidata *nd);
+extern void qnx4_truncate(struct inode *inode);
+extern void qnx4_free_inode(struct inode *inode);
+extern int qnx4_unlink(struct inode *dir, struct dentry *dentry);
+extern int qnx4_rmdir(struct inode *dir, struct dentry *dentry);
+
+static inline struct qnx4_sb_info *qnx4_sb(struct super_block *sb)
+{
+ return sb->s_fs_info;
+}
+
+static inline struct qnx4_inode_info *qnx4_i(struct inode *inode)
+{
+ return container_of(inode, struct qnx4_inode_info, vfs_inode);
+}
+
+static inline struct qnx4_inode_entry *qnx4_raw_inode(struct inode *inode)
+{
+ return &qnx4_i(inode)->raw;
+}
diff --git a/fs/qnx4/truncate.c b/fs/qnx4/truncate.c
index 6437c1c3d1d..d94d9ee241f 100644
--- a/fs/qnx4/truncate.c
+++ b/fs/qnx4/truncate.c
@@ -10,12 +10,8 @@
* 30-06-1998 by Frank DENIS : ugly filler.
*/
-#include <linux/types.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/qnx4_fs.h>
#include <linux/smp_lock.h>
-#include <asm/uaccess.h>
+#include "qnx4.h"
#ifdef CONFIG_QNX4FS_RW
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index b7f5a468f07..95c5b42384b 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -159,10 +159,14 @@ static int check_quotactl_valid(struct super_block *sb, int type, int cmd,
return error;
}
-static void quota_sync_sb(struct super_block *sb, int type)
+#ifdef CONFIG_QUOTA
+void sync_quota_sb(struct super_block *sb, int type)
{
int cnt;
+ if (!sb->s_qcop->quota_sync)
+ return;
+
sb->s_qcop->quota_sync(sb, type);
if (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE)
@@ -191,17 +195,13 @@ static void quota_sync_sb(struct super_block *sb, int type)
}
mutex_unlock(&sb_dqopt(sb)->dqonoff_mutex);
}
+#endif
-void sync_dquots(struct super_block *sb, int type)
+static void sync_dquots(int type)
{
+ struct super_block *sb;
int cnt;
- if (sb) {
- if (sb->s_qcop->quota_sync)
- quota_sync_sb(sb, type);
- return;
- }
-
spin_lock(&sb_lock);
restart:
list_for_each_entry(sb, &super_blocks, s_list) {
@@ -222,8 +222,8 @@ restart:
sb->s_count++;
spin_unlock(&sb_lock);
down_read(&sb->s_umount);
- if (sb->s_root && sb->s_qcop->quota_sync)
- quota_sync_sb(sb, type);
+ if (sb->s_root)
+ sync_quota_sb(sb, type);
up_read(&sb->s_umount);
spin_lock(&sb_lock);
if (__put_super_and_need_restart(sb))
@@ -301,7 +301,10 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
return sb->s_qcop->set_dqblk(sb, type, id, &idq);
}
case Q_SYNC:
- sync_dquots(sb, type);
+ if (sb)
+ sync_quota_sb(sb, type);
+ else
+ sync_dquots(type);
return 0;
case Q_XQUOTAON:
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 3a6b193d844..0ff7566c767 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -202,9 +202,12 @@ static int ramfs_parse_options(char *data, struct ramfs_mount_opts *opts)
return -EINVAL;
opts->mode = option & S_IALLUGO;
break;
- default:
- printk(KERN_ERR "ramfs: bad mount option: %s\n", p);
- return -EINVAL;
+ /*
+ * We might like to report bad mount options here;
+ * but traditionally ramfs has ignored all mount options,
+ * and as it is used as a !CONFIG_SHMEM simple substitute
+ * for tmpfs, better continue to ignore other mount options.
+ */
}
}
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 45ee3d357c7..6d2668fdc38 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -44,13 +44,11 @@ static int reiserfs_dir_fsync(struct file *filp, struct dentry *dentry,
static inline bool is_privroot_deh(struct dentry *dir,
struct reiserfs_de_head *deh)
{
- int ret = 0;
-#ifdef CONFIG_REISERFS_FS_XATTR
struct dentry *privroot = REISERFS_SB(dir->d_sb)->priv_root;
- ret = (dir == dir->d_parent && privroot->d_inode &&
- deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid);
-#endif
- return ret;
+ if (reiserfs_expose_privroot(dir->d_sb))
+ return 0;
+ return (dir == dir->d_parent && privroot->d_inode &&
+ deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid);
}
int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent,
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 3567fb9e3fb..2969773cfc2 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -28,6 +28,7 @@
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/crc32.h>
+#include <linux/smp_lock.h>
struct file_system_type reiserfs_fs_type;
@@ -64,18 +65,15 @@ static int reiserfs_statfs(struct dentry *dentry, struct kstatfs *buf);
static int reiserfs_sync_fs(struct super_block *s, int wait)
{
- if (!(s->s_flags & MS_RDONLY)) {
- struct reiserfs_transaction_handle th;
- reiserfs_write_lock(s);
- if (!journal_begin(&th, s, 1))
- if (!journal_end_sync(&th, s, 1))
- reiserfs_flush_old_commits(s);
- s->s_dirt = 0; /* Even if it's not true.
- * We'll loop forever in sync_supers otherwise */
- reiserfs_write_unlock(s);
- } else {
- s->s_dirt = 0;
- }
+ struct reiserfs_transaction_handle th;
+
+ reiserfs_write_lock(s);
+ if (!journal_begin(&th, s, 1))
+ if (!journal_end_sync(&th, s, 1))
+ reiserfs_flush_old_commits(s);
+ s->s_dirt = 0; /* Even if it's not true.
+ * We'll loop forever in sync_supers otherwise */
+ reiserfs_write_unlock(s);
return 0;
}
@@ -468,6 +466,11 @@ static void reiserfs_put_super(struct super_block *s)
struct reiserfs_transaction_handle th;
th.t_trans_id = 0;
+ lock_kernel();
+
+ if (s->s_dirt)
+ reiserfs_write_super(s);
+
/* change file system state to current state if it was mounted with read-write permissions */
if (!(s->s_flags & MS_RDONLY)) {
if (!journal_begin(&th, s, 10)) {
@@ -500,7 +503,7 @@ static void reiserfs_put_super(struct super_block *s)
kfree(s->s_fs_info);
s->s_fs_info = NULL;
- return;
+ unlock_kernel();
}
static struct kmem_cache *reiserfs_inode_cachep;
@@ -898,6 +901,7 @@ static int reiserfs_parse_options(struct super_block *s, char *options, /* strin
{"conv",.setmask = 1 << REISERFS_CONVERT},
{"attrs",.setmask = 1 << REISERFS_ATTRS},
{"noattrs",.clrmask = 1 << REISERFS_ATTRS},
+ {"expose_privroot", .setmask = 1 << REISERFS_EXPOSE_PRIVROOT},
#ifdef CONFIG_REISERFS_FS_XATTR
{"user_xattr",.setmask = 1 << REISERFS_XATTRS_USER},
{"nouser_xattr",.clrmask = 1 << REISERFS_XATTRS_USER},
@@ -1193,6 +1197,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
memcpy(qf_names, REISERFS_SB(s)->s_qf_names, sizeof(qf_names));
#endif
+ lock_kernel();
rs = SB_DISK_SUPER_BLOCK(s);
if (!reiserfs_parse_options
@@ -1315,10 +1320,12 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg)
out_ok:
replace_mount_options(s, new_opts);
+ unlock_kernel();
return 0;
out_err:
kfree(new_opts);
+ unlock_kernel();
return err;
}
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 8e7deb0e696..f3d47d85684 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -981,7 +981,8 @@ int reiserfs_lookup_privroot(struct super_block *s)
strlen(PRIVROOT_NAME));
if (!IS_ERR(dentry)) {
REISERFS_SB(s)->priv_root = dentry;
- s->s_root->d_op = &xattr_lookup_poison_ops;
+ if (!reiserfs_expose_privroot(s))
+ s->s_root->d_op = &xattr_lookup_poison_ops;
if (dentry->d_inode)
dentry->d_inode->i_flags |= S_PRIVATE;
} else
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
index fc27fbfc539..1402d2d54f5 100644
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -474,6 +474,8 @@ smb_put_super(struct super_block *sb)
{
struct smb_sb_info *server = SMB_SB(sb);
+ lock_kernel();
+
smb_lock_server(server);
server->state = CONN_INVALID;
smbiod_unregister_server(server);
@@ -489,6 +491,8 @@ smb_put_super(struct super_block *sb)
smb_unlock_server(server);
put_pid(server->conn_pid);
kfree(server);
+
+ unlock_kernel();
}
static int smb_fill_super(struct super_block *sb, void *raw_data, int silent)
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 0adc624c956..3b52770f46f 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -338,6 +338,8 @@ static int squashfs_remount(struct super_block *sb, int *flags, char *data)
static void squashfs_put_super(struct super_block *sb)
{
+ lock_kernel();
+
if (sb->s_fs_info) {
struct squashfs_sb_info *sbi = sb->s_fs_info;
squashfs_cache_delete(sbi->block_cache);
@@ -350,6 +352,8 @@ static void squashfs_put_super(struct super_block *sb)
kfree(sb->s_fs_info);
sb->s_fs_info = NULL;
}
+
+ unlock_kernel();
}
diff --git a/fs/super.c b/fs/super.c
index 1943fdf655f..83b47416d00 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -28,7 +28,6 @@
#include <linux/blkdev.h>
#include <linux/quotaops.h>
#include <linux/namei.h>
-#include <linux/buffer_head.h> /* for fsync_super() */
#include <linux/mount.h>
#include <linux/security.h>
#include <linux/syscalls.h>
@@ -38,7 +37,6 @@
#include <linux/kobject.h>
#include <linux/mutex.h>
#include <linux/file.h>
-#include <linux/async.h>
#include <asm/uaccess.h>
#include "internal.h"
@@ -72,7 +70,6 @@ static struct super_block *alloc_super(struct file_system_type *type)
INIT_HLIST_HEAD(&s->s_anon);
INIT_LIST_HEAD(&s->s_inodes);
INIT_LIST_HEAD(&s->s_dentry_lru);
- INIT_LIST_HEAD(&s->s_async_list);
init_rwsem(&s->s_umount);
mutex_init(&s->s_lock);
lockdep_set_class(&s->s_umount, &type->s_umount_key);
@@ -285,38 +282,6 @@ void unlock_super(struct super_block * sb)
EXPORT_SYMBOL(lock_super);
EXPORT_SYMBOL(unlock_super);
-/*
- * Write out and wait upon all dirty data associated with this
- * superblock. Filesystem data as well as the underlying block
- * device. Takes the superblock lock. Requires a second blkdev
- * flush by the caller to complete the operation.
- */
-void __fsync_super(struct super_block *sb)
-{
- sync_inodes_sb(sb, 0);
- vfs_dq_sync(sb);
- lock_super(sb);
- if (sb->s_dirt && sb->s_op->write_super)
- sb->s_op->write_super(sb);
- unlock_super(sb);
- if (sb->s_op->sync_fs)
- sb->s_op->sync_fs(sb, 1);
- sync_blockdev(sb->s_bdev);
- sync_inodes_sb(sb, 1);
-}
-
-/*
- * Write out and wait upon all dirty data associated with this
- * superblock. Filesystem data as well as the underlying block
- * device. Takes the superblock lock.
- */
-int fsync_super(struct super_block *sb)
-{
- __fsync_super(sb);
- return sync_blockdev(sb->s_bdev);
-}
-EXPORT_SYMBOL_GPL(fsync_super);
-
/**
* generic_shutdown_super - common helper for ->kill_sb()
* @sb: superblock to kill
@@ -338,21 +303,13 @@ void generic_shutdown_super(struct super_block *sb)
if (sb->s_root) {
shrink_dcache_for_umount(sb);
- fsync_super(sb);
- lock_super(sb);
+ sync_filesystem(sb);
+ get_fs_excl();
sb->s_flags &= ~MS_ACTIVE;
- /*
- * wait for asynchronous fs operations to finish before going further
- */
- async_synchronize_full_domain(&sb->s_async_list);
-
/* bad name - it should be evict_inodes() */
invalidate_inodes(sb);
- lock_kernel();
- if (sop->write_super && sb->s_dirt)
- sop->write_super(sb);
if (sop->put_super)
sop->put_super(sb);
@@ -362,9 +319,7 @@ void generic_shutdown_super(struct super_block *sb)
"Self-destruct in 5 seconds. Have a nice day...\n",
sb->s_id);
}
-
- unlock_kernel();
- unlock_super(sb);
+ put_fs_excl();
}
spin_lock(&sb_lock);
/* should be initialized for __put_super_and_need_restart() */
@@ -441,16 +396,14 @@ void drop_super(struct super_block *sb)
EXPORT_SYMBOL(drop_super);
-static inline void write_super(struct super_block *sb)
-{
- lock_super(sb);
- if (sb->s_root && sb->s_dirt)
- if (sb->s_op->write_super)
- sb->s_op->write_super(sb);
- unlock_super(sb);
-}
-
-/*
+/**
+ * sync_supers - helper for periodic superblock writeback
+ *
+ * Call the write_super method if present on all dirty superblocks in
+ * the system. This is for the periodic writeback used by most older
+ * filesystems. For data integrity superblock writeback use
+ * sync_filesystems() instead.
+ *
* Note: check the dirty flag before waiting, so we don't
* hold up the sync while mounting a device. (The newly
* mounted device won't need syncing.)
@@ -462,12 +415,15 @@ void sync_supers(void)
spin_lock(&sb_lock);
restart:
list_for_each_entry(sb, &super_blocks, s_list) {
- if (sb->s_dirt) {
+ if (sb->s_op->write_super && sb->s_dirt) {
sb->s_count++;
spin_unlock(&sb_lock);
+
down_read(&sb->s_umount);
- write_super(sb);
+ if (sb->s_root && sb->s_dirt)
+ sb->s_op->write_super(sb);
up_read(&sb->s_umount);
+
spin_lock(&sb_lock);
if (__put_super_and_need_restart(sb))
goto restart;
@@ -476,60 +432,6 @@ restart:
spin_unlock(&sb_lock);
}
-/*
- * Call the ->sync_fs super_op against all filesystems which are r/w and
- * which implement it.
- *
- * This operation is careful to avoid the livelock which could easily happen
- * if two or more filesystems are being continuously dirtied. s_need_sync_fs
- * is used only here. We set it against all filesystems and then clear it as
- * we sync them. So redirtied filesystems are skipped.
- *
- * But if process A is currently running sync_filesystems and then process B
- * calls sync_filesystems as well, process B will set all the s_need_sync_fs
- * flags again, which will cause process A to resync everything. Fix that with
- * a local mutex.
- *
- * (Fabian) Avoid sync_fs with clean fs & wait mode 0
- */
-void sync_filesystems(int wait)
-{
- struct super_block *sb;
- static DEFINE_MUTEX(mutex);
-
- mutex_lock(&mutex); /* Could be down_interruptible */
- spin_lock(&sb_lock);
- list_for_each_entry(sb, &super_blocks, s_list) {
- if (!sb->s_op->sync_fs)
- continue;
- if (sb->s_flags & MS_RDONLY)
- continue;
- sb->s_need_sync_fs = 1;
- }
-
-restart:
- list_for_each_entry(sb, &super_blocks, s_list) {
- if (!sb->s_need_sync_fs)
- continue;
- sb->s_need_sync_fs = 0;
- if (sb->s_flags & MS_RDONLY)
- continue; /* hm. Was remounted r/o meanwhile */
- sb->s_count++;
- spin_unlock(&sb_lock);
- down_read(&sb->s_umount);
- async_synchronize_full_domain(&sb->s_async_list);
- if (sb->s_root && (wait || sb->s_dirt))
- sb->s_op->sync_fs(sb, wait);
- up_read(&sb->s_umount);
- /* restart only when sb is no longer on the list */
- spin_lock(&sb_lock);
- if (__put_super_and_need_restart(sb))
- goto restart;
- }
- spin_unlock(&sb_lock);
- mutex_unlock(&mutex);
-}
-
/**
* get_super - get the superblock of a device
* @bdev: device to get the superblock for
@@ -616,45 +518,6 @@ out:
}
/**
- * mark_files_ro - mark all files read-only
- * @sb: superblock in question
- *
- * All files are marked read-only. We don't care about pending
- * delete files so this should be used in 'force' mode only.
- */
-
-static void mark_files_ro(struct super_block *sb)
-{
- struct file *f;
-
-retry:
- file_list_lock();
- list_for_each_entry(f, &sb->s_files, f_u.fu_list) {
- struct vfsmount *mnt;
- if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
- continue;
- if (!file_count(f))
- continue;
- if (!(f->f_mode & FMODE_WRITE))
- continue;
- f->f_mode &= ~FMODE_WRITE;
- if (file_check_writeable(f) != 0)
- continue;
- file_release_write(f);
- mnt = mntget(f->f_path.mnt);
- file_list_unlock();
- /*
- * This can sleep, so we can't hold
- * the file_list_lock() spinlock.
- */
- mnt_drop_write(mnt);
- mntput(mnt);
- goto retry;
- }
- file_list_unlock();
-}
-
-/**
* do_remount_sb - asks filesystem to change mount options.
* @sb: superblock in question
* @flags: numeric part of options
@@ -675,27 +538,31 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
if (flags & MS_RDONLY)
acct_auto_close(sb);
shrink_dcache_sb(sb);
- fsync_super(sb);
+ sync_filesystem(sb);
/* If we are remounting RDONLY and current sb is read/write,
make sure there are no rw files opened */
if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY)) {
if (force)
mark_files_ro(sb);
- else if (!fs_may_remount_ro(sb))
+ else if (!fs_may_remount_ro(sb)) {
+ unlock_kernel();
return -EBUSY;
+ }
retval = vfs_dq_off(sb, 1);
- if (retval < 0 && retval != -ENOSYS)
+ if (retval < 0 && retval != -ENOSYS) {
+ unlock_kernel();
return -EBUSY;
+ }
}
remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY);
if (sb->s_op->remount_fs) {
- lock_super(sb);
retval = sb->s_op->remount_fs(sb, &flags, data);
- unlock_super(sb);
- if (retval)
+ if (retval) {
+ unlock_kernel();
return retval;
+ }
}
sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK);
if (remount_rw)
@@ -711,18 +578,17 @@ static void do_emergency_remount(struct work_struct *work)
list_for_each_entry(sb, &super_blocks, s_list) {
sb->s_count++;
spin_unlock(&sb_lock);
- down_read(&sb->s_umount);
+ down_write(&sb->s_umount);
if (sb->s_root && sb->s_bdev && !(sb->s_flags & MS_RDONLY)) {
/*
* ->remount_fs needs lock_kernel().
*
* What lock protects sb->s_flags??
*/
- lock_kernel();
do_remount_sb(sb, MS_RDONLY, NULL, 1);
- unlock_kernel();
}
- drop_super(sb);
+ up_write(&sb->s_umount);
+ put_super(sb);
spin_lock(&sb_lock);
}
spin_unlock(&sb_lock);
diff --git a/fs/sync.c b/fs/sync.c
index 7abc65fbf21..dd200025af8 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -13,38 +13,123 @@
#include <linux/pagemap.h>
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
+#include "internal.h"
#define VALID_FLAGS (SYNC_FILE_RANGE_WAIT_BEFORE|SYNC_FILE_RANGE_WRITE| \
SYNC_FILE_RANGE_WAIT_AFTER)
/*
- * sync everything. Start out by waking pdflush, because that writes back
- * all queues in parallel.
+ * Do the filesystem syncing work. For simple filesystems sync_inodes_sb(sb, 0)
+ * just dirties buffers with inodes so we have to submit IO for these buffers
+ * via __sync_blockdev(). This also speeds up the wait == 1 case since in that
+ * case write_inode() functions do sync_dirty_buffer() and thus effectively
+ * write one block at a time.
*/
-static void do_sync(unsigned long wait)
+static int __sync_filesystem(struct super_block *sb, int wait)
{
- wakeup_pdflush(0);
- sync_inodes(0); /* All mappings, inodes and their blockdevs */
- vfs_dq_sync(NULL);
- sync_supers(); /* Write the superblocks */
- sync_filesystems(0); /* Start syncing the filesystems */
- sync_filesystems(wait); /* Waitingly sync the filesystems */
- sync_inodes(wait); /* Mappings, inodes and blockdevs, again. */
+ /* Avoid doing twice syncing and cache pruning for quota sync */
if (!wait)
- printk("Emergency Sync complete\n");
- if (unlikely(laptop_mode))
- laptop_sync_completion();
+ writeout_quota_sb(sb, -1);
+ else
+ sync_quota_sb(sb, -1);
+ sync_inodes_sb(sb, wait);
+ if (sb->s_op->sync_fs)
+ sb->s_op->sync_fs(sb, wait);
+ return __sync_blockdev(sb->s_bdev, wait);
+}
+
+/*
+ * Write out and wait upon all dirty data associated with this
+ * superblock. Filesystem data as well as the underlying block
+ * device. Takes the superblock lock.
+ */
+int sync_filesystem(struct super_block *sb)
+{
+ int ret;
+
+ /*
+ * We need to be protected against the filesystem going from
+ * r/o to r/w or vice versa.
+ */
+ WARN_ON(!rwsem_is_locked(&sb->s_umount));
+
+ /*
+ * No point in syncing out anything if the filesystem is read-only.
+ */
+ if (sb->s_flags & MS_RDONLY)
+ return 0;
+
+ ret = __sync_filesystem(sb, 0);
+ if (ret < 0)
+ return ret;
+ return __sync_filesystem(sb, 1);
+}
+EXPORT_SYMBOL_GPL(sync_filesystem);
+
+/*
+ * Sync all the data for all the filesystems (called by sys_sync() and
+ * emergency sync)
+ *
+ * This operation is careful to avoid the livelock which could easily happen
+ * if two or more filesystems are being continuously dirtied. s_need_sync
+ * is used only here. We set it against all filesystems and then clear it as
+ * we sync them. So redirtied filesystems are skipped.
+ *
+ * But if process A is currently running sync_filesystems and then process B
+ * calls sync_filesystems as well, process B will set all the s_need_sync
+ * flags again, which will cause process A to resync everything. Fix that with
+ * a local mutex.
+ */
+static void sync_filesystems(int wait)
+{
+ struct super_block *sb;
+ static DEFINE_MUTEX(mutex);
+
+ mutex_lock(&mutex); /* Could be down_interruptible */
+ spin_lock(&sb_lock);
+ list_for_each_entry(sb, &super_blocks, s_list)
+ sb->s_need_sync = 1;
+
+restart:
+ list_for_each_entry(sb, &super_blocks, s_list) {
+ if (!sb->s_need_sync)
+ continue;
+ sb->s_need_sync = 0;
+ sb->s_count++;
+ spin_unlock(&sb_lock);
+
+ down_read(&sb->s_umount);
+ if (!(sb->s_flags & MS_RDONLY) && sb->s_root)
+ __sync_filesystem(sb, wait);
+ up_read(&sb->s_umount);
+
+ /* restart only when sb is no longer on the list */
+ spin_lock(&sb_lock);
+ if (__put_super_and_need_restart(sb))
+ goto restart;
+ }
+ spin_unlock(&sb_lock);
+ mutex_unlock(&mutex);
}
SYSCALL_DEFINE0(sync)
{
- do_sync(1);
+ sync_filesystems(0);
+ sync_filesystems(1);
+ if (unlikely(laptop_mode))
+ laptop_sync_completion();
return 0;
}
static void do_sync_work(struct work_struct *work)
{
- do_sync(0);
+ /*
+ * Sync twice to reduce the possibility we skipped some inodes / pages
+ * because they were temporarily locked
+ */
+ sync_filesystems(0);
+ sync_filesystems(0);
+ printk("Emergency Sync complete\n");
kfree(work);
}
@@ -75,10 +160,8 @@ int file_fsync(struct file *filp, struct dentry *dentry, int datasync)
/* sync the superblock to buffers */
sb = inode->i_sb;
- lock_super(sb);
if (sb->s_dirt && sb->s_op->write_super)
sb->s_op->write_super(sb);
- unlock_super(sb);
/* .. finally sync the buffers to disk */
err = sync_blockdev(sb->s_bdev);
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index 56f655254bf..c7798079e64 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -24,7 +24,7 @@ static int sysv_readdir(struct file *, void *, filldir_t);
const struct file_operations sysv_dir_operations = {
.read = generic_read_dir,
.readdir = sysv_readdir,
- .fsync = sysv_sync_file,
+ .fsync = simple_fsync,
};
static inline void dir_put_page(struct page *page)
diff --git a/fs/sysv/file.c b/fs/sysv/file.c
index 589be21d884..96340c01f4a 100644
--- a/fs/sysv/file.c
+++ b/fs/sysv/file.c
@@ -26,7 +26,7 @@ const struct file_operations sysv_file_operations = {
.write = do_sync_write,
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
- .fsync = sysv_sync_file,
+ .fsync = simple_fsync,
.splice_read = generic_file_splice_read,
};
@@ -34,18 +34,3 @@ const struct inode_operations sysv_file_inode_operations = {
.truncate = sysv_truncate,
.getattr = sysv_getattr,
};
-
-int sysv_sync_file(struct file * file, struct dentry *dentry, int datasync)
-{
- struct inode *inode = dentry->d_inode;
- int err;
-
- err = sync_mapping_buffers(inode->i_mapping);
- if (!(inode->i_state & I_DIRTY))
- return err;
- if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
- return err;
-
- err |= sysv_sync_inode(inode);
- return err ? -EIO : 0;
-}
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index da20b48d350..479923456a5 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -31,15 +31,13 @@
#include <asm/byteorder.h>
#include "sysv.h"
-/* This is only called on sync() and umount(), when s_dirt=1. */
-static void sysv_write_super(struct super_block *sb)
+static int sysv_sync_fs(struct super_block *sb, int wait)
{
struct sysv_sb_info *sbi = SYSV_SB(sb);
unsigned long time = get_seconds(), old_time;
+ lock_super(sb);
lock_kernel();
- if (sb->s_flags & MS_RDONLY)
- goto clean;
/*
* If we are going to write out the super block,
@@ -53,18 +51,30 @@ static void sysv_write_super(struct super_block *sb)
*sbi->s_sb_time = cpu_to_fs32(sbi, time);
mark_buffer_dirty(sbi->s_bh2);
}
-clean:
- sb->s_dirt = 0;
+
unlock_kernel();
+ unlock_super(sb);
+
+ return 0;
+}
+
+static void sysv_write_super(struct super_block *sb)
+{
+ if (!(sb->s_flags & MS_RDONLY))
+ sysv_sync_fs(sb, 1);
+ else
+ sb->s_dirt = 0;
}
static int sysv_remount(struct super_block *sb, int *flags, char *data)
{
struct sysv_sb_info *sbi = SYSV_SB(sb);
+ lock_super(sb);
if (sbi->s_forced_ro)
*flags |= MS_RDONLY;
if (!(*flags & MS_RDONLY))
sb->s_dirt = 1;
+ unlock_super(sb);
return 0;
}
@@ -72,6 +82,11 @@ static void sysv_put_super(struct super_block *sb)
{
struct sysv_sb_info *sbi = SYSV_SB(sb);
+ lock_kernel();
+
+ if (sb->s_dirt)
+ sysv_write_super(sb);
+
if (!(sb->s_flags & MS_RDONLY)) {
/* XXX ext2 also updates the state here */
mark_buffer_dirty(sbi->s_bh1);
@@ -84,6 +99,8 @@ static void sysv_put_super(struct super_block *sb)
brelse(sbi->s_bh2);
kfree(sbi);
+
+ unlock_kernel();
}
static int sysv_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -236,7 +253,7 @@ bad_inode:
return ERR_PTR(-EIO);
}
-static struct buffer_head * sysv_update_inode(struct inode * inode)
+int sysv_write_inode(struct inode *inode, int wait)
{
struct super_block * sb = inode->i_sb;
struct sysv_sb_info * sbi = SYSV_SB(sb);
@@ -244,19 +261,21 @@ static struct buffer_head * sysv_update_inode(struct inode * inode)
struct sysv_inode * raw_inode;
struct sysv_inode_info * si;
unsigned int ino, block;
+ int err = 0;
ino = inode->i_ino;
if (!ino || ino > sbi->s_ninodes) {
printk("Bad inode number on dev %s: %d is out of range\n",
inode->i_sb->s_id, ino);
- return NULL;
+ return -EIO;
}
raw_inode = sysv_raw_inode(sb, ino, &bh);
if (!raw_inode) {
printk("unable to read i-node block\n");
- return NULL;
+ return -EIO;
}
+ lock_kernel();
raw_inode->i_mode = cpu_to_fs16(sbi, inode->i_mode);
raw_inode->i_uid = cpu_to_fs16(sbi, fs_high2lowuid(inode->i_uid));
raw_inode->i_gid = cpu_to_fs16(sbi, fs_high2lowgid(inode->i_gid));
@@ -272,38 +291,23 @@ static struct buffer_head * sysv_update_inode(struct inode * inode)
for (block = 0; block < 10+1+1+1; block++)
write3byte(sbi, (u8 *)&si->i_data[block],
&raw_inode->i_data[3*block]);
+ unlock_kernel();
mark_buffer_dirty(bh);
- return bh;
-}
-
-int sysv_write_inode(struct inode * inode, int wait)
-{
- struct buffer_head *bh;
- lock_kernel();
- bh = sysv_update_inode(inode);
+ if (wait) {
+ sync_dirty_buffer(bh);
+ if (buffer_req(bh) && !buffer_uptodate(bh)) {
+ printk ("IO error syncing sysv inode [%s:%08x]\n",
+ sb->s_id, ino);
+ err = -EIO;
+ }
+ }
brelse(bh);
- unlock_kernel();
return 0;
}
-int sysv_sync_inode(struct inode * inode)
+int sysv_sync_inode(struct inode *inode)
{
- int err = 0;
- struct buffer_head *bh;
-
- bh = sysv_update_inode(inode);
- if (bh && buffer_dirty(bh)) {
- sync_dirty_buffer(bh);
- if (buffer_req(bh) && !buffer_uptodate(bh)) {
- printk ("IO error syncing sysv inode [%s:%08lx]\n",
- inode->i_sb->s_id, inode->i_ino);
- err = -1;
- }
- }
- else if (!bh)
- err = -1;
- brelse (bh);
- return err;
+ return sysv_write_inode(inode, 1);
}
static void sysv_delete_inode(struct inode *inode)
@@ -347,6 +351,7 @@ const struct super_operations sysv_sops = {
.delete_inode = sysv_delete_inode,
.put_super = sysv_put_super,
.write_super = sysv_write_super,
+ .sync_fs = sysv_sync_fs,
.remount_fs = sysv_remount,
.statfs = sysv_statfs,
};
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 5784a318c88..53786eb5cf6 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -144,7 +144,6 @@ extern int __sysv_write_begin(struct file *file, struct address_space *mapping,
extern struct inode *sysv_iget(struct super_block *, unsigned int);
extern int sysv_write_inode(struct inode *, int);
extern int sysv_sync_inode(struct inode *);
-extern int sysv_sync_file(struct file *, struct dentry *, int);
extern void sysv_set_inode(struct inode *, dev_t);
extern int sysv_getattr(struct vfsmount *, struct dentry *, struct kstat *);
extern int sysv_init_icache(void);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index e9f7a754c4f..3260b73abe2 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -36,6 +36,7 @@
#include <linux/mount.h>
#include <linux/math64.h>
#include <linux/writeback.h>
+#include <linux/smp_lock.h>
#include "ubifs.h"
/*
@@ -447,9 +448,6 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
if (!wait)
return 0;
- if (sb->s_flags & MS_RDONLY)
- return 0;
-
/*
* VFS calls '->sync_fs()' before synchronizing all dirty inodes and
* pages, so synchronize them first, then commit the journal. Strictly
@@ -1687,6 +1685,9 @@ static void ubifs_put_super(struct super_block *sb)
ubifs_msg("un-mount UBI device %d, volume %d", c->vi.ubi_num,
c->vi.vol_id);
+
+ lock_kernel();
+
/*
* The following asserts are only valid if there has not been a failure
* of the media. For example, there will be dirty inodes if we failed
@@ -1753,6 +1754,8 @@ static void ubifs_put_super(struct super_block *sb)
ubi_close_volume(c->ubi);
mutex_unlock(&c->umount_mutex);
kfree(c);
+
+ unlock_kernel();
}
static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
@@ -1768,17 +1771,22 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
return err;
}
+ lock_kernel();
if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) {
if (c->ro_media) {
ubifs_msg("cannot re-mount due to prior errors");
+ unlock_kernel();
return -EROFS;
}
err = ubifs_remount_rw(c);
- if (err)
+ if (err) {
+ unlock_kernel();
return err;
+ }
} else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) {
if (c->ro_media) {
ubifs_msg("cannot re-mount due to prior errors");
+ unlock_kernel();
return -EROFS;
}
ubifs_remount_ro(c);
@@ -1793,6 +1801,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
}
ubifs_assert(c->lst.taken_empty_lebs > 0);
+ unlock_kernel();
return 0;
}
@@ -1928,6 +1937,9 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
err = bdi_init(&c->bdi);
if (err)
goto out_close;
+ err = bdi_register(&c->bdi, NULL, "ubifs");
+ if (err)
+ goto out_bdi;
err = ubifs_parse_options(c, data, 0);
if (err)
diff --git a/fs/udf/Makefile b/fs/udf/Makefile
index 0d4503f7446..eb880f66c23 100644
--- a/fs/udf/Makefile
+++ b/fs/udf/Makefile
@@ -5,5 +5,5 @@
obj-$(CONFIG_UDF_FS) += udf.o
udf-objs := balloc.o dir.o file.o ialloc.o inode.o lowlevel.o namei.o \
- partition.o super.o truncate.o symlink.o fsync.o \
+ partition.o super.o truncate.o symlink.o \
directory.o misc.o udftime.o unicode.o
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index 2efd4d5291b..61d9a76a3a6 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -210,5 +210,5 @@ const struct file_operations udf_dir_operations = {
.read = generic_read_dir,
.readdir = udf_readdir,
.ioctl = udf_ioctl,
- .fsync = udf_fsync_file,
+ .fsync = simple_fsync,
};
diff --git a/fs/udf/file.c b/fs/udf/file.c
index eb91f3b7032..7464305382b 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -209,7 +209,7 @@ const struct file_operations udf_file_operations = {
.write = do_sync_write,
.aio_write = udf_file_aio_write,
.release = udf_release_file,
- .fsync = udf_fsync_file,
+ .fsync = simple_fsync,
.splice_read = generic_file_splice_read,
.llseek = generic_file_llseek,
};
diff --git a/fs/udf/fsync.c b/fs/udf/fsync.c
deleted file mode 100644
index b2c472b733b..00000000000
--- a/fs/udf/fsync.c
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * fsync.c
- *
- * PURPOSE
- * Fsync handling routines for the OSTA-UDF(tm) filesystem.
- *
- * COPYRIGHT
- * This file is distributed under the terms of the GNU General Public
- * License (GPL). Copies of the GPL can be obtained from:
- * ftp://prep.ai.mit.edu/pub/gnu/GPL
- * Each contributing author retains all rights to their own work.
- *
- * (C) 1999-2001 Ben Fennema
- * (C) 1999-2000 Stelias Computing Inc
- *
- * HISTORY
- *
- * 05/22/99 blf Created.
- */
-
-#include "udfdecl.h"
-
-#include <linux/fs.h>
-
-static int udf_fsync_inode(struct inode *, int);
-
-/*
- * File may be NULL when we are called. Perhaps we shouldn't
- * even pass file to fsync ?
- */
-
-int udf_fsync_file(struct file *file, struct dentry *dentry, int datasync)
-{
- struct inode *inode = dentry->d_inode;
-
- return udf_fsync_inode(inode, datasync);
-}
-
-static int udf_fsync_inode(struct inode *inode, int datasync)
-{
- int err;
-
- err = sync_mapping_buffers(inode->i_mapping);
- if (!(inode->i_state & I_DIRTY))
- return err;
- if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
- return err;
-
- err |= udf_sync_inode(inode);
-
- return err ? -EIO : 0;
-}
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 0ba44107d8f..6832135159b 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -568,6 +568,7 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
if (!udf_parse_options(options, &uopt, true))
return -EINVAL;
+ lock_kernel();
sbi->s_flags = uopt.flags;
sbi->s_uid = uopt.uid;
sbi->s_gid = uopt.gid;
@@ -581,13 +582,16 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
*flags |= MS_RDONLY;
}
- if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
+ if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
+ unlock_kernel();
return 0;
+ }
if (*flags & MS_RDONLY)
udf_close_lvid(sb);
else
udf_open_lvid(sb);
+ unlock_kernel();
return 0;
}
@@ -2062,6 +2066,9 @@ static void udf_put_super(struct super_block *sb)
struct udf_sb_info *sbi;
sbi = UDF_SB(sb);
+
+ lock_kernel();
+
if (sbi->s_vat_inode)
iput(sbi->s_vat_inode);
if (sbi->s_partitions)
@@ -2077,6 +2084,8 @@ static void udf_put_super(struct super_block *sb)
kfree(sbi->s_partmaps);
kfree(sb->s_fs_info);
sb->s_fs_info = NULL;
+
+ unlock_kernel();
}
static int udf_sync_fs(struct super_block *sb, int wait)
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index cac51b77a5d..8d46f4294ee 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -223,9 +223,6 @@ extern int udf_prealloc_blocks(struct super_block *, struct inode *, uint16_t,
extern int udf_new_block(struct super_block *, struct inode *, uint16_t,
uint32_t, int *);
-/* fsync.c */
-extern int udf_fsync_file(struct file *, struct dentry *, int);
-
/* directory.c */
extern struct fileIdentDesc *udf_fileident_read(struct inode *, loff_t *,
struct udf_fileident_bh *,
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 6321b797061..6f671f1ac27 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -666,6 +666,6 @@ not_empty:
const struct file_operations ufs_dir_operations = {
.read = generic_read_dir,
.readdir = ufs_readdir,
- .fsync = ufs_sync_file,
+ .fsync = simple_fsync,
.llseek = generic_file_llseek,
};
diff --git a/fs/ufs/file.c b/fs/ufs/file.c
index 2bd3a161571..73655c61240 100644
--- a/fs/ufs/file.c
+++ b/fs/ufs/file.c
@@ -24,31 +24,10 @@
*/
#include <linux/fs.h>
-#include <linux/buffer_head.h> /* for sync_mapping_buffers() */
#include "ufs_fs.h"
#include "ufs.h"
-
-int ufs_sync_file(struct file *file, struct dentry *dentry, int datasync)
-{
- struct inode *inode = dentry->d_inode;
- int err;
- int ret;
-
- ret = sync_mapping_buffers(inode->i_mapping);
- if (!(inode->i_state & I_DIRTY))
- return ret;
- if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
- return ret;
-
- err = ufs_sync_inode(inode);
- if (ret == 0)
- ret = err;
- return ret;
-}
-
-
/*
* We have mostly NULL's here: the current defaults are ok for
* the ufs filesystem.
@@ -62,6 +41,6 @@ const struct file_operations ufs_file_operations = {
.aio_write = generic_file_aio_write,
.mmap = generic_file_mmap,
.open = generic_file_open,
- .fsync = ufs_sync_file,
+ .fsync = simple_fsync,
.splice_read = generic_file_splice_read,
};
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 60359291761..5faed7954d0 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -263,6 +263,7 @@ void ufs_panic (struct super_block * sb, const char * function,
struct ufs_super_block_first * usb1;
va_list args;
+ lock_kernel();
uspi = UFS_SB(sb)->s_uspi;
usb1 = ubh_get_usb_first(uspi);
@@ -594,6 +595,9 @@ static void ufs_put_super_internal(struct super_block *sb)
UFSD("ENTER\n");
+
+ lock_kernel();
+
ufs_put_cstotal(sb);
size = uspi->s_cssize;
blks = (size + uspi->s_fsize - 1) >> uspi->s_fshift;
@@ -621,6 +625,9 @@ static void ufs_put_super_internal(struct super_block *sb)
brelse (sbi->s_ucg[i]);
kfree (sbi->s_ucg);
kfree (base);
+
+ unlock_kernel();
+
UFSD("EXIT\n");
}
@@ -1118,32 +1125,45 @@ failed_nomem:
return -ENOMEM;
}
-static void ufs_write_super(struct super_block *sb)
+static int ufs_sync_fs(struct super_block *sb, int wait)
{
struct ufs_sb_private_info * uspi;
struct ufs_super_block_first * usb1;
struct ufs_super_block_third * usb3;
unsigned flags;
+ lock_super(sb);
lock_kernel();
+
UFSD("ENTER\n");
+
flags = UFS_SB(sb)->s_flags;
uspi = UFS_SB(sb)->s_uspi;
usb1 = ubh_get_usb_first(uspi);
usb3 = ubh_get_usb_third(uspi);
- if (!(sb->s_flags & MS_RDONLY)) {
- usb1->fs_time = cpu_to_fs32(sb, get_seconds());
- if ((flags & UFS_ST_MASK) == UFS_ST_SUN
- || (flags & UFS_ST_MASK) == UFS_ST_SUNOS
- || (flags & UFS_ST_MASK) == UFS_ST_SUNx86)
- ufs_set_fs_state(sb, usb1, usb3,
- UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time));
- ufs_put_cstotal(sb);
- }
+ usb1->fs_time = cpu_to_fs32(sb, get_seconds());
+ if ((flags & UFS_ST_MASK) == UFS_ST_SUN ||
+ (flags & UFS_ST_MASK) == UFS_ST_SUNOS ||
+ (flags & UFS_ST_MASK) == UFS_ST_SUNx86)
+ ufs_set_fs_state(sb, usb1, usb3,
+ UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time));
+ ufs_put_cstotal(sb);
sb->s_dirt = 0;
+
UFSD("EXIT\n");
unlock_kernel();
+ unlock_super(sb);
+
+ return 0;
+}
+
+static void ufs_write_super(struct super_block *sb)
+{
+ if (!(sb->s_flags & MS_RDONLY))
+ ufs_sync_fs(sb, 1);
+ else
+ sb->s_dirt = 0;
}
static void ufs_put_super(struct super_block *sb)
@@ -1152,6 +1172,9 @@ static void ufs_put_super(struct super_block *sb)
UFSD("ENTER\n");
+ if (sb->s_dirt)
+ ufs_write_super(sb);
+
if (!(sb->s_flags & MS_RDONLY))
ufs_put_super_internal(sb);
@@ -1171,7 +1194,9 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
struct ufs_super_block_third * usb3;
unsigned new_mount_opt, ufstype;
unsigned flags;
-
+
+ lock_kernel();
+ lock_super(sb);
uspi = UFS_SB(sb)->s_uspi;
flags = UFS_SB(sb)->s_flags;
usb1 = ubh_get_usb_first(uspi);
@@ -1184,17 +1209,24 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
ufstype = UFS_SB(sb)->s_mount_opt & UFS_MOUNT_UFSTYPE;
new_mount_opt = 0;
ufs_set_opt (new_mount_opt, ONERROR_LOCK);
- if (!ufs_parse_options (data, &new_mount_opt))
+ if (!ufs_parse_options (data, &new_mount_opt)) {
+ unlock_super(sb);
+ unlock_kernel();
return -EINVAL;
+ }
if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) {
new_mount_opt |= ufstype;
} else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) {
printk("ufstype can't be changed during remount\n");
+ unlock_super(sb);
+ unlock_kernel();
return -EINVAL;
}
if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
UFS_SB(sb)->s_mount_opt = new_mount_opt;
+ unlock_super(sb);
+ unlock_kernel();
return 0;
}
@@ -1219,6 +1251,8 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
#ifndef CONFIG_UFS_FS_WRITE
printk("ufs was compiled with read-only support, "
"can't be mounted as read-write\n");
+ unlock_super(sb);
+ unlock_kernel();
return -EINVAL;
#else
if (ufstype != UFS_MOUNT_UFSTYPE_SUN &&
@@ -1227,16 +1261,22 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
ufstype != UFS_MOUNT_UFSTYPE_SUNx86 &&
ufstype != UFS_MOUNT_UFSTYPE_UFS2) {
printk("this ufstype is read-only supported\n");
+ unlock_super(sb);
+ unlock_kernel();
return -EINVAL;
}
if (!ufs_read_cylinder_structures(sb)) {
printk("failed during remounting\n");
+ unlock_super(sb);
+ unlock_kernel();
return -EPERM;
}
sb->s_flags &= ~MS_RDONLY;
#endif
}
UFS_SB(sb)->s_mount_opt = new_mount_opt;
+ unlock_super(sb);
+ unlock_kernel();
return 0;
}
@@ -1352,6 +1392,7 @@ static const struct super_operations ufs_super_ops = {
.delete_inode = ufs_delete_inode,
.put_super = ufs_put_super,
.write_super = ufs_write_super,
+ .sync_fs = ufs_sync_fs,
.statfs = ufs_statfs,
.remount_fs = ufs_remount,
.show_options = ufs_show_options,
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index d0c4acd4f1f..644e77e1359 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -99,7 +99,6 @@ extern void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de,
extern const struct inode_operations ufs_file_inode_operations;
extern const struct file_operations ufs_file_operations;
extern const struct address_space_operations ufs_aops;
-extern int ufs_sync_file(struct file *, struct dentry *, int);
/* ialloc.c */
extern void ufs_free_inode (struct inode *inode);
diff --git a/fs/xattr.c b/fs/xattr.c
index d51b8f9db92..1c3d0af59dd 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -297,7 +297,7 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
return error;
dentry = f->f_path.dentry;
audit_inode(NULL, dentry);
- error = mnt_want_write(f->f_path.mnt);
+ error = mnt_want_write_file(f);
if (!error) {
error = setxattr(dentry, name, value, size, flags);
mnt_drop_write(f->f_path.mnt);
@@ -524,7 +524,7 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
return error;
dentry = f->f_path.dentry;
audit_inode(NULL, dentry);
- error = mnt_want_write(f->f_path.mnt);
+ error = mnt_want_write_file(f);
if (!error) {
error = removexattr(dentry, name);
mnt_drop_write(f->f_path.mnt);
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 29228f5899c..480f28127f0 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -39,6 +39,7 @@ config XFS_QUOTA
config XFS_POSIX_ACL
bool "XFS POSIX ACL support"
depends on XFS_FS
+ select FS_POSIX_ACL
help
POSIX Access Control Lists (ACLs) support permissions for users and
groups beyond the owner/group/world scheme.
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 60f107e47fe..7a59daed178 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -40,7 +40,7 @@ xfs-$(CONFIG_PROC_FS) += quota/xfs_qm_stats.o
endif
xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o
-xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
+xfs-$(CONFIG_XFS_POSIX_ACL) += $(XFS_LINUX)/xfs_acl.o
xfs-$(CONFIG_PROC_FS) += $(XFS_LINUX)/xfs_stats.o
xfs-$(CONFIG_SYSCTL) += $(XFS_LINUX)/xfs_sysctl.o
xfs-$(CONFIG_COMPAT) += $(XFS_LINUX)/xfs_ioctl32.o
@@ -88,8 +88,7 @@ xfs-y += xfs_alloc.o \
xfs_utils.o \
xfs_vnodeops.o \
xfs_rw.o \
- xfs_dmops.o \
- xfs_qmops.o
+ xfs_dmops.o
xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o \
xfs_dir2_trace.o
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
new file mode 100644
index 00000000000..1e9d1246eeb
--- /dev/null
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -0,0 +1,523 @@
+/*
+ * Copyright (c) 2008, Christoph Hellwig
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "xfs.h"
+#include "xfs_acl.h"
+#include "xfs_attr.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_vnodeops.h"
+#include <linux/xattr.h>
+#include <linux/posix_acl_xattr.h>
+
+
+#define XFS_ACL_NOT_CACHED ((void *)-1)
+
+/*
+ * Locking scheme:
+ * - all ACL updates are protected by inode->i_mutex, which is taken before
+ * calling into this file.
+ * - access and updates to the ip->i_acl and ip->i_default_acl pointers are
+ * protected by inode->i_lock.
+ */
+
+STATIC struct posix_acl *
+xfs_acl_from_disk(struct xfs_acl *aclp)
+{
+ struct posix_acl_entry *acl_e;
+ struct posix_acl *acl;
+ struct xfs_acl_entry *ace;
+ int count, i;
+
+ count = be32_to_cpu(aclp->acl_cnt);
+
+ acl = posix_acl_alloc(count, GFP_KERNEL);
+ if (!acl)
+ return ERR_PTR(-ENOMEM);
+
+ for (i = 0; i < count; i++) {
+ acl_e = &acl->a_entries[i];
+ ace = &aclp->acl_entry[i];
+
+ /*
+ * The tag is 32 bits on disk and 16 bits in core.
+ *
+ * Because every access to it goes through the core
+ * format first this is not a problem.
+ */
+ acl_e->e_tag = be32_to_cpu(ace->ae_tag);
+ acl_e->e_perm = be16_to_cpu(ace->ae_perm);
+
+ switch (acl_e->e_tag) {
+ case ACL_USER:
+ case ACL_GROUP:
+ acl_e->e_id = be32_to_cpu(ace->ae_id);
+ break;
+ case ACL_USER_OBJ:
+ case ACL_GROUP_OBJ:
+ case ACL_MASK:
+ case ACL_OTHER:
+ acl_e->e_id = ACL_UNDEFINED_ID;
+ break;
+ default:
+ goto fail;
+ }
+ }
+ return acl;
+
+fail:
+ posix_acl_release(acl);
+ return ERR_PTR(-EINVAL);
+}
+
+STATIC void
+xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl)
+{
+ const struct posix_acl_entry *acl_e;
+ struct xfs_acl_entry *ace;
+ int i;
+
+ aclp->acl_cnt = cpu_to_be32(acl->a_count);
+ for (i = 0; i < acl->a_count; i++) {
+ ace = &aclp->acl_entry[i];
+ acl_e = &acl->a_entries[i];
+
+ ace->ae_tag = cpu_to_be32(acl_e->e_tag);
+ ace->ae_id = cpu_to_be32(acl_e->e_id);
+ ace->ae_perm = cpu_to_be16(acl_e->e_perm);
+ }
+}
+
+/*
+ * Update the cached ACL pointer in the inode.
+ *
+ * Because we don't hold any locks while reading/writing the attribute
+ * from/to disk another thread could have raced and updated the cached
+ * ACL value before us. In that case we release the previous cached value
+ * and update it with our new value.
+ */
+STATIC void
+xfs_update_cached_acl(struct inode *inode, struct posix_acl **p_acl,
+ struct posix_acl *acl)
+{
+ spin_lock(&inode->i_lock);
+ if (*p_acl && *p_acl != XFS_ACL_NOT_CACHED)
+ posix_acl_release(*p_acl);
+ *p_acl = posix_acl_dup(acl);
+ spin_unlock(&inode->i_lock);
+}
+
+struct posix_acl *
+xfs_get_acl(struct inode *inode, int type)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+ struct posix_acl *acl = NULL, **p_acl;
+ struct xfs_acl *xfs_acl;
+ int len = sizeof(struct xfs_acl);
+ char *ea_name;
+ int error;
+
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ ea_name = SGI_ACL_FILE;
+ p_acl = &ip->i_acl;
+ break;
+ case ACL_TYPE_DEFAULT:
+ ea_name = SGI_ACL_DEFAULT;
+ p_acl = &ip->i_default_acl;
+ break;
+ default:
+ return ERR_PTR(-EINVAL);
+ }
+
+ spin_lock(&inode->i_lock);
+ if (*p_acl != XFS_ACL_NOT_CACHED)
+ acl = posix_acl_dup(*p_acl);
+ spin_unlock(&inode->i_lock);
+
+ /*
+ * If we have a cached ACLs value just return it, not need to
+ * go out to the disk.
+ */
+ if (acl)
+ return acl;
+
+ xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
+ if (!xfs_acl)
+ return ERR_PTR(-ENOMEM);
+
+ error = -xfs_attr_get(ip, ea_name, (char *)xfs_acl, &len, ATTR_ROOT);
+ if (error) {
+ /*
+ * If the attribute doesn't exist make sure we have a negative
+ * cache entry, for any other error assume it is transient and
+ * leave the cache entry as XFS_ACL_NOT_CACHED.
+ */
+ if (error == -ENOATTR) {
+ acl = NULL;
+ goto out_update_cache;
+ }
+ goto out;
+ }
+
+ acl = xfs_acl_from_disk(xfs_acl);
+ if (IS_ERR(acl))
+ goto out;
+
+ out_update_cache:
+ xfs_update_cached_acl(inode, p_acl, acl);
+ out:
+ kfree(xfs_acl);
+ return acl;
+}
+
+STATIC int
+xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+ struct posix_acl **p_acl;
+ char *ea_name;
+ int error;
+
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ ea_name = SGI_ACL_FILE;
+ p_acl = &ip->i_acl;
+ break;
+ case ACL_TYPE_DEFAULT:
+ if (!S_ISDIR(inode->i_mode))
+ return acl ? -EACCES : 0;
+ ea_name = SGI_ACL_DEFAULT;
+ p_acl = &ip->i_default_acl;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (acl) {
+ struct xfs_acl *xfs_acl;
+ int len;
+
+ xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
+ if (!xfs_acl)
+ return -ENOMEM;
+
+ xfs_acl_to_disk(xfs_acl, acl);
+ len = sizeof(struct xfs_acl) -
+ (sizeof(struct xfs_acl_entry) *
+ (XFS_ACL_MAX_ENTRIES - acl->a_count));
+
+ error = -xfs_attr_set(ip, ea_name, (char *)xfs_acl,
+ len, ATTR_ROOT);
+
+ kfree(xfs_acl);
+ } else {
+ /*
+ * A NULL ACL argument means we want to remove the ACL.
+ */
+ error = -xfs_attr_remove(ip, ea_name, ATTR_ROOT);
+
+ /*
+ * If the attribute didn't exist to start with that's fine.
+ */
+ if (error == -ENOATTR)
+ error = 0;
+ }
+
+ if (!error)
+ xfs_update_cached_acl(inode, p_acl, acl);
+ return error;
+}
+
+int
+xfs_check_acl(struct inode *inode, int mask)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+ struct posix_acl *acl;
+ int error = -EAGAIN;
+
+ xfs_itrace_entry(ip);
+
+ /*
+ * If there is no attribute fork no ACL exists on this inode and
+ * we can skip the whole exercise.
+ */
+ if (!XFS_IFORK_Q(ip))
+ return -EAGAIN;
+
+ acl = xfs_get_acl(inode, ACL_TYPE_ACCESS);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ if (acl) {
+ error = posix_acl_permission(inode, acl, mask);
+ posix_acl_release(acl);
+ }
+
+ return error;
+}
+
+static int
+xfs_set_mode(struct inode *inode, mode_t mode)
+{
+ int error = 0;
+
+ if (mode != inode->i_mode) {
+ struct iattr iattr;
+
+ iattr.ia_valid = ATTR_MODE;
+ iattr.ia_mode = mode;
+
+ error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
+ }
+
+ return error;
+}
+
+static int
+xfs_acl_exists(struct inode *inode, char *name)
+{
+ int len = sizeof(struct xfs_acl);
+
+ return (xfs_attr_get(XFS_I(inode), name, NULL, &len,
+ ATTR_ROOT|ATTR_KERNOVAL) == 0);
+}
+
+int
+posix_acl_access_exists(struct inode *inode)
+{
+ return xfs_acl_exists(inode, SGI_ACL_FILE);
+}
+
+int
+posix_acl_default_exists(struct inode *inode)
+{
+ if (!S_ISDIR(inode->i_mode))
+ return 0;
+ return xfs_acl_exists(inode, SGI_ACL_DEFAULT);
+}
+
+/*
+ * No need for i_mutex because the inode is not yet exposed to the VFS.
+ */
+int
+xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl)
+{
+ struct posix_acl *clone;
+ mode_t mode;
+ int error = 0, inherit = 0;
+
+ if (S_ISDIR(inode->i_mode)) {
+ error = xfs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl);
+ if (error)
+ return error;
+ }
+
+ clone = posix_acl_clone(default_acl, GFP_KERNEL);
+ if (!clone)
+ return -ENOMEM;
+
+ mode = inode->i_mode;
+ error = posix_acl_create_masq(clone, &mode);
+ if (error < 0)
+ goto out_release_clone;
+
+ /*
+ * If posix_acl_create_masq returns a positive value we need to
+ * inherit a permission that can't be represented using the Unix
+ * mode bits and we actually need to set an ACL.
+ */
+ if (error > 0)
+ inherit = 1;
+
+ error = xfs_set_mode(inode, mode);
+ if (error)
+ goto out_release_clone;
+
+ if (inherit)
+ error = xfs_set_acl(inode, ACL_TYPE_ACCESS, clone);
+
+ out_release_clone:
+ posix_acl_release(clone);
+ return error;
+}
+
+int
+xfs_acl_chmod(struct inode *inode)
+{
+ struct posix_acl *acl, *clone;
+ int error;
+
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+
+ acl = xfs_get_acl(inode, ACL_TYPE_ACCESS);
+ if (IS_ERR(acl) || !acl)
+ return PTR_ERR(acl);
+
+ clone = posix_acl_clone(acl, GFP_KERNEL);
+ posix_acl_release(acl);
+ if (!clone)
+ return -ENOMEM;
+
+ error = posix_acl_chmod_masq(clone, inode->i_mode);
+ if (!error)
+ error = xfs_set_acl(inode, ACL_TYPE_ACCESS, clone);
+
+ posix_acl_release(clone);
+ return error;
+}
+
+void
+xfs_inode_init_acls(struct xfs_inode *ip)
+{
+ /*
+ * No need for locking, inode is not live yet.
+ */
+ ip->i_acl = XFS_ACL_NOT_CACHED;
+ ip->i_default_acl = XFS_ACL_NOT_CACHED;
+}
+
+void
+xfs_inode_clear_acls(struct xfs_inode *ip)
+{
+ /*
+ * No need for locking here, the inode is not live anymore
+ * and just about to be freed.
+ */
+ if (ip->i_acl != XFS_ACL_NOT_CACHED)
+ posix_acl_release(ip->i_acl);
+ if (ip->i_default_acl != XFS_ACL_NOT_CACHED)
+ posix_acl_release(ip->i_default_acl);
+}
+
+
+/*
+ * System xattr handlers.
+ *
+ * Currently Posix ACLs are the only system namespace extended attribute
+ * handlers supported by XFS, so we just implement the handlers here.
+ * If we ever support other system extended attributes this will need
+ * some refactoring.
+ */
+
+static int
+xfs_decode_acl(const char *name)
+{
+ if (strcmp(name, "posix_acl_access") == 0)
+ return ACL_TYPE_ACCESS;
+ else if (strcmp(name, "posix_acl_default") == 0)
+ return ACL_TYPE_DEFAULT;
+ return -EINVAL;
+}
+
+static int
+xfs_xattr_system_get(struct inode *inode, const char *name,
+ void *value, size_t size)
+{
+ struct posix_acl *acl;
+ int type, error;
+
+ type = xfs_decode_acl(name);
+ if (type < 0)
+ return type;
+
+ acl = xfs_get_acl(inode, type);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ if (acl == NULL)
+ return -ENODATA;
+
+ error = posix_acl_to_xattr(acl, value, size);
+ posix_acl_release(acl);
+
+ return error;
+}
+
+static int
+xfs_xattr_system_set(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ struct posix_acl *acl = NULL;
+ int error = 0, type;
+
+ type = xfs_decode_acl(name);
+ if (type < 0)
+ return type;
+ if (flags & XATTR_CREATE)
+ return -EINVAL;
+ if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
+ return value ? -EACCES : 0;
+ if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER))
+ return -EPERM;
+
+ if (!value)
+ goto set_acl;
+
+ acl = posix_acl_from_xattr(value, size);
+ if (!acl) {
+ /*
+ * acl_set_file(3) may request that we set default ACLs with
+ * zero length -- defend (gracefully) against that here.
+ */
+ goto out;
+ }
+ if (IS_ERR(acl)) {
+ error = PTR_ERR(acl);
+ goto out;
+ }
+
+ error = posix_acl_valid(acl);
+ if (error)
+ goto out_release;
+
+ error = -EINVAL;
+ if (acl->a_count > XFS_ACL_MAX_ENTRIES)
+ goto out_release;
+
+ if (type == ACL_TYPE_ACCESS) {
+ mode_t mode = inode->i_mode;
+ error = posix_acl_equiv_mode(acl, &mode);
+
+ if (error <= 0) {
+ posix_acl_release(acl);
+ acl = NULL;
+
+ if (error < 0)
+ return error;
+ }
+
+ error = xfs_set_mode(inode, mode);
+ if (error)
+ goto out_release;
+ }
+
+ set_acl:
+ error = xfs_set_acl(inode, type, acl);
+ out_release:
+ posix_acl_release(acl);
+ out:
+ return error;
+}
+
+struct xattr_handler xfs_xattr_system_handler = {
+ .prefix = XATTR_SYSTEM_PREFIX,
+ .get = xfs_xattr_system_get,
+ .set = xfs_xattr_system_set,
+};
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 34eaab608e6..5bb523d7f37 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -41,7 +41,6 @@
#include "xfs_itable.h"
#include "xfs_error.h"
#include "xfs_rw.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_bmap.h"
#include "xfs_buf_item.h"
@@ -899,7 +898,8 @@ xfs_ioctl_setattr(
struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp;
unsigned int lock_flags = 0;
- struct xfs_dquot *udqp = NULL, *gdqp = NULL;
+ struct xfs_dquot *udqp = NULL;
+ struct xfs_dquot *gdqp = NULL;
struct xfs_dquot *olddquot = NULL;
int code;
@@ -919,7 +919,7 @@ xfs_ioctl_setattr(
* because the i_*dquot fields will get updated anyway.
*/
if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) {
- code = XFS_QM_DQVOPALLOC(mp, ip, ip->i_d.di_uid,
+ code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid,
ip->i_d.di_gid, fa->fsx_projid,
XFS_QMOPT_PQUOTA, &udqp, &gdqp);
if (code)
@@ -954,10 +954,11 @@ xfs_ioctl_setattr(
* Do a quota reservation only if projid is actually going to change.
*/
if (mask & FSX_PROJID) {
- if (XFS_IS_PQUOTA_ON(mp) &&
+ if (XFS_IS_QUOTA_RUNNING(mp) &&
+ XFS_IS_PQUOTA_ON(mp) &&
ip->i_d.di_projid != fa->fsx_projid) {
ASSERT(tp);
- code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
+ code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
capable(CAP_FOWNER) ?
XFS_QMOPT_FORCE_RES : 0);
if (code) /* out of quota */
@@ -1059,8 +1060,8 @@ xfs_ioctl_setattr(
* in the transaction.
*/
if (ip->i_d.di_projid != fa->fsx_projid) {
- if (XFS_IS_PQUOTA_ON(mp)) {
- olddquot = XFS_QM_DQVOPCHOWN(mp, tp, ip,
+ if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) {
+ olddquot = xfs_qm_vop_chown(tp, ip,
&ip->i_gdquot, gdqp);
}
ip->i_d.di_projid = fa->fsx_projid;
@@ -1106,9 +1107,9 @@ xfs_ioctl_setattr(
/*
* Release any dquot(s) the inode had kept before chown.
*/
- XFS_QM_DQRELE(mp, olddquot);
- XFS_QM_DQRELE(mp, udqp);
- XFS_QM_DQRELE(mp, gdqp);
+ xfs_qm_dqrele(olddquot);
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
if (code)
return code;
@@ -1122,8 +1123,8 @@ xfs_ioctl_setattr(
return 0;
error_return:
- XFS_QM_DQRELE(mp, udqp);
- XFS_QM_DQRELE(mp, gdqp);
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
xfs_trans_cancel(tp, 0);
if (lock_flags)
xfs_iunlock(ip, lock_flags);
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 6075382336d..58973bb4603 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -17,6 +17,7 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
+#include "xfs_acl.h"
#include "xfs_bit.h"
#include "xfs_log.h"
#include "xfs_inum.h"
@@ -51,6 +52,7 @@
#include <linux/capability.h>
#include <linux/xattr.h>
#include <linux/namei.h>
+#include <linux/posix_acl.h>
#include <linux/security.h>
#include <linux/falloc.h>
#include <linux/fiemap.h>
@@ -202,9 +204,8 @@ xfs_vn_mknod(
{
struct inode *inode;
struct xfs_inode *ip = NULL;
- xfs_acl_t *default_acl = NULL;
+ struct posix_acl *default_acl = NULL;
struct xfs_name name;
- int (*test_default_acl)(struct inode *) = _ACL_DEFAULT_EXISTS;
int error;
/*
@@ -219,18 +220,14 @@ xfs_vn_mknod(
rdev = 0;
}
- if (test_default_acl && test_default_acl(dir)) {
- if (!_ACL_ALLOC(default_acl)) {
- return -ENOMEM;
- }
- if (!_ACL_GET_DEFAULT(dir, default_acl)) {
- _ACL_FREE(default_acl);
- default_acl = NULL;
- }
- }
+ if (IS_POSIXACL(dir)) {
+ default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT);
+ if (IS_ERR(default_acl))
+ return -PTR_ERR(default_acl);
- if (IS_POSIXACL(dir) && !default_acl)
- mode &= ~current_umask();
+ if (!default_acl)
+ mode &= ~current_umask();
+ }
xfs_dentry_to_name(&name, dentry);
error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip, NULL);
@@ -244,10 +241,10 @@ xfs_vn_mknod(
goto out_cleanup_inode;
if (default_acl) {
- error = _ACL_INHERIT(inode, mode, default_acl);
+ error = -xfs_inherit_acl(inode, default_acl);
if (unlikely(error))
goto out_cleanup_inode;
- _ACL_FREE(default_acl);
+ posix_acl_release(default_acl);
}
@@ -257,8 +254,7 @@ xfs_vn_mknod(
out_cleanup_inode:
xfs_cleanup_inode(dir, inode, dentry);
out_free_acl:
- if (default_acl)
- _ACL_FREE(default_acl);
+ posix_acl_release(default_acl);
return -error;
}
@@ -488,26 +484,6 @@ xfs_vn_put_link(
kfree(s);
}
-#ifdef CONFIG_XFS_POSIX_ACL
-STATIC int
-xfs_check_acl(
- struct inode *inode,
- int mask)
-{
- struct xfs_inode *ip = XFS_I(inode);
- int error;
-
- xfs_itrace_entry(ip);
-
- if (XFS_IFORK_Q(ip)) {
- error = xfs_acl_iaccess(ip, mask, NULL);
- if (error != -1)
- return -error;
- }
-
- return -EAGAIN;
-}
-
STATIC int
xfs_vn_permission(
struct inode *inode,
@@ -515,9 +491,6 @@ xfs_vn_permission(
{
return generic_permission(inode, mask, xfs_check_acl);
}
-#else
-#define xfs_vn_permission NULL
-#endif
STATIC int
xfs_vn_getattr(
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 9142192ccbe..7078974a6ee 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -42,7 +42,6 @@
#include "xfs_error.h"
#include "xfs_itable.h"
#include "xfs_rw.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_inode_item.h"
#include "xfs_buf_item.h"
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index 94d9a633d3d..cb6e2cca214 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -50,9 +50,11 @@ xfs_fs_quota_sync(
{
struct xfs_mount *mp = XFS_M(sb);
+ if (sb->s_flags & MS_RDONLY)
+ return -EROFS;
if (!XFS_IS_QUOTA_RUNNING(mp))
return -ENOSYS;
- return -xfs_sync_inodes(mp, SYNC_DELWRI);
+ return -xfs_sync_data(mp, 0);
}
STATIC int
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index bb685269f83..2e09efbca8d 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -43,7 +43,6 @@
#include "xfs_itable.h"
#include "xfs_fsops.h"
#include "xfs_rw.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_utils.h"
@@ -405,6 +404,14 @@ xfs_parseargs(
return EINVAL;
}
+#ifndef CONFIG_XFS_QUOTA
+ if (XFS_IS_QUOTA_RUNNING(mp)) {
+ cmn_err(CE_WARN,
+ "XFS: quota support not available in this kernel.");
+ return EINVAL;
+ }
+#endif
+
if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
(mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) {
cmn_err(CE_WARN,
@@ -1063,7 +1070,18 @@ xfs_fs_put_super(
int unmount_event_flags = 0;
xfs_syncd_stop(mp);
- xfs_sync_inodes(mp, SYNC_ATTR|SYNC_DELWRI);
+
+ if (!(sb->s_flags & MS_RDONLY)) {
+ /*
+ * XXX(hch): this should be SYNC_WAIT.
+ *
+ * Or more likely not needed at all because the VFS is already
+ * calling ->sync_fs after shutting down all filestem
+ * operations and just before calling ->put_super.
+ */
+ xfs_sync_data(mp, 0);
+ xfs_sync_attr(mp, 0);
+ }
#ifdef HAVE_DMAPI
if (mp->m_flags & XFS_MOUNT_DMAPI) {
@@ -1098,21 +1116,11 @@ xfs_fs_put_super(
xfs_freesb(mp);
xfs_icsb_destroy_counters(mp);
xfs_close_devices(mp);
- xfs_qmops_put(mp);
xfs_dmops_put(mp);
xfs_free_fsname(mp);
kfree(mp);
}
-STATIC void
-xfs_fs_write_super(
- struct super_block *sb)
-{
- if (!(sb->s_flags & MS_RDONLY))
- xfs_sync_fsdata(XFS_M(sb), 0);
- sb->s_dirt = 0;
-}
-
STATIC int
xfs_fs_sync_super(
struct super_block *sb,
@@ -1137,7 +1145,6 @@ xfs_fs_sync_super(
error = xfs_quiesce_data(mp);
else
error = xfs_sync_fsdata(mp, 0);
- sb->s_dirt = 0;
if (unlikely(laptop_mode)) {
int prev_sync_seq = mp->m_sync_seq;
@@ -1168,6 +1175,7 @@ xfs_fs_statfs(
{
struct xfs_mount *mp = XFS_M(dentry->d_sb);
xfs_sb_t *sbp = &mp->m_sb;
+ struct xfs_inode *ip = XFS_I(dentry->d_inode);
__uint64_t fakeinos, id;
xfs_extlen_t lsize;
@@ -1196,7 +1204,10 @@ xfs_fs_statfs(
statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
spin_unlock(&mp->m_sb_lock);
- XFS_QM_DQSTATVFS(XFS_I(dentry->d_inode), statp);
+ if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
+ ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) ==
+ (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
+ xfs_qm_statvfs(ip, statp);
return 0;
}
@@ -1404,16 +1415,13 @@ xfs_fs_fill_super(
error = xfs_dmops_get(mp);
if (error)
goto out_free_fsname;
- error = xfs_qmops_get(mp);
- if (error)
- goto out_put_dmops;
if (silent)
flags |= XFS_MFSI_QUIET;
error = xfs_open_devices(mp);
if (error)
- goto out_put_qmops;
+ goto out_put_dmops;
if (xfs_icsb_init_counters(mp))
mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
@@ -1443,7 +1451,6 @@ xfs_fs_fill_super(
XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, mtpt, mp->m_fsname);
- sb->s_dirt = 1;
sb->s_magic = XFS_SB_MAGIC;
sb->s_blocksize = mp->m_sb.sb_blocksize;
sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
@@ -1482,8 +1489,6 @@ xfs_fs_fill_super(
out_destroy_counters:
xfs_icsb_destroy_counters(mp);
xfs_close_devices(mp);
- out_put_qmops:
- xfs_qmops_put(mp);
out_put_dmops:
xfs_dmops_put(mp);
out_free_fsname:
@@ -1533,7 +1538,6 @@ static struct super_operations xfs_super_operations = {
.write_inode = xfs_fs_write_inode,
.clear_inode = xfs_fs_clear_inode,
.put_super = xfs_fs_put_super,
- .write_super = xfs_fs_write_super,
.sync_fs = xfs_fs_sync_super,
.freeze_fs = xfs_fs_freeze,
.statfs = xfs_fs_statfs,
@@ -1718,18 +1722,8 @@ xfs_init_zones(void)
if (!xfs_ili_zone)
goto out_destroy_inode_zone;
-#ifdef CONFIG_XFS_POSIX_ACL
- xfs_acl_zone = kmem_zone_init(sizeof(xfs_acl_t), "xfs_acl");
- if (!xfs_acl_zone)
- goto out_destroy_ili_zone;
-#endif
-
return 0;
-#ifdef CONFIG_XFS_POSIX_ACL
- out_destroy_ili_zone:
-#endif
- kmem_zone_destroy(xfs_ili_zone);
out_destroy_inode_zone:
kmem_zone_destroy(xfs_inode_zone);
out_destroy_efi_zone:
@@ -1763,9 +1757,6 @@ xfs_init_zones(void)
STATIC void
xfs_destroy_zones(void)
{
-#ifdef CONFIG_XFS_POSIX_ACL
- kmem_zone_destroy(xfs_acl_zone);
-#endif
kmem_zone_destroy(xfs_ili_zone);
kmem_zone_destroy(xfs_inode_zone);
kmem_zone_destroy(xfs_efi_zone);
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index f7ba76633c2..b619d6b8ca4 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -43,166 +43,267 @@
#include "xfs_buf_item.h"
#include "xfs_inode_item.h"
#include "xfs_rw.h"
+#include "xfs_quota.h"
#include <linux/kthread.h>
#include <linux/freezer.h>
-/*
- * Sync all the inodes in the given AG according to the
- * direction given by the flags.
- */
-STATIC int
-xfs_sync_inodes_ag(
- xfs_mount_t *mp,
- int ag,
- int flags)
-{
- xfs_perag_t *pag = &mp->m_perag[ag];
- int nr_found;
- uint32_t first_index = 0;
- int error = 0;
- int last_error = 0;
- do {
- struct inode *inode;
- xfs_inode_t *ip = NULL;
- int lock_flags = XFS_ILOCK_SHARED;
+STATIC xfs_inode_t *
+xfs_inode_ag_lookup(
+ struct xfs_mount *mp,
+ struct xfs_perag *pag,
+ uint32_t *first_index,
+ int tag)
+{
+ int nr_found;
+ struct xfs_inode *ip;
- /*
- * use a gang lookup to find the next inode in the tree
- * as the tree is sparse and a gang lookup walks to find
- * the number of objects requested.
- */
- read_lock(&pag->pag_ici_lock);
+ /*
+ * use a gang lookup to find the next inode in the tree
+ * as the tree is sparse and a gang lookup walks to find
+ * the number of objects requested.
+ */
+ read_lock(&pag->pag_ici_lock);
+ if (tag == XFS_ICI_NO_TAG) {
nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
- (void**)&ip, first_index, 1);
+ (void **)&ip, *first_index, 1);
+ } else {
+ nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
+ (void **)&ip, *first_index, 1, tag);
+ }
+ if (!nr_found)
+ goto unlock;
- if (!nr_found) {
- read_unlock(&pag->pag_ici_lock);
- break;
- }
+ /*
+ * Update the index for the next lookup. Catch overflows
+ * into the next AG range which can occur if we have inodes
+ * in the last block of the AG and we are currently
+ * pointing to the last inode.
+ */
+ *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+ if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+ goto unlock;
- /*
- * Update the index for the next lookup. Catch overflows
- * into the next AG range which can occur if we have inodes
- * in the last block of the AG and we are currently
- * pointing to the last inode.
- */
- first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
- if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
- read_unlock(&pag->pag_ici_lock);
- break;
- }
+ return ip;
- /* nothing to sync during shutdown */
- if (XFS_FORCED_SHUTDOWN(mp)) {
- read_unlock(&pag->pag_ici_lock);
- return 0;
- }
+unlock:
+ read_unlock(&pag->pag_ici_lock);
+ return NULL;
+}
- /*
- * If we can't get a reference on the inode, it must be
- * in reclaim. Leave it for the reclaim code to flush.
- */
- inode = VFS_I(ip);
- if (!igrab(inode)) {
- read_unlock(&pag->pag_ici_lock);
- continue;
- }
- read_unlock(&pag->pag_ici_lock);
+STATIC int
+xfs_inode_ag_walk(
+ struct xfs_mount *mp,
+ xfs_agnumber_t ag,
+ int (*execute)(struct xfs_inode *ip,
+ struct xfs_perag *pag, int flags),
+ int flags,
+ int tag)
+{
+ struct xfs_perag *pag = &mp->m_perag[ag];
+ uint32_t first_index;
+ int last_error = 0;
+ int skipped;
- /* avoid new or bad inodes */
- if (is_bad_inode(inode) ||
- xfs_iflags_test(ip, XFS_INEW)) {
- IRELE(ip);
- continue;
- }
+restart:
+ skipped = 0;
+ first_index = 0;
+ do {
+ int error = 0;
+ xfs_inode_t *ip;
- /*
- * If we have to flush data or wait for I/O completion
- * we need to hold the iolock.
- */
- if (flags & SYNC_DELWRI) {
- if (VN_DIRTY(inode)) {
- if (flags & SYNC_TRYLOCK) {
- if (xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
- lock_flags |= XFS_IOLOCK_SHARED;
- } else {
- xfs_ilock(ip, XFS_IOLOCK_SHARED);
- lock_flags |= XFS_IOLOCK_SHARED;
- }
- if (lock_flags & XFS_IOLOCK_SHARED) {
- error = xfs_flush_pages(ip, 0, -1,
- (flags & SYNC_WAIT) ? 0
- : XFS_B_ASYNC,
- FI_NONE);
- }
- }
- if (VN_CACHED(inode) && (flags & SYNC_IOWAIT))
- xfs_ioend_wait(ip);
- }
- xfs_ilock(ip, XFS_ILOCK_SHARED);
-
- if ((flags & SYNC_ATTR) && !xfs_inode_clean(ip)) {
- if (flags & SYNC_WAIT) {
- xfs_iflock(ip);
- if (!xfs_inode_clean(ip))
- error = xfs_iflush(ip, XFS_IFLUSH_SYNC);
- else
- xfs_ifunlock(ip);
- } else if (xfs_iflock_nowait(ip)) {
- if (!xfs_inode_clean(ip))
- error = xfs_iflush(ip, XFS_IFLUSH_DELWRI);
- else
- xfs_ifunlock(ip);
- }
- }
- xfs_iput(ip, lock_flags);
+ ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag);
+ if (!ip)
+ break;
+ error = execute(ip, pag, flags);
+ if (error == EAGAIN) {
+ skipped++;
+ continue;
+ }
if (error)
last_error = error;
/*
* bail out if the filesystem is corrupted.
*/
if (error == EFSCORRUPTED)
- return XFS_ERROR(error);
+ break;
- } while (nr_found);
+ } while (1);
+
+ if (skipped) {
+ delay(1);
+ goto restart;
+ }
+ xfs_put_perag(mp, pag);
return last_error;
}
int
-xfs_sync_inodes(
- xfs_mount_t *mp,
- int flags)
+xfs_inode_ag_iterator(
+ struct xfs_mount *mp,
+ int (*execute)(struct xfs_inode *ip,
+ struct xfs_perag *pag, int flags),
+ int flags,
+ int tag)
{
- int error;
- int last_error;
- int i;
- int lflags = XFS_LOG_FORCE;
+ int error = 0;
+ int last_error = 0;
+ xfs_agnumber_t ag;
- if (mp->m_flags & XFS_MOUNT_RDONLY)
- return 0;
- error = 0;
- last_error = 0;
+ for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
+ if (!mp->m_perag[ag].pag_ici_init)
+ continue;
+ error = xfs_inode_ag_walk(mp, ag, execute, flags, tag);
+ if (error) {
+ last_error = error;
+ if (error == EFSCORRUPTED)
+ break;
+ }
+ }
+ return XFS_ERROR(last_error);
+}
+
+/* must be called with pag_ici_lock held and releases it */
+int
+xfs_sync_inode_valid(
+ struct xfs_inode *ip,
+ struct xfs_perag *pag)
+{
+ struct inode *inode = VFS_I(ip);
+
+ /* nothing to sync during shutdown */
+ if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+ read_unlock(&pag->pag_ici_lock);
+ return EFSCORRUPTED;
+ }
+
+ /*
+ * If we can't get a reference on the inode, it must be in reclaim.
+ * Leave it for the reclaim code to flush. Also avoid inodes that
+ * haven't been fully initialised.
+ */
+ if (!igrab(inode)) {
+ read_unlock(&pag->pag_ici_lock);
+ return ENOENT;
+ }
+ read_unlock(&pag->pag_ici_lock);
+
+ if (is_bad_inode(inode) || xfs_iflags_test(ip, XFS_INEW)) {
+ IRELE(ip);
+ return ENOENT;
+ }
+
+ return 0;
+}
+
+STATIC int
+xfs_sync_inode_data(
+ struct xfs_inode *ip,
+ struct xfs_perag *pag,
+ int flags)
+{
+ struct inode *inode = VFS_I(ip);
+ struct address_space *mapping = inode->i_mapping;
+ int error = 0;
+
+ error = xfs_sync_inode_valid(ip, pag);
+ if (error)
+ return error;
+
+ if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
+ goto out_wait;
+
+ if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
+ if (flags & SYNC_TRYLOCK)
+ goto out_wait;
+ xfs_ilock(ip, XFS_IOLOCK_SHARED);
+ }
+
+ error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ?
+ 0 : XFS_B_ASYNC, FI_NONE);
+ xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+ out_wait:
if (flags & SYNC_WAIT)
- lflags |= XFS_LOG_SYNC;
+ xfs_ioend_wait(ip);
+ IRELE(ip);
+ return error;
+}
- for (i = 0; i < mp->m_sb.sb_agcount; i++) {
- if (!mp->m_perag[i].pag_ici_init)
- continue;
- error = xfs_sync_inodes_ag(mp, i, flags);
- if (error)
- last_error = error;
- if (error == EFSCORRUPTED)
- break;
+STATIC int
+xfs_sync_inode_attr(
+ struct xfs_inode *ip,
+ struct xfs_perag *pag,
+ int flags)
+{
+ int error = 0;
+
+ error = xfs_sync_inode_valid(ip, pag);
+ if (error)
+ return error;
+
+ xfs_ilock(ip, XFS_ILOCK_SHARED);
+ if (xfs_inode_clean(ip))
+ goto out_unlock;
+ if (!xfs_iflock_nowait(ip)) {
+ if (!(flags & SYNC_WAIT))
+ goto out_unlock;
+ xfs_iflock(ip);
}
- if (flags & SYNC_DELWRI)
- xfs_log_force(mp, 0, lflags);
- return XFS_ERROR(last_error);
+ if (xfs_inode_clean(ip)) {
+ xfs_ifunlock(ip);
+ goto out_unlock;
+ }
+
+ error = xfs_iflush(ip, (flags & SYNC_WAIT) ?
+ XFS_IFLUSH_SYNC : XFS_IFLUSH_DELWRI);
+
+ out_unlock:
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ IRELE(ip);
+ return error;
+}
+
+/*
+ * Write out pagecache data for the whole filesystem.
+ */
+int
+xfs_sync_data(
+ struct xfs_mount *mp,
+ int flags)
+{
+ int error;
+
+ ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
+
+ error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
+ XFS_ICI_NO_TAG);
+ if (error)
+ return XFS_ERROR(error);
+
+ xfs_log_force(mp, 0,
+ (flags & SYNC_WAIT) ?
+ XFS_LOG_FORCE | XFS_LOG_SYNC :
+ XFS_LOG_FORCE);
+ return 0;
+}
+
+/*
+ * Write out inode metadata (attributes) for the whole filesystem.
+ */
+int
+xfs_sync_attr(
+ struct xfs_mount *mp,
+ int flags)
+{
+ ASSERT((flags & ~SYNC_WAIT) == 0);
+
+ return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
+ XFS_ICI_NO_TAG);
}
STATIC int
@@ -252,7 +353,7 @@ xfs_sync_fsdata(
* If this is xfssyncd() then only sync the superblock if we can
* lock it without sleeping and it is not pinned.
*/
- if (flags & SYNC_BDFLUSH) {
+ if (flags & SYNC_TRYLOCK) {
ASSERT(!(flags & SYNC_WAIT));
bp = xfs_getsb(mp, XFS_BUF_TRYLOCK);
@@ -316,13 +417,13 @@ xfs_quiesce_data(
int error;
/* push non-blocking */
- xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_BDFLUSH);
- XFS_QM_DQSYNC(mp, SYNC_BDFLUSH);
+ xfs_sync_data(mp, 0);
+ xfs_qm_sync(mp, SYNC_TRYLOCK);
xfs_filestream_flush(mp);
/* push and block */
- xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_WAIT|SYNC_IOWAIT);
- XFS_QM_DQSYNC(mp, SYNC_WAIT);
+ xfs_sync_data(mp, SYNC_WAIT);
+ xfs_qm_sync(mp, SYNC_WAIT);
/* write superblock and hoover up shutdown errors */
error = xfs_sync_fsdata(mp, 0);
@@ -341,7 +442,7 @@ xfs_quiesce_fs(
int count = 0, pincount;
xfs_flush_buftarg(mp->m_ddev_targp, 0);
- xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
+ xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
/*
* This loop must run at least twice. The first instance of the loop
@@ -350,7 +451,7 @@ xfs_quiesce_fs(
* logged before we can write the unmount record.
*/
do {
- xfs_sync_inodes(mp, SYNC_ATTR|SYNC_WAIT);
+ xfs_sync_attr(mp, SYNC_WAIT);
pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
if (!pincount) {
delay(50);
@@ -433,8 +534,8 @@ xfs_flush_inodes_work(
void *arg)
{
struct inode *inode = arg;
- xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK);
- xfs_sync_inodes(mp, SYNC_DELWRI | SYNC_TRYLOCK | SYNC_IOWAIT);
+ xfs_sync_data(mp, SYNC_TRYLOCK);
+ xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
iput(inode);
}
@@ -465,10 +566,10 @@ xfs_sync_worker(
if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
- xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
+ xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
/* dgc: errors ignored here */
- error = XFS_QM_DQSYNC(mp, SYNC_BDFLUSH);
- error = xfs_sync_fsdata(mp, SYNC_BDFLUSH);
+ error = xfs_qm_sync(mp, SYNC_TRYLOCK);
+ error = xfs_sync_fsdata(mp, SYNC_TRYLOCK);
if (xfs_log_need_covered(mp))
error = xfs_commit_dummy_trans(mp, XFS_LOG_FORCE);
}
@@ -569,7 +670,7 @@ xfs_reclaim_inode(
xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
- return 1;
+ return -EAGAIN;
}
__xfs_iflags_set(ip, XFS_IRECLAIM);
spin_unlock(&ip->i_flags_lock);
@@ -654,101 +755,27 @@ xfs_inode_clear_reclaim_tag(
xfs_put_perag(mp, pag);
}
-
-STATIC void
-xfs_reclaim_inodes_ag(
- xfs_mount_t *mp,
- int ag,
- int noblock,
- int mode)
+STATIC int
+xfs_reclaim_inode_now(
+ struct xfs_inode *ip,
+ struct xfs_perag *pag,
+ int flags)
{
- xfs_inode_t *ip = NULL;
- xfs_perag_t *pag = &mp->m_perag[ag];
- int nr_found;
- uint32_t first_index;
- int skipped;
-
-restart:
- first_index = 0;
- skipped = 0;
- do {
- /*
- * use a gang lookup to find the next inode in the tree
- * as the tree is sparse and a gang lookup walks to find
- * the number of objects requested.
- */
- read_lock(&pag->pag_ici_lock);
- nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
- (void**)&ip, first_index, 1,
- XFS_ICI_RECLAIM_TAG);
-
- if (!nr_found) {
- read_unlock(&pag->pag_ici_lock);
- break;
- }
-
- /*
- * Update the index for the next lookup. Catch overflows
- * into the next AG range which can occur if we have inodes
- * in the last block of the AG and we are currently
- * pointing to the last inode.
- */
- first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
- if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
- read_unlock(&pag->pag_ici_lock);
- break;
- }
-
- /* ignore if already under reclaim */
- if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
- read_unlock(&pag->pag_ici_lock);
- continue;
- }
-
- if (noblock) {
- if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
- read_unlock(&pag->pag_ici_lock);
- continue;
- }
- if (xfs_ipincount(ip) ||
- !xfs_iflock_nowait(ip)) {
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- read_unlock(&pag->pag_ici_lock);
- continue;
- }
- }
+ /* ignore if already under reclaim */
+ if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
read_unlock(&pag->pag_ici_lock);
-
- /*
- * hmmm - this is an inode already in reclaim. Do
- * we even bother catching it here?
- */
- if (xfs_reclaim_inode(ip, noblock, mode))
- skipped++;
- } while (nr_found);
-
- if (skipped) {
- delay(1);
- goto restart;
+ return 0;
}
- return;
+ read_unlock(&pag->pag_ici_lock);
+ return xfs_reclaim_inode(ip, 0, flags);
}
int
xfs_reclaim_inodes(
xfs_mount_t *mp,
- int noblock,
int mode)
{
- int i;
-
- for (i = 0; i < mp->m_sb.sb_agcount; i++) {
- if (!mp->m_perag[i].pag_ici_init)
- continue;
- xfs_reclaim_inodes_ag(mp, i, noblock, mode);
- }
- return 0;
+ return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode,
+ XFS_ICI_RECLAIM_TAG);
}
-
-
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index 308d5bf6dfb..2a10301c99c 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -29,17 +29,14 @@ typedef struct xfs_sync_work {
struct completion *w_completion;
} xfs_sync_work_t;
-#define SYNC_ATTR 0x0001 /* sync attributes */
-#define SYNC_DELWRI 0x0002 /* look at delayed writes */
-#define SYNC_WAIT 0x0004 /* wait for i/o to complete */
-#define SYNC_BDFLUSH 0x0008 /* BDFLUSH is calling -- don't block */
-#define SYNC_IOWAIT 0x0010 /* wait for all I/O to complete */
-#define SYNC_TRYLOCK 0x0020 /* only try to lock inodes */
+#define SYNC_WAIT 0x0001 /* wait for i/o to complete */
+#define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */
int xfs_syncd_init(struct xfs_mount *mp);
void xfs_syncd_stop(struct xfs_mount *mp);
-int xfs_sync_inodes(struct xfs_mount *mp, int flags);
+int xfs_sync_attr(struct xfs_mount *mp, int flags);
+int xfs_sync_data(struct xfs_mount *mp, int flags);
int xfs_sync_fsdata(struct xfs_mount *mp, int flags);
int xfs_quiesce_data(struct xfs_mount *mp);
@@ -48,10 +45,16 @@ void xfs_quiesce_attr(struct xfs_mount *mp);
void xfs_flush_inodes(struct xfs_inode *ip);
int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode);
-int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode);
+int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip);
void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
struct xfs_inode *ip);
+
+int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
+int xfs_inode_ag_iterator(struct xfs_mount *mp,
+ int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
+ int flags, int tag);
+
#endif
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c
index 964621fde6e..497c7fb75cc 100644
--- a/fs/xfs/linux-2.6/xfs_xattr.c
+++ b/fs/xfs/linux-2.6/xfs_xattr.c
@@ -29,67 +29,6 @@
#include <linux/xattr.h>
-/*
- * ACL handling. Should eventually be moved into xfs_acl.c
- */
-
-static int
-xfs_decode_acl(const char *name)
-{
- if (strcmp(name, "posix_acl_access") == 0)
- return _ACL_TYPE_ACCESS;
- else if (strcmp(name, "posix_acl_default") == 0)
- return _ACL_TYPE_DEFAULT;
- return -EINVAL;
-}
-
-/*
- * Get system extended attributes which at the moment only
- * includes Posix ACLs.
- */
-static int
-xfs_xattr_system_get(struct inode *inode, const char *name,
- void *buffer, size_t size)
-{
- int acl;
-
- acl = xfs_decode_acl(name);
- if (acl < 0)
- return acl;
-
- return xfs_acl_vget(inode, buffer, size, acl);
-}
-
-static int
-xfs_xattr_system_set(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
-{
- int acl;
-
- acl = xfs_decode_acl(name);
- if (acl < 0)
- return acl;
- if (flags & XATTR_CREATE)
- return -EINVAL;
-
- if (!value)
- return xfs_acl_vremove(inode, acl);
-
- return xfs_acl_vset(inode, (void *)value, size, acl);
-}
-
-static struct xattr_handler xfs_xattr_system_handler = {
- .prefix = XATTR_SYSTEM_PREFIX,
- .get = xfs_xattr_system_get,
- .set = xfs_xattr_system_set,
-};
-
-
-/*
- * Real xattr handling. The only difference between the namespaces is
- * a flag passed to the low-level attr code.
- */
-
static int
__xfs_xattr_get(struct inode *inode, const char *name,
void *value, size_t size, int xflags)
@@ -199,7 +138,9 @@ struct xattr_handler *xfs_xattr_handlers[] = {
&xfs_xattr_user_handler,
&xfs_xattr_trusted_handler,
&xfs_xattr_security_handler,
+#ifdef CONFIG_XFS_POSIX_ACL
&xfs_xattr_system_handler,
+#endif
NULL
};
@@ -310,7 +251,7 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
/*
* Then add the two synthetic ACL attributes.
*/
- if (xfs_acl_vhasacl_access(inode)) {
+ if (posix_acl_access_exists(inode)) {
error = list_one_attr(POSIX_ACL_XATTR_ACCESS,
strlen(POSIX_ACL_XATTR_ACCESS) + 1,
data, size, &context.count);
@@ -318,7 +259,7 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
return error;
}
- if (xfs_acl_vhasacl_default(inode)) {
+ if (posix_acl_default_exists(inode)) {
error = list_one_attr(POSIX_ACL_XATTR_DEFAULT,
strlen(POSIX_ACL_XATTR_DEFAULT) + 1,
data, size, &context.count);
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index e4babcc6342..2f3f2229eaa 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -42,7 +42,6 @@
#include "xfs_error.h"
#include "xfs_itable.h"
#include "xfs_rw.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_trans_space.h"
@@ -1194,7 +1193,9 @@ void
xfs_qm_dqrele(
xfs_dquot_t *dqp)
{
- ASSERT(dqp);
+ if (!dqp)
+ return;
+
xfs_dqtrace_entry(dqp, "DQRELE");
xfs_dqlock(dqp);
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
index de0f402ddb4..6533ead9b88 100644
--- a/fs/xfs/quota/xfs_dquot.h
+++ b/fs/xfs/quota/xfs_dquot.h
@@ -181,7 +181,6 @@ extern void xfs_qm_adjust_dqlimits(xfs_mount_t *,
extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
xfs_dqid_t, uint, uint, xfs_dquot_t **);
extern void xfs_qm_dqput(xfs_dquot_t *);
-extern void xfs_qm_dqrele(xfs_dquot_t *);
extern void xfs_dqlock(xfs_dquot_t *);
extern void xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *);
extern void xfs_dqunlock(xfs_dquot_t *);
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
index 1728f6a7c4f..d0d4a9a0bbd 100644
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ b/fs/xfs/quota/xfs_dquot_item.c
@@ -42,7 +42,6 @@
#include "xfs_error.h"
#include "xfs_itable.h"
#include "xfs_rw.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_trans_priv.h"
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 5b6695049e0..45b1bfef738 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -42,7 +42,6 @@
#include "xfs_error.h"
#include "xfs_bmap.h"
#include "xfs_rw.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_trans_space.h"
@@ -287,11 +286,13 @@ xfs_qm_rele_quotafs_ref(
* Just destroy the quotainfo structure.
*/
void
-xfs_qm_unmount_quotadestroy(
- xfs_mount_t *mp)
+xfs_qm_unmount(
+ struct xfs_mount *mp)
{
- if (mp->m_quotainfo)
+ if (mp->m_quotainfo) {
+ xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
xfs_qm_destroy_quotainfo(mp);
+ }
}
@@ -385,8 +386,13 @@ xfs_qm_mount_quotas(
if (error) {
xfs_fs_cmn_err(CE_WARN, mp,
"Failed to initialize disk quotas.");
+ return;
}
- return;
+
+#ifdef QUOTADEBUG
+ if (XFS_IS_QUOTA_ON(mp))
+ xfs_qm_internalqcheck(mp);
+#endif
}
/*
@@ -774,12 +780,11 @@ xfs_qm_dqattach_grouphint(
* Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
* into account.
* If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
- * If XFS_QMOPT_ILOCKED, then inode sent is already locked EXCL.
* Inode may get unlocked and relocked in here, and the caller must deal with
* the consequences.
*/
int
-xfs_qm_dqattach(
+xfs_qm_dqattach_locked(
xfs_inode_t *ip,
uint flags)
{
@@ -787,17 +792,14 @@ xfs_qm_dqattach(
uint nquotas = 0;
int error = 0;
- if ((! XFS_IS_QUOTA_ON(mp)) ||
- (! XFS_NOT_DQATTACHED(mp, ip)) ||
- (ip->i_ino == mp->m_sb.sb_uquotino) ||
- (ip->i_ino == mp->m_sb.sb_gquotino))
+ if (!XFS_IS_QUOTA_RUNNING(mp) ||
+ !XFS_IS_QUOTA_ON(mp) ||
+ !XFS_NOT_DQATTACHED(mp, ip) ||
+ ip->i_ino == mp->m_sb.sb_uquotino ||
+ ip->i_ino == mp->m_sb.sb_gquotino)
return 0;
- ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 ||
- xfs_isilocked(ip, XFS_ILOCK_EXCL));
-
- if (! (flags & XFS_QMOPT_ILOCKED))
- xfs_ilock(ip, XFS_ILOCK_EXCL);
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
if (XFS_IS_UQUOTA_ON(mp)) {
error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
@@ -849,8 +851,7 @@ xfs_qm_dqattach(
xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
}
- done:
-
+ done:
#ifdef QUOTADEBUG
if (! error) {
if (XFS_IS_UQUOTA_ON(mp))
@@ -858,15 +859,22 @@ xfs_qm_dqattach(
if (XFS_IS_OQUOTA_ON(mp))
ASSERT(ip->i_gdquot);
}
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
#endif
+ return error;
+}
- if (! (flags & XFS_QMOPT_ILOCKED))
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
+int
+xfs_qm_dqattach(
+ struct xfs_inode *ip,
+ uint flags)
+{
+ int error;
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ error = xfs_qm_dqattach_locked(ip, flags);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
-#ifdef QUOTADEBUG
- else
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-#endif
return error;
}
@@ -896,11 +904,6 @@ xfs_qm_dqdetach(
}
}
-/*
- * This is called to sync quotas. We can be told to use non-blocking
- * semantics by either the SYNC_BDFLUSH flag or the absence of the
- * SYNC_WAIT flag.
- */
int
xfs_qm_sync(
xfs_mount_t *mp,
@@ -909,17 +912,13 @@ xfs_qm_sync(
int recl, restarts;
xfs_dquot_t *dqp;
uint flush_flags;
- boolean_t nowait;
int error;
- if (! XFS_IS_QUOTA_ON(mp))
+ if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
return 0;
+ flush_flags = (flags & SYNC_WAIT) ? XFS_QMOPT_SYNC : XFS_QMOPT_DELWRI;
restarts = 0;
- /*
- * We won't block unless we are asked to.
- */
- nowait = (boolean_t)(flags & SYNC_BDFLUSH || (flags & SYNC_WAIT) == 0);
again:
xfs_qm_mplist_lock(mp);
@@ -939,18 +938,10 @@ xfs_qm_sync(
* don't 'seem' to be dirty. ie. don't acquire dqlock.
* This is very similar to what xfs_sync does with inodes.
*/
- if (flags & SYNC_BDFLUSH) {
- if (! XFS_DQ_IS_DIRTY(dqp))
+ if (flags & SYNC_TRYLOCK) {
+ if (!XFS_DQ_IS_DIRTY(dqp))
continue;
- }
-
- if (nowait) {
- /*
- * Try to acquire the dquot lock. We are NOT out of
- * lock order, but we just don't want to wait for this
- * lock, unless somebody wanted us to.
- */
- if (! xfs_qm_dqlock_nowait(dqp))
+ if (!xfs_qm_dqlock_nowait(dqp))
continue;
} else {
xfs_dqlock(dqp);
@@ -967,7 +958,7 @@ xfs_qm_sync(
/* XXX a sentinel would be better */
recl = XFS_QI_MPLRECLAIMS(mp);
if (!xfs_dqflock_nowait(dqp)) {
- if (nowait) {
+ if (flags & SYNC_TRYLOCK) {
xfs_dqunlock(dqp);
continue;
}
@@ -985,7 +976,6 @@ xfs_qm_sync(
* Let go of the mplist lock. We don't want to hold it
* across a disk write
*/
- flush_flags = (nowait) ? XFS_QMOPT_DELWRI : XFS_QMOPT_SYNC;
xfs_qm_mplist_unlock(mp);
xfs_dqtrace_entry(dqp, "XQM_SYNC: DQFLUSH");
error = xfs_qm_dqflush(dqp, flush_flags);
@@ -2319,20 +2309,20 @@ xfs_qm_write_sb_changes(
*/
int
xfs_qm_vop_dqalloc(
- xfs_mount_t *mp,
- xfs_inode_t *ip,
- uid_t uid,
- gid_t gid,
- prid_t prid,
- uint flags,
- xfs_dquot_t **O_udqpp,
- xfs_dquot_t **O_gdqpp)
+ struct xfs_inode *ip,
+ uid_t uid,
+ gid_t gid,
+ prid_t prid,
+ uint flags,
+ struct xfs_dquot **O_udqpp,
+ struct xfs_dquot **O_gdqpp)
{
- int error;
- xfs_dquot_t *uq, *gq;
- uint lockflags;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_dquot *uq, *gq;
+ int error;
+ uint lockflags;
- if (!XFS_IS_QUOTA_ON(mp))
+ if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
return 0;
lockflags = XFS_ILOCK_EXCL;
@@ -2346,8 +2336,8 @@ xfs_qm_vop_dqalloc(
* if necessary. The dquot(s) will not be locked.
*/
if (XFS_NOT_DQATTACHED(mp, ip)) {
- if ((error = xfs_qm_dqattach(ip, XFS_QMOPT_DQALLOC |
- XFS_QMOPT_ILOCKED))) {
+ error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC);
+ if (error) {
xfs_iunlock(ip, lockflags);
return error;
}
@@ -2469,6 +2459,7 @@ xfs_qm_vop_chown(
uint bfield = XFS_IS_REALTIME_INODE(ip) ?
XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
+
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
@@ -2508,13 +2499,13 @@ xfs_qm_vop_chown_reserve(
xfs_dquot_t *gdqp,
uint flags)
{
- int error;
- xfs_mount_t *mp;
+ xfs_mount_t *mp = ip->i_mount;
uint delblks, blkflags, prjflags = 0;
xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq;
+ int error;
+
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
- mp = ip->i_mount;
ASSERT(XFS_IS_QUOTA_RUNNING(mp));
delblks = ip->i_delayed_blks;
@@ -2582,28 +2573,23 @@ xfs_qm_vop_chown_reserve(
int
xfs_qm_vop_rename_dqattach(
- xfs_inode_t **i_tab)
+ struct xfs_inode **i_tab)
{
- xfs_inode_t *ip;
- int i;
- int error;
+ struct xfs_mount *mp = i_tab[0]->i_mount;
+ int i;
- ip = i_tab[0];
-
- if (! XFS_IS_QUOTA_ON(ip->i_mount))
+ if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
return 0;
- if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
- error = xfs_qm_dqattach(ip, 0);
- if (error)
- return error;
- }
- for (i = 1; (i < 4 && i_tab[i]); i++) {
+ for (i = 0; (i < 4 && i_tab[i]); i++) {
+ struct xfs_inode *ip = i_tab[i];
+ int error;
+
/*
* Watch out for duplicate entries in the table.
*/
- if ((ip = i_tab[i]) != i_tab[i-1]) {
- if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
+ if (i == 0 || ip != i_tab[i-1]) {
+ if (XFS_NOT_DQATTACHED(mp, ip)) {
error = xfs_qm_dqattach(ip, 0);
if (error)
return error;
@@ -2614,17 +2600,19 @@ xfs_qm_vop_rename_dqattach(
}
void
-xfs_qm_vop_dqattach_and_dqmod_newinode(
- xfs_trans_t *tp,
- xfs_inode_t *ip,
- xfs_dquot_t *udqp,
- xfs_dquot_t *gdqp)
+xfs_qm_vop_create_dqattach(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ struct xfs_dquot *udqp,
+ struct xfs_dquot *gdqp)
{
- if (!XFS_IS_QUOTA_ON(tp->t_mountp))
+ struct xfs_mount *mp = tp->t_mountp;
+
+ if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
return;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
+ ASSERT(XFS_IS_QUOTA_RUNNING(mp));
if (udqp) {
xfs_dqlock(udqp);
@@ -2632,7 +2620,7 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
xfs_dqunlock(udqp);
ASSERT(ip->i_udquot == NULL);
ip->i_udquot = udqp;
- ASSERT(XFS_IS_UQUOTA_ON(tp->t_mountp));
+ ASSERT(XFS_IS_UQUOTA_ON(mp));
ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
}
@@ -2642,8 +2630,8 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
xfs_dqunlock(gdqp);
ASSERT(ip->i_gdquot == NULL);
ip->i_gdquot = gdqp;
- ASSERT(XFS_IS_OQUOTA_ON(tp->t_mountp));
- ASSERT((XFS_IS_GQUOTA_ON(tp->t_mountp) ?
+ ASSERT(XFS_IS_OQUOTA_ON(mp));
+ ASSERT((XFS_IS_GQUOTA_ON(mp) ?
ip->i_d.di_gid : ip->i_d.di_projid) ==
be32_to_cpu(gdqp->q_core.d_id));
xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
index a371954cae1..495564b8af3 100644
--- a/fs/xfs/quota/xfs_qm.h
+++ b/fs/xfs/quota/xfs_qm.h
@@ -127,8 +127,6 @@ typedef struct xfs_quotainfo {
} xfs_quotainfo_t;
-extern xfs_dqtrxops_t xfs_trans_dquot_ops;
-
extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long);
extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *,
xfs_dquot_t *, xfs_dquot_t *, long, long, uint);
@@ -159,17 +157,11 @@ typedef struct xfs_dquot_acct {
#define XFS_QM_RTBWARNLIMIT 5
extern void xfs_qm_destroy_quotainfo(xfs_mount_t *);
-extern void xfs_qm_mount_quotas(xfs_mount_t *);
extern int xfs_qm_quotacheck(xfs_mount_t *);
-extern void xfs_qm_unmount_quotadestroy(xfs_mount_t *);
-extern void xfs_qm_unmount_quotas(xfs_mount_t *);
extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
-extern int xfs_qm_sync(xfs_mount_t *, int);
/* dquot stuff */
extern boolean_t xfs_qm_dqalloc_incore(xfs_dquot_t **);
-extern int xfs_qm_dqattach(xfs_inode_t *, uint);
-extern void xfs_qm_dqdetach(xfs_inode_t *);
extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint);
extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
@@ -183,19 +175,6 @@ extern int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint);
extern int xfs_qm_scall_quotaoff(xfs_mount_t *, uint);
-/* vop stuff */
-extern int xfs_qm_vop_dqalloc(xfs_mount_t *, xfs_inode_t *,
- uid_t, gid_t, prid_t, uint,
- xfs_dquot_t **, xfs_dquot_t **);
-extern void xfs_qm_vop_dqattach_and_dqmod_newinode(
- xfs_trans_t *, xfs_inode_t *,
- xfs_dquot_t *, xfs_dquot_t *);
-extern int xfs_qm_vop_rename_dqattach(xfs_inode_t **);
-extern xfs_dquot_t * xfs_qm_vop_chown(xfs_trans_t *, xfs_inode_t *,
- xfs_dquot_t **, xfs_dquot_t *);
-extern int xfs_qm_vop_chown_reserve(xfs_trans_t *, xfs_inode_t *,
- xfs_dquot_t *, xfs_dquot_t *, uint);
-
/* list stuff */
extern void xfs_qm_freelist_append(xfs_frlist_t *, xfs_dquot_t *);
extern void xfs_qm_freelist_unlink(xfs_dquot_t *);
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
index 63037c689a4..a5346630dfa 100644
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ b/fs/xfs/quota/xfs_qm_bhv.c
@@ -42,7 +42,6 @@
#include "xfs_rtalloc.h"
#include "xfs_error.h"
#include "xfs_rw.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_qm.h"
@@ -84,7 +83,7 @@ xfs_fill_statvfs_from_dquot(
* return a statvfs of the project, not the entire filesystem.
* This makes such trees appear as if they are filesystems in themselves.
*/
-STATIC void
+void
xfs_qm_statvfs(
xfs_inode_t *ip,
struct kstatfs *statp)
@@ -92,20 +91,13 @@ xfs_qm_statvfs(
xfs_mount_t *mp = ip->i_mount;
xfs_dquot_t *dqp;
- if (!(ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
- !((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) ==
- (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
- return;
-
if (!xfs_qm_dqget(mp, NULL, ip->i_d.di_projid, XFS_DQ_PROJ, 0, &dqp)) {
- xfs_disk_dquot_t *dp = &dqp->q_core;
-
- xfs_fill_statvfs_from_dquot(statp, dp);
+ xfs_fill_statvfs_from_dquot(statp, &dqp->q_core);
xfs_qm_dqput(dqp);
}
}
-STATIC int
+int
xfs_qm_newmount(
xfs_mount_t *mp,
uint *needquotamount,
@@ -114,9 +106,6 @@ xfs_qm_newmount(
uint quotaondisk;
uint uquotaondisk = 0, gquotaondisk = 0, pquotaondisk = 0;
- *quotaflags = 0;
- *needquotamount = B_FALSE;
-
quotaondisk = xfs_sb_version_hasquota(&mp->m_sb) &&
(mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT);
@@ -179,66 +168,6 @@ xfs_qm_newmount(
return 0;
}
-STATIC int
-xfs_qm_endmount(
- xfs_mount_t *mp,
- uint needquotamount,
- uint quotaflags)
-{
- if (needquotamount) {
- ASSERT(mp->m_qflags == 0);
- mp->m_qflags = quotaflags;
- xfs_qm_mount_quotas(mp);
- }
-
-#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
- if (! (XFS_IS_QUOTA_ON(mp)))
- xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas not turned on");
- else
- xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas turned on");
-#endif
-
-#ifdef QUOTADEBUG
- if (XFS_IS_QUOTA_ON(mp) && xfs_qm_internalqcheck(mp))
- cmn_err(CE_WARN, "XFS: mount internalqcheck failed");
-#endif
-
- return 0;
-}
-
-STATIC void
-xfs_qm_dqrele_null(
- xfs_dquot_t *dq)
-{
- /*
- * Called from XFS, where we always check first for a NULL dquot.
- */
- if (!dq)
- return;
- xfs_qm_dqrele(dq);
-}
-
-
-struct xfs_qmops xfs_qmcore_xfs = {
- .xfs_qminit = xfs_qm_newmount,
- .xfs_qmdone = xfs_qm_unmount_quotadestroy,
- .xfs_qmmount = xfs_qm_endmount,
- .xfs_qmunmount = xfs_qm_unmount_quotas,
- .xfs_dqrele = xfs_qm_dqrele_null,
- .xfs_dqattach = xfs_qm_dqattach,
- .xfs_dqdetach = xfs_qm_dqdetach,
- .xfs_dqpurgeall = xfs_qm_dqpurge_all,
- .xfs_dqvopalloc = xfs_qm_vop_dqalloc,
- .xfs_dqvopcreate = xfs_qm_vop_dqattach_and_dqmod_newinode,
- .xfs_dqvoprename = xfs_qm_vop_rename_dqattach,
- .xfs_dqvopchown = xfs_qm_vop_chown,
- .xfs_dqvopchownresv = xfs_qm_vop_chown_reserve,
- .xfs_dqstatvfs = xfs_qm_statvfs,
- .xfs_dqsync = xfs_qm_sync,
- .xfs_dqtrxops = &xfs_trans_dquot_ops,
-};
-EXPORT_SYMBOL(xfs_qmcore_xfs);
-
void __init
xfs_qm_init(void)
{
diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c
index 709f5f545cf..21b08c0396a 100644
--- a/fs/xfs/quota/xfs_qm_stats.c
+++ b/fs/xfs/quota/xfs_qm_stats.c
@@ -42,7 +42,6 @@
#include "xfs_rtalloc.h"
#include "xfs_error.h"
#include "xfs_rw.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_qm.h"
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index c7b66f6506c..4e4276b956e 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -45,7 +45,6 @@
#include "xfs_rtalloc.h"
#include "xfs_error.h"
#include "xfs_rw.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_utils.h"
@@ -847,105 +846,55 @@ xfs_qm_export_flags(
}
-/*
- * Release all the dquots on the inodes in an AG.
- */
-STATIC void
-xfs_qm_dqrele_inodes_ag(
- xfs_mount_t *mp,
- int ag,
- uint flags)
+STATIC int
+xfs_dqrele_inode(
+ struct xfs_inode *ip,
+ struct xfs_perag *pag,
+ int flags)
{
- xfs_inode_t *ip = NULL;
- xfs_perag_t *pag = &mp->m_perag[ag];
- int first_index = 0;
- int nr_found;
-
- do {
- /*
- * use a gang lookup to find the next inode in the tree
- * as the tree is sparse and a gang lookup walks to find
- * the number of objects requested.
- */
- read_lock(&pag->pag_ici_lock);
- nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
- (void**)&ip, first_index, 1);
-
- if (!nr_found) {
- read_unlock(&pag->pag_ici_lock);
- break;
- }
-
- /*
- * Update the index for the next lookup. Catch overflows
- * into the next AG range which can occur if we have inodes
- * in the last block of the AG and we are currently
- * pointing to the last inode.
- */
- first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
- if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) {
- read_unlock(&pag->pag_ici_lock);
- break;
- }
-
- /* skip quota inodes */
- if (ip == XFS_QI_UQIP(mp) || ip == XFS_QI_GQIP(mp)) {
- ASSERT(ip->i_udquot == NULL);
- ASSERT(ip->i_gdquot == NULL);
- read_unlock(&pag->pag_ici_lock);
- continue;
- }
+ int error;
- /*
- * If we can't get a reference on the inode, it must be
- * in reclaim. Leave it for the reclaim code to flush.
- */
- if (!igrab(VFS_I(ip))) {
- read_unlock(&pag->pag_ici_lock);
- continue;
- }
+ /* skip quota inodes */
+ if (ip == XFS_QI_UQIP(ip->i_mount) || ip == XFS_QI_GQIP(ip->i_mount)) {
+ ASSERT(ip->i_udquot == NULL);
+ ASSERT(ip->i_gdquot == NULL);
read_unlock(&pag->pag_ici_lock);
+ return 0;
+ }
- /* avoid new inodes though we shouldn't find any here */
- if (xfs_iflags_test(ip, XFS_INEW)) {
- IRELE(ip);
- continue;
- }
+ error = xfs_sync_inode_valid(ip, pag);
+ if (error)
+ return error;
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
- xfs_qm_dqrele(ip->i_udquot);
- ip->i_udquot = NULL;
- }
- if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) &&
- ip->i_gdquot) {
- xfs_qm_dqrele(ip->i_gdquot);
- ip->i_gdquot = NULL;
- }
- xfs_iput(ip, XFS_ILOCK_EXCL);
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
+ xfs_qm_dqrele(ip->i_udquot);
+ ip->i_udquot = NULL;
+ }
+ if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) {
+ xfs_qm_dqrele(ip->i_gdquot);
+ ip->i_gdquot = NULL;
+ }
+ xfs_iput(ip, XFS_ILOCK_EXCL);
+ IRELE(ip);
- } while (nr_found);
+ return 0;
}
+
/*
* Go thru all the inodes in the file system, releasing their dquots.
+ *
* Note that the mount structure gets modified to indicate that quotas are off
- * AFTER this, in the case of quotaoff. This also gets called from
- * xfs_rootumount.
+ * AFTER this, in the case of quotaoff.
*/
void
xfs_qm_dqrele_all_inodes(
struct xfs_mount *mp,
uint flags)
{
- int i;
-
ASSERT(mp->m_quotainfo);
- for (i = 0; i < mp->m_sb.sb_agcount; i++) {
- if (!mp->m_perag[i].pag_ici_init)
- continue;
- xfs_qm_dqrele_inodes_ag(mp, i, flags);
- }
+ xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG);
}
/*------------------------------------------------------------------------*/
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index 447173bcf96..97ac9640be9 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -42,7 +42,6 @@
#include "xfs_rtalloc.h"
#include "xfs_error.h"
#include "xfs_rw.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_trans_priv.h"
@@ -111,7 +110,7 @@ xfs_trans_log_dquot(
* Carry forward whatever is left of the quota blk reservation to
* the spanky new transaction
*/
-STATIC void
+void
xfs_trans_dup_dqinfo(
xfs_trans_t *otp,
xfs_trans_t *ntp)
@@ -167,19 +166,17 @@ xfs_trans_dup_dqinfo(
/*
* Wrap around mod_dquot to account for both user and group quotas.
*/
-STATIC void
+void
xfs_trans_mod_dquot_byino(
xfs_trans_t *tp,
xfs_inode_t *ip,
uint field,
long delta)
{
- xfs_mount_t *mp;
-
- ASSERT(tp);
- mp = tp->t_mountp;
+ xfs_mount_t *mp = tp->t_mountp;
- if (!XFS_IS_QUOTA_ON(mp) ||
+ if (!XFS_IS_QUOTA_RUNNING(mp) ||
+ !XFS_IS_QUOTA_ON(mp) ||
ip->i_ino == mp->m_sb.sb_uquotino ||
ip->i_ino == mp->m_sb.sb_gquotino)
return;
@@ -229,6 +226,7 @@ xfs_trans_mod_dquot(
xfs_dqtrx_t *qtrx;
ASSERT(tp);
+ ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
qtrx = NULL;
if (tp->t_dqinfo == NULL)
@@ -346,7 +344,7 @@ xfs_trans_dqlockedjoin(
* Unreserve just the reservations done by this transaction.
* dquot is still left locked at exit.
*/
-STATIC void
+void
xfs_trans_apply_dquot_deltas(
xfs_trans_t *tp)
{
@@ -357,7 +355,7 @@ xfs_trans_apply_dquot_deltas(
long totalbdelta;
long totalrtbdelta;
- if (! (tp->t_flags & XFS_TRANS_DQ_DIRTY))
+ if (!(tp->t_flags & XFS_TRANS_DQ_DIRTY))
return;
ASSERT(tp->t_dqinfo);
@@ -531,7 +529,7 @@ xfs_trans_apply_dquot_deltas(
* we simply throw those away, since that's the expected behavior
* when a transaction is curtailed without a commit.
*/
-STATIC void
+void
xfs_trans_unreserve_and_mod_dquots(
xfs_trans_t *tp)
{
@@ -768,7 +766,7 @@ xfs_trans_reserve_quota_bydquots(
{
int resvd = 0, error;
- if (!XFS_IS_QUOTA_ON(mp))
+ if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
return 0;
if (tp && tp->t_dqinfo == NULL)
@@ -811,18 +809,17 @@ xfs_trans_reserve_quota_bydquots(
* This doesn't change the actual usage, just the reservation.
* The inode sent in is locked.
*/
-STATIC int
+int
xfs_trans_reserve_quota_nblks(
- xfs_trans_t *tp,
- xfs_mount_t *mp,
- xfs_inode_t *ip,
- long nblks,
- long ninos,
- uint flags)
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ long nblks,
+ long ninos,
+ uint flags)
{
- int error;
+ struct xfs_mount *mp = ip->i_mount;
- if (!XFS_IS_QUOTA_ON(mp))
+ if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
return 0;
if (XFS_IS_PQUOTA_ON(mp))
flags |= XFS_QMOPT_ENOSPC;
@@ -831,7 +828,6 @@ xfs_trans_reserve_quota_nblks(
ASSERT(ip->i_ino != mp->m_sb.sb_gquotino);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
XFS_TRANS_DQ_RES_RTBLKS ||
(flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
@@ -840,11 +836,9 @@ xfs_trans_reserve_quota_nblks(
/*
* Reserve nblks against these dquots, with trans as the mediator.
*/
- error = xfs_trans_reserve_quota_bydquots(tp, mp,
- ip->i_udquot, ip->i_gdquot,
- nblks, ninos,
- flags);
- return error;
+ return xfs_trans_reserve_quota_bydquots(tp, mp,
+ ip->i_udquot, ip->i_gdquot,
+ nblks, ninos, flags);
}
/*
@@ -895,25 +889,15 @@ STATIC void
xfs_trans_alloc_dqinfo(
xfs_trans_t *tp)
{
- (tp)->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP);
+ tp->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP);
}
-STATIC void
+void
xfs_trans_free_dqinfo(
xfs_trans_t *tp)
{
if (!tp->t_dqinfo)
return;
- kmem_zone_free(xfs_Gqm->qm_dqtrxzone, (tp)->t_dqinfo);
- (tp)->t_dqinfo = NULL;
+ kmem_zone_free(xfs_Gqm->qm_dqtrxzone, tp->t_dqinfo);
+ tp->t_dqinfo = NULL;
}
-
-xfs_dqtrxops_t xfs_trans_dquot_ops = {
- .qo_dup_dqinfo = xfs_trans_dup_dqinfo,
- .qo_free_dqinfo = xfs_trans_free_dqinfo,
- .qo_mod_dquot_byino = xfs_trans_mod_dquot_byino,
- .qo_apply_dquot_deltas = xfs_trans_apply_dquot_deltas,
- .qo_reserve_quota_nblks = xfs_trans_reserve_quota_nblks,
- .qo_reserve_quota_bydquots = xfs_trans_reserve_quota_bydquots,
- .qo_unreserve_and_mod_dquots = xfs_trans_unreserve_and_mod_dquots,
-};
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
deleted file mode 100644
index a8cdd73999a..00000000000
--- a/fs/xfs/xfs_acl.c
+++ /dev/null
@@ -1,874 +0,0 @@
-/*
- * Copyright (c) 2001-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_bit.h"
-#include "xfs_inum.h"
-#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_acl.h"
-#include "xfs_attr.h"
-#include "xfs_vnodeops.h"
-
-#include <linux/capability.h>
-#include <linux/posix_acl_xattr.h>
-
-STATIC int xfs_acl_setmode(struct inode *, xfs_acl_t *, int *);
-STATIC void xfs_acl_filter_mode(mode_t, xfs_acl_t *);
-STATIC void xfs_acl_get_endian(xfs_acl_t *);
-STATIC int xfs_acl_access(uid_t, gid_t, xfs_acl_t *, mode_t, cred_t *);
-STATIC int xfs_acl_invalid(xfs_acl_t *);
-STATIC void xfs_acl_sync_mode(mode_t, xfs_acl_t *);
-STATIC void xfs_acl_get_attr(struct inode *, xfs_acl_t *, int, int, int *);
-STATIC void xfs_acl_set_attr(struct inode *, xfs_acl_t *, int, int *);
-STATIC int xfs_acl_allow_set(struct inode *, int);
-
-kmem_zone_t *xfs_acl_zone;
-
-
-/*
- * Test for existence of access ACL attribute as efficiently as possible.
- */
-int
-xfs_acl_vhasacl_access(
- struct inode *vp)
-{
- int error;
-
- xfs_acl_get_attr(vp, NULL, _ACL_TYPE_ACCESS, ATTR_KERNOVAL, &error);
- return (error == 0);
-}
-
-/*
- * Test for existence of default ACL attribute as efficiently as possible.
- */
-int
-xfs_acl_vhasacl_default(
- struct inode *vp)
-{
- int error;
-
- if (!S_ISDIR(vp->i_mode))
- return 0;
- xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error);
- return (error == 0);
-}
-
-/*
- * Convert from extended attribute representation to in-memory for XFS.
- */
-STATIC int
-posix_acl_xattr_to_xfs(
- posix_acl_xattr_header *src,
- size_t size,
- xfs_acl_t *dest)
-{
- posix_acl_xattr_entry *src_entry;
- xfs_acl_entry_t *dest_entry;
- int n;
-
- if (!src || !dest)
- return EINVAL;
-
- if (size < sizeof(posix_acl_xattr_header))
- return EINVAL;
-
- if (src->a_version != cpu_to_le32(POSIX_ACL_XATTR_VERSION))
- return EOPNOTSUPP;
-
- memset(dest, 0, sizeof(xfs_acl_t));
- dest->acl_cnt = posix_acl_xattr_count(size);
- if (dest->acl_cnt < 0 || dest->acl_cnt > XFS_ACL_MAX_ENTRIES)
- return EINVAL;
-
- /*
- * acl_set_file(3) may request that we set default ACLs with
- * zero length -- defend (gracefully) against that here.
- */
- if (!dest->acl_cnt)
- return 0;
-
- src_entry = (posix_acl_xattr_entry *)((char *)src + sizeof(*src));
- dest_entry = &dest->acl_entry[0];
-
- for (n = 0; n < dest->acl_cnt; n++, src_entry++, dest_entry++) {
- dest_entry->ae_perm = le16_to_cpu(src_entry->e_perm);
- if (_ACL_PERM_INVALID(dest_entry->ae_perm))
- return EINVAL;
- dest_entry->ae_tag = le16_to_cpu(src_entry->e_tag);
- switch(dest_entry->ae_tag) {
- case ACL_USER:
- case ACL_GROUP:
- dest_entry->ae_id = le32_to_cpu(src_entry->e_id);
- break;
- case ACL_USER_OBJ:
- case ACL_GROUP_OBJ:
- case ACL_MASK:
- case ACL_OTHER:
- dest_entry->ae_id = ACL_UNDEFINED_ID;
- break;
- default:
- return EINVAL;
- }
- }
- if (xfs_acl_invalid(dest))
- return EINVAL;
-
- return 0;
-}
-
-/*
- * Comparison function called from xfs_sort().
- * Primary key is ae_tag, secondary key is ae_id.
- */
-STATIC int
-xfs_acl_entry_compare(
- const void *va,
- const void *vb)
-{
- xfs_acl_entry_t *a = (xfs_acl_entry_t *)va,
- *b = (xfs_acl_entry_t *)vb;
-
- if (a->ae_tag == b->ae_tag)
- return (a->ae_id - b->ae_id);
- return (a->ae_tag - b->ae_tag);
-}
-
-/*
- * Convert from in-memory XFS to extended attribute representation.
- */
-STATIC int
-posix_acl_xfs_to_xattr(
- xfs_acl_t *src,
- posix_acl_xattr_header *dest,
- size_t size)
-{
- int n;
- size_t new_size = posix_acl_xattr_size(src->acl_cnt);
- posix_acl_xattr_entry *dest_entry;
- xfs_acl_entry_t *src_entry;
-
- if (size < new_size)
- return -ERANGE;
-
- /* Need to sort src XFS ACL by <ae_tag,ae_id> */
- xfs_sort(src->acl_entry, src->acl_cnt, sizeof(src->acl_entry[0]),
- xfs_acl_entry_compare);
-
- dest->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION);
- dest_entry = &dest->a_entries[0];
- src_entry = &src->acl_entry[0];
- for (n = 0; n < src->acl_cnt; n++, dest_entry++, src_entry++) {
- dest_entry->e_perm = cpu_to_le16(src_entry->ae_perm);
- if (_ACL_PERM_INVALID(src_entry->ae_perm))
- return -EINVAL;
- dest_entry->e_tag = cpu_to_le16(src_entry->ae_tag);
- switch (src_entry->ae_tag) {
- case ACL_USER:
- case ACL_GROUP:
- dest_entry->e_id = cpu_to_le32(src_entry->ae_id);
- break;
- case ACL_USER_OBJ:
- case ACL_GROUP_OBJ:
- case ACL_MASK:
- case ACL_OTHER:
- dest_entry->e_id = cpu_to_le32(ACL_UNDEFINED_ID);
- break;
- default:
- return -EINVAL;
- }
- }
- return new_size;
-}
-
-int
-xfs_acl_vget(
- struct inode *vp,
- void *acl,
- size_t size,
- int kind)
-{
- int error;
- xfs_acl_t *xfs_acl = NULL;
- posix_acl_xattr_header *ext_acl = acl;
- int flags = 0;
-
- if(size) {
- if (!(_ACL_ALLOC(xfs_acl))) {
- error = ENOMEM;
- goto out;
- }
- memset(xfs_acl, 0, sizeof(xfs_acl_t));
- } else
- flags = ATTR_KERNOVAL;
-
- xfs_acl_get_attr(vp, xfs_acl, kind, flags, &error);
- if (error)
- goto out;
-
- if (!size) {
- error = -posix_acl_xattr_size(XFS_ACL_MAX_ENTRIES);
- } else {
- if (xfs_acl_invalid(xfs_acl)) {
- error = EINVAL;
- goto out;
- }
- if (kind == _ACL_TYPE_ACCESS)
- xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, xfs_acl);
- error = -posix_acl_xfs_to_xattr(xfs_acl, ext_acl, size);
- }
-out:
- if(xfs_acl)
- _ACL_FREE(xfs_acl);
- return -error;
-}
-
-int
-xfs_acl_vremove(
- struct inode *vp,
- int kind)
-{
- int error;
-
- error = xfs_acl_allow_set(vp, kind);
- if (!error) {
- error = xfs_attr_remove(XFS_I(vp),
- kind == _ACL_TYPE_DEFAULT?
- SGI_ACL_DEFAULT: SGI_ACL_FILE,
- ATTR_ROOT);
- if (error == ENOATTR)
- error = 0; /* 'scool */
- }
- return -error;
-}
-
-int
-xfs_acl_vset(
- struct inode *vp,
- void *acl,
- size_t size,
- int kind)
-{
- posix_acl_xattr_header *ext_acl = acl;
- xfs_acl_t *xfs_acl;
- int error;
- int basicperms = 0; /* more than std unix perms? */
-
- if (!acl)
- return -EINVAL;
-
- if (!(_ACL_ALLOC(xfs_acl)))
- return -ENOMEM;
-
- error = posix_acl_xattr_to_xfs(ext_acl, size, xfs_acl);
- if (error) {
- _ACL_FREE(xfs_acl);
- return -error;
- }
- if (!xfs_acl->acl_cnt) {
- _ACL_FREE(xfs_acl);
- return 0;
- }
-
- error = xfs_acl_allow_set(vp, kind);
-
- /* Incoming ACL exists, set file mode based on its value */
- if (!error && kind == _ACL_TYPE_ACCESS)
- error = xfs_acl_setmode(vp, xfs_acl, &basicperms);
-
- if (error)
- goto out;
-
- /*
- * If we have more than std unix permissions, set up the actual attr.
- * Otherwise, delete any existing attr. This prevents us from
- * having actual attrs for permissions that can be stored in the
- * standard permission bits.
- */
- if (!basicperms) {
- xfs_acl_set_attr(vp, xfs_acl, kind, &error);
- } else {
- error = -xfs_acl_vremove(vp, _ACL_TYPE_ACCESS);
- }
-
-out:
- _ACL_FREE(xfs_acl);
- return -error;
-}
-
-int
-xfs_acl_iaccess(
- xfs_inode_t *ip,
- mode_t mode,
- cred_t *cr)
-{
- xfs_acl_t *acl;
- int rval;
- struct xfs_name acl_name = {SGI_ACL_FILE, SGI_ACL_FILE_SIZE};
-
- if (!(_ACL_ALLOC(acl)))
- return -1;
-
- /* If the file has no ACL return -1. */
- rval = sizeof(xfs_acl_t);
- if (xfs_attr_fetch(ip, &acl_name, (char *)acl, &rval, ATTR_ROOT)) {
- _ACL_FREE(acl);
- return -1;
- }
- xfs_acl_get_endian(acl);
-
- /* If the file has an empty ACL return -1. */
- if (acl->acl_cnt == XFS_ACL_NOT_PRESENT) {
- _ACL_FREE(acl);
- return -1;
- }
-
- /* Synchronize ACL with mode bits */
- xfs_acl_sync_mode(ip->i_d.di_mode, acl);
-
- rval = xfs_acl_access(ip->i_d.di_uid, ip->i_d.di_gid, acl, mode, cr);
- _ACL_FREE(acl);
- return rval;
-}
-
-STATIC int
-xfs_acl_allow_set(
- struct inode *vp,
- int kind)
-{
- if (vp->i_flags & (S_IMMUTABLE|S_APPEND))
- return EPERM;
- if (kind == _ACL_TYPE_DEFAULT && !S_ISDIR(vp->i_mode))
- return ENOTDIR;
- if (vp->i_sb->s_flags & MS_RDONLY)
- return EROFS;
- if (XFS_I(vp)->i_d.di_uid != current_fsuid() && !capable(CAP_FOWNER))
- return EPERM;
- return 0;
-}
-
-/*
- * Note: cr is only used here for the capability check if the ACL test fails.
- * It is not used to find out the credentials uid or groups etc, as was
- * done in IRIX. It is assumed that the uid and groups for the current
- * thread are taken from "current" instead of the cr parameter.
- */
-STATIC int
-xfs_acl_access(
- uid_t fuid,
- gid_t fgid,
- xfs_acl_t *fap,
- mode_t md,
- cred_t *cr)
-{
- xfs_acl_entry_t matched;
- int i, allows;
- int maskallows = -1; /* true, but not 1, either */
- int seen_userobj = 0;
-
- matched.ae_tag = 0; /* Invalid type */
- matched.ae_perm = 0;
-
- for (i = 0; i < fap->acl_cnt; i++) {
- /*
- * Break out if we've got a user_obj entry or
- * a user entry and the mask (and have processed USER_OBJ)
- */
- if (matched.ae_tag == ACL_USER_OBJ)
- break;
- if (matched.ae_tag == ACL_USER) {
- if (maskallows != -1 && seen_userobj)
- break;
- if (fap->acl_entry[i].ae_tag != ACL_MASK &&
- fap->acl_entry[i].ae_tag != ACL_USER_OBJ)
- continue;
- }
- /* True if this entry allows the requested access */
- allows = ((fap->acl_entry[i].ae_perm & md) == md);
-
- switch (fap->acl_entry[i].ae_tag) {
- case ACL_USER_OBJ:
- seen_userobj = 1;
- if (fuid != current_fsuid())
- continue;
- matched.ae_tag = ACL_USER_OBJ;
- matched.ae_perm = allows;
- break;
- case ACL_USER:
- if (fap->acl_entry[i].ae_id != current_fsuid())
- continue;
- matched.ae_tag = ACL_USER;
- matched.ae_perm = allows;
- break;
- case ACL_GROUP_OBJ:
- if ((matched.ae_tag == ACL_GROUP_OBJ ||
- matched.ae_tag == ACL_GROUP) && !allows)
- continue;
- if (!in_group_p(fgid))
- continue;
- matched.ae_tag = ACL_GROUP_OBJ;
- matched.ae_perm = allows;
- break;
- case ACL_GROUP:
- if ((matched.ae_tag == ACL_GROUP_OBJ ||
- matched.ae_tag == ACL_GROUP) && !allows)
- continue;
- if (!in_group_p(fap->acl_entry[i].ae_id))
- continue;
- matched.ae_tag = ACL_GROUP;
- matched.ae_perm = allows;
- break;
- case ACL_MASK:
- maskallows = allows;
- break;
- case ACL_OTHER:
- if (matched.ae_tag != 0)
- continue;
- matched.ae_tag = ACL_OTHER;
- matched.ae_perm = allows;
- break;
- }
- }
- /*
- * First possibility is that no matched entry allows access.
- * The capability to override DAC may exist, so check for it.
- */
- switch (matched.ae_tag) {
- case ACL_OTHER:
- case ACL_USER_OBJ:
- if (matched.ae_perm)
- return 0;
- break;
- case ACL_USER:
- case ACL_GROUP_OBJ:
- case ACL_GROUP:
- if (maskallows && matched.ae_perm)
- return 0;
- break;
- case 0:
- break;
- }
-
- /* EACCES tells generic_permission to check for capability overrides */
- return EACCES;
-}
-
-/*
- * ACL validity checker.
- * This acl validation routine checks each ACL entry read in makes sense.
- */
-STATIC int
-xfs_acl_invalid(
- xfs_acl_t *aclp)
-{
- xfs_acl_entry_t *entry, *e;
- int user = 0, group = 0, other = 0, mask = 0;
- int mask_required = 0;
- int i, j;
-
- if (!aclp)
- goto acl_invalid;
-
- if (aclp->acl_cnt > XFS_ACL_MAX_ENTRIES)
- goto acl_invalid;
-
- for (i = 0; i < aclp->acl_cnt; i++) {
- entry = &aclp->acl_entry[i];
- switch (entry->ae_tag) {
- case ACL_USER_OBJ:
- if (user++)
- goto acl_invalid;
- break;
- case ACL_GROUP_OBJ:
- if (group++)
- goto acl_invalid;
- break;
- case ACL_OTHER:
- if (other++)
- goto acl_invalid;
- break;
- case ACL_USER:
- case ACL_GROUP:
- for (j = i + 1; j < aclp->acl_cnt; j++) {
- e = &aclp->acl_entry[j];
- if (e->ae_id == entry->ae_id &&
- e->ae_tag == entry->ae_tag)
- goto acl_invalid;
- }
- mask_required++;
- break;
- case ACL_MASK:
- if (mask++)
- goto acl_invalid;
- break;
- default:
- goto acl_invalid;
- }
- }
- if (!user || !group || !other || (mask_required && !mask))
- goto acl_invalid;
- else
- return 0;
-acl_invalid:
- return EINVAL;
-}
-
-/*
- * Do ACL endian conversion.
- */
-STATIC void
-xfs_acl_get_endian(
- xfs_acl_t *aclp)
-{
- xfs_acl_entry_t *ace, *end;
-
- INT_SET(aclp->acl_cnt, ARCH_CONVERT, aclp->acl_cnt);
- end = &aclp->acl_entry[0]+aclp->acl_cnt;
- for (ace = &aclp->acl_entry[0]; ace < end; ace++) {
- INT_SET(ace->ae_tag, ARCH_CONVERT, ace->ae_tag);
- INT_SET(ace->ae_id, ARCH_CONVERT, ace->ae_id);
- INT_SET(ace->ae_perm, ARCH_CONVERT, ace->ae_perm);
- }
-}
-
-/*
- * Get the ACL from the EA and do endian conversion.
- */
-STATIC void
-xfs_acl_get_attr(
- struct inode *vp,
- xfs_acl_t *aclp,
- int kind,
- int flags,
- int *error)
-{
- int len = sizeof(xfs_acl_t);
-
- ASSERT((flags & ATTR_KERNOVAL) ? (aclp == NULL) : 1);
- flags |= ATTR_ROOT;
- *error = xfs_attr_get(XFS_I(vp),
- kind == _ACL_TYPE_ACCESS ?
- SGI_ACL_FILE : SGI_ACL_DEFAULT,
- (char *)aclp, &len, flags);
- if (*error || (flags & ATTR_KERNOVAL))
- return;
- xfs_acl_get_endian(aclp);
-}
-
-/*
- * Set the EA with the ACL and do endian conversion.
- */
-STATIC void
-xfs_acl_set_attr(
- struct inode *vp,
- xfs_acl_t *aclp,
- int kind,
- int *error)
-{
- xfs_acl_entry_t *ace, *newace, *end;
- xfs_acl_t *newacl;
- int len;
-
- if (!(_ACL_ALLOC(newacl))) {
- *error = ENOMEM;
- return;
- }
-
- len = sizeof(xfs_acl_t) -
- (sizeof(xfs_acl_entry_t) * (XFS_ACL_MAX_ENTRIES - aclp->acl_cnt));
- end = &aclp->acl_entry[0]+aclp->acl_cnt;
- for (ace = &aclp->acl_entry[0], newace = &newacl->acl_entry[0];
- ace < end;
- ace++, newace++) {
- INT_SET(newace->ae_tag, ARCH_CONVERT, ace->ae_tag);
- INT_SET(newace->ae_id, ARCH_CONVERT, ace->ae_id);
- INT_SET(newace->ae_perm, ARCH_CONVERT, ace->ae_perm);
- }
- INT_SET(newacl->acl_cnt, ARCH_CONVERT, aclp->acl_cnt);
- *error = xfs_attr_set(XFS_I(vp),
- kind == _ACL_TYPE_ACCESS ?
- SGI_ACL_FILE: SGI_ACL_DEFAULT,
- (char *)newacl, len, ATTR_ROOT);
- _ACL_FREE(newacl);
-}
-
-int
-xfs_acl_vtoacl(
- struct inode *vp,
- xfs_acl_t *access_acl,
- xfs_acl_t *default_acl)
-{
- int error = 0;
-
- if (access_acl) {
- /*
- * Get the Access ACL and the mode. If either cannot
- * be obtained for some reason, invalidate the access ACL.
- */
- xfs_acl_get_attr(vp, access_acl, _ACL_TYPE_ACCESS, 0, &error);
- if (error)
- access_acl->acl_cnt = XFS_ACL_NOT_PRESENT;
- else /* We have a good ACL and the file mode, synchronize. */
- xfs_acl_sync_mode(XFS_I(vp)->i_d.di_mode, access_acl);
- }
-
- if (default_acl) {
- xfs_acl_get_attr(vp, default_acl, _ACL_TYPE_DEFAULT, 0, &error);
- if (error)
- default_acl->acl_cnt = XFS_ACL_NOT_PRESENT;
- }
- return error;
-}
-
-/*
- * This function retrieves the parent directory's acl, processes it
- * and lets the child inherit the acl(s) that it should.
- */
-int
-xfs_acl_inherit(
- struct inode *vp,
- mode_t mode,
- xfs_acl_t *pdaclp)
-{
- xfs_acl_t *cacl;
- int error = 0;
- int basicperms = 0;
-
- /*
- * If the parent does not have a default ACL, or it's an
- * invalid ACL, we're done.
- */
- if (!vp)
- return 0;
- if (!pdaclp || xfs_acl_invalid(pdaclp))
- return 0;
-
- /*
- * Copy the default ACL of the containing directory to
- * the access ACL of the new file and use the mode that
- * was passed in to set up the correct initial values for
- * the u::,g::[m::], and o:: entries. This is what makes
- * umask() "work" with ACL's.
- */
-
- if (!(_ACL_ALLOC(cacl)))
- return ENOMEM;
-
- memcpy(cacl, pdaclp, sizeof(xfs_acl_t));
- xfs_acl_filter_mode(mode, cacl);
- error = xfs_acl_setmode(vp, cacl, &basicperms);
- if (error)
- goto out_error;
-
- /*
- * Set the Default and Access ACL on the file. The mode is already
- * set on the file, so we don't need to worry about that.
- *
- * If the new file is a directory, its default ACL is a copy of
- * the containing directory's default ACL.
- */
- if (S_ISDIR(vp->i_mode))
- xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error);
- if (!error && !basicperms)
- xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error);
-out_error:
- _ACL_FREE(cacl);
- return error;
-}
-
-/*
- * Set up the correct mode on the file based on the supplied ACL. This
- * makes sure that the mode on the file reflects the state of the
- * u::,g::[m::], and o:: entries in the ACL. Since the mode is where
- * the ACL is going to get the permissions for these entries, we must
- * synchronize the mode whenever we set the ACL on a file.
- */
-STATIC int
-xfs_acl_setmode(
- struct inode *vp,
- xfs_acl_t *acl,
- int *basicperms)
-{
- struct iattr iattr;
- xfs_acl_entry_t *ap;
- xfs_acl_entry_t *gap = NULL;
- int i, nomask = 1;
-
- *basicperms = 1;
-
- if (acl->acl_cnt == XFS_ACL_NOT_PRESENT)
- return 0;
-
- /*
- * Copy the u::, g::, o::, and m:: bits from the ACL into the
- * mode. The m:: bits take precedence over the g:: bits.
- */
- iattr.ia_valid = ATTR_MODE;
- iattr.ia_mode = XFS_I(vp)->i_d.di_mode;
- iattr.ia_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO);
- ap = acl->acl_entry;
- for (i = 0; i < acl->acl_cnt; ++i) {
- switch (ap->ae_tag) {
- case ACL_USER_OBJ:
- iattr.ia_mode |= ap->ae_perm << 6;
- break;
- case ACL_GROUP_OBJ:
- gap = ap;
- break;
- case ACL_MASK: /* more than just standard modes */
- nomask = 0;
- iattr.ia_mode |= ap->ae_perm << 3;
- *basicperms = 0;
- break;
- case ACL_OTHER:
- iattr.ia_mode |= ap->ae_perm;
- break;
- default: /* more than just standard modes */
- *basicperms = 0;
- break;
- }
- ap++;
- }
-
- /* Set the group bits from ACL_GROUP_OBJ if there's no ACL_MASK */
- if (gap && nomask)
- iattr.ia_mode |= gap->ae_perm << 3;
-
- return xfs_setattr(XFS_I(vp), &iattr, 0);
-}
-
-/*
- * The permissions for the special ACL entries (u::, g::[m::], o::) are
- * actually stored in the file mode (if there is both a group and a mask,
- * the group is stored in the ACL entry and the mask is stored on the file).
- * This allows the mode to remain automatically in sync with the ACL without
- * the need for a call-back to the ACL system at every point where the mode
- * could change. This function takes the permissions from the specified mode
- * and places it in the supplied ACL.
- *
- * This implementation draws its validity from the fact that, when the ACL
- * was assigned, the mode was copied from the ACL.
- * If the mode did not change, therefore, the mode remains exactly what was
- * taken from the special ACL entries at assignment.
- * If a subsequent chmod() was done, the POSIX spec says that the change in
- * mode must cause an update to the ACL seen at user level and used for
- * access checks. Before and after a mode change, therefore, the file mode
- * most accurately reflects what the special ACL entries should permit/deny.
- *
- * CAVEAT: If someone sets the SGI_ACL_FILE attribute directly,
- * the existing mode bits will override whatever is in the
- * ACL. Similarly, if there is a pre-existing ACL that was
- * never in sync with its mode (owing to a bug in 6.5 and
- * before), it will now magically (or mystically) be
- * synchronized. This could cause slight astonishment, but
- * it is better than inconsistent permissions.
- *
- * The supplied ACL is a template that may contain any combination
- * of special entries. These are treated as place holders when we fill
- * out the ACL. This routine does not add or remove special entries, it
- * simply unites each special entry with its associated set of permissions.
- */
-STATIC void
-xfs_acl_sync_mode(
- mode_t mode,
- xfs_acl_t *acl)
-{
- int i, nomask = 1;
- xfs_acl_entry_t *ap;
- xfs_acl_entry_t *gap = NULL;
-
- /*
- * Set ACL entries. POSIX1003.1eD16 requires that the MASK
- * be set instead of the GROUP entry, if there is a MASK.
- */
- for (ap = acl->acl_entry, i = 0; i < acl->acl_cnt; ap++, i++) {
- switch (ap->ae_tag) {
- case ACL_USER_OBJ:
- ap->ae_perm = (mode >> 6) & 0x7;
- break;
- case ACL_GROUP_OBJ:
- gap = ap;
- break;
- case ACL_MASK:
- nomask = 0;
- ap->ae_perm = (mode >> 3) & 0x7;
- break;
- case ACL_OTHER:
- ap->ae_perm = mode & 0x7;
- break;
- default:
- break;
- }
- }
- /* Set the ACL_GROUP_OBJ if there's no ACL_MASK */
- if (gap && nomask)
- gap->ae_perm = (mode >> 3) & 0x7;
-}
-
-/*
- * When inheriting an Access ACL from a directory Default ACL,
- * the ACL bits are set to the intersection of the ACL default
- * permission bits and the file permission bits in mode. If there
- * are no permission bits on the file then we must not give them
- * the ACL. This is what what makes umask() work with ACLs.
- */
-STATIC void
-xfs_acl_filter_mode(
- mode_t mode,
- xfs_acl_t *acl)
-{
- int i, nomask = 1;
- xfs_acl_entry_t *ap;
- xfs_acl_entry_t *gap = NULL;
-
- /*
- * Set ACL entries. POSIX1003.1eD16 requires that the MASK
- * be merged with GROUP entry, if there is a MASK.
- */
- for (ap = acl->acl_entry, i = 0; i < acl->acl_cnt; ap++, i++) {
- switch (ap->ae_tag) {
- case ACL_USER_OBJ:
- ap->ae_perm &= (mode >> 6) & 0x7;
- break;
- case ACL_GROUP_OBJ:
- gap = ap;
- break;
- case ACL_MASK:
- nomask = 0;
- ap->ae_perm &= (mode >> 3) & 0x7;
- break;
- case ACL_OTHER:
- ap->ae_perm &= mode & 0x7;
- break;
- default:
- break;
- }
- }
- /* Set the ACL_GROUP_OBJ if there's no ACL_MASK */
- if (gap && nomask)
- gap->ae_perm &= (mode >> 3) & 0x7;
-}
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 642f1db4def..63dc1f2efad 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -18,81 +18,48 @@
#ifndef __XFS_ACL_H__
#define __XFS_ACL_H__
-/*
- * Access Control Lists
- */
-typedef __uint16_t xfs_acl_perm_t;
-typedef __int32_t xfs_acl_tag_t;
-typedef __int32_t xfs_acl_id_t;
+struct inode;
+struct posix_acl;
+struct xfs_inode;
#define XFS_ACL_MAX_ENTRIES 25
#define XFS_ACL_NOT_PRESENT (-1)
-typedef struct xfs_acl_entry {
- xfs_acl_tag_t ae_tag;
- xfs_acl_id_t ae_id;
- xfs_acl_perm_t ae_perm;
-} xfs_acl_entry_t;
-
-typedef struct xfs_acl {
- __int32_t acl_cnt;
- xfs_acl_entry_t acl_entry[XFS_ACL_MAX_ENTRIES];
-} xfs_acl_t;
+/* On-disk XFS access control list structure */
+struct xfs_acl {
+ __be32 acl_cnt;
+ struct xfs_acl_entry {
+ __be32 ae_tag;
+ __be32 ae_id;
+ __be16 ae_perm;
+ } acl_entry[XFS_ACL_MAX_ENTRIES];
+};
/* On-disk XFS extended attribute names */
-#define SGI_ACL_FILE "SGI_ACL_FILE"
-#define SGI_ACL_DEFAULT "SGI_ACL_DEFAULT"
+#define SGI_ACL_FILE "SGI_ACL_FILE"
+#define SGI_ACL_DEFAULT "SGI_ACL_DEFAULT"
#define SGI_ACL_FILE_SIZE (sizeof(SGI_ACL_FILE)-1)
#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1)
-#define _ACL_TYPE_ACCESS 1
-#define _ACL_TYPE_DEFAULT 2
-
#ifdef CONFIG_XFS_POSIX_ACL
+extern int xfs_check_acl(struct inode *inode, int mask);
+extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
+extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl);
+extern int xfs_acl_chmod(struct inode *inode);
+extern void xfs_inode_init_acls(struct xfs_inode *ip);
+extern void xfs_inode_clear_acls(struct xfs_inode *ip);
+extern int posix_acl_access_exists(struct inode *inode);
+extern int posix_acl_default_exists(struct inode *inode);
-struct vattr;
-struct xfs_inode;
-
-extern struct kmem_zone *xfs_acl_zone;
-#define xfs_acl_zone_init(zone, name) \
- (zone) = kmem_zone_init(sizeof(xfs_acl_t), (name))
-#define xfs_acl_zone_destroy(zone) kmem_zone_destroy(zone)
-
-extern int xfs_acl_inherit(struct inode *, mode_t mode, xfs_acl_t *);
-extern int xfs_acl_iaccess(struct xfs_inode *, mode_t, cred_t *);
-extern int xfs_acl_vtoacl(struct inode *, xfs_acl_t *, xfs_acl_t *);
-extern int xfs_acl_vhasacl_access(struct inode *);
-extern int xfs_acl_vhasacl_default(struct inode *);
-extern int xfs_acl_vset(struct inode *, void *, size_t, int);
-extern int xfs_acl_vget(struct inode *, void *, size_t, int);
-extern int xfs_acl_vremove(struct inode *, int);
-
-#define _ACL_PERM_INVALID(perm) ((perm) & ~(ACL_READ|ACL_WRITE|ACL_EXECUTE))
-
-#define _ACL_INHERIT(c,m,d) (xfs_acl_inherit(c,m,d))
-#define _ACL_GET_ACCESS(pv,pa) (xfs_acl_vtoacl(pv,pa,NULL) == 0)
-#define _ACL_GET_DEFAULT(pv,pd) (xfs_acl_vtoacl(pv,NULL,pd) == 0)
-#define _ACL_ACCESS_EXISTS xfs_acl_vhasacl_access
-#define _ACL_DEFAULT_EXISTS xfs_acl_vhasacl_default
-
-#define _ACL_ALLOC(a) ((a) = kmem_zone_alloc(xfs_acl_zone, KM_SLEEP))
-#define _ACL_FREE(a) ((a)? kmem_zone_free(xfs_acl_zone, (a)):(void)0)
-
+extern struct xattr_handler xfs_xattr_system_handler;
#else
-#define xfs_acl_zone_init(zone,name)
-#define xfs_acl_zone_destroy(zone)
-#define xfs_acl_vset(v,p,sz,t) (-EOPNOTSUPP)
-#define xfs_acl_vget(v,p,sz,t) (-EOPNOTSUPP)
-#define xfs_acl_vremove(v,t) (-EOPNOTSUPP)
-#define xfs_acl_vhasacl_access(v) (0)
-#define xfs_acl_vhasacl_default(v) (0)
-#define _ACL_ALLOC(a) (1) /* successfully allocate nothing */
-#define _ACL_FREE(a) ((void)0)
-#define _ACL_INHERIT(c,m,d) (0)
-#define _ACL_GET_ACCESS(pv,pa) (0)
-#define _ACL_GET_DEFAULT(pv,pd) (0)
-#define _ACL_ACCESS_EXISTS (NULL)
-#define _ACL_DEFAULT_EXISTS (NULL)
-#endif
-
+# define xfs_check_acl NULL
+# define xfs_get_acl(inode, type) NULL
+# define xfs_inherit_acl(inode, default_acl) 0
+# define xfs_acl_chmod(inode) 0
+# define xfs_inode_init_acls(ip)
+# define xfs_inode_clear_acls(ip)
+# define posix_acl_access_exists(inode) 0
+# define posix_acl_default_exists(inode) 0
+#endif /* CONFIG_XFS_POSIX_ACL */
#endif /* __XFS_ACL_H__ */
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index c8641f713ca..f24b50b68d0 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -212,6 +212,8 @@ typedef struct xfs_perag
/*
* tags for inode radix tree
*/
+#define XFS_ICI_NO_TAG (-1) /* special flag for an untagged lookup
+ in xfs_inode_ag_iterator */
#define XFS_ICI_RECLAIM_TAG 0 /* inode is to be reclaimed */
#define XFS_AG_MAXLEVELS(mp) ((mp)->m_ag_maxlevels)
diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h
index 53d5e70d136..0902249354a 100644
--- a/fs/xfs/xfs_arch.h
+++ b/fs/xfs/xfs_arch.h
@@ -73,28 +73,6 @@ static inline void be64_add_cpu(__be64 *a, __s64 b)
#endif /* __KERNEL__ */
-/* do we need conversion? */
-#define ARCH_NOCONVERT 1
-#ifdef XFS_NATIVE_HOST
-# define ARCH_CONVERT ARCH_NOCONVERT
-#else
-# define ARCH_CONVERT 0
-#endif
-
-/* generic swapping macros */
-
-#ifndef HAVE_SWABMACROS
-#define INT_SWAP16(type,var) ((typeof(type))(__swab16((__u16)(var))))
-#define INT_SWAP32(type,var) ((typeof(type))(__swab32((__u32)(var))))
-#define INT_SWAP64(type,var) ((typeof(type))(__swab64((__u64)(var))))
-#endif
-
-#define INT_SWAP(type, var) \
- ((sizeof(type) == 8) ? INT_SWAP64(type,var) : \
- ((sizeof(type) == 4) ? INT_SWAP32(type,var) : \
- ((sizeof(type) == 2) ? INT_SWAP16(type,var) : \
- (var))))
-
/*
* get and set integers from potentially unaligned locations
*/
@@ -107,16 +85,6 @@ static inline void be64_add_cpu(__be64 *a, __s64 b)
((__u8*)(pointer))[1] = (((value) ) & 0xff); \
}
-/* does not return a value */
-#define INT_SET(reference,arch,valueref) \
- (__builtin_constant_p(valueref) ? \
- (void)( (reference) = ( ((arch) != ARCH_NOCONVERT) ? (INT_SWAP((reference),(valueref))) : (valueref)) ) : \
- (void)( \
- ((reference) = (valueref)), \
- ( ((arch) != ARCH_NOCONVERT) ? (reference) = INT_SWAP((reference),(reference)) : 0 ) \
- ) \
- )
-
/*
* In directories inode numbers are stored as unaligned arrays of unsigned
* 8bit integers on disk.
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 5fde1654b43..db15feb906f 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -45,7 +45,6 @@
#include "xfs_error.h"
#include "xfs_quota.h"
#include "xfs_trans_space.h"
-#include "xfs_acl.h"
#include "xfs_rw.h"
#include "xfs_vnodeops.h"
@@ -249,8 +248,9 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
/*
* Attach the dquots to the inode.
*/
- if ((error = XFS_QM_DQATTACH(mp, dp, 0)))
- return (error);
+ error = xfs_qm_dqattach(dp, 0);
+ if (error)
+ return error;
/*
* If the inode doesn't have an attribute fork, add one.
@@ -311,7 +311,7 @@ xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
}
xfs_ilock(dp, XFS_ILOCK_EXCL);
- error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, args.trans, dp, args.total, 0,
+ error = xfs_trans_reserve_quota_nblks(args.trans, dp, args.total, 0,
rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
XFS_QMOPT_RES_REGBLKS);
if (error) {
@@ -501,8 +501,9 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
/*
* Attach the dquots to the inode.
*/
- if ((error = XFS_QM_DQATTACH(mp, dp, 0)))
- return (error);
+ error = xfs_qm_dqattach(dp, 0);
+ if (error)
+ return error;
/*
* Start our first transaction of the day.
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index ca7c6005a48..7928b9983c1 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2691,7 +2691,7 @@ xfs_bmap_rtalloc(
* Adjust the disk quota also. This was reserved
* earlier.
*/
- XFS_TRANS_MOD_DQUOT_BYINO(mp, ap->tp, ap->ip,
+ xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT :
XFS_TRANS_DQ_RTBCOUNT, (long) ralen);
} else {
@@ -2995,7 +2995,7 @@ xfs_bmap_btalloc(
* Adjust the disk quota also. This was reserved
* earlier.
*/
- XFS_TRANS_MOD_DQUOT_BYINO(mp, ap->tp, ap->ip,
+ xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
ap->wasdel ? XFS_TRANS_DQ_DELBCOUNT :
XFS_TRANS_DQ_BCOUNT,
(long) args.len);
@@ -3066,7 +3066,7 @@ xfs_bmap_btree_to_extents(
return error;
xfs_bmap_add_free(cbno, 1, cur->bc_private.b.flist, mp);
ip->i_d.di_nblocks--;
- XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
+ xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
xfs_trans_binval(tp, cbp);
if (cur->bc_bufs[0] == cbp)
cur->bc_bufs[0] = NULL;
@@ -3386,7 +3386,7 @@ xfs_bmap_del_extent(
* Adjust quota data.
*/
if (qfield)
- XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, qfield, (long)-nblks);
+ xfs_trans_mod_dquot_byino(tp, ip, qfield, (long)-nblks);
/*
* Account for change in delayed indirect blocks.
@@ -3523,7 +3523,7 @@ xfs_bmap_extents_to_btree(
*firstblock = cur->bc_private.b.firstblock = args.fsbno;
cur->bc_private.b.allocated++;
ip->i_d.di_nblocks++;
- XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
+ xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
abp = xfs_btree_get_bufl(mp, tp, args.fsbno, 0);
/*
* Fill in the child block.
@@ -3690,7 +3690,7 @@ xfs_bmap_local_to_extents(
XFS_BMAP_TRACE_POST_UPDATE("new", ip, 0, whichfork);
XFS_IFORK_NEXT_SET(ip, whichfork, 1);
ip->i_d.di_nblocks = 1;
- XFS_TRANS_MOD_DQUOT_BYINO(args.mp, tp, ip,
+ xfs_trans_mod_dquot_byino(tp, ip,
XFS_TRANS_DQ_BCOUNT, 1L);
flags |= xfs_ilog_fext(whichfork);
} else {
@@ -4048,7 +4048,7 @@ xfs_bmap_add_attrfork(
XFS_TRANS_PERM_LOG_RES, XFS_ADDAFORK_LOG_COUNT)))
goto error0;
xfs_ilock(ip, XFS_ILOCK_EXCL);
- error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, blks, 0, rsvd ?
+ error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?
XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
XFS_QMOPT_RES_REGBLKS);
if (error) {
@@ -4983,10 +4983,11 @@ xfs_bmapi(
* adjusted later. We return if we haven't
* allocated blocks already inside this loop.
*/
- if ((error = XFS_TRANS_RESERVE_QUOTA_NBLKS(
- mp, NULL, ip, (long)alen, 0,
+ error = xfs_trans_reserve_quota_nblks(
+ NULL, ip, (long)alen, 0,
rt ? XFS_QMOPT_RES_RTBLKS :
- XFS_QMOPT_RES_REGBLKS))) {
+ XFS_QMOPT_RES_REGBLKS);
+ if (error) {
if (n == 0) {
*nmap = 0;
ASSERT(cur == NULL);
@@ -5035,8 +5036,8 @@ xfs_bmapi(
if (XFS_IS_QUOTA_ON(mp))
/* unreserve the blocks now */
(void)
- XFS_TRANS_UNRESERVE_QUOTA_NBLKS(
- mp, NULL, ip,
+ xfs_trans_unreserve_quota_nblks(
+ NULL, ip,
(long)alen, 0, rt ?
XFS_QMOPT_RES_RTBLKS :
XFS_QMOPT_RES_REGBLKS);
@@ -5691,14 +5692,14 @@ xfs_bunmapi(
do_div(rtexts, mp->m_sb.sb_rextsize);
xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
(int64_t)rtexts, rsvd);
- (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
- NULL, ip, -((long)del.br_blockcount), 0,
+ (void)xfs_trans_reserve_quota_nblks(NULL,
+ ip, -((long)del.br_blockcount), 0,
XFS_QMOPT_RES_RTBLKS);
} else {
xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS,
(int64_t)del.br_blockcount, rsvd);
- (void)XFS_TRANS_RESERVE_QUOTA_NBLKS(mp,
- NULL, ip, -((long)del.br_blockcount), 0,
+ (void)xfs_trans_reserve_quota_nblks(NULL,
+ ip, -((long)del.br_blockcount), 0,
XFS_QMOPT_RES_REGBLKS);
}
ip->i_delayed_blks -= del.br_blockcount;
@@ -6085,6 +6086,7 @@ xfs_getbmap(
break;
}
+ kmem_free(out);
return error;
}
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 0760d352586..5c1ade06578 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -590,7 +590,7 @@ xfs_bmbt_alloc_block(
cur->bc_private.b.allocated++;
cur->bc_private.b.ip->i_d.di_nblocks++;
xfs_trans_log_inode(args.tp, cur->bc_private.b.ip, XFS_ILOG_CORE);
- XFS_TRANS_MOD_DQUOT_BYINO(args.mp, args.tp, cur->bc_private.b.ip,
+ xfs_trans_mod_dquot_byino(args.tp, cur->bc_private.b.ip,
XFS_TRANS_DQ_BCOUNT, 1L);
new->l = cpu_to_be64(args.fsbno);
@@ -618,7 +618,7 @@ xfs_bmbt_free_block(
ip->i_d.di_nblocks--;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
+ xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
xfs_trans_binval(tp, bp);
return 0;
}
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 6c87c8f304e..edf8bdf4141 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -542,10 +542,8 @@ xfs_filestream_associate(
* waiting for the lock because someone else is waiting on the lock we
* hold and we cannot drop that as we are in a transaction here.
*
- * Lucky for us, this inversion is rarely a problem because it's a
- * directory inode that we are trying to lock here and that means the
- * only place that matters is xfs_sync_inodes() and SYNC_DELWRI is
- * used. i.e. freeze, remount-ro, quotasync or unmount.
+ * Lucky for us, this inversion is not a problem because it's a
+ * directory inode that we are trying to lock here.
*
* So, if we can't get the iolock without sleeping then just give up
*/
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index f7c06fac822..c4ea51b55dc 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -239,10 +239,13 @@ typedef struct xfs_fsop_resblks {
* Minimum and maximum sizes need for growth checks
*/
#define XFS_MIN_AG_BLOCKS 64
-#define XFS_MIN_LOG_BLOCKS 512
-#define XFS_MAX_LOG_BLOCKS (64 * 1024)
-#define XFS_MIN_LOG_BYTES (256 * 1024)
-#define XFS_MAX_LOG_BYTES (128 * 1024 * 1024)
+#define XFS_MIN_LOG_BLOCKS 512ULL
+#define XFS_MAX_LOG_BLOCKS (1024 * 1024ULL)
+#define XFS_MIN_LOG_BYTES (10 * 1024 * 1024ULL)
+
+/* keep the maximum size under 2^31 by a small amount */
+#define XFS_MAX_LOG_BYTES \
+ ((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES)
/*
* Structures for XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG & XFS_IOC_FSGROWFSRT
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 89b81eedce6..76c540f719e 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -18,6 +18,7 @@
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_types.h"
+#include "xfs_acl.h"
#include "xfs_bit.h"
#include "xfs_log.h"
#include "xfs_inum.h"
@@ -82,6 +83,7 @@ xfs_inode_alloc(
memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
ip->i_size = 0;
ip->i_new_size = 0;
+ xfs_inode_init_acls(ip);
/*
* Initialize inode's trace buffers.
@@ -500,10 +502,7 @@ xfs_ireclaim(
* ilock one but will still hold the iolock.
*/
xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
- /*
- * Release dquots (and their references) if any.
- */
- XFS_QM_DQDETACH(ip->i_mount, ip);
+ xfs_qm_dqdetach(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
switch (ip->i_d.di_mode & S_IFMT) {
@@ -561,6 +560,7 @@ xfs_ireclaim(
ASSERT(atomic_read(&ip->i_pincount) == 0);
ASSERT(!spin_is_locked(&ip->i_flags_lock));
ASSERT(completion_done(&ip->i_flush));
+ xfs_inode_clear_acls(ip);
kmem_zone_free(xfs_inode_zone, ip);
}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 123b20c8cbf..1f22d65fed0 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -49,7 +49,6 @@
#include "xfs_utils.h"
#include "xfs_dir2_trace.h"
#include "xfs_quota.h"
-#include "xfs_acl.h"
#include "xfs_filestream.h"
#include "xfs_vnodeops.h"
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index f879c1bc4b9..77016702938 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -18,6 +18,7 @@
#ifndef __XFS_INODE_H__
#define __XFS_INODE_H__
+struct posix_acl;
struct xfs_dinode;
struct xfs_inode;
@@ -272,6 +273,11 @@ typedef struct xfs_inode {
/* VFS inode */
struct inode i_vnode; /* embedded VFS inode */
+#ifdef CONFIG_XFS_POSIX_ACL
+ struct posix_acl *i_acl;
+ struct posix_acl *i_default_acl;
+#endif
+
/* Trace buffers per inode. */
#ifdef XFS_INODE_TRACE
struct ktrace *i_trace; /* general inode trace */
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 5aaa2d7ec15..67ae5555a30 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -42,7 +42,6 @@
#include "xfs_error.h"
#include "xfs_itable.h"
#include "xfs_rw.h"
-#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_trans_space.h"
@@ -385,7 +384,7 @@ xfs_iomap_write_direct(
* Make sure that the dquots are there. This doesn't hold
* the ilock across a disk read.
*/
- error = XFS_QM_DQATTACH(ip->i_mount, ip, XFS_QMOPT_ILOCKED);
+ error = xfs_qm_dqattach_locked(ip, 0);
if (error)
return XFS_ERROR(error);
@@ -444,8 +443,7 @@ xfs_iomap_write_direct(
if (error)
goto error_out;
- error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,
- qblocks, 0, quota_flag);
+ error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
if (error)
goto error1;
@@ -495,7 +493,7 @@ xfs_iomap_write_direct(
error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
xfs_bmap_cancel(&free_list);
- XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag);
+ xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
error1: /* Just cancel transaction */
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
@@ -582,7 +580,7 @@ xfs_iomap_write_delay(
* Make sure that the dquots are there. This doesn't hold
* the ilock across a disk read.
*/
- error = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED);
+ error = xfs_qm_dqattach_locked(ip, 0);
if (error)
return XFS_ERROR(error);
@@ -684,7 +682,8 @@ xfs_iomap_write_allocate(
/*
* Make sure that the dquots are there.
*/
- if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+ error = xfs_qm_dqattach(ip, 0);
+ if (error)
return XFS_ERROR(error);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 7ba450116d4..47da2fb4537 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1975,16 +1975,30 @@ xlog_recover_do_reg_buffer(
error = 0;
if (buf_f->blf_flags &
(XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
+ if (item->ri_buf[i].i_addr == NULL) {
+ cmn_err(CE_ALERT,
+ "XFS: NULL dquot in %s.", __func__);
+ goto next;
+ }
+ if (item->ri_buf[i].i_len < sizeof(xfs_dqblk_t)) {
+ cmn_err(CE_ALERT,
+ "XFS: dquot too small (%d) in %s.",
+ item->ri_buf[i].i_len, __func__);
+ goto next;
+ }
error = xfs_qm_dqcheck((xfs_disk_dquot_t *)
item->ri_buf[i].i_addr,
-1, 0, XFS_QMOPT_DOWARN,
"dquot_buf_recover");
+ if (error)
+ goto next;
}
- if (!error)
- memcpy(xfs_buf_offset(bp,
- (uint)bit << XFS_BLI_SHIFT), /* dest */
- item->ri_buf[i].i_addr, /* source */
- nbits<<XFS_BLI_SHIFT); /* length */
+
+ memcpy(xfs_buf_offset(bp,
+ (uint)bit << XFS_BLI_SHIFT), /* dest */
+ item->ri_buf[i].i_addr, /* source */
+ nbits<<XFS_BLI_SHIFT); /* length */
+ next:
i++;
bit += nbits;
}
@@ -2615,7 +2629,19 @@ xlog_recover_do_dquot_trans(
return (0);
recddq = (xfs_disk_dquot_t *)item->ri_buf[1].i_addr;
- ASSERT(recddq);
+
+ if (item->ri_buf[1].i_addr == NULL) {
+ cmn_err(CE_ALERT,
+ "XFS: NULL dquot in %s.", __func__);
+ return XFS_ERROR(EIO);
+ }
+ if (item->ri_buf[1].i_len < sizeof(xfs_dqblk_t)) {
+ cmn_err(CE_ALERT,
+ "XFS: dquot too small (%d) in %s.",
+ item->ri_buf[1].i_len, __func__);
+ return XFS_ERROR(EIO);
+ }
+
/*
* This type of quotas was turned off, so ignore this record.
*/
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 65a99725d0c..5c6f092659c 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -960,6 +960,53 @@ xfs_check_sizes(xfs_mount_t *mp)
}
/*
+ * Clear the quotaflags in memory and in the superblock.
+ */
+int
+xfs_mount_reset_sbqflags(
+ struct xfs_mount *mp)
+{
+ int error;
+ struct xfs_trans *tp;
+
+ mp->m_qflags = 0;
+
+ /*
+ * It is OK to look at sb_qflags here in mount path,
+ * without m_sb_lock.
+ */
+ if (mp->m_sb.sb_qflags == 0)
+ return 0;
+ spin_lock(&mp->m_sb_lock);
+ mp->m_sb.sb_qflags = 0;
+ spin_unlock(&mp->m_sb_lock);
+
+ /*
+ * If the fs is readonly, let the incore superblock run
+ * with quotas off but don't flush the update out to disk
+ */
+ if (mp->m_flags & XFS_MOUNT_RDONLY)
+ return 0;
+
+#ifdef QUOTADEBUG
+ xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes");
+#endif
+
+ tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
+ error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
+ XFS_DEFAULT_LOG_COUNT);
+ if (error) {
+ xfs_trans_cancel(tp, 0);
+ xfs_fs_cmn_err(CE_ALERT, mp,
+ "xfs_mount_reset_sbqflags: Superblock update failed!");
+ return error;
+ }
+
+ xfs_mod_sb(tp, XFS_SB_QFLAGS);
+ return xfs_trans_commit(tp, 0);
+}
+
+/*
* This function does the following on an initial mount of a file system:
* - reads the superblock from disk and init the mount struct
* - if we're a 32-bit kernel, do a size check on the superblock
@@ -976,7 +1023,8 @@ xfs_mountfs(
xfs_sb_t *sbp = &(mp->m_sb);
xfs_inode_t *rip;
__uint64_t resblks;
- uint quotamount, quotaflags;
+ uint quotamount = 0;
+ uint quotaflags = 0;
int error = 0;
xfs_mount_common(mp, sbp);
@@ -1210,9 +1258,28 @@ xfs_mountfs(
/*
* Initialise the XFS quota management subsystem for this mount
*/
- error = XFS_QM_INIT(mp, &quotamount, &quotaflags);
- if (error)
- goto out_rtunmount;
+ if (XFS_IS_QUOTA_RUNNING(mp)) {
+ error = xfs_qm_newmount(mp, &quotamount, &quotaflags);
+ if (error)
+ goto out_rtunmount;
+ } else {
+ ASSERT(!XFS_IS_QUOTA_ON(mp));
+
+ /*
+ * If a file system had quotas running earlier, but decided to
+ * mount without -o uquota/pquota/gquota options, revoke the
+ * quotachecked license.
+ */
+ if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) {
+ cmn_err(CE_NOTE,
+ "XFS: resetting qflags for filesystem %s",
+ mp->m_fsname);
+
+ error = xfs_mount_reset_sbqflags(mp);
+ if (error)
+ return error;
+ }
+ }
/*
* Finish recovering the file system. This part needed to be
@@ -1228,9 +1295,19 @@ xfs_mountfs(
/*
* Complete the quota initialisation, post-log-replay component.
*/
- error = XFS_QM_MOUNT(mp, quotamount, quotaflags);
- if (error)
- goto out_rtunmount;
+ if (quotamount) {
+ ASSERT(mp->m_qflags == 0);
+ mp->m_qflags = quotaflags;
+
+ xfs_qm_mount_quotas(mp);
+ }
+
+#if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
+ if (XFS_IS_QUOTA_ON(mp))
+ xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas turned on");
+ else
+ xfs_fs_cmn_err(CE_NOTE, mp, "Disk quotas not turned on");
+#endif
/*
* Now we are mounted, reserve a small amount of unused space for
@@ -1279,12 +1356,7 @@ xfs_unmountfs(
__uint64_t resblks;
int error;
- /*
- * Release dquot that rootinode, rbmino and rsumino might be holding,
- * and release the quota inodes.
- */
- XFS_QM_UNMOUNT(mp);
-
+ xfs_qm_unmount_quotas(mp);
xfs_rtunmount_inodes(mp);
IRELE(mp->m_rootip);
@@ -1299,12 +1371,9 @@ xfs_unmountfs(
* need to force the log first.
*/
xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
- xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_ASYNC);
-
- XFS_QM_DQPURGEALL(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_UMOUNTING);
+ xfs_reclaim_inodes(mp, XFS_IFLUSH_ASYNC);
- if (mp->m_quotainfo)
- XFS_QM_DONE(mp);
+ xfs_qm_unmount(mp);
/*
* Flush out the log synchronously so that we know for sure
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index d6a64392f98..a5122382afd 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -64,6 +64,8 @@ struct xfs_swapext;
struct xfs_mru_cache;
struct xfs_nameops;
struct xfs_ail;
+struct xfs_quotainfo;
+
/*
* Prototypes and functions for the Data Migration subsystem.
@@ -107,86 +109,6 @@ typedef struct xfs_dmops {
(*(mp)->m_dm_ops->xfs_send_unmount)(mp,ip,right,mode,rval,fl)
-/*
- * Prototypes and functions for the Quota Management subsystem.
- */
-
-struct xfs_dquot;
-struct xfs_dqtrxops;
-struct xfs_quotainfo;
-
-typedef int (*xfs_qminit_t)(struct xfs_mount *, uint *, uint *);
-typedef int (*xfs_qmmount_t)(struct xfs_mount *, uint, uint);
-typedef void (*xfs_qmunmount_t)(struct xfs_mount *);
-typedef void (*xfs_qmdone_t)(struct xfs_mount *);
-typedef void (*xfs_dqrele_t)(struct xfs_dquot *);
-typedef int (*xfs_dqattach_t)(struct xfs_inode *, uint);
-typedef void (*xfs_dqdetach_t)(struct xfs_inode *);
-typedef int (*xfs_dqpurgeall_t)(struct xfs_mount *, uint);
-typedef int (*xfs_dqvopalloc_t)(struct xfs_mount *,
- struct xfs_inode *, uid_t, gid_t, prid_t, uint,
- struct xfs_dquot **, struct xfs_dquot **);
-typedef void (*xfs_dqvopcreate_t)(struct xfs_trans *, struct xfs_inode *,
- struct xfs_dquot *, struct xfs_dquot *);
-typedef int (*xfs_dqvoprename_t)(struct xfs_inode **);
-typedef struct xfs_dquot * (*xfs_dqvopchown_t)(
- struct xfs_trans *, struct xfs_inode *,
- struct xfs_dquot **, struct xfs_dquot *);
-typedef int (*xfs_dqvopchownresv_t)(struct xfs_trans *, struct xfs_inode *,
- struct xfs_dquot *, struct xfs_dquot *, uint);
-typedef void (*xfs_dqstatvfs_t)(struct xfs_inode *, struct kstatfs *);
-typedef int (*xfs_dqsync_t)(struct xfs_mount *, int flags);
-
-typedef struct xfs_qmops {
- xfs_qminit_t xfs_qminit;
- xfs_qmdone_t xfs_qmdone;
- xfs_qmmount_t xfs_qmmount;
- xfs_qmunmount_t xfs_qmunmount;
- xfs_dqrele_t xfs_dqrele;
- xfs_dqattach_t xfs_dqattach;
- xfs_dqdetach_t xfs_dqdetach;
- xfs_dqpurgeall_t xfs_dqpurgeall;
- xfs_dqvopalloc_t xfs_dqvopalloc;
- xfs_dqvopcreate_t xfs_dqvopcreate;
- xfs_dqvoprename_t xfs_dqvoprename;
- xfs_dqvopchown_t xfs_dqvopchown;
- xfs_dqvopchownresv_t xfs_dqvopchownresv;
- xfs_dqstatvfs_t xfs_dqstatvfs;
- xfs_dqsync_t xfs_dqsync;
- struct xfs_dqtrxops *xfs_dqtrxops;
-} xfs_qmops_t;
-
-#define XFS_QM_INIT(mp, mnt, fl) \
- (*(mp)->m_qm_ops->xfs_qminit)(mp, mnt, fl)
-#define XFS_QM_MOUNT(mp, mnt, fl) \
- (*(mp)->m_qm_ops->xfs_qmmount)(mp, mnt, fl)
-#define XFS_QM_UNMOUNT(mp) \
- (*(mp)->m_qm_ops->xfs_qmunmount)(mp)
-#define XFS_QM_DONE(mp) \
- (*(mp)->m_qm_ops->xfs_qmdone)(mp)
-#define XFS_QM_DQRELE(mp, dq) \
- (*(mp)->m_qm_ops->xfs_dqrele)(dq)
-#define XFS_QM_DQATTACH(mp, ip, fl) \
- (*(mp)->m_qm_ops->xfs_dqattach)(ip, fl)
-#define XFS_QM_DQDETACH(mp, ip) \
- (*(mp)->m_qm_ops->xfs_dqdetach)(ip)
-#define XFS_QM_DQPURGEALL(mp, fl) \
- (*(mp)->m_qm_ops->xfs_dqpurgeall)(mp, fl)
-#define XFS_QM_DQVOPALLOC(mp, ip, uid, gid, prid, fl, dq1, dq2) \
- (*(mp)->m_qm_ops->xfs_dqvopalloc)(mp, ip, uid, gid, prid, fl, dq1, dq2)
-#define XFS_QM_DQVOPCREATE(mp, tp, ip, dq1, dq2) \
- (*(mp)->m_qm_ops->xfs_dqvopcreate)(tp, ip, dq1, dq2)
-#define XFS_QM_DQVOPRENAME(mp, ip) \
- (*(mp)->m_qm_ops->xfs_dqvoprename)(ip)
-#define XFS_QM_DQVOPCHOWN(mp, tp, ip, dqp, dq) \
- (*(mp)->m_qm_ops->xfs_dqvopchown)(tp, ip, dqp, dq)
-#define XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, dq1, dq2, fl) \
- (*(mp)->m_qm_ops->xfs_dqvopchownresv)(tp, ip, dq1, dq2, fl)
-#define XFS_QM_DQSTATVFS(ip, statp) \
- (*(ip)->i_mount->m_qm_ops->xfs_dqstatvfs)(ip, statp)
-#define XFS_QM_DQSYNC(mp, flags) \
- (*(mp)->m_qm_ops->xfs_dqsync)(mp, flags)
-
#ifdef HAVE_PERCPU_SB
/*
@@ -510,8 +432,6 @@ extern int xfs_sb_validate_fsb_count(struct xfs_sb *, __uint64_t);
extern int xfs_dmops_get(struct xfs_mount *);
extern void xfs_dmops_put(struct xfs_mount *);
-extern int xfs_qmops_get(struct xfs_mount *);
-extern void xfs_qmops_put(struct xfs_mount *);
extern struct xfs_dmops xfs_dmcore_xfs;
diff --git a/fs/xfs/xfs_qmops.c b/fs/xfs/xfs_qmops.c
deleted file mode 100644
index e101790ea8e..00000000000
--- a/fs/xfs/xfs_qmops.c
+++ /dev/null
@@ -1,152 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
-#include "xfs_mount.h"
-#include "xfs_quota.h"
-#include "xfs_error.h"
-
-
-STATIC struct xfs_dquot *
-xfs_dqvopchown_default(
- struct xfs_trans *tp,
- struct xfs_inode *ip,
- struct xfs_dquot **dqp,
- struct xfs_dquot *dq)
-{
- return NULL;
-}
-
-/*
- * Clear the quotaflags in memory and in the superblock.
- */
-int
-xfs_mount_reset_sbqflags(xfs_mount_t *mp)
-{
- int error;
- xfs_trans_t *tp;
-
- mp->m_qflags = 0;
- /*
- * It is OK to look at sb_qflags here in mount path,
- * without m_sb_lock.
- */
- if (mp->m_sb.sb_qflags == 0)
- return 0;
- spin_lock(&mp->m_sb_lock);
- mp->m_sb.sb_qflags = 0;
- spin_unlock(&mp->m_sb_lock);
-
- /*
- * if the fs is readonly, let the incore superblock run
- * with quotas off but don't flush the update out to disk
- */
- if (mp->m_flags & XFS_MOUNT_RDONLY)
- return 0;
-#ifdef QUOTADEBUG
- xfs_fs_cmn_err(CE_NOTE, mp, "Writing superblock quota changes");
-#endif
- tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
- if ((error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
- XFS_DEFAULT_LOG_COUNT))) {
- xfs_trans_cancel(tp, 0);
- xfs_fs_cmn_err(CE_ALERT, mp,
- "xfs_mount_reset_sbqflags: Superblock update failed!");
- return error;
- }
- xfs_mod_sb(tp, XFS_SB_QFLAGS);
- error = xfs_trans_commit(tp, 0);
- return error;
-}
-
-STATIC int
-xfs_noquota_init(
- xfs_mount_t *mp,
- uint *needquotamount,
- uint *quotaflags)
-{
- int error = 0;
-
- *quotaflags = 0;
- *needquotamount = B_FALSE;
-
- ASSERT(!XFS_IS_QUOTA_ON(mp));
-
- /*
- * If a file system had quotas running earlier, but decided to
- * mount without -o uquota/pquota/gquota options, revoke the
- * quotachecked license.
- */
- if (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT) {
- cmn_err(CE_NOTE,
- "XFS resetting qflags for filesystem %s",
- mp->m_fsname);
-
- error = xfs_mount_reset_sbqflags(mp);
- }
- return error;
-}
-
-static struct xfs_qmops xfs_qmcore_stub = {
- .xfs_qminit = (xfs_qminit_t) xfs_noquota_init,
- .xfs_qmdone = (xfs_qmdone_t) fs_noerr,
- .xfs_qmmount = (xfs_qmmount_t) fs_noerr,
- .xfs_qmunmount = (xfs_qmunmount_t) fs_noerr,
- .xfs_dqrele = (xfs_dqrele_t) fs_noerr,
- .xfs_dqattach = (xfs_dqattach_t) fs_noerr,
- .xfs_dqdetach = (xfs_dqdetach_t) fs_noerr,
- .xfs_dqpurgeall = (xfs_dqpurgeall_t) fs_noerr,
- .xfs_dqvopalloc = (xfs_dqvopalloc_t) fs_noerr,
- .xfs_dqvopcreate = (xfs_dqvopcreate_t) fs_noerr,
- .xfs_dqvoprename = (xfs_dqvoprename_t) fs_noerr,
- .xfs_dqvopchown = xfs_dqvopchown_default,
- .xfs_dqvopchownresv = (xfs_dqvopchownresv_t) fs_noerr,
- .xfs_dqstatvfs = (xfs_dqstatvfs_t) fs_noval,
- .xfs_dqsync = (xfs_dqsync_t) fs_noerr,
-};
-
-int
-xfs_qmops_get(struct xfs_mount *mp)
-{
- if (XFS_IS_QUOTA_RUNNING(mp)) {
-#ifdef CONFIG_XFS_QUOTA
- mp->m_qm_ops = &xfs_qmcore_xfs;
-#else
- cmn_err(CE_WARN,
- "XFS: qouta support not available in this kernel.");
- return EINVAL;
-#endif
- } else {
- mp->m_qm_ops = &xfs_qmcore_stub;
- }
-
- return 0;
-}
-
-void
-xfs_qmops_put(struct xfs_mount *mp)
-{
-}
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index f5d1202dde2..3ec91ac74c2 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -197,7 +197,6 @@ typedef struct xfs_qoff_logformat {
#define XFS_QMOPT_UMOUNTING 0x0000100 /* filesys is being unmounted */
#define XFS_QMOPT_DOLOG 0x0000200 /* log buf changes (in quotacheck) */
#define XFS_QMOPT_DOWARN 0x0000400 /* increase warning cnt if needed */
-#define XFS_QMOPT_ILOCKED 0x0000800 /* inode is already locked (excl) */
#define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */
#define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */
#define XFS_QMOPT_ENOSPC 0x0004000 /* enospc instead of edquot (prj) */
@@ -302,69 +301,79 @@ typedef struct xfs_dqtrx {
long qt_delrtb_delta; /* delayed RT blk count changes */
} xfs_dqtrx_t;
-/*
- * Dquot transaction functions, used if quota is enabled.
- */
-typedef void (*qo_dup_dqinfo_t)(struct xfs_trans *, struct xfs_trans *);
-typedef void (*qo_mod_dquot_byino_t)(struct xfs_trans *,
- struct xfs_inode *, uint, long);
-typedef void (*qo_free_dqinfo_t)(struct xfs_trans *);
-typedef void (*qo_apply_dquot_deltas_t)(struct xfs_trans *);
-typedef void (*qo_unreserve_and_mod_dquots_t)(struct xfs_trans *);
-typedef int (*qo_reserve_quota_nblks_t)(
- struct xfs_trans *, struct xfs_mount *,
- struct xfs_inode *, long, long, uint);
-typedef int (*qo_reserve_quota_bydquots_t)(
- struct xfs_trans *, struct xfs_mount *,
- struct xfs_dquot *, struct xfs_dquot *,
- long, long, uint);
-typedef struct xfs_dqtrxops {
- qo_dup_dqinfo_t qo_dup_dqinfo;
- qo_free_dqinfo_t qo_free_dqinfo;
- qo_mod_dquot_byino_t qo_mod_dquot_byino;
- qo_apply_dquot_deltas_t qo_apply_dquot_deltas;
- qo_reserve_quota_nblks_t qo_reserve_quota_nblks;
- qo_reserve_quota_bydquots_t qo_reserve_quota_bydquots;
- qo_unreserve_and_mod_dquots_t qo_unreserve_and_mod_dquots;
-} xfs_dqtrxops_t;
-
-#define XFS_DQTRXOP(mp, tp, op, args...) \
- ((mp)->m_qm_ops->xfs_dqtrxops ? \
- ((mp)->m_qm_ops->xfs_dqtrxops->op)(tp, ## args) : 0)
-
-#define XFS_DQTRXOP_VOID(mp, tp, op, args...) \
- ((mp)->m_qm_ops->xfs_dqtrxops ? \
- ((mp)->m_qm_ops->xfs_dqtrxops->op)(tp, ## args) : (void)0)
-
-#define XFS_TRANS_DUP_DQINFO(mp, otp, ntp) \
- XFS_DQTRXOP_VOID(mp, otp, qo_dup_dqinfo, ntp)
-#define XFS_TRANS_FREE_DQINFO(mp, tp) \
- XFS_DQTRXOP_VOID(mp, tp, qo_free_dqinfo)
-#define XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, field, delta) \
- XFS_DQTRXOP_VOID(mp, tp, qo_mod_dquot_byino, ip, field, delta)
-#define XFS_TRANS_APPLY_DQUOT_DELTAS(mp, tp) \
- XFS_DQTRXOP_VOID(mp, tp, qo_apply_dquot_deltas)
-#define XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, ninos, fl) \
- XFS_DQTRXOP(mp, tp, qo_reserve_quota_nblks, mp, ip, nblks, ninos, fl)
-#define XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, nb, ni, fl) \
- XFS_DQTRXOP(mp, tp, qo_reserve_quota_bydquots, mp, ud, gd, nb, ni, fl)
-#define XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp) \
- XFS_DQTRXOP_VOID(mp, tp, qo_unreserve_and_mod_dquots)
-
-#define XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, nblks, ninos, flags) \
- XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip, -(nblks), -(ninos), flags)
-#define XFS_TRANS_RESERVE_QUOTA(mp, tp, ud, gd, nb, ni, f) \
- XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, nb, ni, \
- f | XFS_QMOPT_RES_REGBLKS)
-#define XFS_TRANS_UNRESERVE_QUOTA(mp, tp, ud, gd, nb, ni, f) \
- XFS_TRANS_RESERVE_QUOTA_BYDQUOTS(mp, tp, ud, gd, -(nb), -(ni), \
+#ifdef CONFIG_XFS_QUOTA
+extern void xfs_trans_dup_dqinfo(struct xfs_trans *, struct xfs_trans *);
+extern void xfs_trans_free_dqinfo(struct xfs_trans *);
+extern void xfs_trans_mod_dquot_byino(struct xfs_trans *, struct xfs_inode *,
+ uint, long);
+extern void xfs_trans_apply_dquot_deltas(struct xfs_trans *);
+extern void xfs_trans_unreserve_and_mod_dquots(struct xfs_trans *);
+extern int xfs_trans_reserve_quota_nblks(struct xfs_trans *,
+ struct xfs_inode *, long, long, uint);
+extern int xfs_trans_reserve_quota_bydquots(struct xfs_trans *,
+ struct xfs_mount *, struct xfs_dquot *,
+ struct xfs_dquot *, long, long, uint);
+
+extern int xfs_qm_vop_dqalloc(struct xfs_inode *, uid_t, gid_t, prid_t, uint,
+ struct xfs_dquot **, struct xfs_dquot **);
+extern void xfs_qm_vop_create_dqattach(struct xfs_trans *, struct xfs_inode *,
+ struct xfs_dquot *, struct xfs_dquot *);
+extern int xfs_qm_vop_rename_dqattach(struct xfs_inode **);
+extern struct xfs_dquot *xfs_qm_vop_chown(struct xfs_trans *,
+ struct xfs_inode *, struct xfs_dquot **, struct xfs_dquot *);
+extern int xfs_qm_vop_chown_reserve(struct xfs_trans *, struct xfs_inode *,
+ struct xfs_dquot *, struct xfs_dquot *, uint);
+extern int xfs_qm_dqattach(struct xfs_inode *, uint);
+extern int xfs_qm_dqattach_locked(struct xfs_inode *, uint);
+extern void xfs_qm_dqdetach(struct xfs_inode *);
+extern void xfs_qm_dqrele(struct xfs_dquot *);
+extern void xfs_qm_statvfs(struct xfs_inode *, struct kstatfs *);
+extern int xfs_qm_sync(struct xfs_mount *, int);
+extern int xfs_qm_newmount(struct xfs_mount *, uint *, uint *);
+extern void xfs_qm_mount_quotas(struct xfs_mount *);
+extern void xfs_qm_unmount(struct xfs_mount *);
+extern void xfs_qm_unmount_quotas(struct xfs_mount *);
+
+#else
+static inline int
+xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid,
+ uint flags, struct xfs_dquot **udqp, struct xfs_dquot **gdqp)
+{
+ *udqp = NULL;
+ *gdqp = NULL;
+ return 0;
+}
+#define xfs_trans_dup_dqinfo(tp, tp2)
+#define xfs_trans_free_dqinfo(tp)
+#define xfs_trans_mod_dquot_byino(tp, ip, fields, delta)
+#define xfs_trans_apply_dquot_deltas(tp)
+#define xfs_trans_unreserve_and_mod_dquots(tp)
+#define xfs_trans_reserve_quota_nblks(tp, ip, nblks, ninos, flags) (0)
+#define xfs_trans_reserve_quota_bydquots(tp, mp, u, g, nb, ni, fl) (0)
+#define xfs_qm_vop_create_dqattach(tp, ip, u, g)
+#define xfs_qm_vop_rename_dqattach(it) (0)
+#define xfs_qm_vop_chown(tp, ip, old, new) (NULL)
+#define xfs_qm_vop_chown_reserve(tp, ip, u, g, fl) (0)
+#define xfs_qm_dqattach(ip, fl) (0)
+#define xfs_qm_dqattach_locked(ip, fl) (0)
+#define xfs_qm_dqdetach(ip)
+#define xfs_qm_dqrele(d)
+#define xfs_qm_statvfs(ip, s)
+#define xfs_qm_sync(mp, fl) (0)
+#define xfs_qm_newmount(mp, a, b) (0)
+#define xfs_qm_mount_quotas(mp)
+#define xfs_qm_unmount(mp)
+#define xfs_qm_unmount_quotas(mp) (0)
+#endif /* CONFIG_XFS_QUOTA */
+
+#define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \
+ xfs_trans_reserve_quota_nblks(tp, ip, -(nblks), -(ninos), flags)
+#define xfs_trans_reserve_quota(tp, mp, ud, gd, nb, ni, f) \
+ xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, nb, ni, \
f | XFS_QMOPT_RES_REGBLKS)
extern int xfs_qm_dqcheck(xfs_disk_dquot_t *, xfs_dqid_t, uint, uint, char *);
extern int xfs_mount_reset_sbqflags(struct xfs_mount *);
-extern struct xfs_qmops xfs_qmcore_xfs;
-
#endif /* __KERNEL__ */
-
#endif /* __XFS_QUOTA_H__ */
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index 58f85e9cd11..b81deea0ce1 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -166,7 +166,8 @@ xfs_rename(
/*
* Attach the dquots to the inodes
*/
- if ((error = XFS_QM_DQVOPRENAME(mp, inodes))) {
+ error = xfs_qm_vop_rename_dqattach(inodes);
+ if (error) {
xfs_trans_cancel(tp, cancel_flags);
goto std_return;
}
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index 36f3a21c54d..fea68615ed2 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -41,7 +41,6 @@
#include "xfs_ialloc.h"
#include "xfs_attr.h"
#include "xfs_bmap.h"
-#include "xfs_acl.h"
#include "xfs_error.h"
#include "xfs_buf_item.h"
#include "xfs_rw.h"
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 8570b826fed..66b849358e6 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -297,7 +297,7 @@ xfs_trans_dup(
tp->t_rtx_res = tp->t_rtx_res_used;
ntp->t_pflags = tp->t_pflags;
- XFS_TRANS_DUP_DQINFO(tp->t_mountp, tp, ntp);
+ xfs_trans_dup_dqinfo(tp, ntp);
atomic_inc(&tp->t_mountp->m_active_trans);
return ntp;
@@ -628,8 +628,6 @@ xfs_trans_apply_sb_deltas(
xfs_trans_log_buf(tp, bp, offsetof(xfs_dsb_t, sb_icount),
offsetof(xfs_dsb_t, sb_frextents) +
sizeof(sbp->sb_frextents) - 1);
-
- tp->t_mountp->m_super->s_dirt = 1;
}
/*
@@ -831,7 +829,7 @@ shut_us_down:
* means is that we have some (non-persistent) quota
* reservations that need to be unreserved.
*/
- XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp);
+ xfs_trans_unreserve_and_mod_dquots(tp);
if (tp->t_ticket) {
commit_lsn = xfs_log_done(mp, tp->t_ticket,
NULL, log_flags);
@@ -850,10 +848,9 @@ shut_us_down:
/*
* If we need to update the superblock, then do it now.
*/
- if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
+ if (tp->t_flags & XFS_TRANS_SB_DIRTY)
xfs_trans_apply_sb_deltas(tp);
- }
- XFS_TRANS_APPLY_DQUOT_DELTAS(mp, tp);
+ xfs_trans_apply_dquot_deltas(tp);
/*
* Ask each log item how many log_vector entries it will
@@ -1058,7 +1055,7 @@ xfs_trans_uncommit(
}
xfs_trans_unreserve_and_mod_sb(tp);
- XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(tp->t_mountp, tp);
+ xfs_trans_unreserve_and_mod_dquots(tp);
xfs_trans_free_items(tp, flags);
xfs_trans_free_busy(tp);
@@ -1183,7 +1180,7 @@ xfs_trans_cancel(
}
#endif
xfs_trans_unreserve_and_mod_sb(tp);
- XFS_TRANS_UNRESERVE_AND_MOD_DQUOTS(mp, tp);
+ xfs_trans_unreserve_and_mod_dquots(tp);
if (tp->t_ticket) {
if (flags & XFS_TRANS_RELEASE_LOG_RES) {
@@ -1213,7 +1210,7 @@ xfs_trans_free(
xfs_trans_t *tp)
{
atomic_dec(&tp->t_mountp->m_active_trans);
- XFS_TRANS_FREE_DQINFO(tp->t_mountp, tp);
+ xfs_trans_free_dqinfo(tp);
kmem_zone_free(xfs_trans_zone, tp);
}
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 79b9e5ea535..4d88616bde9 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -166,7 +166,7 @@ xfs_dir_ialloc(
xfs_buf_relse(ialloc_context);
if (dqinfo) {
tp->t_dqinfo = dqinfo;
- XFS_TRANS_FREE_DQINFO(tp->t_mountp, tp);
+ xfs_trans_free_dqinfo(tp);
}
*tpp = ntp;
*ipp = NULL;
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 19cf90a9c76..c4eca5ed5da 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -42,6 +42,7 @@
#include "xfs_ialloc.h"
#include "xfs_alloc.h"
#include "xfs_bmap.h"
+#include "xfs_acl.h"
#include "xfs_attr.h"
#include "xfs_rw.h"
#include "xfs_error.h"
@@ -118,7 +119,7 @@ xfs_setattr(
*/
ASSERT(udqp == NULL);
ASSERT(gdqp == NULL);
- code = XFS_QM_DQVOPALLOC(mp, ip, uid, gid, ip->i_d.di_projid,
+ code = xfs_qm_vop_dqalloc(ip, uid, gid, ip->i_d.di_projid,
qflags, &udqp, &gdqp);
if (code)
return code;
@@ -180,10 +181,11 @@ xfs_setattr(
* Do a quota reservation only if uid/gid is actually
* going to change.
*/
- if ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
- (XFS_IS_GQUOTA_ON(mp) && igid != gid)) {
+ if (XFS_IS_QUOTA_RUNNING(mp) &&
+ ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
+ (XFS_IS_GQUOTA_ON(mp) && igid != gid))) {
ASSERT(tp);
- code = XFS_QM_DQVOPCHOWNRESV(mp, tp, ip, udqp, gdqp,
+ code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
capable(CAP_FOWNER) ?
XFS_QMOPT_FORCE_RES : 0);
if (code) /* out of quota */
@@ -217,7 +219,7 @@ xfs_setattr(
/*
* Make sure that the dquots are attached to the inode.
*/
- code = XFS_QM_DQATTACH(mp, ip, XFS_QMOPT_ILOCKED);
+ code = xfs_qm_dqattach_locked(ip, 0);
if (code)
goto error_return;
@@ -351,21 +353,21 @@ xfs_setattr(
* in the transaction.
*/
if (iuid != uid) {
- if (XFS_IS_UQUOTA_ON(mp)) {
+ if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
ASSERT(mask & ATTR_UID);
ASSERT(udqp);
- olddquot1 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
+ olddquot1 = xfs_qm_vop_chown(tp, ip,
&ip->i_udquot, udqp);
}
ip->i_d.di_uid = uid;
inode->i_uid = uid;
}
if (igid != gid) {
- if (XFS_IS_GQUOTA_ON(mp)) {
+ if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
ASSERT(!XFS_IS_PQUOTA_ON(mp));
ASSERT(mask & ATTR_GID);
ASSERT(gdqp);
- olddquot2 = XFS_QM_DQVOPCHOWN(mp, tp, ip,
+ olddquot2 = xfs_qm_vop_chown(tp, ip,
&ip->i_gdquot, gdqp);
}
ip->i_d.di_gid = gid;
@@ -461,13 +463,25 @@ xfs_setattr(
/*
* Release any dquot(s) the inode had kept before chown.
*/
- XFS_QM_DQRELE(mp, olddquot1);
- XFS_QM_DQRELE(mp, olddquot2);
- XFS_QM_DQRELE(mp, udqp);
- XFS_QM_DQRELE(mp, gdqp);
+ xfs_qm_dqrele(olddquot1);
+ xfs_qm_dqrele(olddquot2);
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
- if (code) {
+ if (code)
return code;
+
+ /*
+ * XXX(hch): Updating the ACL entries is not atomic vs the i_mode
+ * update. We could avoid this with linked transactions
+ * and passing down the transaction pointer all the way
+ * to attr_set. No previous user of the generic
+ * Posix ACL code seems to care about this issue either.
+ */
+ if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
+ code = -xfs_acl_chmod(inode);
+ if (code)
+ return XFS_ERROR(code);
}
if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE) &&
@@ -482,8 +496,8 @@ xfs_setattr(
commit_flags |= XFS_TRANS_ABORT;
/* FALLTHROUGH */
error_return:
- XFS_QM_DQRELE(mp, udqp);
- XFS_QM_DQRELE(mp, gdqp);
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
if (tp) {
xfs_trans_cancel(tp, commit_flags);
}
@@ -739,7 +753,8 @@ xfs_free_eofblocks(
/*
* Attach the dquots to the inode up front.
*/
- if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+ error = xfs_qm_dqattach(ip, 0);
+ if (error)
return error;
/*
@@ -1181,7 +1196,8 @@ xfs_inactive(
ASSERT(ip->i_d.di_nlink == 0);
- if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+ error = xfs_qm_dqattach(ip, 0);
+ if (error)
return VN_INACTIVE_CACHE;
tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
@@ -1307,7 +1323,7 @@ xfs_inactive(
/*
* Credit the quota account(s). The inode is gone.
*/
- XFS_TRANS_MOD_DQUOT_BYINO(mp, tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
+ xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
/*
* Just ignore errors at this point. There is nothing we can
@@ -1323,11 +1339,11 @@ xfs_inactive(
xfs_fs_cmn_err(CE_NOTE, mp, "xfs_inactive: "
"xfs_trans_commit() returned error %d", error);
}
+
/*
* Release the dquots held by inode, if any.
*/
- XFS_QM_DQDETACH(mp, ip);
-
+ xfs_qm_dqdetach(ip);
xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
out:
@@ -1427,8 +1443,7 @@ xfs_create(
/*
* Make sure that we have allocated dquot(s) on disk.
*/
- error = XFS_QM_DQVOPALLOC(mp, dp,
- current_fsuid(), current_fsgid(), prid,
+ error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
if (error)
goto std_return;
@@ -1489,7 +1504,7 @@ xfs_create(
/*
* Reserve disk quota and the inode.
*/
- error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0);
+ error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0);
if (error)
goto out_trans_cancel;
@@ -1561,7 +1576,7 @@ xfs_create(
* These ids of the inode couldn't have changed since the new
* inode has been locked ever since it was created.
*/
- XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp);
+ xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp);
/*
* xfs_trans_commit normally decrements the vnode ref count
@@ -1580,8 +1595,8 @@ xfs_create(
goto out_dqrele;
}
- XFS_QM_DQRELE(mp, udqp);
- XFS_QM_DQRELE(mp, gdqp);
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
*ipp = ip;
@@ -1602,8 +1617,8 @@ xfs_create(
out_trans_cancel:
xfs_trans_cancel(tp, cancel_flags);
out_dqrele:
- XFS_QM_DQRELE(mp, udqp);
- XFS_QM_DQRELE(mp, gdqp);
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
if (unlock_dp_on_error)
xfs_iunlock(dp, XFS_ILOCK_EXCL);
@@ -1837,11 +1852,11 @@ xfs_remove(
return error;
}
- error = XFS_QM_DQATTACH(mp, dp, 0);
+ error = xfs_qm_dqattach(dp, 0);
if (error)
goto std_return;
- error = XFS_QM_DQATTACH(mp, ip, 0);
+ error = xfs_qm_dqattach(ip, 0);
if (error)
goto std_return;
@@ -2028,11 +2043,11 @@ xfs_link(
/* Return through std_return after this point. */
- error = XFS_QM_DQATTACH(mp, sip, 0);
+ error = xfs_qm_dqattach(sip, 0);
if (error)
goto std_return;
- error = XFS_QM_DQATTACH(mp, tdp, 0);
+ error = xfs_qm_dqattach(tdp, 0);
if (error)
goto std_return;
@@ -2205,8 +2220,7 @@ xfs_symlink(
/*
* Make sure that we have allocated dquot(s) on disk.
*/
- error = XFS_QM_DQVOPALLOC(mp, dp,
- current_fsuid(), current_fsgid(), prid,
+ error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp);
if (error)
goto std_return;
@@ -2248,7 +2262,7 @@ xfs_symlink(
/*
* Reserve disk quota : blocks and inode.
*/
- error = XFS_TRANS_RESERVE_QUOTA(mp, tp, udqp, gdqp, resblks, 1, 0);
+ error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, resblks, 1, 0);
if (error)
goto error_return;
@@ -2288,7 +2302,7 @@ xfs_symlink(
/*
* Also attach the dquot(s) to it, if applicable.
*/
- XFS_QM_DQVOPCREATE(mp, tp, ip, udqp, gdqp);
+ xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp);
if (resblks)
resblks -= XFS_IALLOC_SPACE_RES(mp);
@@ -2376,8 +2390,8 @@ xfs_symlink(
goto error2;
}
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
- XFS_QM_DQRELE(mp, udqp);
- XFS_QM_DQRELE(mp, gdqp);
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
/* Fall through to std_return with error = 0 or errno from
* xfs_trans_commit */
@@ -2401,8 +2415,8 @@ std_return:
cancel_flags |= XFS_TRANS_ABORT;
error_return:
xfs_trans_cancel(tp, cancel_flags);
- XFS_QM_DQRELE(mp, udqp);
- XFS_QM_DQRELE(mp, gdqp);
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
if (unlock_dp_on_error)
xfs_iunlock(dp, XFS_ILOCK_EXCL);
@@ -2541,7 +2555,8 @@ xfs_alloc_file_space(
if (XFS_FORCED_SHUTDOWN(mp))
return XFS_ERROR(EIO);
- if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+ error = xfs_qm_dqattach(ip, 0);
+ if (error)
return error;
if (len <= 0)
@@ -2628,8 +2643,8 @@ retry:
break;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
- error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, tp, ip,
- qblocks, 0, quota_flag);
+ error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks,
+ 0, quota_flag);
if (error)
goto error1;
@@ -2688,7 +2703,7 @@ dmapi_enospc_check:
error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
xfs_bmap_cancel(&free_list);
- XFS_TRANS_UNRESERVE_QUOTA_NBLKS(mp, tp, ip, qblocks, 0, quota_flag);
+ xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
error1: /* Just cancel transaction */
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
@@ -2827,7 +2842,8 @@ xfs_free_file_space(
xfs_itrace_entry(ip);
- if ((error = XFS_QM_DQATTACH(mp, ip, 0)))
+ error = xfs_qm_dqattach(ip, 0);
+ if (error)
return error;
error = 0;
@@ -2953,9 +2969,9 @@ xfs_free_file_space(
break;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
- error = XFS_TRANS_RESERVE_QUOTA(mp, tp,
- ip->i_udquot, ip->i_gdquot, resblks, 0,
- XFS_QMOPT_RES_REGBLKS);
+ error = xfs_trans_reserve_quota(tp, mp,
+ ip->i_udquot, ip->i_gdquot,
+ resblks, 0, XFS_QMOPT_RES_REGBLKS);
if (error)
goto error1;
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 04373c6c61f..a9e102de71a 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -18,6 +18,7 @@ int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags);
#define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */
#define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */
#define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */
+#define XFS_ATTR_NOACL 0x08 /* Don't call xfs_acl_chmod */
int xfs_readlink(struct xfs_inode *ip, char *link);
int xfs_fsync(struct xfs_inode *ip);