aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig3
-rw-r--r--fs/adfs/adfs.h2
-rw-r--r--fs/affs/file.c4
-rw-r--r--fs/afs/dir.c4
-rw-r--r--fs/afs/internal.h8
-rw-r--r--fs/aio.c5
-rw-r--r--fs/anon_inodes.c13
-rw-r--r--fs/autofs4/autofs_i.h2
-rw-r--r--fs/autofs4/expire.c26
-rw-r--r--fs/autofs4/root.c38
-rw-r--r--fs/autofs4/waitq.c2
-rw-r--r--fs/bfs/bfs.h2
-rw-r--r--fs/buffer.c2
-rw-r--r--fs/compat.c15
-rw-r--r--fs/compat_ioctl.c4
-rw-r--r--fs/configfs/file.c2
-rw-r--r--fs/configfs/inode.c2
-rw-r--r--fs/configfs/mount.c2
-rw-r--r--fs/configfs/symlink.c4
-rw-r--r--fs/debugfs/file.c2
-rw-r--r--fs/devpts/inode.c43
-rw-r--r--fs/dlm/lockspace.c2
-rw-r--r--fs/dnotify.c11
-rw-r--r--fs/eventfd.c15
-rw-r--r--fs/eventpoll.c30
-rw-r--r--fs/exec.c18
-rw-r--r--fs/exportfs/expfs.c10
-rw-r--r--fs/ext3/inode.c14
-rw-r--r--fs/ext4/acl.c12
-rw-r--r--fs/ext4/balloc.c33
-rw-r--r--fs/ext4/bitmap.c2
-rw-r--r--fs/ext4/dir.c4
-rw-r--r--fs/ext4/ext4.h1205
-rw-r--r--fs/ext4/ext4_extents.h232
-rw-r--r--fs/ext4/ext4_i.h167
-rw-r--r--fs/ext4/ext4_jbd2.c14
-rw-r--r--fs/ext4/ext4_jbd2.h231
-rw-r--r--fs/ext4/ext4_sb.h148
-rw-r--r--fs/ext4/extents.c354
-rw-r--r--fs/ext4/file.c6
-rw-r--r--fs/ext4/fsync.c7
-rw-r--r--fs/ext4/hash.c2
-rw-r--r--fs/ext4/ialloc.c44
-rw-r--r--fs/ext4/inode.c57
-rw-r--r--fs/ext4/ioctl.c16
-rw-r--r--fs/ext4/mballoc.c437
-rw-r--r--fs/ext4/mballoc.h304
-rw-r--r--fs/ext4/migrate.c43
-rw-r--r--fs/ext4/namei.c44
-rw-r--r--fs/ext4/resize.c83
-rw-r--r--fs/ext4/super.c66
-rw-r--r--fs/ext4/symlink.c2
-rw-r--r--fs/ext4/xattr.c40
-rw-r--r--fs/ext4/xattr.h7
-rw-r--r--fs/ext4/xattr_security.c4
-rw-r--r--fs/ext4/xattr_trusted.c4
-rw-r--r--fs/ext4/xattr_user.c4
-rw-r--r--fs/fat/cache.c6
-rw-r--r--fs/fat/fatent.c2
-rw-r--r--fs/fat/file.c2
-rw-r--r--fs/fcntl.c1
-rw-r--r--fs/file.c23
-rw-r--r--fs/file_table.c1
-rw-r--r--fs/fuse/control.c2
-rw-r--r--fs/fuse/dev.c23
-rw-r--r--fs/fuse/dir.c86
-rw-r--r--fs/fuse/file.c633
-rw-r--r--fs/fuse/fuse_i.h52
-rw-r--r--fs/fuse/inode.c90
-rw-r--r--fs/gfs2/locking/dlm/sysfs.c2
-rw-r--r--fs/gfs2/util.h18
-rw-r--r--fs/hfs/btree.c10
-rw-r--r--fs/hfs/mdb.c2
-rw-r--r--fs/hfsplus/btree.c10
-rw-r--r--fs/hfsplus/super.c2
-rw-r--r--fs/hugetlbfs/inode.c2
-rw-r--r--fs/isofs/dir.c8
-rw-r--r--fs/isofs/namei.c7
-rw-r--r--fs/jbd2/commit.c19
-rw-r--r--fs/jbd2/journal.c38
-rw-r--r--fs/jbd2/revoke.c165
-rw-r--r--fs/jbd2/transaction.c41
-rw-r--r--fs/jffs2/build.c31
-rw-r--r--fs/jffs2/debug.h8
-rw-r--r--fs/jffs2/dir.c42
-rw-r--r--fs/jffs2/erase.c9
-rw-r--r--fs/jffs2/fs.c14
-rw-r--r--fs/jffs2/gc.c8
-rw-r--r--fs/jffs2/nodelist.h5
-rw-r--r--fs/jffs2/nodemgmt.c2
-rw-r--r--fs/jffs2/os-linux.h2
-rw-r--r--fs/jffs2/readinode.c16
-rw-r--r--fs/jffs2/scan.c9
-rw-r--r--fs/jffs2/super.c15
-rw-r--r--fs/jffs2/wbuf.c2
-rw-r--r--fs/jffs2/write.c17
-rw-r--r--fs/jffs2/xattr.c8
-rw-r--r--fs/lockd/clntproc.c2
-rw-r--r--fs/lockd/svclock.c2
-rw-r--r--fs/locks.c1
-rw-r--r--fs/msdos/namei.c2
-rw-r--r--fs/namespace.c4
-rw-r--r--fs/nfs/super.c26
-rw-r--r--fs/nfsd/nfs4callback.c4
-rw-r--r--fs/ntfs/debug.h6
-rw-r--r--fs/ocfs2/cluster/sys.c2
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c8
-rw-r--r--fs/ocfs2/dlm/dlmfs.c2
-rw-r--r--fs/ocfs2/file.c4
-rw-r--r--fs/ocfs2/localalloc.c4
-rw-r--r--fs/ocfs2/stack_o2cb.c2
-rw-r--r--fs/ocfs2/stack_user.c2
-rw-r--r--fs/ocfs2/symlink.c2
-rw-r--r--fs/open.c1
-rw-r--r--fs/partitions/ldm.c8
-rw-r--r--fs/pipe.c17
-rw-r--r--fs/proc/array.c7
-rw-r--r--fs/proc/base.c1
-rw-r--r--fs/proc/generic.c17
-rw-r--r--fs/proc/proc_misc.c2
-rw-r--r--fs/proc/proc_net.c11
-rw-r--r--fs/proc/proc_tty.c6
-rw-r--r--fs/proc/task_nommu.c1
-rw-r--r--fs/quota_v2.c4
-rw-r--r--fs/ramfs/inode.c2
-rw-r--r--fs/reiserfs/journal.c50
-rw-r--r--fs/select.c15
-rw-r--r--fs/signalfd.c17
-rw-r--r--fs/smbfs/smb_debug.h6
-rw-r--r--fs/sysfs/file.c2
-rw-r--r--fs/sysfs/inode.c4
-rw-r--r--fs/sysfs/mount.c2
-rw-r--r--fs/sysv/sysv.h8
-rw-r--r--fs/timerfd.c11
-rw-r--r--fs/udf/super.c4
-rw-r--r--fs/utimes.c17
-rw-r--r--fs/vfat/namei.c2
-rw-r--r--fs/xfs/Kconfig13
-rw-r--r--fs/xfs/linux-2.6/mrlock.h60
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c75
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c21
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h24
-rw-r--r--fs/xfs/quota/xfs_dquot.c4
-rw-r--r--fs/xfs/quota/xfs_qm.c27
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c6
-rw-r--r--fs/xfs/quota/xfs_quota_priv.h5
-rw-r--r--fs/xfs/quota/xfs_trans_dquot.c2
-rw-r--r--fs/xfs/support/debug.h2
-rw-r--r--fs/xfs/xfs.h2
-rw-r--r--fs/xfs/xfs_acl.c53
-rw-r--r--fs/xfs/xfs_attr.c93
-rw-r--r--fs/xfs/xfs_attr.h6
-rw-r--r--fs/xfs/xfs_bmap.c1
-rw-r--r--fs/xfs/xfs_dfrag.c4
-rw-r--r--fs/xfs/xfs_fsops.c8
-rw-r--r--fs/xfs/xfs_ialloc.c10
-rw-r--r--fs/xfs/xfs_iget.c140
-rw-r--r--fs/xfs/xfs_inode.c25
-rw-r--r--fs/xfs/xfs_inode.h16
-rw-r--r--fs/xfs/xfs_inode_item.c12
-rw-r--r--fs/xfs/xfs_iomap.c19
-rw-r--r--fs/xfs/xfs_itable.c6
-rw-r--r--fs/xfs/xfs_mount.c83
-rw-r--r--fs/xfs/xfs_mount.h7
-rw-r--r--fs/xfs/xfs_rename.c252
-rw-r--r--fs/xfs/xfs_trans_inode.c12
-rw-r--r--fs/xfs/xfs_utils.c45
-rw-r--r--fs/xfs/xfs_utils.h2
-rw-r--r--fs/xfs/xfs_vfsops.c1
-rw-r--r--fs/xfs/xfs_vnodeops.c274
-rw-r--r--fs/xfs/xfs_vnodeops.h8
178 files changed, 4754 insertions, 2162 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 2e43d46f65d..cf12c403b8c 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1005,7 +1005,8 @@ config TMPFS_POSIX_ACL
config HUGETLBFS
bool "HugeTLB file system support"
- depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || BROKEN
+ depends on X86 || IA64 || PPC64 || SPARC64 || (SUPERH && MMU) || \
+ (S390 && 64BIT) || BROKEN
help
hugetlbfs is a filesystem backing for HugeTLB pages, based on
ramfs. For architectures that support it, say Y here and read
diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h
index 936f2af39c4..831157502d5 100644
--- a/fs/adfs/adfs.h
+++ b/fs/adfs/adfs.h
@@ -75,7 +75,7 @@ extern unsigned int adfs_map_free(struct super_block *sb);
/* Misc */
void __adfs_error(struct super_block *sb, const char *function,
const char *fmt, ...);
-#define adfs_error(sb, fmt...) __adfs_error(sb, __FUNCTION__, fmt)
+#define adfs_error(sb, fmt...) __adfs_error(sb, __func__, fmt)
/* super.c */
diff --git a/fs/affs/file.c b/fs/affs/file.c
index e87ede608f7..1a4f092f24e 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -539,7 +539,7 @@ affs_extent_file_ofs(struct inode *inode, u32 newsize)
tmp = min(bsize - boff, newsize - size);
BUG_ON(boff + tmp > bsize || tmp > bsize);
memset(AFFS_DATA(bh) + boff, 0, tmp);
- AFFS_DATA_HEAD(bh)->size = cpu_to_be32(be32_to_cpu(AFFS_DATA_HEAD(bh)->size) + tmp);
+ be32_add_cpu(&AFFS_DATA_HEAD(bh)->size, tmp);
affs_fix_checksum(sb, bh);
mark_buffer_dirty_inode(bh, inode);
size += tmp;
@@ -680,7 +680,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
tmp = min(bsize - boff, to - from);
BUG_ON(boff + tmp > bsize || tmp > bsize);
memcpy(AFFS_DATA(bh) + boff, data + from, tmp);
- AFFS_DATA_HEAD(bh)->size = cpu_to_be32(be32_to_cpu(AFFS_DATA_HEAD(bh)->size) + tmp);
+ be32_add_cpu(&AFFS_DATA_HEAD(bh)->size, tmp);
affs_fix_checksum(sb, bh);
mark_buffer_dirty_inode(bh, inode);
written += tmp;
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index b58af8f18bc..dfda03d4397 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -140,7 +140,7 @@ static inline void afs_dir_check_page(struct inode *dir, struct page *page)
if (page->index == 0 && qty != ntohs(dbuf->blocks[0].pagehdr.npages)) {
printk("kAFS: %s(%lu): wrong number of dir blocks %d!=%hu\n",
- __FUNCTION__, dir->i_ino, qty,
+ __func__, dir->i_ino, qty,
ntohs(dbuf->blocks[0].pagehdr.npages));
goto error;
}
@@ -159,7 +159,7 @@ static inline void afs_dir_check_page(struct inode *dir, struct page *page)
for (tmp = 0; tmp < qty; tmp++) {
if (dbuf->blocks[tmp].pagehdr.magic != AFS_DIR_MAGIC) {
printk("kAFS: %s(%lu): bad magic %d/%d is %04hx\n",
- __FUNCTION__, dir->i_ino, tmp, qty,
+ __func__, dir->i_ino, tmp, qty,
ntohs(dbuf->blocks[tmp].pagehdr.magic));
goto error;
}
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index eec41c76de7..7102824ba84 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -757,8 +757,8 @@ void _dbprintk(const char *fmt, ...)
{
}
-#define kenter(FMT,...) dbgprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__)
-#define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__)
+#define kenter(FMT,...) dbgprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__)
+#define kleave(FMT,...) dbgprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__)
#define kdebug(FMT,...) dbgprintk(" "FMT ,##__VA_ARGS__)
@@ -791,8 +791,8 @@ do { \
} while (0)
#else
-#define _enter(FMT,...) _dbprintk("==> %s("FMT")",__FUNCTION__ ,##__VA_ARGS__)
-#define _leave(FMT,...) _dbprintk("<== %s()"FMT"",__FUNCTION__ ,##__VA_ARGS__)
+#define _enter(FMT,...) _dbprintk("==> %s("FMT")",__func__ ,##__VA_ARGS__)
+#define _leave(FMT,...) _dbprintk("<== %s()"FMT"",__func__ ,##__VA_ARGS__)
#define _debug(FMT,...) _dbprintk(" "FMT ,##__VA_ARGS__)
#endif
diff --git a/fs/aio.c b/fs/aio.c
index 99c2352906a..b5253e77eb2 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1078,9 +1078,7 @@ static void timeout_func(unsigned long data)
static inline void init_timeout(struct aio_timeout *to)
{
- init_timer(&to->timer);
- to->timer.data = (unsigned long)to;
- to->timer.function = timeout_func;
+ setup_timer_on_stack(&to->timer, timeout_func, (unsigned long) to);
to->timed_out = 0;
to->p = current;
}
@@ -1213,6 +1211,7 @@ retry:
if (timeout)
clear_timeout(&to);
out:
+ destroy_timer_on_stack(&to.timer);
return i ? i : ret;
}
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index f42be069e08..977ef208c05 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -57,9 +57,6 @@ static struct dentry_operations anon_inodefs_dentry_operations = {
* anonymous inode, and a dentry that describe the "class"
* of the file
*
- * @pfd: [out] pointer to the file descriptor
- * @dpinode: [out] pointer to the inode
- * @pfile: [out] pointer to the file struct
* @name: [in] name of the "class" of the new file
* @fops [in] file operations for the new file
* @priv [in] private data for the new file (will be file's private_data)
@@ -68,10 +65,9 @@ static struct dentry_operations anon_inodefs_dentry_operations = {
* that do not need to have a full-fledged inode in order to operate correctly.
* All the files created with anon_inode_getfd() will share a single inode,
* hence saving memory and avoiding code duplication for the file/inode/dentry
- * setup.
+ * setup. Returns new descriptor or -error.
*/
-int anon_inode_getfd(int *pfd, struct inode **pinode, struct file **pfile,
- const char *name, const struct file_operations *fops,
+int anon_inode_getfd(const char *name, const struct file_operations *fops,
void *priv)
{
struct qstr this;
@@ -125,10 +121,7 @@ int anon_inode_getfd(int *pfd, struct inode **pinode, struct file **pfile,
fd_install(fd, file);
- *pfd = fd;
- *pinode = anon_inode_inode;
- *pfile = file;
- return 0;
+ return fd;
err_dput:
dput(dentry);
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 2d4ae40718d..c3d352d7fa9 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -35,7 +35,7 @@
/* #define DEBUG */
#ifdef DEBUG
-#define DPRINTK(fmt,args...) do { printk(KERN_DEBUG "pid %d: %s: " fmt "\n" , current->pid , __FUNCTION__ , ##args); } while(0)
+#define DPRINTK(fmt,args...) do { printk(KERN_DEBUG "pid %d: %s: " fmt "\n" , current->pid , __func__ , ##args); } while(0)
#else
#define DPRINTK(fmt,args...) do {} while(0)
#endif
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index d96e5c14a9c..894fee54d4d 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -73,8 +73,8 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
status = 0;
done:
DPRINTK("returning = %d", status);
- mntput(mnt);
dput(dentry);
+ mntput(mnt);
return status;
}
@@ -333,7 +333,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
/* Can we expire this guy */
if (autofs4_can_expire(dentry, timeout, do_now)) {
expired = dentry;
- break;
+ goto found;
}
goto next;
}
@@ -352,7 +352,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
inf->flags |= AUTOFS_INF_EXPIRING;
spin_unlock(&sbi->fs_lock);
expired = dentry;
- break;
+ goto found;
}
spin_unlock(&sbi->fs_lock);
/*
@@ -363,7 +363,7 @@ static struct dentry *autofs4_expire_indirect(struct super_block *sb,
expired = autofs4_check_leaves(mnt, dentry, timeout, do_now);
if (expired) {
dput(dentry);
- break;
+ goto found;
}
}
next:
@@ -371,18 +371,16 @@ next:
spin_lock(&dcache_lock);
next = next->next;
}
-
- if (expired) {
- DPRINTK("returning %p %.*s",
- expired, (int)expired->d_name.len, expired->d_name.name);
- spin_lock(&dcache_lock);
- list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
- spin_unlock(&dcache_lock);
- return expired;
- }
spin_unlock(&dcache_lock);
-
return NULL;
+
+found:
+ DPRINTK("returning %p %.*s",
+ expired, (int)expired->d_name.len, expired->d_name.name);
+ spin_lock(&dcache_lock);
+ list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
+ spin_unlock(&dcache_lock);
+ return expired;
}
/* Perform an expiry operation */
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index aa4c5ff8a40..edf5b6bddb5 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -146,17 +146,17 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
if (d_mountpoint(dentry)) {
struct file *fp = NULL;
- struct vfsmount *fp_mnt = mntget(mnt);
- struct dentry *fp_dentry = dget(dentry);
+ struct path fp_path = { .dentry = dentry, .mnt = mnt };
- if (!autofs4_follow_mount(&fp_mnt, &fp_dentry)) {
- dput(fp_dentry);
- mntput(fp_mnt);
+ path_get(&fp_path);
+
+ if (!autofs4_follow_mount(&fp_path.mnt, &fp_path.dentry)) {
+ path_put(&fp_path);
dcache_dir_close(inode, file);
goto out;
}
- fp = dentry_open(fp_dentry, fp_mnt, file->f_flags);
+ fp = dentry_open(fp_path.dentry, fp_path.mnt, file->f_flags);
status = PTR_ERR(fp);
if (IS_ERR(fp)) {
dcache_dir_close(inode, file);
@@ -242,7 +242,8 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
{
struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
struct autofs_info *ino = autofs4_dentry_ino(dentry);
- int status = 0;
+ struct dentry *new;
+ int status;
/* Block on any pending expiry here; invalidate the dentry
when expiration is done to trigger mount request with a new
@@ -318,7 +319,28 @@ static int try_to_fill_dentry(struct dentry *dentry, int flags)
spin_lock(&dentry->d_lock);
dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
spin_unlock(&dentry->d_lock);
- return status;
+
+ /*
+ * The dentry that is passed in from lookup may not be the one
+ * we end up using, as mkdir can create a new one. If this
+ * happens, and another process tries the lookup at the same time,
+ * it will set the PENDING flag on this new dentry, but add itself
+ * to our waitq. Then, if after the lookup succeeds, the first
+ * process that requested the mount performs another lookup of the
+ * same directory, it will show up as still pending! So, we need
+ * to redo the lookup here and clear pending on that dentry.
+ */
+ if (d_unhashed(dentry)) {
+ new = d_lookup(dentry->d_parent, &dentry->d_name);
+ if (new) {
+ spin_lock(&new->d_lock);
+ new->d_flags &= ~DCACHE_AUTOFS_PENDING;
+ spin_unlock(&new->d_lock);
+ dput(new);
+ }
+ }
+
+ return 0;
}
/* For autofs direct mounts the follow link triggers the mount */
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 1fe28e4754c..75e5955c3f6 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -171,7 +171,7 @@ static int autofs4_getpath(struct autofs_sb_info *sbi,
for (tmp = dentry ; tmp != root ; tmp = tmp->d_parent)
len += tmp->d_name.len + 1;
- if (--len > NAME_MAX) {
+ if (!len || --len > NAME_MAX) {
spin_unlock(&dcache_lock);
return 0;
}
diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h
index 71faf4d2390..70f5d3a8eed 100644
--- a/fs/bfs/bfs.h
+++ b/fs/bfs/bfs.h
@@ -42,7 +42,7 @@ static inline struct bfs_inode_info *BFS_I(struct inode *inode)
#define printf(format, args...) \
- printk(KERN_ERR "BFS-fs: %s(): " format, __FUNCTION__, ## args)
+ printk(KERN_ERR "BFS-fs: %s(): " format, __func__, ## args)
/* inode.c */
extern struct inode *bfs_iget(struct super_block *sb, unsigned long ino);
diff --git a/fs/buffer.c b/fs/buffer.c
index 189efa4efc6..a073f3f4f01 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1101,7 +1101,7 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
printk(KERN_ERR "%s: requested out-of-range block %llu for "
"device %s\n",
- __FUNCTION__, (unsigned long long)block,
+ __func__, (unsigned long long)block,
bdevname(bdev, b));
return -EIO;
}
diff --git a/fs/compat.c b/fs/compat.c
index 2ce4456aad3..332a869d2c5 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -24,6 +24,7 @@
#include <linux/fcntl.h>
#include <linux/namei.h>
#include <linux/file.h>
+#include <linux/fdtable.h>
#include <linux/vfs.h>
#include <linux/ioctl.h>
#include <linux/init.h>
@@ -1634,7 +1635,7 @@ sticky:
return ret;
}
-#ifdef TIF_RESTORE_SIGMASK
+#ifdef HAVE_SET_RESTORE_SIGMASK
asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp,
compat_ulong_t __user *outp, compat_ulong_t __user *exp,
struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask,
@@ -1720,7 +1721,7 @@ sticky:
if (sigmask) {
memcpy(&current->saved_sigmask, &sigsaved,
sizeof(sigsaved));
- set_thread_flag(TIF_RESTORE_SIGMASK);
+ set_restore_sigmask();
}
} else if (sigmask)
sigprocmask(SIG_SETMASK, &sigsaved, NULL);
@@ -1791,7 +1792,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
if (sigmask) {
memcpy(&current->saved_sigmask, &sigsaved,
sizeof(sigsaved));
- set_thread_flag(TIF_RESTORE_SIGMASK);
+ set_restore_sigmask();
}
ret = -ERESTARTNOHAND;
} else if (sigmask)
@@ -1825,7 +1826,7 @@ sticky:
return ret;
}
-#endif /* TIF_RESTORE_SIGMASK */
+#endif /* HAVE_SET_RESTORE_SIGMASK */
#if defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)
/* Stuff for NFS server syscalls... */
@@ -2080,7 +2081,7 @@ long asmlinkage compat_sys_nfsservctl(int cmd, void *notused, void *notused2)
#ifdef CONFIG_EPOLL
-#ifdef TIF_RESTORE_SIGMASK
+#ifdef HAVE_SET_RESTORE_SIGMASK
asmlinkage long compat_sys_epoll_pwait(int epfd,
struct compat_epoll_event __user *events,
int maxevents, int timeout,
@@ -2117,14 +2118,14 @@ asmlinkage long compat_sys_epoll_pwait(int epfd,
if (err == -EINTR) {
memcpy(&current->saved_sigmask, &sigsaved,
sizeof(sigsaved));
- set_thread_flag(TIF_RESTORE_SIGMASK);
+ set_restore_sigmask();
} else
sigprocmask(SIG_SETMASK, &sigsaved, NULL);
}
return err;
}
-#endif /* TIF_RESTORE_SIGMASK */
+#endif /* HAVE_SET_RESTORE_SIGMASK */
#endif /* CONFIG_EPOLL */
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index c6e72aebd16..97dba0d9234 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1046,14 +1046,14 @@ static int vt_check(struct file *file)
struct inode *inode = file->f_path.dentry->d_inode;
struct vc_data *vc;
- if (file->f_op->ioctl != tty_ioctl)
+ if (file->f_op->unlocked_ioctl != tty_ioctl)
return -EINVAL;
tty = (struct tty_struct *)file->private_data;
if (tty_paranoia_check(tty, inode, "tty_ioctl"))
return -EINVAL;
- if (tty->driver->ioctl != vt_ioctl)
+ if (tty->ops->ioctl != vt_ioctl)
return -EINVAL;
vc = (struct vc_data *)tty->driver_data;
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index 397cb503a18..2b6cb23dd14 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -115,7 +115,7 @@ configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *pp
goto out;
}
pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n",
- __FUNCTION__, count, *ppos, buffer->page);
+ __func__, count, *ppos, buffer->page);
retval = simple_read_from_buffer(buf, count, ppos, buffer->page,
buffer->count);
out:
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 4c1ebff778e..b9a1d810346 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -47,7 +47,7 @@ static const struct address_space_operations configfs_aops = {
static struct backing_dev_info configfs_backing_dev_info = {
.ra_pages = 0, /* No readahead */
- .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
+ .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
};
static const struct inode_operations configfs_inode_operations ={
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index de3b31d0a37..8421cea7d8c 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -92,7 +92,7 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent)
root = d_alloc_root(inode);
if (!root) {
- pr_debug("%s: could not get root dentry!\n",__FUNCTION__);
+ pr_debug("%s: could not get root dentry!\n",__func__);
iput(inode);
return -ENOMEM;
}
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 78929ea84ff..2a731ef5f30 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -210,13 +210,13 @@ static int configfs_get_target_path(struct config_item * item, struct config_ite
if (size > PATH_MAX)
return -ENAMETOOLONG;
- pr_debug("%s: depth = %d, size = %d\n", __FUNCTION__, depth, size);
+ pr_debug("%s: depth = %d, size = %d\n", __func__, depth, size);
for (s = path; depth--; s += 3)
strcpy(s,"../");
fill_item_path(target, path, size);
- pr_debug("%s: path = '%s'\n", __FUNCTION__, path);
+ pr_debug("%s: path = '%s'\n", __func__, path);
return 0;
}
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index fddffe4851f..159a5efd6a8 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -9,7 +9,7 @@
* 2 as published by the Free Software Foundation.
*
* debugfs is for people to use instead of /proc or /sys.
- * See Documentation/DocBook/kernel-api for more details.
+ * See Documentation/DocBook/filesystems for more details.
*
*/
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index f120e120787..285b64a8b06 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -17,6 +17,8 @@
#include <linux/namei.h>
#include <linux/mount.h>
#include <linux/tty.h>
+#include <linux/mutex.h>
+#include <linux/idr.h>
#include <linux/devpts_fs.h>
#include <linux/parser.h>
#include <linux/fsnotify.h>
@@ -26,6 +28,10 @@
#define DEVPTS_DEFAULT_MODE 0600
+extern int pty_limit; /* Config limit on Unix98 ptys */
+static DEFINE_IDR(allocated_ptys);
+static DEFINE_MUTEX(allocated_ptys_lock);
+
static struct vfsmount *devpts_mnt;
static struct dentry *devpts_root;
@@ -171,9 +177,44 @@ static struct dentry *get_node(int num)
return lookup_one_len(s, root, sprintf(s, "%d", num));
}
+int devpts_new_index(void)
+{
+ int index;
+ int idr_ret;
+
+retry:
+ if (!idr_pre_get(&allocated_ptys, GFP_KERNEL)) {
+ return -ENOMEM;
+ }
+
+ mutex_lock(&allocated_ptys_lock);
+ idr_ret = idr_get_new(&allocated_ptys, NULL, &index);
+ if (idr_ret < 0) {
+ mutex_unlock(&allocated_ptys_lock);
+ if (idr_ret == -EAGAIN)
+ goto retry;
+ return -EIO;
+ }
+
+ if (index >= pty_limit) {
+ idr_remove(&allocated_ptys, index);
+ mutex_unlock(&allocated_ptys_lock);
+ return -EIO;
+ }
+ mutex_unlock(&allocated_ptys_lock);
+ return index;
+}
+
+void devpts_kill_index(int idx)
+{
+ mutex_lock(&allocated_ptys_lock);
+ idr_remove(&allocated_ptys, idx);
+ mutex_unlock(&allocated_ptys_lock);
+}
+
int devpts_pty_new(struct tty_struct *tty)
{
- int number = tty->index;
+ int number = tty->index; /* tty layer puts index from devpts_new_index() in here */
struct tty_driver *driver = tty->driver;
dev_t device = MKDEV(driver->major, driver->minor_start+number);
struct dentry *dentry;
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index b64e55e0515..499e16759e9 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -200,7 +200,7 @@ int __init dlm_lockspace_init(void)
dlm_kset = kset_create_and_add("dlm", NULL, kernel_kobj);
if (!dlm_kset) {
- printk(KERN_WARNING "%s: can not create kset\n", __FUNCTION__);
+ printk(KERN_WARNING "%s: can not create kset\n", __func__);
return -ENOMEM;
}
return 0;
diff --git a/fs/dnotify.c b/fs/dnotify.c
index 28d01ed66de..676073b8dda 100644
--- a/fs/dnotify.c
+++ b/fs/dnotify.c
@@ -20,6 +20,7 @@
#include <linux/init.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
+#include <linux/fdtable.h>
int dir_notify_enable __read_mostly = 1;
@@ -66,6 +67,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
struct dnotify_struct **prev;
struct inode *inode;
fl_owner_t id = current->files;
+ struct file *f;
int error = 0;
if ((arg & ~DN_MULTISHOT) == 0) {
@@ -92,6 +94,15 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
prev = &odn->dn_next;
}
+ rcu_read_lock();
+ f = fcheck(fd);
+ rcu_read_unlock();
+ /* we'd lost the race with close(), sod off silently */
+ /* note that inode->i_lock prevents reordering problems
+ * between accesses to descriptor table and ->i_dnotify */
+ if (f != filp)
+ goto out_free;
+
error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
if (error)
goto out_free;
diff --git a/fs/eventfd.c b/fs/eventfd.c
index a9f130cd50a..343942deeec 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -200,10 +200,8 @@ struct file *eventfd_fget(int fd)
asmlinkage long sys_eventfd(unsigned int count)
{
- int error, fd;
+ int fd;
struct eventfd_ctx *ctx;
- struct file *file;
- struct inode *inode;
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
@@ -216,12 +214,9 @@ asmlinkage long sys_eventfd(unsigned int count)
* When we call this, the initialization must be complete, since
* anon_inode_getfd() will install the fd.
*/
- error = anon_inode_getfd(&fd, &inode, &file, "[eventfd]",
- &eventfd_fops, ctx);
- if (!error)
- return fd;
-
- kfree(ctx);
- return error;
+ fd = anon_inode_getfd("[eventfd]", &eventfd_fops, ctx);
+ if (fd < 0)
+ kfree(ctx);
+ return fd;
}
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 0d237182d72..990c01d2d66 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1050,8 +1050,6 @@ asmlinkage long sys_epoll_create(int size)
{
int error, fd = -1;
struct eventpoll *ep;
- struct inode *inode;
- struct file *file;
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d)\n",
current, size));
@@ -1061,29 +1059,24 @@ asmlinkage long sys_epoll_create(int size)
* structure ( "struct eventpoll" ).
*/
error = -EINVAL;
- if (size <= 0 || (error = ep_alloc(&ep)) != 0)
+ if (size <= 0 || (error = ep_alloc(&ep)) < 0) {
+ fd = error;
goto error_return;
+ }
/*
* Creates all the items needed to setup an eventpoll file. That is,
- * a file structure, and inode and a free file descriptor.
+ * a file structure and a free file descriptor.
*/
- error = anon_inode_getfd(&fd, &inode, &file, "[eventpoll]",
- &eventpoll_fops, ep);
- if (error)
- goto error_free;
+ fd = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep);
+ if (fd < 0)
+ ep_free(ep);
+error_return:
DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
current, size, fd));
return fd;
-
-error_free:
- ep_free(ep);
-error_return:
- DNPRINTK(3, (KERN_INFO "[%p] eventpoll: sys_epoll_create(%d) = %d\n",
- current, size, error));
- return error;
}
/*
@@ -1241,7 +1234,7 @@ error_return:
return error;
}
-#ifdef TIF_RESTORE_SIGMASK
+#ifdef HAVE_SET_RESTORE_SIGMASK
/*
* Implement the event wait interface for the eventpoll file. It is the kernel
@@ -1279,7 +1272,7 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
if (error == -EINTR) {
memcpy(&current->saved_sigmask, &sigsaved,
sizeof(sigsaved));
- set_thread_flag(TIF_RESTORE_SIGMASK);
+ set_restore_sigmask();
} else
sigprocmask(SIG_SETMASK, &sigsaved, NULL);
}
@@ -1287,7 +1280,7 @@ asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
return error;
}
-#endif /* #ifdef TIF_RESTORE_SIGMASK */
+#endif /* HAVE_SET_RESTORE_SIGMASK */
static int __init eventpoll_init(void)
{
@@ -1309,4 +1302,3 @@ static int __init eventpoll_init(void)
return 0;
}
fs_initcall(eventpoll_init);
-
diff --git a/fs/exec.c b/fs/exec.c
index a13883903ee..aeaa9791d8b 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -24,6 +24,7 @@
#include <linux/slab.h>
#include <linux/file.h>
+#include <linux/fdtable.h>
#include <linux/mman.h>
#include <linux/a.out.h>
#include <linux/stat.h>
@@ -766,9 +767,7 @@ static int de_thread(struct task_struct *tsk)
/*
* Kill all other threads in the thread group.
- * We must hold tasklist_lock to call zap_other_threads.
*/
- read_lock(&tasklist_lock);
spin_lock_irq(lock);
if (signal_group_exit(sig)) {
/*
@@ -776,21 +775,10 @@ static int de_thread(struct task_struct *tsk)
* return so that the signal is processed.
*/
spin_unlock_irq(lock);
- read_unlock(&tasklist_lock);
return -EAGAIN;
}
-
- /*
- * child_reaper ignores SIGKILL, change it now.
- * Reparenting needs write_lock on tasklist_lock,
- * so it is safe to do it under read_lock.
- */
- if (unlikely(tsk->group_leader == task_child_reaper(tsk)))
- task_active_pid_ns(tsk)->child_reaper = tsk;
-
sig->group_exit_task = tsk;
zap_other_threads(tsk);
- read_unlock(&tasklist_lock);
/* Account for the thread group leader hanging around: */
count = thread_group_leader(tsk) ? 1 : 2;
@@ -811,7 +799,7 @@ static int de_thread(struct task_struct *tsk)
if (!thread_group_leader(tsk)) {
leader = tsk->group_leader;
- sig->notify_count = -1;
+ sig->notify_count = -1; /* for exit_notify() */
for (;;) {
write_lock_irq(&tasklist_lock);
if (likely(leader->exit_state))
@@ -821,6 +809,8 @@ static int de_thread(struct task_struct *tsk)
schedule();
}
+ if (unlikely(task_child_reaper(tsk) == leader))
+ task_active_pid_ns(tsk)->child_reaper = tsk;
/*
* The only record we have of the real-time age of a
* process, regardless of execs it's done, is start_time.
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 109ab5e44ec..cc91227d3bb 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -150,12 +150,12 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir)
if (IS_ERR(ppd)) {
err = PTR_ERR(ppd);
dprintk("%s: get_parent of %ld failed, err %d\n",
- __FUNCTION__, pd->d_inode->i_ino, err);
+ __func__, pd->d_inode->i_ino, err);
dput(pd);
break;
}
- dprintk("%s: find name of %lu in %lu\n", __FUNCTION__,
+ dprintk("%s: find name of %lu in %lu\n", __func__,
pd->d_inode->i_ino, ppd->d_inode->i_ino);
err = exportfs_get_name(mnt, ppd, nbuf, pd);
if (err) {
@@ -168,14 +168,14 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir)
continue;
break;
}
- dprintk("%s: found name: %s\n", __FUNCTION__, nbuf);
+ dprintk("%s: found name: %s\n", __func__, nbuf);
mutex_lock(&ppd->d_inode->i_mutex);
npd = lookup_one_len(nbuf, ppd, strlen(nbuf));
mutex_unlock(&ppd->d_inode->i_mutex);
if (IS_ERR(npd)) {
err = PTR_ERR(npd);
dprintk("%s: lookup failed: %d\n",
- __FUNCTION__, err);
+ __func__, err);
dput(ppd);
dput(pd);
break;
@@ -188,7 +188,7 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir)
if (npd == pd)
noprogress = 0;
else
- printk("%s: npd != pd\n", __FUNCTION__);
+ printk("%s: npd != pd\n", __func__);
dput(npd);
dput(ppd);
if (IS_ROOT(pd)) {
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index cc47b76091b..6ae4ecf3ce4 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1261,10 +1261,11 @@ static int ext3_ordered_write_end(struct file *file,
new_i_size = pos + copied;
if (new_i_size > EXT3_I(inode)->i_disksize)
EXT3_I(inode)->i_disksize = new_i_size;
- copied = ext3_generic_write_end(file, mapping, pos, len, copied,
+ ret2 = ext3_generic_write_end(file, mapping, pos, len, copied,
page, fsdata);
- if (copied < 0)
- ret = copied;
+ copied = ret2;
+ if (ret2 < 0)
+ ret = ret2;
}
ret2 = ext3_journal_stop(handle);
if (!ret)
@@ -1289,10 +1290,11 @@ static int ext3_writeback_write_end(struct file *file,
if (new_i_size > EXT3_I(inode)->i_disksize)
EXT3_I(inode)->i_disksize = new_i_size;
- copied = ext3_generic_write_end(file, mapping, pos, len, copied,
+ ret2 = ext3_generic_write_end(file, mapping, pos, len, copied,
page, fsdata);
- if (copied < 0)
- ret = copied;
+ copied = ret2;
+ if (ret2 < 0)
+ ret = ret2;
ret2 = ext3_journal_stop(handle);
if (!ret)
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index a8bae8cd1d5..3c8dab880d9 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -9,8 +9,8 @@
#include <linux/slab.h>
#include <linux/capability.h>
#include <linux/fs.h>
-#include <linux/ext4_jbd2.h>
-#include <linux/ext4_fs.h>
+#include "ext4_jbd2.h"
+#include "ext4.h"
#include "xattr.h"
#include "acl.h"
@@ -37,7 +37,7 @@ ext4_acl_from_disk(const void *value, size_t size)
return ERR_PTR(-EINVAL);
if (count == 0)
return NULL;
- acl = posix_acl_alloc(count, GFP_KERNEL);
+ acl = posix_acl_alloc(count, GFP_NOFS);
if (!acl)
return ERR_PTR(-ENOMEM);
for (n=0; n < count; n++) {
@@ -91,7 +91,7 @@ ext4_acl_to_disk(const struct posix_acl *acl, size_t *size)
*size = ext4_acl_size(acl->a_count);
ext_acl = kmalloc(sizeof(ext4_acl_header) + acl->a_count *
- sizeof(ext4_acl_entry), GFP_KERNEL);
+ sizeof(ext4_acl_entry), GFP_NOFS);
if (!ext_acl)
return ERR_PTR(-ENOMEM);
ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION);
@@ -187,7 +187,7 @@ ext4_get_acl(struct inode *inode, int type)
}
retval = ext4_xattr_get(inode, name_index, "", NULL, 0);
if (retval > 0) {
- value = kmalloc(retval, GFP_KERNEL);
+ value = kmalloc(retval, GFP_NOFS);
if (!value)
return ERR_PTR(-ENOMEM);
retval = ext4_xattr_get(inode, name_index, "", value, retval);
@@ -335,7 +335,7 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
if (error)
goto cleanup;
}
- clone = posix_acl_clone(acl, GFP_KERNEL);
+ clone = posix_acl_clone(acl, GFP_NOFS);
error = -ENOMEM;
if (!clone)
goto cleanup;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 0737e05ba3d..da994374ec3 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -15,12 +15,12 @@
#include <linux/capability.h>
#include <linux/fs.h>
#include <linux/jbd2.h>
-#include <linux/ext4_fs.h>
-#include <linux/ext4_jbd2.h>
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
-
+#include "ext4.h"
+#include "ext4_jbd2.h"
#include "group.h"
+
/*
* balloc.c contains the blocks allocation and deallocation routines
*/
@@ -48,7 +48,6 @@ void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
ext4_group_t block_group, struct ext4_group_desc *gdp)
{
- unsigned long start;
int bit, bit_max;
unsigned free_blocks, group_blocks;
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -59,7 +58,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
/* If checksum is bad mark all blocks used to prevent allocation
* essentially implementing a per-group read-only flag. */
if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
- ext4_error(sb, __FUNCTION__,
+ ext4_error(sb, __func__,
"Checksum bad for group %lu\n", block_group);
gdp->bg_free_blocks_count = 0;
gdp->bg_free_inodes_count = 0;
@@ -106,11 +105,12 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
free_blocks = group_blocks - bit_max;
if (bh) {
+ ext4_fsblk_t start;
+
for (bit = 0; bit < bit_max; bit++)
ext4_set_bit(bit, bh->b_data);
- start = block_group * EXT4_BLOCKS_PER_GROUP(sb) +
- le32_to_cpu(sbi->s_es->s_first_data_block);
+ start = ext4_group_first_block_no(sb, block_group);
/* Set bits for block and inode bitmaps, and inode table */
ext4_set_bit(ext4_block_bitmap(sb, gdp) - start, bh->b_data);
@@ -235,7 +235,7 @@ static int ext4_valid_block_bitmap(struct super_block *sb,
return 1;
err_out:
- ext4_error(sb, __FUNCTION__,
+ ext4_error(sb, __func__,
"Invalid block bitmap - "
"block_group = %d, block = %llu",
block_group, bitmap_blk);
@@ -264,7 +264,7 @@ read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
bitmap_blk = ext4_block_bitmap(sb, desc);
bh = sb_getblk(sb, bitmap_blk);
if (unlikely(!bh)) {
- ext4_error(sb, __FUNCTION__,
+ ext4_error(sb, __func__,
"Cannot read block bitmap - "
"block_group = %d, block_bitmap = %llu",
(int)block_group, (unsigned long long)bitmap_blk);
@@ -281,7 +281,7 @@ read_block_bitmap(struct super_block *sb, ext4_group_t block_group)
}
if (bh_submit_read(bh) < 0) {
put_bh(bh);
- ext4_error(sb, __FUNCTION__,
+ ext4_error(sb, __func__,
"Cannot read block bitmap - "
"block_group = %d, block_bitmap = %llu",
(int)block_group, (unsigned long long)bitmap_blk);
@@ -360,7 +360,7 @@ restart:
BUG();
}
#define rsv_window_dump(root, verbose) \
- __rsv_window_dump((root), (verbose), __FUNCTION__)
+ __rsv_window_dump((root), (verbose), __func__)
#else
#define rsv_window_dump(root, verbose) do {} while (0)
#endif
@@ -740,7 +740,7 @@ do_more:
if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
bit + i, bitmap_bh->b_data)) {
jbd_unlock_bh_state(bitmap_bh);
- ext4_error(sb, __FUNCTION__,
+ ext4_error(sb, __func__,
"bit already cleared for block %llu",
(ext4_fsblk_t)(block + i));
jbd_lock_bh_state(bitmap_bh);
@@ -752,9 +752,7 @@ do_more:
jbd_unlock_bh_state(bitmap_bh);
spin_lock(sb_bgl_lock(sbi, block_group));
- desc->bg_free_blocks_count =
- cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) +
- group_freed);
+ le16_add_cpu(&desc->bg_free_blocks_count, group_freed);
desc->bg_checksum = ext4_group_desc_csum(sbi, block_group, desc);
spin_unlock(sb_bgl_lock(sbi, block_group));
percpu_counter_add(&sbi->s_freeblocks_counter, count);
@@ -1798,7 +1796,7 @@ allocated:
if (ext4_test_bit(grp_alloc_blk+i,
bh2jh(bitmap_bh)->b_committed_data)) {
printk("%s: block was unexpectedly set in "
- "b_committed_data\n", __FUNCTION__);
+ "b_committed_data\n", __func__);
}
}
}
@@ -1823,8 +1821,7 @@ allocated:
spin_lock(sb_bgl_lock(sbi, group_no));
if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
- gdp->bg_free_blocks_count =
- cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num);
+ le16_add_cpu(&gdp->bg_free_blocks_count, -num);
gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
spin_unlock(sb_bgl_lock(sbi, group_no));
percpu_counter_sub(&sbi->s_freeblocks_counter, num);
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index 420554f8f79..d37ea675045 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -9,7 +9,7 @@
#include <linux/buffer_head.h>
#include <linux/jbd2.h>
-#include <linux/ext4_fs.h>
+#include "ext4.h"
#ifdef EXT4FS_DEBUG
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 2c23bade9aa..2bf0331ea19 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -23,10 +23,10 @@
#include <linux/fs.h>
#include <linux/jbd2.h>
-#include <linux/ext4_fs.h>
#include <linux/buffer_head.h>
#include <linux/slab.h>
#include <linux/rbtree.h>
+#include "ext4.h"
static unsigned char ext4_filetype_table[] = {
DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
@@ -42,7 +42,7 @@ const struct file_operations ext4_dir_operations = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
.readdir = ext4_readdir, /* we take BKL. needed?*/
- .ioctl = ext4_ioctl, /* BKL held */
+ .unlocked_ioctl = ext4_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = ext4_compat_ioctl,
#endif
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
new file mode 100644
index 00000000000..8158083f7ac
--- /dev/null
+++ b/fs/ext4/ext4.h
@@ -0,0 +1,1205 @@
+/*
+ * ext4.h
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ * from
+ *
+ * linux/include/linux/minix_fs.h
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#ifndef _EXT4_H
+#define _EXT4_H
+
+#include <linux/types.h>
+#include <linux/blkdev.h>
+#include <linux/magic.h>
+#include "ext4_i.h"
+
+/*
+ * The second extended filesystem constants/structures
+ */
+
+/*
+ * Define EXT4FS_DEBUG to produce debug messages
+ */
+#undef EXT4FS_DEBUG
+
+/*
+ * Define EXT4_RESERVATION to reserve data blocks for expanding files
+ */
+#define EXT4_DEFAULT_RESERVE_BLOCKS 8
+/*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */
+#define EXT4_MAX_RESERVE_BLOCKS 1027
+#define EXT4_RESERVE_WINDOW_NOT_ALLOCATED 0
+
+/*
+ * Debug code
+ */
+#ifdef EXT4FS_DEBUG
+#define ext4_debug(f, a...) \
+ do { \
+ printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \
+ __FILE__, __LINE__, __FUNCTION__); \
+ printk (KERN_DEBUG f, ## a); \
+ } while (0)
+#else
+#define ext4_debug(f, a...) do {} while (0)
+#endif
+
+#define EXT4_MULTIBLOCK_ALLOCATOR 1
+
+/* prefer goal again. length */
+#define EXT4_MB_HINT_MERGE 1
+/* blocks already reserved */
+#define EXT4_MB_HINT_RESERVED 2
+/* metadata is being allocated */
+#define EXT4_MB_HINT_METADATA 4
+/* first blocks in the file */
+#define EXT4_MB_HINT_FIRST 8
+/* search for the best chunk */
+#define EXT4_MB_HINT_BEST 16
+/* data is being allocated */
+#define EXT4_MB_HINT_DATA 32
+/* don't preallocate (for tails) */
+#define EXT4_MB_HINT_NOPREALLOC 64
+/* allocate for locality group */
+#define EXT4_MB_HINT_GROUP_ALLOC 128
+/* allocate goal blocks or none */
+#define EXT4_MB_HINT_GOAL_ONLY 256
+/* goal is meaningful */
+#define EXT4_MB_HINT_TRY_GOAL 512
+
+struct ext4_allocation_request {
+ /* target inode for block we're allocating */
+ struct inode *inode;
+ /* logical block in target inode */
+ ext4_lblk_t logical;
+ /* phys. target (a hint) */
+ ext4_fsblk_t goal;
+ /* the closest logical allocated block to the left */
+ ext4_lblk_t lleft;
+ /* phys. block for ^^^ */
+ ext4_fsblk_t pleft;
+ /* the closest logical allocated block to the right */
+ ext4_lblk_t lright;
+ /* phys. block for ^^^ */
+ ext4_fsblk_t pright;
+ /* how many blocks we want to allocate */
+ unsigned long len;
+ /* flags. see above EXT4_MB_HINT_* */
+ unsigned long flags;
+};
+
+/*
+ * Special inodes numbers
+ */
+#define EXT4_BAD_INO 1 /* Bad blocks inode */
+#define EXT4_ROOT_INO 2 /* Root inode */
+#define EXT4_BOOT_LOADER_INO 5 /* Boot loader inode */
+#define EXT4_UNDEL_DIR_INO 6 /* Undelete directory inode */
+#define EXT4_RESIZE_INO 7 /* Reserved group descriptors inode */
+#define EXT4_JOURNAL_INO 8 /* Journal inode */
+
+/* First non-reserved inode for old ext4 filesystems */
+#define EXT4_GOOD_OLD_FIRST_INO 11
+
+/*
+ * Maximal count of links to a file
+ */
+#define EXT4_LINK_MAX 65000
+
+/*
+ * Macro-instructions used to manage several block sizes
+ */
+#define EXT4_MIN_BLOCK_SIZE 1024
+#define EXT4_MAX_BLOCK_SIZE 65536
+#define EXT4_MIN_BLOCK_LOG_SIZE 10
+#ifdef __KERNEL__
+# define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize)
+#else
+# define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size)
+#endif
+#define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof (__u32))
+#ifdef __KERNEL__
+# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits)
+#else
+# define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10)
+#endif
+#ifdef __KERNEL__
+#define EXT4_ADDR_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_addr_per_block_bits)
+#define EXT4_INODE_SIZE(s) (EXT4_SB(s)->s_inode_size)
+#define EXT4_FIRST_INO(s) (EXT4_SB(s)->s_first_ino)
+#else
+#define EXT4_INODE_SIZE(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \
+ EXT4_GOOD_OLD_INODE_SIZE : \
+ (s)->s_inode_size)
+#define EXT4_FIRST_INO(s) (((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \
+ EXT4_GOOD_OLD_FIRST_INO : \
+ (s)->s_first_ino)
+#endif
+#define EXT4_BLOCK_ALIGN(size, blkbits) ALIGN((size), (1 << (blkbits)))
+
+/*
+ * Structure of a blocks group descriptor
+ */
+struct ext4_group_desc
+{
+ __le32 bg_block_bitmap_lo; /* Blocks bitmap block */
+ __le32 bg_inode_bitmap_lo; /* Inodes bitmap block */
+ __le32 bg_inode_table_lo; /* Inodes table block */
+ __le16 bg_free_blocks_count; /* Free blocks count */
+ __le16 bg_free_inodes_count; /* Free inodes count */
+ __le16 bg_used_dirs_count; /* Directories count */
+ __le16 bg_flags; /* EXT4_BG_flags (INODE_UNINIT, etc) */
+ __u32 bg_reserved[2]; /* Likely block/inode bitmap checksum */
+ __le16 bg_itable_unused; /* Unused inodes count */
+ __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */
+ __le32 bg_block_bitmap_hi; /* Blocks bitmap block MSB */
+ __le32 bg_inode_bitmap_hi; /* Inodes bitmap block MSB */
+ __le32 bg_inode_table_hi; /* Inodes table block MSB */
+ __le16 bg_free_blocks_count_hi;/* Free blocks count MSB */
+ __le16 bg_free_inodes_count_hi;/* Free inodes count MSB */
+ __le16 bg_used_dirs_count_hi; /* Directories count MSB */
+ __le16 bg_itable_unused_hi; /* Unused inodes count MSB */
+ __u32 bg_reserved2[3];
+};
+
+#define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */
+#define EXT4_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */
+#define EXT4_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */
+
+#ifdef __KERNEL__
+#include "ext4_sb.h"
+#endif
+/*
+ * Macro-instructions used to manage group descriptors
+ */
+#define EXT4_MIN_DESC_SIZE 32
+#define EXT4_MIN_DESC_SIZE_64BIT 64
+#define EXT4_MAX_DESC_SIZE EXT4_MIN_BLOCK_SIZE
+#define EXT4_DESC_SIZE(s) (EXT4_SB(s)->s_desc_size)
+#ifdef __KERNEL__
+# define EXT4_BLOCKS_PER_GROUP(s) (EXT4_SB(s)->s_blocks_per_group)
+# define EXT4_DESC_PER_BLOCK(s) (EXT4_SB(s)->s_desc_per_block)
+# define EXT4_INODES_PER_GROUP(s) (EXT4_SB(s)->s_inodes_per_group)
+# define EXT4_DESC_PER_BLOCK_BITS(s) (EXT4_SB(s)->s_desc_per_block_bits)
+#else
+# define EXT4_BLOCKS_PER_GROUP(s) ((s)->s_blocks_per_group)
+# define EXT4_DESC_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / EXT4_DESC_SIZE(s))
+# define EXT4_INODES_PER_GROUP(s) ((s)->s_inodes_per_group)
+#endif
+
+/*
+ * Constants relative to the data blocks
+ */
+#define EXT4_NDIR_BLOCKS 12
+#define EXT4_IND_BLOCK EXT4_NDIR_BLOCKS
+#define EXT4_DIND_BLOCK (EXT4_IND_BLOCK + 1)
+#define EXT4_TIND_BLOCK (EXT4_DIND_BLOCK + 1)
+#define EXT4_N_BLOCKS (EXT4_TIND_BLOCK + 1)
+
+/*
+ * Inode flags
+ */
+#define EXT4_SECRM_FL 0x00000001 /* Secure deletion */
+#define EXT4_UNRM_FL 0x00000002 /* Undelete */
+#define EXT4_COMPR_FL 0x00000004 /* Compress file */
+#define EXT4_SYNC_FL 0x00000008 /* Synchronous updates */
+#define EXT4_IMMUTABLE_FL 0x00000010 /* Immutable file */
+#define EXT4_APPEND_FL 0x00000020 /* writes to file may only append */
+#define EXT4_NODUMP_FL 0x00000040 /* do not dump file */
+#define EXT4_NOATIME_FL 0x00000080 /* do not update atime */
+/* Reserved for compression usage... */
+#define EXT4_DIRTY_FL 0x00000100
+#define EXT4_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */
+#define EXT4_NOCOMPR_FL 0x00000400 /* Don't compress */
+#define EXT4_ECOMPR_FL 0x00000800 /* Compression error */
+/* End compression flags --- maybe not all used */
+#define EXT4_INDEX_FL 0x00001000 /* hash-indexed directory */
+#define EXT4_IMAGIC_FL 0x00002000 /* AFS directory */
+#define EXT4_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */
+#define EXT4_NOTAIL_FL 0x00008000 /* file tail should not be merged */
+#define EXT4_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
+#define EXT4_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
+#define EXT4_HUGE_FILE_FL 0x00040000 /* Set to each huge file */
+#define EXT4_EXTENTS_FL 0x00080000 /* Inode uses extents */
+#define EXT4_EXT_MIGRATE 0x00100000 /* Inode is migrating */
+#define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
+
+#define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */
+#define EXT4_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
+
+/*
+ * Inode dynamic state flags
+ */
+#define EXT4_STATE_JDATA 0x00000001 /* journaled data exists */
+#define EXT4_STATE_NEW 0x00000002 /* inode is newly created */
+#define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */
+#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */
+
+/* Used to pass group descriptor data when online resize is done */
+struct ext4_new_group_input {
+ __u32 group; /* Group number for this data */
+ __u64 block_bitmap; /* Absolute block number of block bitmap */
+ __u64 inode_bitmap; /* Absolute block number of inode bitmap */
+ __u64 inode_table; /* Absolute block number of inode table start */
+ __u32 blocks_count; /* Total number of blocks in this group */
+ __u16 reserved_blocks; /* Number of reserved blocks in this group */
+ __u16 unused;
+};
+
+/* The struct ext4_new_group_input in kernel space, with free_blocks_count */
+struct ext4_new_group_data {
+ __u32 group;
+ __u64 block_bitmap;
+ __u64 inode_bitmap;
+ __u64 inode_table;
+ __u32 blocks_count;
+ __u16 reserved_blocks;
+ __u16 unused;
+ __u32 free_blocks_count;
+};
+
+/*
+ * Following is used by preallocation code to tell get_blocks() that we
+ * want uninitialzed extents.
+ */
+#define EXT4_CREATE_UNINITIALIZED_EXT 2
+
+/*
+ * ioctl commands
+ */
+#define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS
+#define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS
+#define EXT4_IOC_GETVERSION _IOR('f', 3, long)
+#define EXT4_IOC_SETVERSION _IOW('f', 4, long)
+#define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long)
+#define EXT4_IOC_GROUP_ADD _IOW('f', 8,struct ext4_new_group_input)
+#define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION
+#define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION
+#ifdef CONFIG_JBD2_DEBUG
+#define EXT4_IOC_WAIT_FOR_READONLY _IOR('f', 99, long)
+#endif
+#define EXT4_IOC_GETRSVSZ _IOR('f', 5, long)
+#define EXT4_IOC_SETRSVSZ _IOW('f', 6, long)
+#define EXT4_IOC_MIGRATE _IO('f', 7)
+
+/*
+ * ioctl commands in 32 bit emulation
+ */
+#define EXT4_IOC32_GETFLAGS FS_IOC32_GETFLAGS
+#define EXT4_IOC32_SETFLAGS FS_IOC32_SETFLAGS
+#define EXT4_IOC32_GETVERSION _IOR('f', 3, int)
+#define EXT4_IOC32_SETVERSION _IOW('f', 4, int)
+#define EXT4_IOC32_GETRSVSZ _IOR('f', 5, int)
+#define EXT4_IOC32_SETRSVSZ _IOW('f', 6, int)
+#define EXT4_IOC32_GROUP_EXTEND _IOW('f', 7, unsigned int)
+#ifdef CONFIG_JBD2_DEBUG
+#define EXT4_IOC32_WAIT_FOR_READONLY _IOR('f', 99, int)
+#endif
+#define EXT4_IOC32_GETVERSION_OLD FS_IOC32_GETVERSION
+#define EXT4_IOC32_SETVERSION_OLD FS_IOC32_SETVERSION
+
+
+/*
+ * Mount options
+ */
+struct ext4_mount_options {
+ unsigned long s_mount_opt;
+ uid_t s_resuid;
+ gid_t s_resgid;
+ unsigned long s_commit_interval;
+#ifdef CONFIG_QUOTA
+ int s_jquota_fmt;
+ char *s_qf_names[MAXQUOTAS];
+#endif
+};
+
+/*
+ * Structure of an inode on the disk
+ */
+struct ext4_inode {
+ __le16 i_mode; /* File mode */
+ __le16 i_uid; /* Low 16 bits of Owner Uid */
+ __le32 i_size_lo; /* Size in bytes */
+ __le32 i_atime; /* Access time */
+ __le32 i_ctime; /* Inode Change time */
+ __le32 i_mtime; /* Modification time */
+ __le32 i_dtime; /* Deletion Time */
+ __le16 i_gid; /* Low 16 bits of Group Id */
+ __le16 i_links_count; /* Links count */
+ __le32 i_blocks_lo; /* Blocks count */
+ __le32 i_flags; /* File flags */
+ union {
+ struct {
+ __le32 l_i_version;
+ } linux1;
+ struct {
+ __u32 h_i_translator;
+ } hurd1;
+ struct {
+ __u32 m_i_reserved1;
+ } masix1;
+ } osd1; /* OS dependent 1 */
+ __le32 i_block[EXT4_N_BLOCKS];/* Pointers to blocks */
+ __le32 i_generation; /* File version (for NFS) */
+ __le32 i_file_acl_lo; /* File ACL */
+ __le32 i_size_high;
+ __le32 i_obso_faddr; /* Obsoleted fragment address */
+ union {
+ struct {
+ __le16 l_i_blocks_high; /* were l_i_reserved1 */
+ __le16 l_i_file_acl_high;
+ __le16 l_i_uid_high; /* these 2 fields */
+ __le16 l_i_gid_high; /* were reserved2[0] */
+ __u32 l_i_reserved2;
+ } linux2;
+ struct {
+ __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */
+ __u16 h_i_mode_high;
+ __u16 h_i_uid_high;
+ __u16 h_i_gid_high;
+ __u32 h_i_author;
+ } hurd2;
+ struct {
+ __le16 h_i_reserved1; /* Obsoleted fragment number/size which are removed in ext4 */
+ __le16 m_i_file_acl_high;
+ __u32 m_i_reserved2[2];
+ } masix2;
+ } osd2; /* OS dependent 2 */
+ __le16 i_extra_isize;
+ __le16 i_pad1;
+ __le32 i_ctime_extra; /* extra Change time (nsec << 2 | epoch) */
+ __le32 i_mtime_extra; /* extra Modification time(nsec << 2 | epoch) */
+ __le32 i_atime_extra; /* extra Access time (nsec << 2 | epoch) */
+ __le32 i_crtime; /* File Creation time */
+ __le32 i_crtime_extra; /* extra FileCreationtime (nsec << 2 | epoch) */
+ __le32 i_version_hi; /* high 32 bits for 64-bit version */
+};
+
+
+#define EXT4_EPOCH_BITS 2
+#define EXT4_EPOCH_MASK ((1 << EXT4_EPOCH_BITS) - 1)
+#define EXT4_NSEC_MASK (~0UL << EXT4_EPOCH_BITS)
+
+/*
+ * Extended fields will fit into an inode if the filesystem was formatted
+ * with large inodes (-I 256 or larger) and there are not currently any EAs
+ * consuming all of the available space. For new inodes we always reserve
+ * enough space for the kernel's known extended fields, but for inodes
+ * created with an old kernel this might not have been the case. None of
+ * the extended inode fields is critical for correct filesystem operation.
+ * This macro checks if a certain field fits in the inode. Note that
+ * inode-size = GOOD_OLD_INODE_SIZE + i_extra_isize
+ */
+#define EXT4_FITS_IN_INODE(ext4_inode, einode, field) \
+ ((offsetof(typeof(*ext4_inode), field) + \
+ sizeof((ext4_inode)->field)) \
+ <= (EXT4_GOOD_OLD_INODE_SIZE + \
+ (einode)->i_extra_isize)) \
+
+static inline __le32 ext4_encode_extra_time(struct timespec *time)
+{
+ return cpu_to_le32((sizeof(time->tv_sec) > 4 ?
+ time->tv_sec >> 32 : 0) |
+ ((time->tv_nsec << 2) & EXT4_NSEC_MASK));
+}
+
+static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra)
+{
+ if (sizeof(time->tv_sec) > 4)
+ time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK)
+ << 32;
+ time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> 2;
+}
+
+#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \
+do { \
+ (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \
+ if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
+ (raw_inode)->xtime ## _extra = \
+ ext4_encode_extra_time(&(inode)->xtime); \
+} while (0)
+
+#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \
+do { \
+ if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
+ (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \
+ if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
+ (raw_inode)->xtime ## _extra = \
+ ext4_encode_extra_time(&(einode)->xtime); \
+} while (0)
+
+#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \
+do { \
+ (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \
+ if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) \
+ ext4_decode_extra_time(&(inode)->xtime, \
+ raw_inode->xtime ## _extra); \
+} while (0)
+
+#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \
+do { \
+ if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
+ (einode)->xtime.tv_sec = \
+ (signed)le32_to_cpu((raw_inode)->xtime); \
+ if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
+ ext4_decode_extra_time(&(einode)->xtime, \
+ raw_inode->xtime ## _extra); \
+} while (0)
+
+#define i_disk_version osd1.linux1.l_i_version
+
+#if defined(__KERNEL__) || defined(__linux__)
+#define i_reserved1 osd1.linux1.l_i_reserved1
+#define i_file_acl_high osd2.linux2.l_i_file_acl_high
+#define i_blocks_high osd2.linux2.l_i_blocks_high
+#define i_uid_low i_uid
+#define i_gid_low i_gid
+#define i_uid_high osd2.linux2.l_i_uid_high
+#define i_gid_high osd2.linux2.l_i_gid_high
+#define i_reserved2 osd2.linux2.l_i_reserved2
+
+#elif defined(__GNU__)
+
+#define i_translator osd1.hurd1.h_i_translator
+#define i_uid_high osd2.hurd2.h_i_uid_high
+#define i_gid_high osd2.hurd2.h_i_gid_high
+#define i_author osd2.hurd2.h_i_author
+
+#elif defined(__masix__)
+
+#define i_reserved1 osd1.masix1.m_i_reserved1
+#define i_file_acl_high osd2.masix2.m_i_file_acl_high
+#define i_reserved2 osd2.masix2.m_i_reserved2
+
+#endif /* defined(__KERNEL__) || defined(__linux__) */
+
+/*
+ * File system states
+ */
+#define EXT4_VALID_FS 0x0001 /* Unmounted cleanly */
+#define EXT4_ERROR_FS 0x0002 /* Errors detected */
+#define EXT4_ORPHAN_FS 0x0004 /* Orphans being recovered */
+
+/*
+ * Misc. filesystem flags
+ */
+#define EXT2_FLAGS_SIGNED_HASH 0x0001 /* Signed dirhash in use */
+#define EXT2_FLAGS_UNSIGNED_HASH 0x0002 /* Unsigned dirhash in use */
+#define EXT2_FLAGS_TEST_FILESYS 0x0004 /* to test development code */
+
+/*
+ * Mount flags
+ */
+#define EXT4_MOUNT_CHECK 0x00001 /* Do mount-time checks */
+#define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */
+#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */
+#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */
+#define EXT4_MOUNT_ERRORS_CONT 0x00010 /* Continue on errors */
+#define EXT4_MOUNT_ERRORS_RO 0x00020 /* Remount fs ro on errors */
+#define EXT4_MOUNT_ERRORS_PANIC 0x00040 /* Panic on errors */
+#define EXT4_MOUNT_MINIX_DF 0x00080 /* Mimics the Minix statfs */
+#define EXT4_MOUNT_NOLOAD 0x00100 /* Don't use existing journal*/
+#define EXT4_MOUNT_ABORT 0x00200 /* Fatal error detected */
+#define EXT4_MOUNT_DATA_FLAGS 0x00C00 /* Mode for data writes: */
+#define EXT4_MOUNT_JOURNAL_DATA 0x00400 /* Write data to journal */
+#define EXT4_MOUNT_ORDERED_DATA 0x00800 /* Flush data before commit */
+#define EXT4_MOUNT_WRITEBACK_DATA 0x00C00 /* No data ordering */
+#define EXT4_MOUNT_UPDATE_JOURNAL 0x01000 /* Update the journal format */
+#define EXT4_MOUNT_NO_UID32 0x02000 /* Disable 32-bit UIDs */
+#define EXT4_MOUNT_XATTR_USER 0x04000 /* Extended user attributes */
+#define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */
+#define EXT4_MOUNT_RESERVATION 0x10000 /* Preallocation */
+#define EXT4_MOUNT_BARRIER 0x20000 /* Use block barriers */
+#define EXT4_MOUNT_NOBH 0x40000 /* No bufferheads */
+#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
+#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
+#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
+#define EXT4_MOUNT_EXTENTS 0x400000 /* Extents support */
+#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
+#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
+#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
+#define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */
+/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
+#ifndef _LINUX_EXT2_FS_H
+#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
+#define set_opt(o, opt) o |= EXT4_MOUNT_##opt
+#define test_opt(sb, opt) (EXT4_SB(sb)->s_mount_opt & \
+ EXT4_MOUNT_##opt)
+#else
+#define EXT2_MOUNT_NOLOAD EXT4_MOUNT_NOLOAD
+#define EXT2_MOUNT_ABORT EXT4_MOUNT_ABORT
+#define EXT2_MOUNT_DATA_FLAGS EXT4_MOUNT_DATA_FLAGS
+#endif
+
+#define ext4_set_bit ext2_set_bit
+#define ext4_set_bit_atomic ext2_set_bit_atomic
+#define ext4_clear_bit ext2_clear_bit
+#define ext4_clear_bit_atomic ext2_clear_bit_atomic
+#define ext4_test_bit ext2_test_bit
+#define ext4_find_first_zero_bit ext2_find_first_zero_bit
+#define ext4_find_next_zero_bit ext2_find_next_zero_bit
+#define ext4_find_next_bit ext2_find_next_bit
+
+/*
+ * Maximal mount counts between two filesystem checks
+ */
+#define EXT4_DFL_MAX_MNT_COUNT 20 /* Allow 20 mounts */
+#define EXT4_DFL_CHECKINTERVAL 0 /* Don't use interval check */
+
+/*
+ * Behaviour when detecting errors
+ */
+#define EXT4_ERRORS_CONTINUE 1 /* Continue execution */
+#define EXT4_ERRORS_RO 2 /* Remount fs read-only */
+#define EXT4_ERRORS_PANIC 3 /* Panic */
+#define EXT4_ERRORS_DEFAULT EXT4_ERRORS_CONTINUE
+
+/*
+ * Structure of the super block
+ */
+struct ext4_super_block {
+/*00*/ __le32 s_inodes_count; /* Inodes count */
+ __le32 s_blocks_count_lo; /* Blocks count */
+ __le32 s_r_blocks_count_lo; /* Reserved blocks count */
+ __le32 s_free_blocks_count_lo; /* Free blocks count */
+/*10*/ __le32 s_free_inodes_count; /* Free inodes count */
+ __le32 s_first_data_block; /* First Data Block */
+ __le32 s_log_block_size; /* Block size */
+ __le32 s_obso_log_frag_size; /* Obsoleted fragment size */
+/*20*/ __le32 s_blocks_per_group; /* # Blocks per group */
+ __le32 s_obso_frags_per_group; /* Obsoleted fragments per group */
+ __le32 s_inodes_per_group; /* # Inodes per group */
+ __le32 s_mtime; /* Mount time */
+/*30*/ __le32 s_wtime; /* Write time */
+ __le16 s_mnt_count; /* Mount count */
+ __le16 s_max_mnt_count; /* Maximal mount count */
+ __le16 s_magic; /* Magic signature */
+ __le16 s_state; /* File system state */
+ __le16 s_errors; /* Behaviour when detecting errors */
+ __le16 s_minor_rev_level; /* minor revision level */
+/*40*/ __le32 s_lastcheck; /* time of last check */
+ __le32 s_checkinterval; /* max. time between checks */
+ __le32 s_creator_os; /* OS */
+ __le32 s_rev_level; /* Revision level */
+/*50*/ __le16 s_def_resuid; /* Default uid for reserved blocks */
+ __le16 s_def_resgid; /* Default gid for reserved blocks */
+ /*
+ * These fields are for EXT4_DYNAMIC_REV superblocks only.
+ *
+ * Note: the difference between the compatible feature set and
+ * the incompatible feature set is that if there is a bit set
+ * in the incompatible feature set that the kernel doesn't
+ * know about, it should refuse to mount the filesystem.
+ *
+ * e2fsck's requirements are more strict; if it doesn't know
+ * about a feature in either the compatible or incompatible
+ * feature set, it must abort and not try to meddle with
+ * things it doesn't understand...
+ */
+ __le32 s_first_ino; /* First non-reserved inode */
+ __le16 s_inode_size; /* size of inode structure */
+ __le16 s_block_group_nr; /* block group # of this superblock */
+ __le32 s_feature_compat; /* compatible feature set */
+/*60*/ __le32 s_feature_incompat; /* incompatible feature set */
+ __le32 s_feature_ro_compat; /* readonly-compatible feature set */
+/*68*/ __u8 s_uuid[16]; /* 128-bit uuid for volume */
+/*78*/ char s_volume_name[16]; /* volume name */
+/*88*/ char s_last_mounted[64]; /* directory where last mounted */
+/*C8*/ __le32 s_algorithm_usage_bitmap; /* For compression */
+ /*
+ * Performance hints. Directory preallocation should only
+ * happen if the EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on.
+ */
+ __u8 s_prealloc_blocks; /* Nr of blocks to try to preallocate*/
+ __u8 s_prealloc_dir_blocks; /* Nr to preallocate for dirs */
+ __le16 s_reserved_gdt_blocks; /* Per group desc for online growth */
+ /*
+ * Journaling support valid if EXT4_FEATURE_COMPAT_HAS_JOURNAL set.
+ */
+/*D0*/ __u8 s_journal_uuid[16]; /* uuid of journal superblock */
+/*E0*/ __le32 s_journal_inum; /* inode number of journal file */
+ __le32 s_journal_dev; /* device number of journal file */
+ __le32 s_last_orphan; /* start of list of inodes to delete */
+ __le32 s_hash_seed[4]; /* HTREE hash seed */
+ __u8 s_def_hash_version; /* Default hash version to use */
+ __u8 s_reserved_char_pad;
+ __le16 s_desc_size; /* size of group descriptor */
+/*100*/ __le32 s_default_mount_opts;
+ __le32 s_first_meta_bg; /* First metablock block group */
+ __le32 s_mkfs_time; /* When the filesystem was created */
+ __le32 s_jnl_blocks[17]; /* Backup of the journal inode */
+ /* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */
+/*150*/ __le32 s_blocks_count_hi; /* Blocks count */
+ __le32 s_r_blocks_count_hi; /* Reserved blocks count */
+ __le32 s_free_blocks_count_hi; /* Free blocks count */
+ __le16 s_min_extra_isize; /* All inodes have at least # bytes */
+ __le16 s_want_extra_isize; /* New inodes should reserve # bytes */
+ __le32 s_flags; /* Miscellaneous flags */
+ __le16 s_raid_stride; /* RAID stride */
+ __le16 s_mmp_interval; /* # seconds to wait in MMP checking */
+ __le64 s_mmp_block; /* Block for multi-mount protection */
+ __le32 s_raid_stripe_width; /* blocks on all data disks (N*stride)*/
+ __u32 s_reserved[163]; /* Padding to the end of the block */
+};
+
+#ifdef __KERNEL__
+static inline struct ext4_sb_info * EXT4_SB(struct super_block *sb)
+{
+ return sb->s_fs_info;
+}
+static inline struct ext4_inode_info *EXT4_I(struct inode *inode)
+{
+ return container_of(inode, struct ext4_inode_info, vfs_inode);
+}
+
+static inline struct timespec ext4_current_time(struct inode *inode)
+{
+ return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ?
+ current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
+}
+
+
+static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
+{
+ return ino == EXT4_ROOT_INO ||
+ ino == EXT4_JOURNAL_INO ||
+ ino == EXT4_RESIZE_INO ||
+ (ino >= EXT4_FIRST_INO(sb) &&
+ ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
+}
+#else
+/* Assume that user mode programs are passing in an ext4fs superblock, not
+ * a kernel struct super_block. This will allow us to call the feature-test
+ * macros from user land. */
+#define EXT4_SB(sb) (sb)
+#endif
+
+#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime
+
+/*
+ * Codes for operating systems
+ */
+#define EXT4_OS_LINUX 0
+#define EXT4_OS_HURD 1
+#define EXT4_OS_MASIX 2
+#define EXT4_OS_FREEBSD 3
+#define EXT4_OS_LITES 4
+
+/*
+ * Revision levels
+ */
+#define EXT4_GOOD_OLD_REV 0 /* The good old (original) format */
+#define EXT4_DYNAMIC_REV 1 /* V2 format w/ dynamic inode sizes */
+
+#define EXT4_CURRENT_REV EXT4_GOOD_OLD_REV
+#define EXT4_MAX_SUPP_REV EXT4_DYNAMIC_REV
+
+#define EXT4_GOOD_OLD_INODE_SIZE 128
+
+/*
+ * Feature set definitions
+ */
+
+#define EXT4_HAS_COMPAT_FEATURE(sb,mask) \
+ ( EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) )
+#define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \
+ ( EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) )
+#define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \
+ ( EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) )
+#define EXT4_SET_COMPAT_FEATURE(sb,mask) \
+ EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask)
+#define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \
+ EXT4_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask)
+#define EXT4_SET_INCOMPAT_FEATURE(sb,mask) \
+ EXT4_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask)
+#define EXT4_CLEAR_COMPAT_FEATURE(sb,mask) \
+ EXT4_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask)
+#define EXT4_CLEAR_RO_COMPAT_FEATURE(sb,mask) \
+ EXT4_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask)
+#define EXT4_CLEAR_INCOMPAT_FEATURE(sb,mask) \
+ EXT4_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask)
+
+#define EXT4_FEATURE_COMPAT_DIR_PREALLOC 0x0001
+#define EXT4_FEATURE_COMPAT_IMAGIC_INODES 0x0002
+#define EXT4_FEATURE_COMPAT_HAS_JOURNAL 0x0004
+#define EXT4_FEATURE_COMPAT_EXT_ATTR 0x0008
+#define EXT4_FEATURE_COMPAT_RESIZE_INODE 0x0010
+#define EXT4_FEATURE_COMPAT_DIR_INDEX 0x0020
+
+#define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
+#define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
+#define EXT4_FEATURE_RO_COMPAT_BTREE_DIR 0x0004
+#define EXT4_FEATURE_RO_COMPAT_HUGE_FILE 0x0008
+#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010
+#define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020
+#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040
+
+#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
+#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002
+#define EXT4_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */
+#define EXT4_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */
+#define EXT4_FEATURE_INCOMPAT_META_BG 0x0010
+#define EXT4_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */
+#define EXT4_FEATURE_INCOMPAT_64BIT 0x0080
+#define EXT4_FEATURE_INCOMPAT_MMP 0x0100
+#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200
+
+#define EXT4_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR
+#define EXT4_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
+ EXT4_FEATURE_INCOMPAT_RECOVER| \
+ EXT4_FEATURE_INCOMPAT_META_BG| \
+ EXT4_FEATURE_INCOMPAT_EXTENTS| \
+ EXT4_FEATURE_INCOMPAT_64BIT| \
+ EXT4_FEATURE_INCOMPAT_FLEX_BG)
+#define EXT4_FEATURE_RO_COMPAT_SUPP (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
+ EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
+ EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
+ EXT4_FEATURE_RO_COMPAT_DIR_NLINK | \
+ EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
+ EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\
+ EXT4_FEATURE_RO_COMPAT_HUGE_FILE)
+
+/*
+ * Default values for user and/or group using reserved blocks
+ */
+#define EXT4_DEF_RESUID 0
+#define EXT4_DEF_RESGID 0
+
+/*
+ * Default mount options
+ */
+#define EXT4_DEFM_DEBUG 0x0001
+#define EXT4_DEFM_BSDGROUPS 0x0002
+#define EXT4_DEFM_XATTR_USER 0x0004
+#define EXT4_DEFM_ACL 0x0008
+#define EXT4_DEFM_UID16 0x0010
+#define EXT4_DEFM_JMODE 0x0060
+#define EXT4_DEFM_JMODE_DATA 0x0020
+#define EXT4_DEFM_JMODE_ORDERED 0x0040
+#define EXT4_DEFM_JMODE_WBACK 0x0060
+
+/*
+ * Structure of a directory entry
+ */
+#define EXT4_NAME_LEN 255
+
+struct ext4_dir_entry {
+ __le32 inode; /* Inode number */
+ __le16 rec_len; /* Directory entry length */
+ __le16 name_len; /* Name length */
+ char name[EXT4_NAME_LEN]; /* File name */
+};
+
+/*
+ * The new version of the directory entry. Since EXT4 structures are
+ * stored in intel byte order, and the name_len field could never be
+ * bigger than 255 chars, it's safe to reclaim the extra byte for the
+ * file_type field.
+ */
+struct ext4_dir_entry_2 {
+ __le32 inode; /* Inode number */
+ __le16 rec_len; /* Directory entry length */
+ __u8 name_len; /* Name length */
+ __u8 file_type;
+ char name[EXT4_NAME_LEN]; /* File name */
+};
+
+/*
+ * Ext4 directory file types. Only the low 3 bits are used. The
+ * other bits are reserved for now.
+ */
+#define EXT4_FT_UNKNOWN 0
+#define EXT4_FT_REG_FILE 1
+#define EXT4_FT_DIR 2
+#define EXT4_FT_CHRDEV 3
+#define EXT4_FT_BLKDEV 4
+#define EXT4_FT_FIFO 5
+#define EXT4_FT_SOCK 6
+#define EXT4_FT_SYMLINK 7
+
+#define EXT4_FT_MAX 8
+
+/*
+ * EXT4_DIR_PAD defines the directory entries boundaries
+ *
+ * NOTE: It must be a multiple of 4
+ */
+#define EXT4_DIR_PAD 4
+#define EXT4_DIR_ROUND (EXT4_DIR_PAD - 1)
+#define EXT4_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT4_DIR_ROUND) & \
+ ~EXT4_DIR_ROUND)
+#define EXT4_MAX_REC_LEN ((1<<16)-1)
+
+static inline unsigned ext4_rec_len_from_disk(__le16 dlen)
+{
+ unsigned len = le16_to_cpu(dlen);
+
+ if (len == EXT4_MAX_REC_LEN)
+ return 1 << 16;
+ return len;
+}
+
+static inline __le16 ext4_rec_len_to_disk(unsigned len)
+{
+ if (len == (1 << 16))
+ return cpu_to_le16(EXT4_MAX_REC_LEN);
+ else if (len > (1 << 16))
+ BUG();
+ return cpu_to_le16(len);
+}
+
+/*
+ * Hash Tree Directory indexing
+ * (c) Daniel Phillips, 2001
+ */
+
+#define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \
+ EXT4_FEATURE_COMPAT_DIR_INDEX) && \
+ (EXT4_I(dir)->i_flags & EXT4_INDEX_FL))
+#define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX)
+#define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
+
+/* Legal values for the dx_root hash_version field: */
+
+#define DX_HASH_LEGACY 0
+#define DX_HASH_HALF_MD4 1
+#define DX_HASH_TEA 2
+
+#ifdef __KERNEL__
+
+/* hash info structure used by the directory hash */
+struct dx_hash_info
+{
+ u32 hash;
+ u32 minor_hash;
+ int hash_version;
+ u32 *seed;
+};
+
+#define EXT4_HTREE_EOF 0x7fffffff
+
+/*
+ * Control parameters used by ext4_htree_next_block
+ */
+#define HASH_NB_ALWAYS 1
+
+
+/*
+ * Describe an inode's exact location on disk and in memory
+ */
+struct ext4_iloc
+{
+ struct buffer_head *bh;
+ unsigned long offset;
+ ext4_group_t block_group;
+};
+
+static inline struct ext4_inode *ext4_raw_inode(struct ext4_iloc *iloc)
+{
+ return (struct ext4_inode *) (iloc->bh->b_data + iloc->offset);
+}
+
+/*
+ * This structure is stuffed into the struct file's private_data field
+ * for directories. It is where we put information so that we can do
+ * readdir operations in hash tree order.
+ */
+struct dir_private_info {
+ struct rb_root root;
+ struct rb_node *curr_node;
+ struct fname *extra_fname;
+ loff_t last_pos;
+ __u32 curr_hash;
+ __u32 curr_minor_hash;
+ __u32 next_hash;
+};
+
+/* calculate the first block number of the group */
+static inline ext4_fsblk_t
+ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
+{
+ return group_no * (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) +
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
+}
+
+/*
+ * Special error return code only used by dx_probe() and its callers.
+ */
+#define ERR_BAD_DX_DIR -75000
+
+void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
+ unsigned long *blockgrpp, ext4_grpblk_t *offsetp);
+
+/*
+ * Function prototypes
+ */
+
+/*
+ * Ok, these declarations are also in <linux/kernel.h> but none of the
+ * ext4 source programs needs to include it so they are duplicated here.
+ */
+# define NORET_TYPE /**/
+# define ATTRIB_NORET __attribute__((noreturn))
+# define NORET_AND noreturn,
+
+/* balloc.c */
+extern unsigned int ext4_block_group(struct super_block *sb,
+ ext4_fsblk_t blocknr);
+extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb,
+ ext4_fsblk_t blocknr);
+extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group);
+extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
+ ext4_group_t group);
+extern ext4_fsblk_t ext4_new_block (handle_t *handle, struct inode *inode,
+ ext4_fsblk_t goal, int *errp);
+extern ext4_fsblk_t ext4_new_blocks (handle_t *handle, struct inode *inode,
+ ext4_fsblk_t goal, unsigned long *count, int *errp);
+extern ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
+ ext4_fsblk_t goal, unsigned long *count, int *errp);
+extern void ext4_free_blocks (handle_t *handle, struct inode *inode,
+ ext4_fsblk_t block, unsigned long count, int metadata);
+extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb,
+ ext4_fsblk_t block, unsigned long count,
+ unsigned long *pdquot_freed_blocks);
+extern ext4_fsblk_t ext4_count_free_blocks (struct super_block *);
+extern void ext4_check_blocks_bitmap (struct super_block *);
+extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
+ ext4_group_t block_group,
+ struct buffer_head ** bh);
+extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
+extern void ext4_init_block_alloc_info(struct inode *);
+extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv);
+
+/* dir.c */
+extern int ext4_check_dir_entry(const char *, struct inode *,
+ struct ext4_dir_entry_2 *,
+ struct buffer_head *, unsigned long);
+extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
+ __u32 minor_hash,
+ struct ext4_dir_entry_2 *dirent);
+extern void ext4_htree_free_dir_info(struct dir_private_info *p);
+
+/* fsync.c */
+extern int ext4_sync_file (struct file *, struct dentry *, int);
+
+/* hash.c */
+extern int ext4fs_dirhash(const char *name, int len, struct
+ dx_hash_info *hinfo);
+
+/* ialloc.c */
+extern struct inode * ext4_new_inode (handle_t *, struct inode *, int);
+extern void ext4_free_inode (handle_t *, struct inode *);
+extern struct inode * ext4_orphan_get (struct super_block *, unsigned long);
+extern unsigned long ext4_count_free_inodes (struct super_block *);
+extern unsigned long ext4_count_dirs (struct super_block *);
+extern void ext4_check_inodes_bitmap (struct super_block *);
+extern unsigned long ext4_count_free (struct buffer_head *, unsigned);
+
+/* mballoc.c */
+extern long ext4_mb_stats;
+extern long ext4_mb_max_to_scan;
+extern int ext4_mb_init(struct super_block *, int);
+extern int ext4_mb_release(struct super_block *);
+extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
+ struct ext4_allocation_request *, int *);
+extern int ext4_mb_reserve_blocks(struct super_block *, int);
+extern void ext4_mb_discard_inode_preallocations(struct inode *);
+extern int __init init_ext4_mballoc(void);
+extern void exit_ext4_mballoc(void);
+extern void ext4_mb_free_blocks(handle_t *, struct inode *,
+ unsigned long, unsigned long, int, unsigned long *);
+
+
+/* inode.c */
+int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
+ struct buffer_head *bh, ext4_fsblk_t blocknr);
+struct buffer_head *ext4_getblk(handle_t *, struct inode *,
+ ext4_lblk_t, int, int *);
+struct buffer_head *ext4_bread(handle_t *, struct inode *,
+ ext4_lblk_t, int, int *);
+int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
+ ext4_lblk_t iblock, unsigned long maxblocks,
+ struct buffer_head *bh_result,
+ int create, int extend_disksize);
+
+extern struct inode *ext4_iget(struct super_block *, unsigned long);
+extern int ext4_write_inode (struct inode *, int);
+extern int ext4_setattr (struct dentry *, struct iattr *);
+extern void ext4_delete_inode (struct inode *);
+extern int ext4_sync_inode (handle_t *, struct inode *);
+extern void ext4_discard_reservation (struct inode *);
+extern void ext4_dirty_inode(struct inode *);
+extern int ext4_change_inode_journal_flag(struct inode *, int);
+extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
+extern void ext4_truncate (struct inode *);
+extern void ext4_set_inode_flags(struct inode *);
+extern void ext4_get_inode_flags(struct ext4_inode_info *);
+extern void ext4_set_aops(struct inode *inode);
+extern int ext4_writepage_trans_blocks(struct inode *);
+extern int ext4_block_truncate_page(handle_t *handle, struct page *page,
+ struct address_space *mapping, loff_t from);
+
+/* ioctl.c */
+extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
+extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long);
+
+/* migrate.c */
+extern int ext4_ext_migrate(struct inode *, struct file *, unsigned int,
+ unsigned long);
+/* namei.c */
+extern int ext4_orphan_add(handle_t *, struct inode *);
+extern int ext4_orphan_del(handle_t *, struct inode *);
+extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
+ __u32 start_minor_hash, __u32 *next_hash);
+
+/* resize.c */
+extern int ext4_group_add(struct super_block *sb,
+ struct ext4_new_group_data *input);
+extern int ext4_group_extend(struct super_block *sb,
+ struct ext4_super_block *es,
+ ext4_fsblk_t n_blocks_count);
+
+/* super.c */
+extern void ext4_error (struct super_block *, const char *, const char *, ...)
+ __attribute__ ((format (printf, 3, 4)));
+extern void __ext4_std_error (struct super_block *, const char *, int);
+extern void ext4_abort (struct super_block *, const char *, const char *, ...)
+ __attribute__ ((format (printf, 3, 4)));
+extern void ext4_warning (struct super_block *, const char *, const char *, ...)
+ __attribute__ ((format (printf, 3, 4)));
+extern void ext4_update_dynamic_rev (struct super_block *sb);
+extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb,
+ __u32 compat);
+extern int ext4_update_rocompat_feature(handle_t *handle,
+ struct super_block *sb, __u32 rocompat);
+extern int ext4_update_incompat_feature(handle_t *handle,
+ struct super_block *sb, __u32 incompat);
+extern ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
+ struct ext4_group_desc *bg);
+extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
+ struct ext4_group_desc *bg);
+extern ext4_fsblk_t ext4_inode_table(struct super_block *sb,
+ struct ext4_group_desc *bg);
+extern void ext4_block_bitmap_set(struct super_block *sb,
+ struct ext4_group_desc *bg, ext4_fsblk_t blk);
+extern void ext4_inode_bitmap_set(struct super_block *sb,
+ struct ext4_group_desc *bg, ext4_fsblk_t blk);
+extern void ext4_inode_table_set(struct super_block *sb,
+ struct ext4_group_desc *bg, ext4_fsblk_t blk);
+
+static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
+{
+ return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) |
+ le32_to_cpu(es->s_blocks_count_lo);
+}
+
+static inline ext4_fsblk_t ext4_r_blocks_count(struct ext4_super_block *es)
+{
+ return ((ext4_fsblk_t)le32_to_cpu(es->s_r_blocks_count_hi) << 32) |
+ le32_to_cpu(es->s_r_blocks_count_lo);
+}
+
+static inline ext4_fsblk_t ext4_free_blocks_count(struct ext4_super_block *es)
+{
+ return ((ext4_fsblk_t)le32_to_cpu(es->s_free_blocks_count_hi) << 32) |
+ le32_to_cpu(es->s_free_blocks_count_lo);
+}
+
+static inline void ext4_blocks_count_set(struct ext4_super_block *es,
+ ext4_fsblk_t blk)
+{
+ es->s_blocks_count_lo = cpu_to_le32((u32)blk);
+ es->s_blocks_count_hi = cpu_to_le32(blk >> 32);
+}
+
+static inline void ext4_free_blocks_count_set(struct ext4_super_block *es,
+ ext4_fsblk_t blk)
+{
+ es->s_free_blocks_count_lo = cpu_to_le32((u32)blk);
+ es->s_free_blocks_count_hi = cpu_to_le32(blk >> 32);
+}
+
+static inline void ext4_r_blocks_count_set(struct ext4_super_block *es,
+ ext4_fsblk_t blk)
+{
+ es->s_r_blocks_count_lo = cpu_to_le32((u32)blk);
+ es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32);
+}
+
+static inline loff_t ext4_isize(struct ext4_inode *raw_inode)
+{
+ return ((loff_t)le32_to_cpu(raw_inode->i_size_high) << 32) |
+ le32_to_cpu(raw_inode->i_size_lo);
+}
+
+static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size)
+{
+ raw_inode->i_size_lo = cpu_to_le32(i_size);
+ raw_inode->i_size_high = cpu_to_le32(i_size >> 32);
+}
+
+static inline
+struct ext4_group_info *ext4_get_group_info(struct super_block *sb,
+ ext4_group_t group)
+{
+ struct ext4_group_info ***grp_info;
+ long indexv, indexh;
+ grp_info = EXT4_SB(sb)->s_group_info;
+ indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb));
+ indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1);
+ return grp_info[indexv][indexh];
+}
+
+
+#define ext4_std_error(sb, errno) \
+do { \
+ if ((errno)) \
+ __ext4_std_error((sb), __FUNCTION__, (errno)); \
+} while (0)
+
+/*
+ * Inodes and files operations
+ */
+
+/* dir.c */
+extern const struct file_operations ext4_dir_operations;
+
+/* file.c */
+extern const struct inode_operations ext4_file_inode_operations;
+extern const struct file_operations ext4_file_operations;
+
+/* namei.c */
+extern const struct inode_operations ext4_dir_inode_operations;
+extern const struct inode_operations ext4_special_inode_operations;
+
+/* symlink.c */
+extern const struct inode_operations ext4_symlink_inode_operations;
+extern const struct inode_operations ext4_fast_symlink_inode_operations;
+
+/* extents.c */
+extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
+extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
+extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
+ ext4_lblk_t iblock,
+ unsigned long max_blocks, struct buffer_head *bh_result,
+ int create, int extend_disksize);
+extern void ext4_ext_truncate(struct inode *, struct page *);
+extern void ext4_ext_init(struct super_block *);
+extern void ext4_ext_release(struct super_block *);
+extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
+ loff_t len);
+extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode,
+ sector_t block, unsigned long max_blocks,
+ struct buffer_head *bh, int create,
+ int extend_disksize);
+#endif /* __KERNEL__ */
+
+#endif /* _EXT4_H */
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
new file mode 100644
index 00000000000..75333b595fa
--- /dev/null
+++ b/fs/ext4/ext4_extents.h
@@ -0,0 +1,232 @@
+/*
+ * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
+ * Written by Alex Tomas <alex@clusterfs.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public Licens
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
+ */
+
+#ifndef _EXT4_EXTENTS
+#define _EXT4_EXTENTS
+
+#include "ext4.h"
+
+/*
+ * With AGGRESSIVE_TEST defined, the capacity of index/leaf blocks
+ * becomes very small, so index split, in-depth growing and
+ * other hard changes happen much more often.
+ * This is for debug purposes only.
+ */
+#define AGGRESSIVE_TEST_
+
+/*
+ * With EXTENTS_STATS defined, the number of blocks and extents
+ * are collected in the truncate path. They'll be shown at
+ * umount time.
+ */
+#define EXTENTS_STATS__
+
+/*
+ * If CHECK_BINSEARCH is defined, then the results of the binary search
+ * will also be checked by linear search.
+ */
+#define CHECK_BINSEARCH__
+
+/*
+ * If EXT_DEBUG is defined you can use the 'extdebug' mount option
+ * to get lots of info about what's going on.
+ */
+#define EXT_DEBUG__
+#ifdef EXT_DEBUG
+#define ext_debug(a...) printk(a)
+#else
+#define ext_debug(a...)
+#endif
+
+/*
+ * If EXT_STATS is defined then stats numbers are collected.
+ * These number will be displayed at umount time.
+ */
+#define EXT_STATS_
+
+
+/*
+ * ext4_inode has i_block array (60 bytes total).
+ * The first 12 bytes store ext4_extent_header;
+ * the remainder stores an array of ext4_extent.
+ */
+
+/*
+ * This is the extent on-disk structure.
+ * It's used at the bottom of the tree.
+ */
+struct ext4_extent {
+ __le32 ee_block; /* first logical block extent covers */
+ __le16 ee_len; /* number of blocks covered by extent */
+ __le16 ee_start_hi; /* high 16 bits of physical block */
+ __le32 ee_start_lo; /* low 32 bits of physical block */
+};
+
+/*
+ * This is index on-disk structure.
+ * It's used at all the levels except the bottom.
+ */
+struct ext4_extent_idx {
+ __le32 ei_block; /* index covers logical blocks from 'block' */
+ __le32 ei_leaf_lo; /* pointer to the physical block of the next *
+ * level. leaf or next index could be there */
+ __le16 ei_leaf_hi; /* high 16 bits of physical block */
+ __u16 ei_unused;
+};
+
+/*
+ * Each block (leaves and indexes), even inode-stored has header.
+ */
+struct ext4_extent_header {
+ __le16 eh_magic; /* probably will support different formats */
+ __le16 eh_entries; /* number of valid entries */
+ __le16 eh_max; /* capacity of store in entries */
+ __le16 eh_depth; /* has tree real underlying blocks? */
+ __le32 eh_generation; /* generation of the tree */
+};
+
+#define EXT4_EXT_MAGIC cpu_to_le16(0xf30a)
+
+/*
+ * Array of ext4_ext_path contains path to some extent.
+ * Creation/lookup routines use it for traversal/splitting/etc.
+ * Truncate uses it to simulate recursive walking.
+ */
+struct ext4_ext_path {
+ ext4_fsblk_t p_block;
+ __u16 p_depth;
+ struct ext4_extent *p_ext;
+ struct ext4_extent_idx *p_idx;
+ struct ext4_extent_header *p_hdr;
+ struct buffer_head *p_bh;
+};
+
+/*
+ * structure for external API
+ */
+
+#define EXT4_EXT_CACHE_NO 0
+#define EXT4_EXT_CACHE_GAP 1
+#define EXT4_EXT_CACHE_EXTENT 2
+
+
+#define EXT_MAX_BLOCK 0xffffffff
+
+/*
+ * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an
+ * initialized extent. This is 2^15 and not (2^16 - 1), since we use the
+ * MSB of ee_len field in the extent datastructure to signify if this
+ * particular extent is an initialized extent or an uninitialized (i.e.
+ * preallocated).
+ * EXT_UNINIT_MAX_LEN is the maximum number of blocks we can have in an
+ * uninitialized extent.
+ * If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an
+ * uninitialized one. In other words, if MSB of ee_len is set, it is an
+ * uninitialized extent with only one special scenario when ee_len = 0x8000.
+ * In this case we can not have an uninitialized extent of zero length and
+ * thus we make it as a special case of initialized extent with 0x8000 length.
+ * This way we get better extent-to-group alignment for initialized extents.
+ * Hence, the maximum number of blocks we can have in an *initialized*
+ * extent is 2^15 (32768) and in an *uninitialized* extent is 2^15-1 (32767).
+ */
+#define EXT_INIT_MAX_LEN (1UL << 15)
+#define EXT_UNINIT_MAX_LEN (EXT_INIT_MAX_LEN - 1)
+
+
+#define EXT_FIRST_EXTENT(__hdr__) \
+ ((struct ext4_extent *) (((char *) (__hdr__)) + \
+ sizeof(struct ext4_extent_header)))
+#define EXT_FIRST_INDEX(__hdr__) \
+ ((struct ext4_extent_idx *) (((char *) (__hdr__)) + \
+ sizeof(struct ext4_extent_header)))
+#define EXT_HAS_FREE_INDEX(__path__) \
+ (le16_to_cpu((__path__)->p_hdr->eh_entries) \
+ < le16_to_cpu((__path__)->p_hdr->eh_max))
+#define EXT_LAST_EXTENT(__hdr__) \
+ (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1)
+#define EXT_LAST_INDEX(__hdr__) \
+ (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1)
+#define EXT_MAX_EXTENT(__hdr__) \
+ (EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)
+#define EXT_MAX_INDEX(__hdr__) \
+ (EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)
+
+static inline struct ext4_extent_header *ext_inode_hdr(struct inode *inode)
+{
+ return (struct ext4_extent_header *) EXT4_I(inode)->i_data;
+}
+
+static inline struct ext4_extent_header *ext_block_hdr(struct buffer_head *bh)
+{
+ return (struct ext4_extent_header *) bh->b_data;
+}
+
+static inline unsigned short ext_depth(struct inode *inode)
+{
+ return le16_to_cpu(ext_inode_hdr(inode)->eh_depth);
+}
+
+static inline void ext4_ext_tree_changed(struct inode *inode)
+{
+ EXT4_I(inode)->i_ext_generation++;
+}
+
+static inline void
+ext4_ext_invalidate_cache(struct inode *inode)
+{
+ EXT4_I(inode)->i_cached_extent.ec_type = EXT4_EXT_CACHE_NO;
+}
+
+static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext)
+{
+ /* We can not have an uninitialized extent of zero length! */
+ BUG_ON((le16_to_cpu(ext->ee_len) & ~EXT_INIT_MAX_LEN) == 0);
+ ext->ee_len |= cpu_to_le16(EXT_INIT_MAX_LEN);
+}
+
+static inline int ext4_ext_is_uninitialized(struct ext4_extent *ext)
+{
+ /* Extent with ee_len of 0x8000 is treated as an initialized extent */
+ return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN);
+}
+
+static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
+{
+ return (le16_to_cpu(ext->ee_len) <= EXT_INIT_MAX_LEN ?
+ le16_to_cpu(ext->ee_len) :
+ (le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN));
+}
+
+extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
+extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
+extern int ext4_extent_tree_init(handle_t *, struct inode *);
+extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *);
+extern int ext4_ext_try_to_merge(struct inode *inode,
+ struct ext4_ext_path *path,
+ struct ext4_extent *);
+extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);
+extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *);
+extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
+ struct ext4_ext_path *);
+extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *,
+ ext4_lblk_t *, ext4_fsblk_t *);
+extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *,
+ ext4_lblk_t *, ext4_fsblk_t *);
+extern void ext4_ext_drop_refs(struct ext4_ext_path *);
+#endif /* _EXT4_EXTENTS */
+
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h
new file mode 100644
index 00000000000..26a4ae255d7
--- /dev/null
+++ b/fs/ext4/ext4_i.h
@@ -0,0 +1,167 @@
+/*
+ * ext4_i.h
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ * from
+ *
+ * linux/include/linux/minix_fs_i.h
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#ifndef _EXT4_I
+#define _EXT4_I
+
+#include <linux/rwsem.h>
+#include <linux/rbtree.h>
+#include <linux/seqlock.h>
+#include <linux/mutex.h>
+
+/* data type for block offset of block group */
+typedef int ext4_grpblk_t;
+
+/* data type for filesystem-wide blocks number */
+typedef unsigned long long ext4_fsblk_t;
+
+/* data type for file logical block number */
+typedef __u32 ext4_lblk_t;
+
+/* data type for block group number */
+typedef unsigned long ext4_group_t;
+
+struct ext4_reserve_window {
+ ext4_fsblk_t _rsv_start; /* First byte reserved */
+ ext4_fsblk_t _rsv_end; /* Last byte reserved or 0 */
+};
+
+struct ext4_reserve_window_node {
+ struct rb_node rsv_node;
+ __u32 rsv_goal_size;
+ __u32 rsv_alloc_hit;
+ struct ext4_reserve_window rsv_window;
+};
+
+struct ext4_block_alloc_info {
+ /* information about reservation window */
+ struct ext4_reserve_window_node rsv_window_node;
+ /*
+ * was i_next_alloc_block in ext4_inode_info
+ * is the logical (file-relative) number of the
+ * most-recently-allocated block in this file.
+ * We use this for detecting linearly ascending allocation requests.
+ */
+ ext4_lblk_t last_alloc_logical_block;
+ /*
+ * Was i_next_alloc_goal in ext4_inode_info
+ * is the *physical* companion to i_next_alloc_block.
+ * it the physical block number of the block which was most-recentl
+ * allocated to this file. This give us the goal (target) for the next
+ * allocation when we detect linearly ascending requests.
+ */
+ ext4_fsblk_t last_alloc_physical_block;
+};
+
+#define rsv_start rsv_window._rsv_start
+#define rsv_end rsv_window._rsv_end
+
+/*
+ * storage for cached extent
+ */
+struct ext4_ext_cache {
+ ext4_fsblk_t ec_start;
+ ext4_lblk_t ec_block;
+ __u32 ec_len; /* must be 32bit to return holes */
+ __u32 ec_type;
+};
+
+/*
+ * third extended file system inode data in memory
+ */
+struct ext4_inode_info {
+ __le32 i_data[15]; /* unconverted */
+ __u32 i_flags;
+ ext4_fsblk_t i_file_acl;
+ __u32 i_dtime;
+
+ /*
+ * i_block_group is the number of the block group which contains
+ * this file's inode. Constant across the lifetime of the inode,
+ * it is ued for making block allocation decisions - we try to
+ * place a file's data blocks near its inode block, and new inodes
+ * near to their parent directory's inode.
+ */
+ ext4_group_t i_block_group;
+ __u32 i_state; /* Dynamic state flags for ext4 */
+
+ /* block reservation info */
+ struct ext4_block_alloc_info *i_block_alloc_info;
+
+ ext4_lblk_t i_dir_start_lookup;
+#ifdef CONFIG_EXT4DEV_FS_XATTR
+ /*
+ * Extended attributes can be read independently of the main file
+ * data. Taking i_mutex even when reading would cause contention
+ * between readers of EAs and writers of regular file data, so
+ * instead we synchronize on xattr_sem when reading or changing
+ * EAs.
+ */
+ struct rw_semaphore xattr_sem;
+#endif
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
+ struct posix_acl *i_acl;
+ struct posix_acl *i_default_acl;
+#endif
+
+ struct list_head i_orphan; /* unlinked but open inodes */
+
+ /*
+ * i_disksize keeps track of what the inode size is ON DISK, not
+ * in memory. During truncate, i_size is set to the new size by
+ * the VFS prior to calling ext4_truncate(), but the filesystem won't
+ * set i_disksize to 0 until the truncate is actually under way.
+ *
+ * The intent is that i_disksize always represents the blocks which
+ * are used by this file. This allows recovery to restart truncate
+ * on orphans if we crash during truncate. We actually write i_disksize
+ * into the on-disk inode when writing inodes out, instead of i_size.
+ *
+ * The only time when i_disksize and i_size may be different is when
+ * a truncate is in progress. The only things which change i_disksize
+ * are ext4_get_block (growth) and ext4_truncate (shrinkth).
+ */
+ loff_t i_disksize;
+
+ /* on-disk additional length */
+ __u16 i_extra_isize;
+
+ /*
+ * i_data_sem is for serialising ext4_truncate() against
+ * ext4_getblock(). In the 2.4 ext2 design, great chunks of inode's
+ * data tree are chopped off during truncate. We can't do that in
+ * ext4 because whenever we perform intermediate commits during
+ * truncate, the inode and all the metadata blocks *must* be in a
+ * consistent state which allows truncation of the orphans to restart
+ * during recovery. Hence we must fix the get_block-vs-truncate race
+ * by other means, so we have i_data_sem.
+ */
+ struct rw_semaphore i_data_sem;
+ struct inode vfs_inode;
+
+ unsigned long i_ext_generation;
+ struct ext4_ext_cache i_cached_extent;
+ /*
+ * File creation time. Its function is same as that of
+ * struct timespec i_{a,c,m}time in the generic inode.
+ */
+ struct timespec i_crtime;
+
+ /* mballoc */
+ struct list_head i_prealloc_list;
+ spinlock_t i_prealloc_lock;
+};
+
+#endif /* _EXT4_I */
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index d6afe4e2734..c75384b34f2 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -2,14 +2,14 @@
* Interface between ext4 and JBD
*/
-#include <linux/ext4_jbd2.h>
+#include "ext4_jbd2.h"
int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
struct buffer_head *bh)
{
int err = jbd2_journal_get_undo_access(handle, bh);
if (err)
- ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+ ext4_journal_abort_handle(where, __func__, bh, handle, err);
return err;
}
@@ -18,7 +18,7 @@ int __ext4_journal_get_write_access(const char *where, handle_t *handle,
{
int err = jbd2_journal_get_write_access(handle, bh);
if (err)
- ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+ ext4_journal_abort_handle(where, __func__, bh, handle, err);
return err;
}
@@ -27,7 +27,7 @@ int __ext4_journal_forget(const char *where, handle_t *handle,
{
int err = jbd2_journal_forget(handle, bh);
if (err)
- ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+ ext4_journal_abort_handle(where, __func__, bh, handle, err);
return err;
}
@@ -36,7 +36,7 @@ int __ext4_journal_revoke(const char *where, handle_t *handle,
{
int err = jbd2_journal_revoke(handle, blocknr, bh);
if (err)
- ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+ ext4_journal_abort_handle(where, __func__, bh, handle, err);
return err;
}
@@ -45,7 +45,7 @@ int __ext4_journal_get_create_access(const char *where,
{
int err = jbd2_journal_get_create_access(handle, bh);
if (err)
- ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+ ext4_journal_abort_handle(where, __func__, bh, handle, err);
return err;
}
@@ -54,6 +54,6 @@ int __ext4_journal_dirty_metadata(const char *where,
{
int err = jbd2_journal_dirty_metadata(handle, bh);
if (err)
- ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+ ext4_journal_abort_handle(where, __func__, bh, handle, err);
return err;
}
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
new file mode 100644
index 00000000000..9255a7d28b2
--- /dev/null
+++ b/fs/ext4/ext4_jbd2.h
@@ -0,0 +1,231 @@
+/*
+ * ext4_jbd2.h
+ *
+ * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
+ *
+ * Copyright 1998--1999 Red Hat corp --- All Rights Reserved
+ *
+ * This file is part of the Linux kernel and is made available under
+ * the terms of the GNU General Public License, version 2, or at your
+ * option, any later version, incorporated herein by reference.
+ *
+ * Ext4-specific journaling extensions.
+ */
+
+#ifndef _EXT4_JBD2_H
+#define _EXT4_JBD2_H
+
+#include <linux/fs.h>
+#include <linux/jbd2.h>
+#include "ext4.h"
+
+#define EXT4_JOURNAL(inode) (EXT4_SB((inode)->i_sb)->s_journal)
+
+/* Define the number of blocks we need to account to a transaction to
+ * modify one block of data.
+ *
+ * We may have to touch one inode, one bitmap buffer, up to three
+ * indirection blocks, the group and superblock summaries, and the data
+ * block to complete the transaction.
+ *
+ * For extents-enabled fs we may have to allocate and modify up to
+ * 5 levels of tree + root which are stored in the inode. */
+
+#define EXT4_SINGLEDATA_TRANS_BLOCKS(sb) \
+ (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS) \
+ || test_opt(sb, EXTENTS) ? 27U : 8U)
+
+/* Extended attribute operations touch at most two data buffers,
+ * two bitmap buffers, and two group summaries, in addition to the inode
+ * and the superblock, which are already accounted for. */
+
+#define EXT4_XATTR_TRANS_BLOCKS 6U
+
+/* Define the minimum size for a transaction which modifies data. This
+ * needs to take into account the fact that we may end up modifying two
+ * quota files too (one for the group, one for the user quota). The
+ * superblock only gets updated once, of course, so don't bother
+ * counting that again for the quota updates. */
+
+#define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \
+ EXT4_XATTR_TRANS_BLOCKS - 2 + \
+ 2*EXT4_QUOTA_TRANS_BLOCKS(sb))
+
+/* Delete operations potentially hit one directory's namespace plus an
+ * entire inode, plus arbitrary amounts of bitmap/indirection data. Be
+ * generous. We can grow the delete transaction later if necessary. */
+
+#define EXT4_DELETE_TRANS_BLOCKS(sb) (2 * EXT4_DATA_TRANS_BLOCKS(sb) + 64)
+
+/* Define an arbitrary limit for the amount of data we will anticipate
+ * writing to any given transaction. For unbounded transactions such as
+ * write(2) and truncate(2) we can write more than this, but we always
+ * start off at the maximum transaction size and grow the transaction
+ * optimistically as we go. */
+
+#define EXT4_MAX_TRANS_DATA 64U
+
+/* We break up a large truncate or write transaction once the handle's
+ * buffer credits gets this low, we need either to extend the
+ * transaction or to start a new one. Reserve enough space here for
+ * inode, bitmap, superblock, group and indirection updates for at least
+ * one block, plus two quota updates. Quota allocations are not
+ * needed. */
+
+#define EXT4_RESERVE_TRANS_BLOCKS 12U
+
+#define EXT4_INDEX_EXTRA_TRANS_BLOCKS 8
+
+#ifdef CONFIG_QUOTA
+/* Amount of blocks needed for quota update - we know that the structure was
+ * allocated so we need to update only inode+data */
+#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0)
+/* Amount of blocks needed for quota insert/delete - we do some block writes
+ * but inode, sb and group updates are done only once */
+#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
+ (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0)
+#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
+ (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0)
+#else
+#define EXT4_QUOTA_TRANS_BLOCKS(sb) 0
+#define EXT4_QUOTA_INIT_BLOCKS(sb) 0
+#define EXT4_QUOTA_DEL_BLOCKS(sb) 0
+#endif
+
+int
+ext4_mark_iloc_dirty(handle_t *handle,
+ struct inode *inode,
+ struct ext4_iloc *iloc);
+
+/*
+ * On success, We end up with an outstanding reference count against
+ * iloc->bh. This _must_ be cleaned up later.
+ */
+
+int ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
+ struct ext4_iloc *iloc);
+
+int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);
+
+/*
+ * Wrapper functions with which ext4 calls into JBD. The intent here is
+ * to allow these to be turned into appropriate stubs so ext4 can control
+ * ext2 filesystems, so ext2+ext4 systems only nee one fs. This work hasn't
+ * been done yet.
+ */
+
+static inline void ext4_journal_release_buffer(handle_t *handle,
+ struct buffer_head *bh)
+{
+ jbd2_journal_release_buffer(handle, bh);
+}
+
+void ext4_journal_abort_handle(const char *caller, const char *err_fn,
+ struct buffer_head *bh, handle_t *handle, int err);
+
+int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
+ struct buffer_head *bh);
+
+int __ext4_journal_get_write_access(const char *where, handle_t *handle,
+ struct buffer_head *bh);
+
+int __ext4_journal_forget(const char *where, handle_t *handle,
+ struct buffer_head *bh);
+
+int __ext4_journal_revoke(const char *where, handle_t *handle,
+ ext4_fsblk_t blocknr, struct buffer_head *bh);
+
+int __ext4_journal_get_create_access(const char *where,
+ handle_t *handle, struct buffer_head *bh);
+
+int __ext4_journal_dirty_metadata(const char *where,
+ handle_t *handle, struct buffer_head *bh);
+
+#define ext4_journal_get_undo_access(handle, bh) \
+ __ext4_journal_get_undo_access(__FUNCTION__, (handle), (bh))
+#define ext4_journal_get_write_access(handle, bh) \
+ __ext4_journal_get_write_access(__FUNCTION__, (handle), (bh))
+#define ext4_journal_revoke(handle, blocknr, bh) \
+ __ext4_journal_revoke(__FUNCTION__, (handle), (blocknr), (bh))
+#define ext4_journal_get_create_access(handle, bh) \
+ __ext4_journal_get_create_access(__FUNCTION__, (handle), (bh))
+#define ext4_journal_dirty_metadata(handle, bh) \
+ __ext4_journal_dirty_metadata(__FUNCTION__, (handle), (bh))
+#define ext4_journal_forget(handle, bh) \
+ __ext4_journal_forget(__FUNCTION__, (handle), (bh))
+
+int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh);
+
+handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
+int __ext4_journal_stop(const char *where, handle_t *handle);
+
+static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks)
+{
+ return ext4_journal_start_sb(inode->i_sb, nblocks);
+}
+
+#define ext4_journal_stop(handle) \
+ __ext4_journal_stop(__FUNCTION__, (handle))
+
+static inline handle_t *ext4_journal_current_handle(void)
+{
+ return journal_current_handle();
+}
+
+static inline int ext4_journal_extend(handle_t *handle, int nblocks)
+{
+ return jbd2_journal_extend(handle, nblocks);
+}
+
+static inline int ext4_journal_restart(handle_t *handle, int nblocks)
+{
+ return jbd2_journal_restart(handle, nblocks);
+}
+
+static inline int ext4_journal_blocks_per_page(struct inode *inode)
+{
+ return jbd2_journal_blocks_per_page(inode);
+}
+
+static inline int ext4_journal_force_commit(journal_t *journal)
+{
+ return jbd2_journal_force_commit(journal);
+}
+
+/* super.c */
+int ext4_force_commit(struct super_block *sb);
+
+static inline int ext4_should_journal_data(struct inode *inode)
+{
+ if (!S_ISREG(inode->i_mode))
+ return 1;
+ if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
+ return 1;
+ if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
+ return 1;
+ return 0;
+}
+
+static inline int ext4_should_order_data(struct inode *inode)
+{
+ if (!S_ISREG(inode->i_mode))
+ return 0;
+ if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
+ return 0;
+ if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
+ return 1;
+ return 0;
+}
+
+static inline int ext4_should_writeback_data(struct inode *inode)
+{
+ if (!S_ISREG(inode->i_mode))
+ return 0;
+ if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
+ return 0;
+ if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
+ return 1;
+ return 0;
+}
+
+#endif /* _EXT4_JBD2_H */
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
new file mode 100644
index 00000000000..5802e69f219
--- /dev/null
+++ b/fs/ext4/ext4_sb.h
@@ -0,0 +1,148 @@
+/*
+ * ext4_sb.h
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ * from
+ *
+ * linux/include/linux/minix_fs_sb.h
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#ifndef _EXT4_SB
+#define _EXT4_SB
+
+#ifdef __KERNEL__
+#include <linux/timer.h>
+#include <linux/wait.h>
+#include <linux/blockgroup_lock.h>
+#include <linux/percpu_counter.h>
+#endif
+#include <linux/rbtree.h>
+
+/*
+ * third extended-fs super-block data in memory
+ */
+struct ext4_sb_info {
+ unsigned long s_desc_size; /* Size of a group descriptor in bytes */
+ unsigned long s_inodes_per_block;/* Number of inodes per block */
+ unsigned long s_blocks_per_group;/* Number of blocks in a group */
+ unsigned long s_inodes_per_group;/* Number of inodes in a group */
+ unsigned long s_itb_per_group; /* Number of inode table blocks per group */
+ unsigned long s_gdb_count; /* Number of group descriptor blocks */
+ unsigned long s_desc_per_block; /* Number of group descriptors per block */
+ ext4_group_t s_groups_count; /* Number of groups in the fs */
+ unsigned long s_overhead_last; /* Last calculated overhead */
+ unsigned long s_blocks_last; /* Last seen block count */
+ loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */
+ struct buffer_head * s_sbh; /* Buffer containing the super block */
+ struct ext4_super_block * s_es; /* Pointer to the super block in the buffer */
+ struct buffer_head ** s_group_desc;
+ unsigned long s_mount_opt;
+ ext4_fsblk_t s_sb_block;
+ uid_t s_resuid;
+ gid_t s_resgid;
+ unsigned short s_mount_state;
+ unsigned short s_pad;
+ int s_addr_per_block_bits;
+ int s_desc_per_block_bits;
+ int s_inode_size;
+ int s_first_ino;
+ spinlock_t s_next_gen_lock;
+ u32 s_next_generation;
+ u32 s_hash_seed[4];
+ int s_def_hash_version;
+ struct percpu_counter s_freeblocks_counter;
+ struct percpu_counter s_freeinodes_counter;
+ struct percpu_counter s_dirs_counter;
+ struct blockgroup_lock s_blockgroup_lock;
+
+ /* root of the per fs reservation window tree */
+ spinlock_t s_rsv_window_lock;
+ struct rb_root s_rsv_window_root;
+ struct ext4_reserve_window_node s_rsv_window_head;
+
+ /* Journaling */
+ struct inode * s_journal_inode;
+ struct journal_s * s_journal;
+ struct list_head s_orphan;
+ unsigned long s_commit_interval;
+ struct block_device *journal_bdev;
+#ifdef CONFIG_JBD2_DEBUG
+ struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
+ wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
+#endif
+#ifdef CONFIG_QUOTA
+ char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */
+ int s_jquota_fmt; /* Format of quota to use */
+#endif
+ unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
+
+#ifdef EXTENTS_STATS
+ /* ext4 extents stats */
+ unsigned long s_ext_min;
+ unsigned long s_ext_max;
+ unsigned long s_depth_max;
+ spinlock_t s_ext_stats_lock;
+ unsigned long s_ext_blocks;
+ unsigned long s_ext_extents;
+#endif
+
+ /* for buddy allocator */
+ struct ext4_group_info ***s_group_info;
+ struct inode *s_buddy_cache;
+ long s_blocks_reserved;
+ spinlock_t s_reserve_lock;
+ struct list_head s_active_transaction;
+ struct list_head s_closed_transaction;
+ struct list_head s_committed_transaction;
+ spinlock_t s_md_lock;
+ tid_t s_last_transaction;
+ unsigned short *s_mb_offsets, *s_mb_maxs;
+
+ /* tunables */
+ unsigned long s_stripe;
+ unsigned long s_mb_stream_request;
+ unsigned long s_mb_max_to_scan;
+ unsigned long s_mb_min_to_scan;
+ unsigned long s_mb_stats;
+ unsigned long s_mb_order2_reqs;
+ unsigned long s_mb_group_prealloc;
+ /* where last allocation was done - for stream allocation */
+ unsigned long s_mb_last_group;
+ unsigned long s_mb_last_start;
+
+ /* history to debug policy */
+ struct ext4_mb_history *s_mb_history;
+ int s_mb_history_cur;
+ int s_mb_history_max;
+ int s_mb_history_num;
+ struct proc_dir_entry *s_mb_proc;
+ spinlock_t s_mb_history_lock;
+ int s_mb_history_filter;
+
+ /* stats for buddy allocator */
+ spinlock_t s_mb_pa_lock;
+ atomic_t s_bal_reqs; /* number of reqs with len > 1 */
+ atomic_t s_bal_success; /* we found long enough chunks */
+ atomic_t s_bal_allocated; /* in blocks */
+ atomic_t s_bal_ex_scanned; /* total extents scanned */
+ atomic_t s_bal_goals; /* goal hits */
+ atomic_t s_bal_breaks; /* too long searches */
+ atomic_t s_bal_2orders; /* 2^order hits */
+ spinlock_t s_bal_lock;
+ unsigned long s_mb_buddies_generated;
+ unsigned long long s_mb_generation_time;
+ atomic_t s_mb_lost_chunks;
+ atomic_t s_mb_preallocated;
+ atomic_t s_mb_discarded;
+
+ /* locality groups */
+ struct ext4_locality_group *s_locality_groups;
+};
+
+#endif /* _EXT4_SB */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 9ae6e67090c..47929c4e3da 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -32,7 +32,6 @@
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/time.h>
-#include <linux/ext4_jbd2.h>
#include <linux/jbd2.h>
#include <linux/highuid.h>
#include <linux/pagemap.h>
@@ -40,8 +39,9 @@
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/falloc.h>
-#include <linux/ext4_fs_extents.h>
#include <asm/uaccess.h>
+#include "ext4_jbd2.h"
+#include "ext4_extents.h"
/*
@@ -308,7 +308,7 @@ corrupted:
}
#define ext4_ext_check_header(inode, eh, depth) \
- __ext4_ext_check_header(__FUNCTION__, inode, eh, depth)
+ __ext4_ext_check_header(__func__, inode, eh, depth)
#ifdef EXT_DEBUG
static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
@@ -614,7 +614,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
ix->ei_block = cpu_to_le32(logical);
ext4_idx_store_pblock(ix, ptr);
- curp->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(curp->p_hdr->eh_entries)+1);
+ le16_add_cpu(&curp->p_hdr->eh_entries, 1);
BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries)
> le16_to_cpu(curp->p_hdr->eh_max));
@@ -736,7 +736,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
}
if (m) {
memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m);
- neh->eh_entries = cpu_to_le16(le16_to_cpu(neh->eh_entries)+m);
+ le16_add_cpu(&neh->eh_entries, m);
}
set_buffer_uptodate(bh);
@@ -753,8 +753,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
goto cleanup;
- path[depth].p_hdr->eh_entries =
- cpu_to_le16(le16_to_cpu(path[depth].p_hdr->eh_entries)-m);
+ le16_add_cpu(&path[depth].p_hdr->eh_entries, -m);
err = ext4_ext_dirty(handle, inode, path + depth);
if (err)
goto cleanup;
@@ -817,8 +816,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
if (m) {
memmove(++fidx, path[i].p_idx - m,
sizeof(struct ext4_extent_idx) * m);
- neh->eh_entries =
- cpu_to_le16(le16_to_cpu(neh->eh_entries) + m);
+ le16_add_cpu(&neh->eh_entries, m);
}
set_buffer_uptodate(bh);
unlock_buffer(bh);
@@ -834,7 +832,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
err = ext4_ext_get_access(handle, inode, path + i);
if (err)
goto cleanup;
- path[i].p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path[i].p_hdr->eh_entries)-m);
+ le16_add_cpu(&path[i].p_hdr->eh_entries, -m);
err = ext4_ext_dirty(handle, inode, path + i);
if (err)
goto cleanup;
@@ -1369,7 +1367,7 @@ int ext4_ext_try_to_merge(struct inode *inode,
* sizeof(struct ext4_extent);
memmove(ex + 1, ex + 2, len);
}
- eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries) - 1);
+ le16_add_cpu(&eh->eh_entries, -1);
merge_done = 1;
WARN_ON(eh->eh_entries == 0);
if (!eh->eh_entries)
@@ -1560,7 +1558,7 @@ has_space:
path[depth].p_ext = nearex;
}
- eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)+1);
+ le16_add_cpu(&eh->eh_entries, 1);
nearex = path[depth].p_ext;
nearex->ee_block = newext->ee_block;
ext4_ext_store_pblock(nearex, ext_pblock(newext));
@@ -1699,7 +1697,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
err = ext4_ext_get_access(handle, inode, path);
if (err)
return err;
- path->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path->p_hdr->eh_entries)-1);
+ le16_add_cpu(&path->p_hdr->eh_entries, -1);
err = ext4_ext_dirty(handle, inode, path);
if (err)
return err;
@@ -1902,7 +1900,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
if (num == 0) {
/* this extent is removed; mark slot entirely unused */
ext4_ext_store_pblock(ex, 0);
- eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1);
+ le16_add_cpu(&eh->eh_entries, -1);
}
ex->ee_block = cpu_to_le32(block);
@@ -1979,7 +1977,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
* We start scanning from right side, freeing all the blocks
* after i_size and walking into the tree depth-wise.
*/
- path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_KERNEL);
+ path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS);
if (path == NULL) {
ext4_journal_stop(handle);
return -ENOMEM;
@@ -2138,6 +2136,82 @@ void ext4_ext_release(struct super_block *sb)
#endif
}
+static void bi_complete(struct bio *bio, int error)
+{
+ complete((struct completion *)bio->bi_private);
+}
+
+/* FIXME!! we need to try to merge to left or right after zero-out */
+static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
+{
+ int ret = -EIO;
+ struct bio *bio;
+ int blkbits, blocksize;
+ sector_t ee_pblock;
+ struct completion event;
+ unsigned int ee_len, len, done, offset;
+
+
+ blkbits = inode->i_blkbits;
+ blocksize = inode->i_sb->s_blocksize;
+ ee_len = ext4_ext_get_actual_len(ex);
+ ee_pblock = ext_pblock(ex);
+
+ /* convert ee_pblock to 512 byte sectors */
+ ee_pblock = ee_pblock << (blkbits - 9);
+
+ while (ee_len > 0) {
+
+ if (ee_len > BIO_MAX_PAGES)
+ len = BIO_MAX_PAGES;
+ else
+ len = ee_len;
+
+ bio = bio_alloc(GFP_NOIO, len);
+ if (!bio)
+ return -ENOMEM;
+ bio->bi_sector = ee_pblock;
+ bio->bi_bdev = inode->i_sb->s_bdev;
+
+ done = 0;
+ offset = 0;
+ while (done < len) {
+ ret = bio_add_page(bio, ZERO_PAGE(0),
+ blocksize, offset);
+ if (ret != blocksize) {
+ /*
+ * We can't add any more pages because of
+ * hardware limitations. Start a new bio.
+ */
+ break;
+ }
+ done++;
+ offset += blocksize;
+ if (offset >= PAGE_CACHE_SIZE)
+ offset = 0;
+ }
+
+ init_completion(&event);
+ bio->bi_private = &event;
+ bio->bi_end_io = bi_complete;
+ submit_bio(WRITE, bio);
+ wait_for_completion(&event);
+
+ if (test_bit(BIO_UPTODATE, &bio->bi_flags))
+ ret = 0;
+ else {
+ ret = -EIO;
+ break;
+ }
+ bio_put(bio);
+ ee_len -= done;
+ ee_pblock += done << (blkbits - 9);
+ }
+ return ret;
+}
+
+#define EXT4_EXT_ZERO_LEN 7
+
/*
* This function is called by ext4_ext_get_blocks() if someone tries to write
* to an uninitialized extent. It may result in splitting the uninitialized
@@ -2154,7 +2228,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
ext4_lblk_t iblock,
unsigned long max_blocks)
{
- struct ext4_extent *ex, newex;
+ struct ext4_extent *ex, newex, orig_ex;
struct ext4_extent *ex1 = NULL;
struct ext4_extent *ex2 = NULL;
struct ext4_extent *ex3 = NULL;
@@ -2173,10 +2247,26 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
allocated = ee_len - (iblock - ee_block);
newblock = iblock - ee_block + ext_pblock(ex);
ex2 = ex;
+ orig_ex.ee_block = ex->ee_block;
+ orig_ex.ee_len = cpu_to_le16(ee_len);
+ ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
goto out;
+ /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
+ if (ee_len <= 2*EXT4_EXT_ZERO_LEN) {
+ err = ext4_ext_zeroout(inode, &orig_ex);
+ if (err)
+ goto fix_extent_len;
+ /* update the extent length and mark as initialized */
+ ex->ee_block = orig_ex.ee_block;
+ ex->ee_len = orig_ex.ee_len;
+ ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_dirty(handle, inode, path + depth);
+ /* zeroed the full extent */
+ return allocated;
+ }
/* ex1: ee_block to iblock - 1 : uninitialized */
if (iblock > ee_block) {
@@ -2195,19 +2285,103 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
/* ex3: to ee_block + ee_len : uninitialised */
if (allocated > max_blocks) {
unsigned int newdepth;
+ /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
+ if (allocated <= EXT4_EXT_ZERO_LEN) {
+ /* Mark first half uninitialized.
+ * Mark second half initialized and zero out the
+ * initialized extent
+ */
+ ex->ee_block = orig_ex.ee_block;
+ ex->ee_len = cpu_to_le16(ee_len - allocated);
+ ext4_ext_mark_uninitialized(ex);
+ ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_dirty(handle, inode, path + depth);
+
+ ex3 = &newex;
+ ex3->ee_block = cpu_to_le32(iblock);
+ ext4_ext_store_pblock(ex3, newblock);
+ ex3->ee_len = cpu_to_le16(allocated);
+ err = ext4_ext_insert_extent(handle, inode, path, ex3);
+ if (err == -ENOSPC) {
+ err = ext4_ext_zeroout(inode, &orig_ex);
+ if (err)
+ goto fix_extent_len;
+ ex->ee_block = orig_ex.ee_block;
+ ex->ee_len = orig_ex.ee_len;
+ ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_dirty(handle, inode, path + depth);
+ /* zeroed the full extent */
+ return allocated;
+
+ } else if (err)
+ goto fix_extent_len;
+
+ /*
+ * We need to zero out the second half because
+ * an fallocate request can update file size and
+ * converting the second half to initialized extent
+ * implies that we can leak some junk data to user
+ * space.
+ */
+ err = ext4_ext_zeroout(inode, ex3);
+ if (err) {
+ /*
+ * We should actually mark the
+ * second half as uninit and return error
+ * Insert would have changed the extent
+ */
+ depth = ext_depth(inode);
+ ext4_ext_drop_refs(path);
+ path = ext4_ext_find_extent(inode,
+ iblock, path);
+ if (IS_ERR(path)) {
+ err = PTR_ERR(path);
+ return err;
+ }
+ ex = path[depth].p_ext;
+ err = ext4_ext_get_access(handle, inode,
+ path + depth);
+ if (err)
+ return err;
+ ext4_ext_mark_uninitialized(ex);
+ ext4_ext_dirty(handle, inode, path + depth);
+ return err;
+ }
+
+ /* zeroed the second half */
+ return allocated;
+ }
ex3 = &newex;
ex3->ee_block = cpu_to_le32(iblock + max_blocks);
ext4_ext_store_pblock(ex3, newblock + max_blocks);
ex3->ee_len = cpu_to_le16(allocated - max_blocks);
ext4_ext_mark_uninitialized(ex3);
err = ext4_ext_insert_extent(handle, inode, path, ex3);
- if (err)
- goto out;
+ if (err == -ENOSPC) {
+ err = ext4_ext_zeroout(inode, &orig_ex);
+ if (err)
+ goto fix_extent_len;
+ /* update the extent length and mark as initialized */
+ ex->ee_block = orig_ex.ee_block;
+ ex->ee_len = orig_ex.ee_len;
+ ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_dirty(handle, inode, path + depth);
+ /* zeroed the full extent */
+ return allocated;
+
+ } else if (err)
+ goto fix_extent_len;
/*
* The depth, and hence eh & ex might change
* as part of the insert above.
*/
newdepth = ext_depth(inode);
+ /*
+ * update the extent length after successfull insert of the
+ * split extent
+ */
+ orig_ex.ee_len = cpu_to_le16(ee_len -
+ ext4_ext_get_actual_len(ex3));
if (newdepth != depth) {
depth = newdepth;
ext4_ext_drop_refs(path);
@@ -2226,6 +2400,24 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
goto out;
}
allocated = max_blocks;
+
+ /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying
+ * to insert a extent in the middle zerout directly
+ * otherwise give the extent a chance to merge to left
+ */
+ if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN &&
+ iblock != ee_block) {
+ err = ext4_ext_zeroout(inode, &orig_ex);
+ if (err)
+ goto fix_extent_len;
+ /* update the extent length and mark as initialized */
+ ex->ee_block = orig_ex.ee_block;
+ ex->ee_len = orig_ex.ee_len;
+ ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_dirty(handle, inode, path + depth);
+ /* zero out the first half */
+ return allocated;
+ }
}
/*
* If there was a change of depth as part of the
@@ -2282,8 +2474,29 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
goto out;
insert:
err = ext4_ext_insert_extent(handle, inode, path, &newex);
+ if (err == -ENOSPC) {
+ err = ext4_ext_zeroout(inode, &orig_ex);
+ if (err)
+ goto fix_extent_len;
+ /* update the extent length and mark as initialized */
+ ex->ee_block = orig_ex.ee_block;
+ ex->ee_len = orig_ex.ee_len;
+ ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_dirty(handle, inode, path + depth);
+ /* zero out the first half */
+ return allocated;
+ } else if (err)
+ goto fix_extent_len;
out:
return err ? err : allocated;
+
+fix_extent_len:
+ ex->ee_block = orig_ex.ee_block;
+ ex->ee_len = orig_ex.ee_len;
+ ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_mark_uninitialized(ex);
+ ext4_ext_dirty(handle, inode, path + depth);
+ return err;
}
/*
@@ -2393,8 +2606,20 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
}
if (create == EXT4_CREATE_UNINITIALIZED_EXT)
goto out;
- if (!create)
+ if (!create) {
+ /*
+ * We have blocks reserved already. We
+ * return allocated blocks so that delalloc
+ * won't do block reservation for us. But
+ * the buffer head will be unmapped so that
+ * a read from the block returns 0s.
+ */
+ if (allocated > max_blocks)
+ allocated = max_blocks;
+ /* mark the buffer unwritten */
+ __set_bit(BH_Unwritten, &bh_result->b_state);
goto out2;
+ }
ret = ext4_ext_convert_to_initialized(handle, inode,
path, iblock,
@@ -2584,6 +2809,8 @@ out_stop:
ext4_orphan_del(handle, inode);
up_write(&EXT4_I(inode)->i_data_sem);
+ inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
+ ext4_mark_inode_dirty(handle, inode);
ext4_journal_stop(handle);
}
@@ -2608,6 +2835,28 @@ int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
return needed;
}
+static void ext4_falloc_update_inode(struct inode *inode,
+ int mode, loff_t new_size, int update_ctime)
+{
+ struct timespec now;
+
+ if (update_ctime) {
+ now = current_fs_time(inode->i_sb);
+ if (!timespec_equal(&inode->i_ctime, &now))
+ inode->i_ctime = now;
+ }
+ /*
+ * Update only when preallocation was requested beyond
+ * the file size.
+ */
+ if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+ new_size > i_size_read(inode)) {
+ i_size_write(inode, new_size);
+ EXT4_I(inode)->i_disksize = new_size;
+ }
+
+}
+
/*
* preallocate space for a file. This implements ext4's fallocate inode
* operation, which gets called from sys_fallocate system call.
@@ -2619,8 +2868,8 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
{
handle_t *handle;
ext4_lblk_t block;
+ loff_t new_size;
unsigned long max_blocks;
- ext4_fsblk_t nblocks = 0;
int ret = 0;
int ret2 = 0;
int retries = 0;
@@ -2639,9 +2888,12 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
return -ENODEV;
block = offset >> blkbits;
+ /*
+ * We can't just convert len to max_blocks because
+ * If blocksize = 4096 offset = 3072 and len = 2048
+ */
max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
- - block;
-
+ - block;
/*
* credits to insert 1 extent into extent tree + buffers to be able to
* modify 1 super block, 1 block bitmap and 1 group descriptor.
@@ -2657,7 +2909,6 @@ retry:
ret = PTR_ERR(handle);
break;
}
-
ret = ext4_get_blocks_wrap(handle, inode, block,
max_blocks, &map_bh,
EXT4_CREATE_UNINITIALIZED_EXT, 0);
@@ -2673,61 +2924,24 @@ retry:
ret2 = ext4_journal_stop(handle);
break;
}
- if (ret > 0) {
- /* check wrap through sign-bit/zero here */
- if ((block + ret) < 0 || (block + ret) < block) {
- ret = -EIO;
- ext4_mark_inode_dirty(handle, inode);
- ret2 = ext4_journal_stop(handle);
- break;
- }
- if (buffer_new(&map_bh) && ((block + ret) >
- (EXT4_BLOCK_ALIGN(i_size_read(inode), blkbits)
- >> blkbits)))
- nblocks = nblocks + ret;
- }
-
- /* Update ctime if new blocks get allocated */
- if (nblocks) {
- struct timespec now;
-
- now = current_fs_time(inode->i_sb);
- if (!timespec_equal(&inode->i_ctime, &now))
- inode->i_ctime = now;
- }
+ if ((block + ret) >= (EXT4_BLOCK_ALIGN(offset + len,
+ blkbits) >> blkbits))
+ new_size = offset + len;
+ else
+ new_size = (block + ret) << blkbits;
+ ext4_falloc_update_inode(inode, mode, new_size,
+ buffer_new(&map_bh));
ext4_mark_inode_dirty(handle, inode);
ret2 = ext4_journal_stop(handle);
if (ret2)
break;
}
-
- if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
+ if (ret == -ENOSPC &&
+ ext4_should_retry_alloc(inode->i_sb, &retries)) {
+ ret = 0;
goto retry;
-
- /*
- * Time to update the file size.
- * Update only when preallocation was requested beyond the file size.
- */
- if (!(mode & FALLOC_FL_KEEP_SIZE) &&
- (offset + len) > i_size_read(inode)) {
- if (ret > 0) {
- /*
- * if no error, we assume preallocation succeeded
- * completely
- */
- i_size_write(inode, offset + len);
- EXT4_I(inode)->i_disksize = i_size_read(inode);
- } else if (ret < 0 && nblocks) {
- /* Handle partial allocation scenario */
- loff_t newsize;
-
- newsize = (nblocks << blkbits) + i_size_read(inode);
- i_size_write(inode, EXT4_BLOCK_ALIGN(newsize, blkbits));
- EXT4_I(inode)->i_disksize = i_size_read(inode);
- }
}
-
mutex_unlock(&inode->i_mutex);
return ret > 0 ? ret2 : ret;
}
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index ac35ec58db5..4159be6366a 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -21,8 +21,8 @@
#include <linux/time.h>
#include <linux/fs.h>
#include <linux/jbd2.h>
-#include <linux/ext4_fs.h>
-#include <linux/ext4_jbd2.h>
+#include "ext4.h"
+#include "ext4_jbd2.h"
#include "xattr.h"
#include "acl.h"
@@ -129,7 +129,7 @@ const struct file_operations ext4_file_operations = {
.write = do_sync_write,
.aio_read = generic_file_aio_read,
.aio_write = ext4_file_write,
- .ioctl = ext4_ioctl,
+ .unlocked_ioctl = ext4_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = ext4_compat_ioctl,
#endif
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 8d50879d1c2..1c8ba48d4f8 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -27,8 +27,8 @@
#include <linux/sched.h>
#include <linux/writeback.h>
#include <linux/jbd2.h>
-#include <linux/ext4_fs.h>
-#include <linux/ext4_jbd2.h>
+#include "ext4.h"
+#include "ext4_jbd2.h"
/*
* akpm: A new design for ext4_sync_file().
@@ -72,6 +72,9 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync)
goto out;
}
+ if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
+ goto out;
+
/*
* The VFS has written the file data. If the inode is unaltered
* then we need not start a commit.
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index 1555024e3b3..1d6329dbe39 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -11,8 +11,8 @@
#include <linux/fs.h>
#include <linux/jbd2.h>
-#include <linux/ext4_fs.h>
#include <linux/cryptohash.h>
+#include "ext4.h"
#define DELTA 0x9E3779B9
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 486e46a3918..c6efbab0c80 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -15,8 +15,6 @@
#include <linux/time.h>
#include <linux/fs.h>
#include <linux/jbd2.h>
-#include <linux/ext4_fs.h>
-#include <linux/ext4_jbd2.h>
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/quotaops.h>
@@ -25,7 +23,8 @@
#include <linux/bitops.h>
#include <linux/blkdev.h>
#include <asm/byteorder.h>
-
+#include "ext4.h"
+#include "ext4_jbd2.h"
#include "xattr.h"
#include "acl.h"
#include "group.h"
@@ -75,7 +74,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
/* If checksum is bad mark all blocks and inodes use to prevent
* allocation, essentially implementing a per-group read-only flag. */
if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
- ext4_error(sb, __FUNCTION__, "Checksum bad for group %lu\n",
+ ext4_error(sb, __func__, "Checksum bad for group %lu\n",
block_group);
gdp->bg_free_blocks_count = 0;
gdp->bg_free_inodes_count = 0;
@@ -223,11 +222,9 @@ void ext4_free_inode (handle_t *handle, struct inode * inode)
if (gdp) {
spin_lock(sb_bgl_lock(sbi, block_group));
- gdp->bg_free_inodes_count = cpu_to_le16(
- le16_to_cpu(gdp->bg_free_inodes_count) + 1);
+ le16_add_cpu(&gdp->bg_free_inodes_count, 1);
if (is_directory)
- gdp->bg_used_dirs_count = cpu_to_le16(
- le16_to_cpu(gdp->bg_used_dirs_count) - 1);
+ le16_add_cpu(&gdp->bg_used_dirs_count, -1);
gdp->bg_checksum = ext4_group_desc_csum(sbi,
block_group, gdp);
spin_unlock(sb_bgl_lock(sbi, block_group));
@@ -588,7 +585,7 @@ got:
ino++;
if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
ino > EXT4_INODES_PER_GROUP(sb)) {
- ext4_error(sb, __FUNCTION__,
+ ext4_error(sb, __func__,
"reserved inode or inode > inodes count - "
"block_group = %lu, inode=%lu", group,
ino + group * EXT4_INODES_PER_GROUP(sb));
@@ -664,11 +661,9 @@ got:
cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino);
}
- gdp->bg_free_inodes_count =
- cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
+ le16_add_cpu(&gdp->bg_free_inodes_count, -1);
if (S_ISDIR(mode)) {
- gdp->bg_used_dirs_count =
- cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
+ le16_add_cpu(&gdp->bg_used_dirs_count, 1);
}
gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
spin_unlock(sb_bgl_lock(sbi, group));
@@ -744,23 +739,24 @@ got:
if (err)
goto fail_free_drop;
- err = ext4_mark_inode_dirty(handle, inode);
- if (err) {
- ext4_std_error(sb, err);
- goto fail_free_drop;
- }
if (test_opt(sb, EXTENTS)) {
- /* set extent flag only for directory and file */
- if (S_ISDIR(mode) || S_ISREG(mode)) {
+ /* set extent flag only for diretory, file and normal symlink*/
+ if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) {
EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL;
ext4_ext_tree_init(handle, inode);
err = ext4_update_incompat_feature(handle, sb,
EXT4_FEATURE_INCOMPAT_EXTENTS);
if (err)
- goto fail;
+ goto fail_free_drop;
}
}
+ err = ext4_mark_inode_dirty(handle, inode);
+ if (err) {
+ ext4_std_error(sb, err);
+ goto fail_free_drop;
+ }
+
ext4_debug("allocating inode %lu\n", inode->i_ino);
goto really_out;
fail:
@@ -796,7 +792,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
/* Error cases - e2fsck has already cleaned up for us */
if (ino > max_ino) {
- ext4_warning(sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"bad orphan ino %lu! e2fsck was run?", ino);
goto error;
}
@@ -805,7 +801,7 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
bitmap_bh = read_inode_bitmap(sb, block_group);
if (!bitmap_bh) {
- ext4_warning(sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"inode bitmap error for orphan %lu", ino);
goto error;
}
@@ -830,7 +826,7 @@ iget_failed:
err = PTR_ERR(inode);
inode = NULL;
bad_orphan:
- ext4_warning(sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"bad orphan inode %lu! e2fsck was run?", ino);
printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n",
bit, (unsigned long long)bitmap_bh->b_blocknr,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8fab233cb05..8d970774641 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -25,7 +25,6 @@
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/time.h>
-#include <linux/ext4_jbd2.h>
#include <linux/jbd2.h>
#include <linux/highuid.h>
#include <linux/pagemap.h>
@@ -36,6 +35,7 @@
#include <linux/mpage.h>
#include <linux/uio.h>
#include <linux/bio.h>
+#include "ext4_jbd2.h"
#include "xattr.h"
#include "acl.h"
@@ -93,7 +93,7 @@ int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
BUFFER_TRACE(bh, "call ext4_journal_revoke");
err = ext4_journal_revoke(handle, blocknr, bh);
if (err)
- ext4_abort(inode->i_sb, __FUNCTION__,
+ ext4_abort(inode->i_sb, __func__,
"error %d when attempting revoke", err);
BUFFER_TRACE(bh, "exit");
return err;
@@ -985,6 +985,16 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
} else {
retval = ext4_get_blocks_handle(handle, inode, block,
max_blocks, bh, create, extend_disksize);
+
+ if (retval > 0 && buffer_new(bh)) {
+ /*
+ * We allocated new blocks which will result in
+ * i_data's format changing. Force the migrate
+ * to fail by clearing migrate flags
+ */
+ EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags &
+ ~EXT4_EXT_MIGRATE;
+ }
}
up_write((&EXT4_I(inode)->i_data_sem));
return retval;
@@ -1230,7 +1240,7 @@ int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
{
int err = jbd2_journal_dirty_data(handle, bh);
if (err)
- ext4_journal_abort_handle(__FUNCTION__, __FUNCTION__,
+ ext4_journal_abort_handle(__func__, __func__,
bh, handle, err);
return err;
}
@@ -1301,10 +1311,11 @@ static int ext4_ordered_write_end(struct file *file,
new_i_size = pos + copied;
if (new_i_size > EXT4_I(inode)->i_disksize)
EXT4_I(inode)->i_disksize = new_i_size;
- copied = ext4_generic_write_end(file, mapping, pos, len, copied,
+ ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
page, fsdata);
- if (copied < 0)
- ret = copied;
+ copied = ret2;
+ if (ret2 < 0)
+ ret = ret2;
}
ret2 = ext4_journal_stop(handle);
if (!ret)
@@ -1329,10 +1340,11 @@ static int ext4_writeback_write_end(struct file *file,
if (new_i_size > EXT4_I(inode)->i_disksize)
EXT4_I(inode)->i_disksize = new_i_size;
- copied = ext4_generic_write_end(file, mapping, pos, len, copied,
+ ret2 = ext4_generic_write_end(file, mapping, pos, len, copied,
page, fsdata);
- if (copied < 0)
- ret = copied;
+ copied = ret2;
+ if (ret2 < 0)
+ ret = ret2;
ret2 = ext4_journal_stop(handle);
if (!ret)
@@ -2501,12 +2513,10 @@ out_stop:
static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
unsigned long ino, struct ext4_iloc *iloc)
{
- unsigned long desc, group_desc;
ext4_group_t block_group;
unsigned long offset;
ext4_fsblk_t block;
- struct buffer_head *bh;
- struct ext4_group_desc * gdp;
+ struct ext4_group_desc *gdp;
if (!ext4_valid_inum(sb, ino)) {
/*
@@ -2518,22 +2528,10 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
}
block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
- if (block_group >= EXT4_SB(sb)->s_groups_count) {
- ext4_error(sb,"ext4_get_inode_block","group >= groups count");
+ gdp = ext4_get_group_desc(sb, block_group, NULL);
+ if (!gdp)
return 0;
- }
- smp_rmb();
- group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
- desc = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
- bh = EXT4_SB(sb)->s_group_desc[group_desc];
- if (!bh) {
- ext4_error (sb, "ext4_get_inode_block",
- "Descriptor not loaded");
- return 0;
- }
- gdp = (struct ext4_group_desc *)((__u8 *)bh->b_data +
- desc * EXT4_DESC_SIZE(sb));
/*
* Figure out the offset within the block group inode table
*/
@@ -2976,7 +2974,8 @@ static int ext4_do_update_inode(handle_t *handle,
if (ext4_inode_blocks_set(handle, raw_inode, ei))
goto out_brelse;
raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
- raw_inode->i_flags = cpu_to_le32(ei->i_flags);
+ /* clear the migrate flag in the raw_inode */
+ raw_inode->i_flags = cpu_to_le32(ei->i_flags & ~EXT4_EXT_MIGRATE);
if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
cpu_to_le32(EXT4_OS_HURD))
raw_inode->i_file_acl_high =
@@ -3374,7 +3373,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND;
if (mnt_count !=
le16_to_cpu(sbi->s_es->s_mnt_count)) {
- ext4_warning(inode->i_sb, __FUNCTION__,
+ ext4_warning(inode->i_sb, __func__,
"Unable to expand inode %lu. Delete"
" some EAs or run e2fsck.",
inode->i_ino);
@@ -3415,7 +3414,7 @@ void ext4_dirty_inode(struct inode *inode)
current_handle->h_transaction != handle->h_transaction) {
/* This task has a transaction open against a different fs */
printk(KERN_EMERG "%s: transactions do not match!\n",
- __FUNCTION__);
+ __func__);
} else {
jbd_debug(5, "marking dirty. outer handle=%p\n",
current_handle);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 25b13ede808..7a6c2f1faba 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -10,17 +10,17 @@
#include <linux/fs.h>
#include <linux/jbd2.h>
#include <linux/capability.h>
-#include <linux/ext4_fs.h>
-#include <linux/ext4_jbd2.h>
#include <linux/time.h>
#include <linux/compat.h>
#include <linux/smp_lock.h>
#include <linux/mount.h>
#include <asm/uaccess.h>
+#include "ext4_jbd2.h"
+#include "ext4.h"
-int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
- unsigned long arg)
+long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
+ struct inode *inode = filp->f_dentry->d_inode;
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned int flags;
unsigned short rsv_window_size;
@@ -277,9 +277,6 @@ setversion_out:
#ifdef CONFIG_COMPAT
long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
- struct inode *inode = file->f_path.dentry->d_inode;
- int ret;
-
/* These are just misnamed, they actually get/put from/to user an int */
switch (cmd) {
case EXT4_IOC32_GETFLAGS:
@@ -319,9 +316,6 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
default:
return -ENOIOCTLCMD;
}
- lock_kernel();
- ret = ext4_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
- unlock_kernel();
- return ret;
+ return ext4_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
}
#endif
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 9d57695de74..fbec2ef9379 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -21,21 +21,7 @@
* mballoc.c contains the multiblocks allocation routines
*/
-#include <linux/time.h>
-#include <linux/fs.h>
-#include <linux/namei.h>
-#include <linux/ext4_jbd2.h>
-#include <linux/ext4_fs.h>
-#include <linux/quotaops.h>
-#include <linux/buffer_head.h>
-#include <linux/module.h>
-#include <linux/swap.h>
-#include <linux/proc_fs.h>
-#include <linux/pagemap.h>
-#include <linux/seq_file.h>
-#include <linux/version.h>
-#include "group.h"
-
+#include "mballoc.h"
/*
* MUSTDO:
* - test ext4_ext_search_left() and ext4_ext_search_right()
@@ -345,288 +331,6 @@
*
*/
-/*
- * with AGGRESSIVE_CHECK allocator runs consistency checks over
- * structures. these checks slow things down a lot
- */
-#define AGGRESSIVE_CHECK__
-
-/*
- * with DOUBLE_CHECK defined mballoc creates persistent in-core
- * bitmaps, maintains and uses them to check for double allocations
- */
-#define DOUBLE_CHECK__
-
-/*
- */
-#define MB_DEBUG__
-#ifdef MB_DEBUG
-#define mb_debug(fmt, a...) printk(fmt, ##a)
-#else
-#define mb_debug(fmt, a...)
-#endif
-
-/*
- * with EXT4_MB_HISTORY mballoc stores last N allocations in memory
- * and you can monitor it in /proc/fs/ext4/<dev>/mb_history
- */
-#define EXT4_MB_HISTORY
-#define EXT4_MB_HISTORY_ALLOC 1 /* allocation */
-#define EXT4_MB_HISTORY_PREALLOC 2 /* preallocated blocks used */
-#define EXT4_MB_HISTORY_DISCARD 4 /* preallocation discarded */
-#define EXT4_MB_HISTORY_FREE 8 /* free */
-
-#define EXT4_MB_HISTORY_DEFAULT (EXT4_MB_HISTORY_ALLOC | \
- EXT4_MB_HISTORY_PREALLOC)
-
-/*
- * How long mballoc can look for a best extent (in found extents)
- */
-#define MB_DEFAULT_MAX_TO_SCAN 200
-
-/*
- * How long mballoc must look for a best extent
- */
-#define MB_DEFAULT_MIN_TO_SCAN 10
-
-/*
- * How many groups mballoc will scan looking for the best chunk
- */
-#define MB_DEFAULT_MAX_GROUPS_TO_SCAN 5
-
-/*
- * with 'ext4_mb_stats' allocator will collect stats that will be
- * shown at umount. The collecting costs though!
- */
-#define MB_DEFAULT_STATS 1
-
-/*
- * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served
- * by the stream allocator, which purpose is to pack requests
- * as close each to other as possible to produce smooth I/O traffic
- * We use locality group prealloc space for stream request.
- * We can tune the same via /proc/fs/ext4/<parition>/stream_req
- */
-#define MB_DEFAULT_STREAM_THRESHOLD 16 /* 64K */
-
-/*
- * for which requests use 2^N search using buddies
- */
-#define MB_DEFAULT_ORDER2_REQS 2
-
-/*
- * default group prealloc size 512 blocks
- */
-#define MB_DEFAULT_GROUP_PREALLOC 512
-
-static struct kmem_cache *ext4_pspace_cachep;
-static struct kmem_cache *ext4_ac_cachep;
-
-#ifdef EXT4_BB_MAX_BLOCKS
-#undef EXT4_BB_MAX_BLOCKS
-#endif
-#define EXT4_BB_MAX_BLOCKS 30
-
-struct ext4_free_metadata {
- ext4_group_t group;
- unsigned short num;
- ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS];
- struct list_head list;
-};
-
-struct ext4_group_info {
- unsigned long bb_state;
- unsigned long bb_tid;
- struct ext4_free_metadata *bb_md_cur;
- unsigned short bb_first_free;
- unsigned short bb_free;
- unsigned short bb_fragments;
- struct list_head bb_prealloc_list;
-#ifdef DOUBLE_CHECK
- void *bb_bitmap;
-#endif
- unsigned short bb_counters[];
-};
-
-#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
-#define EXT4_GROUP_INFO_LOCKED_BIT 1
-
-#define EXT4_MB_GRP_NEED_INIT(grp) \
- (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
-
-
-struct ext4_prealloc_space {
- struct list_head pa_inode_list;
- struct list_head pa_group_list;
- union {
- struct list_head pa_tmp_list;
- struct rcu_head pa_rcu;
- } u;
- spinlock_t pa_lock;
- atomic_t pa_count;
- unsigned pa_deleted;
- ext4_fsblk_t pa_pstart; /* phys. block */
- ext4_lblk_t pa_lstart; /* log. block */
- unsigned short pa_len; /* len of preallocated chunk */
- unsigned short pa_free; /* how many blocks are free */
- unsigned short pa_linear; /* consumed in one direction
- * strictly, for grp prealloc */
- spinlock_t *pa_obj_lock;
- struct inode *pa_inode; /* hack, for history only */
-};
-
-
-struct ext4_free_extent {
- ext4_lblk_t fe_logical;
- ext4_grpblk_t fe_start;
- ext4_group_t fe_group;
- int fe_len;
-};
-
-/*
- * Locality group:
- * we try to group all related changes together
- * so that writeback can flush/allocate them together as well
- */
-struct ext4_locality_group {
- /* for allocator */
- struct mutex lg_mutex; /* to serialize allocates */
- struct list_head lg_prealloc_list;/* list of preallocations */
- spinlock_t lg_prealloc_lock;
-};
-
-struct ext4_allocation_context {
- struct inode *ac_inode;
- struct super_block *ac_sb;
-
- /* original request */
- struct ext4_free_extent ac_o_ex;
-
- /* goal request (after normalization) */
- struct ext4_free_extent ac_g_ex;
-
- /* the best found extent */
- struct ext4_free_extent ac_b_ex;
-
- /* copy of the bext found extent taken before preallocation efforts */
- struct ext4_free_extent ac_f_ex;
-
- /* number of iterations done. we have to track to limit searching */
- unsigned long ac_ex_scanned;
- __u16 ac_groups_scanned;
- __u16 ac_found;
- __u16 ac_tail;
- __u16 ac_buddy;
- __u16 ac_flags; /* allocation hints */
- __u8 ac_status;
- __u8 ac_criteria;
- __u8 ac_repeats;
- __u8 ac_2order; /* if request is to allocate 2^N blocks and
- * N > 0, the field stores N, otherwise 0 */
- __u8 ac_op; /* operation, for history only */
- struct page *ac_bitmap_page;
- struct page *ac_buddy_page;
- struct ext4_prealloc_space *ac_pa;
- struct ext4_locality_group *ac_lg;
-};
-
-#define AC_STATUS_CONTINUE 1
-#define AC_STATUS_FOUND 2
-#define AC_STATUS_BREAK 3
-
-struct ext4_mb_history {
- struct ext4_free_extent orig; /* orig allocation */
- struct ext4_free_extent goal; /* goal allocation */
- struct ext4_free_extent result; /* result allocation */
- unsigned pid;
- unsigned ino;
- __u16 found; /* how many extents have been found */
- __u16 groups; /* how many groups have been scanned */
- __u16 tail; /* what tail broke some buddy */
- __u16 buddy; /* buddy the tail ^^^ broke */
- __u16 flags;
- __u8 cr:3; /* which phase the result extent was found at */
- __u8 op:4;
- __u8 merged:1;
-};
-
-struct ext4_buddy {
- struct page *bd_buddy_page;
- void *bd_buddy;
- struct page *bd_bitmap_page;
- void *bd_bitmap;
- struct ext4_group_info *bd_info;
- struct super_block *bd_sb;
- __u16 bd_blkbits;
- ext4_group_t bd_group;
-};
-#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap)
-#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy)
-
-#ifndef EXT4_MB_HISTORY
-static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
-{
- return;
-}
-#else
-static void ext4_mb_store_history(struct ext4_allocation_context *ac);
-#endif
-
-#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
-
-static struct proc_dir_entry *proc_root_ext4;
-struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
-ext4_fsblk_t ext4_new_blocks_old(handle_t *handle, struct inode *inode,
- ext4_fsblk_t goal, unsigned long *count, int *errp);
-
-static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
- ext4_group_t group);
-static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
-static void ext4_mb_free_committed_blocks(struct super_block *);
-static void ext4_mb_return_to_preallocation(struct inode *inode,
- struct ext4_buddy *e4b, sector_t block,
- int count);
-static void ext4_mb_put_pa(struct ext4_allocation_context *,
- struct super_block *, struct ext4_prealloc_space *pa);
-static int ext4_mb_init_per_dev_proc(struct super_block *sb);
-static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
-
-
-static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
-{
- struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
-
- bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
-}
-
-static inline void ext4_unlock_group(struct super_block *sb,
- ext4_group_t group)
-{
- struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
-
- bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
-}
-
-static inline int ext4_is_group_locked(struct super_block *sb,
- ext4_group_t group)
-{
- struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
-
- return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
- &(grinfo->bb_state));
-}
-
-static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
- struct ext4_free_extent *fex)
-{
- ext4_fsblk_t block;
-
- block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb)
- + fex->fe_start
- + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
- return block;
-}
-
static inline void *mb_correct_addr_and_bit(int *bit, void *addr)
{
#if BITS_PER_LONG == 64
@@ -736,7 +440,7 @@ static void mb_free_blocks_double(struct inode *inode, struct ext4_buddy *e4b,
blocknr +=
le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
- ext4_error(sb, __FUNCTION__, "double-free of inode"
+ ext4_error(sb, __func__, "double-free of inode"
" %lu's block %llu(bit %u in group %lu)\n",
inode ? inode->i_ino : 0, blocknr,
first + i, e4b->bd_group);
@@ -898,17 +602,17 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
list_for_each(cur, &grp->bb_prealloc_list) {
ext4_group_t groupnr;
struct ext4_prealloc_space *pa;
- pa = list_entry(cur, struct ext4_prealloc_space, group_list);
- ext4_get_group_no_and_offset(sb, pa->pstart, &groupnr, &k);
+ pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
+ ext4_get_group_no_and_offset(sb, pa->pa_pstart, &groupnr, &k);
MB_CHECK_ASSERT(groupnr == e4b->bd_group);
- for (i = 0; i < pa->len; i++)
+ for (i = 0; i < pa->pa_len; i++)
MB_CHECK_ASSERT(mb_test_bit(k + i, buddy));
}
return 0;
}
#undef MB_CHECK_ASSERT
#define mb_check_buddy(e4b) __mb_check_buddy(e4b, \
- __FILE__, __FUNCTION__, __LINE__)
+ __FILE__, __func__, __LINE__)
#else
#define mb_check_buddy(e4b)
#endif
@@ -982,7 +686,7 @@ static void ext4_mb_generate_buddy(struct super_block *sb,
grp->bb_fragments = fragments;
if (free != grp->bb_free) {
- ext4_error(sb, __FUNCTION__,
+ ext4_error(sb, __func__,
"EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n",
group, free, grp->bb_free);
/*
@@ -1168,8 +872,9 @@ out:
return err;
}
-static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
- struct ext4_buddy *e4b)
+static noinline_for_stack int
+ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
+ struct ext4_buddy *e4b)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct inode *inode = sbi->s_buddy_cache;
@@ -1367,7 +1072,7 @@ static int mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
blocknr +=
le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
- ext4_error(sb, __FUNCTION__, "double-free of inode"
+ ext4_error(sb, __func__, "double-free of inode"
" %lu's block %llu(bit %u in group %lu)\n",
inode ? inode->i_ino : 0, blocknr, block,
e4b->bd_group);
@@ -1848,7 +1553,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
* free blocks even though group info says we
* we have free blocks
*/
- ext4_error(sb, __FUNCTION__, "%d free blocks as per "
+ ext4_error(sb, __func__, "%d free blocks as per "
"group info. But bitmap says 0\n",
free);
break;
@@ -1857,7 +1562,7 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex);
BUG_ON(ex.fe_len <= 0);
if (free < ex.fe_len) {
- ext4_error(sb, __FUNCTION__, "%d free blocks as per "
+ ext4_error(sb, __func__, "%d free blocks as per "
"group info. But got %d blocks\n",
free, ex.fe_len);
/*
@@ -1965,7 +1670,8 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
return 0;
}
-static int ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
+static noinline_for_stack int
+ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
{
ext4_group_t group;
ext4_group_t i;
@@ -2465,7 +2171,8 @@ static void ext4_mb_history_init(struct super_block *sb)
/* if we can't allocate history, then we simple won't use it */
}
-static void ext4_mb_store_history(struct ext4_allocation_context *ac)
+static noinline_for_stack void
+ext4_mb_store_history(struct ext4_allocation_context *ac)
{
struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
struct ext4_mb_history h;
@@ -2565,13 +2272,13 @@ static int ext4_mb_init_backend(struct super_block *sb)
meta_group_info[j] = kzalloc(len, GFP_KERNEL);
if (meta_group_info[j] == NULL) {
printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
- i--;
goto err_freebuddy;
}
desc = ext4_get_group_desc(sb, i, NULL);
if (desc == NULL) {
printk(KERN_ERR
"EXT4-fs: can't read descriptor %lu\n", i);
+ i++;
goto err_freebuddy;
}
memset(meta_group_info[j], 0, len);
@@ -2611,13 +2318,11 @@ static int ext4_mb_init_backend(struct super_block *sb)
return 0;
err_freebuddy:
- while (i >= 0) {
+ while (i-- > 0)
kfree(ext4_get_group_info(sb, i));
- i--;
- }
i = num_meta_group_infos;
err_freemeta:
- while (--i >= 0)
+ while (i-- > 0)
kfree(sbi->s_group_info[i]);
iput(sbi->s_buddy_cache);
err_freesgi:
@@ -2801,7 +2506,8 @@ int ext4_mb_release(struct super_block *sb)
return 0;
}
-static void ext4_mb_free_committed_blocks(struct super_block *sb)
+static noinline_for_stack void
+ext4_mb_free_committed_blocks(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
int err;
@@ -3021,7 +2727,8 @@ void exit_ext4_mballoc(void)
* Check quota and mark choosed space (ac->ac_b_ex) non-free in bitmaps
* Returns 0 if success or error code
*/
-static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
+static noinline_for_stack int
+ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
handle_t *handle)
{
struct buffer_head *bitmap_bh = NULL;
@@ -3070,7 +2777,7 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
in_range(block, ext4_inode_table(sb, gdp),
EXT4_SB(sb)->s_itb_per_group)) {
- ext4_error(sb, __FUNCTION__,
+ ext4_error(sb, __func__,
"Allocating block in system zone - block = %llu",
block);
}
@@ -3094,9 +2801,7 @@ static int ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
ac->ac_b_ex.fe_group,
gdp));
}
- gdp->bg_free_blocks_count =
- cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)
- - ac->ac_b_ex.fe_len);
+ le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
@@ -3130,7 +2835,7 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe;
else
ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
- mb_debug("#%u: goal %lu blocks for locality group\n",
+ mb_debug("#%u: goal %u blocks for locality group\n",
current->pid, ac->ac_g_ex.fe_len);
}
@@ -3138,15 +2843,16 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
* Normalization means making request better in terms of
* size and alignment
*/
-static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
+static noinline_for_stack void
+ext4_mb_normalize_request(struct ext4_allocation_context *ac,
struct ext4_allocation_request *ar)
{
int bsbits, max;
ext4_lblk_t end;
- struct list_head *cur;
loff_t size, orig_size, start_off;
ext4_lblk_t start, orig_start;
struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
+ struct ext4_prealloc_space *pa;
/* do normalize only data requests, metadata requests
do not need preallocation */
@@ -3232,12 +2938,9 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
/* check we don't cross already preallocated blocks */
rcu_read_lock();
- list_for_each_rcu(cur, &ei->i_prealloc_list) {
- struct ext4_prealloc_space *pa;
+ list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
unsigned long pa_end;
- pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
-
if (pa->pa_deleted)
continue;
spin_lock(&pa->pa_lock);
@@ -3279,10 +2982,8 @@ static void ext4_mb_normalize_request(struct ext4_allocation_context *ac,
/* XXX: extra loop to check we really don't overlap preallocations */
rcu_read_lock();
- list_for_each_rcu(cur, &ei->i_prealloc_list) {
- struct ext4_prealloc_space *pa;
+ list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
unsigned long pa_end;
- pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
spin_lock(&pa->pa_lock);
if (pa->pa_deleted == 0) {
pa_end = pa->pa_lstart + pa->pa_len;
@@ -3374,7 +3075,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
BUG_ON(pa->pa_free < len);
pa->pa_free -= len;
- mb_debug("use %llu/%lu from inode pa %p\n", start, len, pa);
+ mb_debug("use %llu/%u from inode pa %p\n", start, len, pa);
}
/*
@@ -3404,12 +3105,12 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
/*
* search goal blocks in preallocated space
*/
-static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
+static noinline_for_stack int
+ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
{
struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
struct ext4_locality_group *lg;
struct ext4_prealloc_space *pa;
- struct list_head *cur;
/* only data can be preallocated */
if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
@@ -3417,8 +3118,7 @@ static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
/* first, try per-file preallocation */
rcu_read_lock();
- list_for_each_rcu(cur, &ei->i_prealloc_list) {
- pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
+ list_for_each_entry_rcu(pa, &ei->i_prealloc_list, pa_inode_list) {
/* all fields in this condition don't change,
* so we can skip locking for them */
@@ -3450,8 +3150,7 @@ static int ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
return 0;
rcu_read_lock();
- list_for_each_rcu(cur, &lg->lg_prealloc_list) {
- pa = list_entry(cur, struct ext4_prealloc_space, pa_inode_list);
+ list_for_each_entry_rcu(pa, &lg->lg_prealloc_list, pa_inode_list) {
spin_lock(&pa->pa_lock);
if (pa->pa_deleted == 0 && pa->pa_free >= ac->ac_o_ex.fe_len) {
atomic_inc(&pa->pa_count);
@@ -3571,7 +3270,8 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
/*
* creates new preallocated space for given inode
*/
-static int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
+static noinline_for_stack int
+ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
{
struct super_block *sb = ac->ac_sb;
struct ext4_prealloc_space *pa;
@@ -3658,7 +3358,8 @@ static int ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
/*
* creates new preallocated space for locality group inodes belongs to
*/
-static int ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
+static noinline_for_stack int
+ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
{
struct super_block *sb = ac->ac_sb;
struct ext4_locality_group *lg;
@@ -3731,11 +3432,11 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
* the caller MUST hold group/inode locks.
* TODO: optimize the case when there are no in-core structures yet
*/
-static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
- struct buffer_head *bitmap_bh,
- struct ext4_prealloc_space *pa)
+static noinline_for_stack int
+ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
+ struct ext4_prealloc_space *pa,
+ struct ext4_allocation_context *ac)
{
- struct ext4_allocation_context *ac;
struct super_block *sb = e4b->bd_sb;
struct ext4_sb_info *sbi = EXT4_SB(sb);
unsigned long end;
@@ -3751,8 +3452,6 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
end = bit + pa->pa_len;
- ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
-
if (ac) {
ac->ac_sb = sb;
ac->ac_inode = pa->pa_inode;
@@ -3789,7 +3488,7 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
pa, (unsigned long) pa->pa_lstart,
(unsigned long) pa->pa_pstart,
(unsigned long) pa->pa_len);
- ext4_error(sb, __FUNCTION__, "free %u, pa_free %u\n",
+ ext4_error(sb, __func__, "free %u, pa_free %u\n",
free, pa->pa_free);
/*
* pa is already deleted so we use the value obtained
@@ -3797,22 +3496,19 @@ static int ext4_mb_release_inode_pa(struct ext4_buddy *e4b,
*/
}
atomic_add(free, &sbi->s_mb_discarded);
- if (ac)
- kmem_cache_free(ext4_ac_cachep, ac);
return err;
}
-static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
- struct ext4_prealloc_space *pa)
+static noinline_for_stack int
+ext4_mb_release_group_pa(struct ext4_buddy *e4b,
+ struct ext4_prealloc_space *pa,
+ struct ext4_allocation_context *ac)
{
- struct ext4_allocation_context *ac;
struct super_block *sb = e4b->bd_sb;
ext4_group_t group;
ext4_grpblk_t bit;
- ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
-
if (ac)
ac->ac_op = EXT4_MB_HISTORY_DISCARD;
@@ -3830,7 +3526,6 @@ static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
ac->ac_b_ex.fe_len = pa->pa_len;
ac->ac_b_ex.fe_logical = 0;
ext4_mb_store_history(ac);
- kmem_cache_free(ext4_ac_cachep, ac);
}
return 0;
@@ -3845,12 +3540,14 @@ static int ext4_mb_release_group_pa(struct ext4_buddy *e4b,
* - how many do we discard
* 1) how many requested
*/
-static int ext4_mb_discard_group_preallocations(struct super_block *sb,
+static noinline_for_stack int
+ext4_mb_discard_group_preallocations(struct super_block *sb,
ext4_group_t group, int needed)
{
struct ext4_group_info *grp = ext4_get_group_info(sb, group);
struct buffer_head *bitmap_bh = NULL;
struct ext4_prealloc_space *pa, *tmp;
+ struct ext4_allocation_context *ac;
struct list_head list;
struct ext4_buddy e4b;
int err;
@@ -3878,6 +3575,7 @@ static int ext4_mb_discard_group_preallocations(struct super_block *sb,
grp = ext4_get_group_info(sb, group);
INIT_LIST_HEAD(&list);
+ ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
repeat:
ext4_lock_group(sb, group);
list_for_each_entry_safe(pa, tmp,
@@ -3932,9 +3630,9 @@ repeat:
spin_unlock(pa->pa_obj_lock);
if (pa->pa_linear)
- ext4_mb_release_group_pa(&e4b, pa);
+ ext4_mb_release_group_pa(&e4b, pa, ac);
else
- ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
+ ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
list_del(&pa->u.pa_tmp_list);
call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
@@ -3942,6 +3640,8 @@ repeat:
out:
ext4_unlock_group(sb, group);
+ if (ac)
+ kmem_cache_free(ext4_ac_cachep, ac);
ext4_mb_release_desc(&e4b);
put_bh(bitmap_bh);
return free;
@@ -3962,6 +3662,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode)
struct super_block *sb = inode->i_sb;
struct buffer_head *bitmap_bh = NULL;
struct ext4_prealloc_space *pa, *tmp;
+ struct ext4_allocation_context *ac;
ext4_group_t group = 0;
struct list_head list;
struct ext4_buddy e4b;
@@ -3976,6 +3677,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode)
INIT_LIST_HEAD(&list);
+ ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
repeat:
/* first, collect all pa's in the inode */
spin_lock(&ei->i_prealloc_lock);
@@ -4040,7 +3742,7 @@ repeat:
ext4_lock_group(sb, group);
list_del(&pa->pa_group_list);
- ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
+ ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
ext4_unlock_group(sb, group);
ext4_mb_release_desc(&e4b);
@@ -4049,6 +3751,8 @@ repeat:
list_del(&pa->u.pa_tmp_list);
call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
}
+ if (ac)
+ kmem_cache_free(ext4_ac_cachep, ac);
}
/*
@@ -4108,7 +3812,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
printk(KERN_ERR "PA:%lu:%d:%u \n", i,
start, pa->pa_len);
}
- ext4_lock_group(sb, i);
+ ext4_unlock_group(sb, i);
if (grp->bb_free == 0)
continue;
@@ -4167,7 +3871,8 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
mutex_lock(&ac->ac_lg->lg_mutex);
}
-static int ext4_mb_initialize_context(struct ext4_allocation_context *ac,
+static noinline_for_stack int
+ext4_mb_initialize_context(struct ext4_allocation_context *ac,
struct ext4_allocation_request *ar)
{
struct super_block *sb = ar->inode->i_sb;
@@ -4398,7 +4103,8 @@ static void ext4_mb_poll_new_transaction(struct super_block *sb,
ext4_mb_free_committed_blocks(sb);
}
-static int ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
+static noinline_for_stack int
+ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
ext4_group_t group, ext4_grpblk_t block, int count)
{
struct ext4_group_info *db = e4b->bd_info;
@@ -4489,7 +4195,7 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
if (block < le32_to_cpu(es->s_first_data_block) ||
block + count < block ||
block + count > ext4_blocks_count(es)) {
- ext4_error(sb, __FUNCTION__,
+ ext4_error(sb, __func__,
"Freeing blocks not in datazone - "
"block = %lu, count = %lu", block, count);
goto error_return;
@@ -4530,7 +4236,7 @@ do_more:
in_range(block + count - 1, ext4_inode_table(sb, gdp),
EXT4_SB(sb)->s_itb_per_group)) {
- ext4_error(sb, __FUNCTION__,
+ ext4_error(sb, __func__,
"Freeing blocks in system zone - "
"Block = %lu, count = %lu", block, count);
}
@@ -4588,8 +4294,7 @@ do_more:
}
spin_lock(sb_bgl_lock(sbi, block_group));
- gdp->bg_free_blocks_count =
- cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
+ le16_add_cpu(&gdp->bg_free_blocks_count, count);
gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
spin_unlock(sb_bgl_lock(sbi, block_group));
percpu_counter_add(&sbi->s_freeblocks_counter, count);
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
new file mode 100644
index 00000000000..bfe6add46bc
--- /dev/null
+++ b/fs/ext4/mballoc.h
@@ -0,0 +1,304 @@
+/*
+ * fs/ext4/mballoc.h
+ *
+ * Written by: Alex Tomas <alex@clusterfs.com>
+ *
+ */
+#ifndef _EXT4_MBALLOC_H
+#define _EXT4_MBALLOC_H
+
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/quotaops.h>
+#include <linux/buffer_head.h>
+#include <linux/module.h>
+#include <linux/swap.h>
+#include <linux/proc_fs.h>
+#include <linux/pagemap.h>
+#include <linux/seq_file.h>
+#include <linux/version.h>
+#include "ext4_jbd2.h"
+#include "ext4.h"
+#include "group.h"
+
+/*
+ * with AGGRESSIVE_CHECK allocator runs consistency checks over
+ * structures. these checks slow things down a lot
+ */
+#define AGGRESSIVE_CHECK__
+
+/*
+ * with DOUBLE_CHECK defined mballoc creates persistent in-core
+ * bitmaps, maintains and uses them to check for double allocations
+ */
+#define DOUBLE_CHECK__
+
+/*
+ */
+#define MB_DEBUG__
+#ifdef MB_DEBUG
+#define mb_debug(fmt, a...) printk(fmt, ##a)
+#else
+#define mb_debug(fmt, a...)
+#endif
+
+/*
+ * with EXT4_MB_HISTORY mballoc stores last N allocations in memory
+ * and you can monitor it in /proc/fs/ext4/<dev>/mb_history
+ */
+#define EXT4_MB_HISTORY
+#define EXT4_MB_HISTORY_ALLOC 1 /* allocation */
+#define EXT4_MB_HISTORY_PREALLOC 2 /* preallocated blocks used */
+#define EXT4_MB_HISTORY_DISCARD 4 /* preallocation discarded */
+#define EXT4_MB_HISTORY_FREE 8 /* free */
+
+#define EXT4_MB_HISTORY_DEFAULT (EXT4_MB_HISTORY_ALLOC | \
+ EXT4_MB_HISTORY_PREALLOC)
+
+/*
+ * How long mballoc can look for a best extent (in found extents)
+ */
+#define MB_DEFAULT_MAX_TO_SCAN 200
+
+/*
+ * How long mballoc must look for a best extent
+ */
+#define MB_DEFAULT_MIN_TO_SCAN 10
+
+/*
+ * How many groups mballoc will scan looking for the best chunk
+ */
+#define MB_DEFAULT_MAX_GROUPS_TO_SCAN 5
+
+/*
+ * with 'ext4_mb_stats' allocator will collect stats that will be
+ * shown at umount. The collecting costs though!
+ */
+#define MB_DEFAULT_STATS 1
+
+/*
+ * files smaller than MB_DEFAULT_STREAM_THRESHOLD are served
+ * by the stream allocator, which purpose is to pack requests
+ * as close each to other as possible to produce smooth I/O traffic
+ * We use locality group prealloc space for stream request.
+ * We can tune the same via /proc/fs/ext4/<parition>/stream_req
+ */
+#define MB_DEFAULT_STREAM_THRESHOLD 16 /* 64K */
+
+/*
+ * for which requests use 2^N search using buddies
+ */
+#define MB_DEFAULT_ORDER2_REQS 2
+
+/*
+ * default group prealloc size 512 blocks
+ */
+#define MB_DEFAULT_GROUP_PREALLOC 512
+
+static struct kmem_cache *ext4_pspace_cachep;
+static struct kmem_cache *ext4_ac_cachep;
+
+#ifdef EXT4_BB_MAX_BLOCKS
+#undef EXT4_BB_MAX_BLOCKS
+#endif
+#define EXT4_BB_MAX_BLOCKS 30
+
+struct ext4_free_metadata {
+ ext4_group_t group;
+ unsigned short num;
+ ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS];
+ struct list_head list;
+};
+
+struct ext4_group_info {
+ unsigned long bb_state;
+ unsigned long bb_tid;
+ struct ext4_free_metadata *bb_md_cur;
+ unsigned short bb_first_free;
+ unsigned short bb_free;
+ unsigned short bb_fragments;
+ struct list_head bb_prealloc_list;
+#ifdef DOUBLE_CHECK
+ void *bb_bitmap;
+#endif
+ unsigned short bb_counters[];
+};
+
+#define EXT4_GROUP_INFO_NEED_INIT_BIT 0
+#define EXT4_GROUP_INFO_LOCKED_BIT 1
+
+#define EXT4_MB_GRP_NEED_INIT(grp) \
+ (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
+
+
+struct ext4_prealloc_space {
+ struct list_head pa_inode_list;
+ struct list_head pa_group_list;
+ union {
+ struct list_head pa_tmp_list;
+ struct rcu_head pa_rcu;
+ } u;
+ spinlock_t pa_lock;
+ atomic_t pa_count;
+ unsigned pa_deleted;
+ ext4_fsblk_t pa_pstart; /* phys. block */
+ ext4_lblk_t pa_lstart; /* log. block */
+ unsigned short pa_len; /* len of preallocated chunk */
+ unsigned short pa_free; /* how many blocks are free */
+ unsigned short pa_linear; /* consumed in one direction
+ * strictly, for grp prealloc */
+ spinlock_t *pa_obj_lock;
+ struct inode *pa_inode; /* hack, for history only */
+};
+
+
+struct ext4_free_extent {
+ ext4_lblk_t fe_logical;
+ ext4_grpblk_t fe_start;
+ ext4_group_t fe_group;
+ int fe_len;
+};
+
+/*
+ * Locality group:
+ * we try to group all related changes together
+ * so that writeback can flush/allocate them together as well
+ */
+struct ext4_locality_group {
+ /* for allocator */
+ struct mutex lg_mutex; /* to serialize allocates */
+ struct list_head lg_prealloc_list;/* list of preallocations */
+ spinlock_t lg_prealloc_lock;
+};
+
+struct ext4_allocation_context {
+ struct inode *ac_inode;
+ struct super_block *ac_sb;
+
+ /* original request */
+ struct ext4_free_extent ac_o_ex;
+
+ /* goal request (after normalization) */
+ struct ext4_free_extent ac_g_ex;
+
+ /* the best found extent */
+ struct ext4_free_extent ac_b_ex;
+
+ /* copy of the bext found extent taken before preallocation efforts */
+ struct ext4_free_extent ac_f_ex;
+
+ /* number of iterations done. we have to track to limit searching */
+ unsigned long ac_ex_scanned;
+ __u16 ac_groups_scanned;
+ __u16 ac_found;
+ __u16 ac_tail;
+ __u16 ac_buddy;
+ __u16 ac_flags; /* allocation hints */
+ __u8 ac_status;
+ __u8 ac_criteria;
+ __u8 ac_repeats;
+ __u8 ac_2order; /* if request is to allocate 2^N blocks and
+ * N > 0, the field stores N, otherwise 0 */
+ __u8 ac_op; /* operation, for history only */
+ struct page *ac_bitmap_page;
+ struct page *ac_buddy_page;
+ struct ext4_prealloc_space *ac_pa;
+ struct ext4_locality_group *ac_lg;
+};
+
+#define AC_STATUS_CONTINUE 1
+#define AC_STATUS_FOUND 2
+#define AC_STATUS_BREAK 3
+
+struct ext4_mb_history {
+ struct ext4_free_extent orig; /* orig allocation */
+ struct ext4_free_extent goal; /* goal allocation */
+ struct ext4_free_extent result; /* result allocation */
+ unsigned pid;
+ unsigned ino;
+ __u16 found; /* how many extents have been found */
+ __u16 groups; /* how many groups have been scanned */
+ __u16 tail; /* what tail broke some buddy */
+ __u16 buddy; /* buddy the tail ^^^ broke */
+ __u16 flags;
+ __u8 cr:3; /* which phase the result extent was found at */
+ __u8 op:4;
+ __u8 merged:1;
+};
+
+struct ext4_buddy {
+ struct page *bd_buddy_page;
+ void *bd_buddy;
+ struct page *bd_bitmap_page;
+ void *bd_bitmap;
+ struct ext4_group_info *bd_info;
+ struct super_block *bd_sb;
+ __u16 bd_blkbits;
+ ext4_group_t bd_group;
+};
+#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap)
+#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy)
+
+#ifndef EXT4_MB_HISTORY
+static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
+{
+ return;
+}
+#else
+static void ext4_mb_store_history(struct ext4_allocation_context *ac);
+#endif
+
+#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
+
+static struct proc_dir_entry *proc_root_ext4;
+struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
+
+static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
+ ext4_group_t group);
+static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
+static void ext4_mb_free_committed_blocks(struct super_block *);
+static void ext4_mb_return_to_preallocation(struct inode *inode,
+ struct ext4_buddy *e4b, sector_t block,
+ int count);
+static void ext4_mb_put_pa(struct ext4_allocation_context *,
+ struct super_block *, struct ext4_prealloc_space *pa);
+static int ext4_mb_init_per_dev_proc(struct super_block *sb);
+static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
+
+
+static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
+{
+ struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
+
+ bit_spin_lock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
+}
+
+static inline void ext4_unlock_group(struct super_block *sb,
+ ext4_group_t group)
+{
+ struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
+
+ bit_spin_unlock(EXT4_GROUP_INFO_LOCKED_BIT, &(grinfo->bb_state));
+}
+
+static inline int ext4_is_group_locked(struct super_block *sb,
+ ext4_group_t group)
+{
+ struct ext4_group_info *grinfo = ext4_get_group_info(sb, group);
+
+ return bit_spin_is_locked(EXT4_GROUP_INFO_LOCKED_BIT,
+ &(grinfo->bb_state));
+}
+
+static ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
+ struct ext4_free_extent *fex)
+{
+ ext4_fsblk_t block;
+
+ block = (ext4_fsblk_t) fex->fe_group * EXT4_BLOCKS_PER_GROUP(sb)
+ + fex->fe_start
+ + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
+ return block;
+}
+#endif
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 5c1e27de775..b9e077ba07e 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -13,8 +13,8 @@
*/
#include <linux/module.h>
-#include <linux/ext4_jbd2.h>
-#include <linux/ext4_fs_extents.h>
+#include "ext4_jbd2.h"
+#include "ext4_extents.h"
/*
* The contiguous blocks details which can be
@@ -327,7 +327,7 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
}
static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
- struct inode *tmp_inode)
+ struct inode *tmp_inode)
{
int retval;
__le32 i_data[3];
@@ -339,7 +339,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
* i_data field of the original inode
*/
retval = ext4_journal_extend(handle, 1);
- if (retval != 0) {
+ if (retval) {
retval = ext4_journal_restart(handle, 1);
if (retval)
goto err_out;
@@ -351,6 +351,18 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
down_write(&EXT4_I(inode)->i_data_sem);
/*
+ * if EXT4_EXT_MIGRATE is cleared a block allocation
+ * happened after we started the migrate. We need to
+ * fail the migrate
+ */
+ if (!(EXT4_I(inode)->i_flags & EXT4_EXT_MIGRATE)) {
+ retval = -EAGAIN;
+ up_write(&EXT4_I(inode)->i_data_sem);
+ goto err_out;
+ } else
+ EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags &
+ ~EXT4_EXT_MIGRATE;
+ /*
* We have the extent map build with the tmp inode.
* Now copy the i_data across
*/
@@ -508,6 +520,17 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp,
* switch the inode format to prevent read.
*/
mutex_lock(&(inode->i_mutex));
+ /*
+ * Even though we take i_mutex we can still cause block allocation
+ * via mmap write to holes. If we have allocated new blocks we fail
+ * migrate. New block allocation will clear EXT4_EXT_MIGRATE flag.
+ * The flag is updated with i_data_sem held to prevent racing with
+ * block allocation.
+ */
+ down_read((&EXT4_I(inode)->i_data_sem));
+ EXT4_I(inode)->i_flags = EXT4_I(inode)->i_flags | EXT4_EXT_MIGRATE;
+ up_read((&EXT4_I(inode)->i_data_sem));
+
handle = ext4_journal_start(inode, 1);
ei = EXT4_I(inode);
@@ -559,9 +582,15 @@ err_out:
* tmp_inode
*/
free_ext_block(handle, tmp_inode);
- else
- retval = ext4_ext_swap_inode_data(handle, inode,
- tmp_inode);
+ else {
+ retval = ext4_ext_swap_inode_data(handle, inode, tmp_inode);
+ if (retval)
+ /*
+ * if we fail to swap inode data free the extent
+ * details of the tmp inode
+ */
+ free_ext_block(handle, tmp_inode);
+ }
/* We mark the tmp_inode dirty via ext4_ext_tree_init. */
if (ext4_journal_extend(handle, 1) != 0)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 28aa2ed4297..ab16beaa830 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -28,14 +28,14 @@
#include <linux/pagemap.h>
#include <linux/jbd2.h>
#include <linux/time.h>
-#include <linux/ext4_fs.h>
-#include <linux/ext4_jbd2.h>
#include <linux/fcntl.h>
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
#include <linux/bio.h>
+#include "ext4.h"
+#include "ext4_jbd2.h"
#include "namei.h"
#include "xattr.h"
@@ -57,10 +57,15 @@ static struct buffer_head *ext4_append(handle_t *handle,
*block = inode->i_size >> inode->i_sb->s_blocksize_bits;
- if ((bh = ext4_bread(handle, inode, *block, 1, err))) {
+ bh = ext4_bread(handle, inode, *block, 1, err);
+ if (bh) {
inode->i_size += inode->i_sb->s_blocksize;
EXT4_I(inode)->i_disksize = inode->i_size;
- ext4_journal_get_write_access(handle,bh);
+ *err = ext4_journal_get_write_access(handle, bh);
+ if (*err) {
+ brelse(bh);
+ bh = NULL;
+ }
}
return bh;
}
@@ -348,7 +353,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
if (root->info.hash_version != DX_HASH_TEA &&
root->info.hash_version != DX_HASH_HALF_MD4 &&
root->info.hash_version != DX_HASH_LEGACY) {
- ext4_warning(dir->i_sb, __FUNCTION__,
+ ext4_warning(dir->i_sb, __func__,
"Unrecognised inode hash code %d",
root->info.hash_version);
brelse(bh);
@@ -362,7 +367,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
hash = hinfo->hash;
if (root->info.unused_flags & 1) {
- ext4_warning(dir->i_sb, __FUNCTION__,
+ ext4_warning(dir->i_sb, __func__,
"Unimplemented inode hash flags: %#06x",
root->info.unused_flags);
brelse(bh);
@@ -371,7 +376,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
}
if ((indirect = root->info.indirect_levels) > 1) {
- ext4_warning(dir->i_sb, __FUNCTION__,
+ ext4_warning(dir->i_sb, __func__,
"Unimplemented inode hash depth: %#06x",
root->info.indirect_levels);
brelse(bh);
@@ -384,7 +389,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
if (dx_get_limit(entries) != dx_root_limit(dir,
root->info.info_length)) {
- ext4_warning(dir->i_sb, __FUNCTION__,
+ ext4_warning(dir->i_sb, __func__,
"dx entry: limit != root limit");
brelse(bh);
*err = ERR_BAD_DX_DIR;
@@ -396,7 +401,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
{
count = dx_get_count(entries);
if (!count || count > dx_get_limit(entries)) {
- ext4_warning(dir->i_sb, __FUNCTION__,
+ ext4_warning(dir->i_sb, __func__,
"dx entry: no count or count > limit");
brelse(bh);
*err = ERR_BAD_DX_DIR;
@@ -441,7 +446,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
goto fail2;
at = entries = ((struct dx_node *) bh->b_data)->entries;
if (dx_get_limit(entries) != dx_node_limit (dir)) {
- ext4_warning(dir->i_sb, __FUNCTION__,
+ ext4_warning(dir->i_sb, __func__,
"dx entry: limit != node limit");
brelse(bh);
*err = ERR_BAD_DX_DIR;
@@ -457,7 +462,7 @@ fail2:
}
fail:
if (*err == ERR_BAD_DX_DIR)
- ext4_warning(dir->i_sb, __FUNCTION__,
+ ext4_warning(dir->i_sb, __func__,
"Corrupt dir inode %ld, running e2fsck is "
"recommended.", dir->i_ino);
return NULL;
@@ -914,7 +919,7 @@ restart:
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
/* read error, skip block & hope for the best */
- ext4_error(sb, __FUNCTION__, "reading directory #%lu "
+ ext4_error(sb, __func__, "reading directory #%lu "
"offset %lu", dir->i_ino,
(unsigned long)block);
brelse(bh);
@@ -1007,7 +1012,7 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
retval = ext4_htree_next_block(dir, hash, frame,
frames, NULL);
if (retval < 0) {
- ext4_warning(sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"error reading index page in directory #%lu",
dir->i_ino);
*err = retval;
@@ -1532,7 +1537,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
if (levels && (dx_get_count(frames->entries) ==
dx_get_limit(frames->entries))) {
- ext4_warning(sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"Directory index full!");
err = -ENOSPC;
goto cleanup;
@@ -1860,11 +1865,11 @@ static int empty_dir (struct inode * inode)
if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
!(bh = ext4_bread (NULL, inode, 0, 0, &err))) {
if (err)
- ext4_error(inode->i_sb, __FUNCTION__,
+ ext4_error(inode->i_sb, __func__,
"error %d reading directory #%lu offset 0",
err, inode->i_ino);
else
- ext4_warning(inode->i_sb, __FUNCTION__,
+ ext4_warning(inode->i_sb, __func__,
"bad directory (dir #%lu) - no data block",
inode->i_ino);
return 1;
@@ -1893,7 +1898,7 @@ static int empty_dir (struct inode * inode)
offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err);
if (!bh) {
if (err)
- ext4_error(sb, __FUNCTION__,
+ ext4_error(sb, __func__,
"error %d reading directory"
" #%lu offset %lu",
err, inode->i_ino, offset);
@@ -2217,6 +2222,8 @@ retry:
goto out_stop;
}
} else {
+ /* clear the extent format for fast symlink */
+ EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL;
inode->i_op = &ext4_fast_symlink_inode_operations;
memcpy((char*)&EXT4_I(inode)->i_data,symname,l);
inode->i_size = l-1;
@@ -2347,6 +2354,9 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
EXT4_FEATURE_INCOMPAT_FILETYPE))
new_de->file_type = old_de->file_type;
new_dir->i_version++;
+ new_dir->i_ctime = new_dir->i_mtime =
+ ext4_current_time(new_dir);
+ ext4_mark_inode_dirty(handle, new_dir);
BUFFER_TRACE(new_bh, "call ext4_journal_dirty_metadata");
ext4_journal_dirty_metadata(handle, new_bh);
brelse(new_bh);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index e29efa0f9d6..9f086a6a472 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -11,11 +11,10 @@
#define EXT4FS_DEBUG
-#include <linux/ext4_jbd2.h>
-
#include <linux/errno.h>
#include <linux/slab.h>
+#include "ext4_jbd2.h"
#include "group.h"
#define outside(b, first, last) ((b) < (first) || (b) >= (last))
@@ -50,63 +49,63 @@ static int verify_group_input(struct super_block *sb,
ext4_get_group_no_and_offset(sb, start, NULL, &offset);
if (group != sbi->s_groups_count)
- ext4_warning(sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"Cannot add at group %u (only %lu groups)",
input->group, sbi->s_groups_count);
else if (offset != 0)
- ext4_warning(sb, __FUNCTION__, "Last group not full");
+ ext4_warning(sb, __func__, "Last group not full");
else if (input->reserved_blocks > input->blocks_count / 5)
- ext4_warning(sb, __FUNCTION__, "Reserved blocks too high (%u)",
+ ext4_warning(sb, __func__, "Reserved blocks too high (%u)",
input->reserved_blocks);
else if (free_blocks_count < 0)
- ext4_warning(sb, __FUNCTION__, "Bad blocks count %u",
+ ext4_warning(sb, __func__, "Bad blocks count %u",
input->blocks_count);
else if (!(bh = sb_bread(sb, end - 1)))
- ext4_warning(sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"Cannot read last block (%llu)",
end - 1);
else if (outside(input->block_bitmap, start, end))
- ext4_warning(sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"Block bitmap not in group (block %llu)",
(unsigned long long)input->block_bitmap);
else if (outside(input->inode_bitmap, start, end))
- ext4_warning(sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"Inode bitmap not in group (block %llu)",
(unsigned long long)input->inode_bitmap);
else if (outside(input->inode_table, start, end) ||
outside(itend - 1, start, end))
- ext4_warning(sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"Inode table not in group (blocks %llu-%llu)",
(unsigned long long)input->inode_table, itend - 1);
else if (input->inode_bitmap == input->block_bitmap)
- ext4_warning(sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"Block bitmap same as inode bitmap (%llu)",
(unsigned long long)input->block_bitmap);
else if (inside(input->block_bitmap, input->inode_table, itend))
- ext4_warning(sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"Block bitmap (%llu) in inode table (%llu-%llu)",
(unsigned long long)input->block_bitmap,
(unsigned long long)input->inode_table, itend - 1);
else if (inside(input->inode_bitmap, input->inode_table, itend))
- ext4_warning(sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"Inode bitmap (%llu) in inode table (%llu-%llu)",
(unsigned long long)input->inode_bitmap,
(unsigned long long)input->inode_table, itend - 1);
else if (inside(input->block_bitmap, start, metaend))
- ext4_warning(sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"Block bitmap (%llu) in GDT table"
" (%llu-%llu)",
(unsigned long long)input->block_bitmap,
start, metaend - 1);
else if (inside(input->inode_bitmap, start, metaend))
- ext4_warning(sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"Inode bitmap (%llu) in GDT table"
" (%llu-%llu)",
(unsigned long long)input->inode_bitmap,
start, metaend - 1);
else if (inside(input->inode_table, start, metaend) ||
inside(itend - 1, start, metaend))
- ext4_warning(sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"Inode table (%llu-%llu) overlaps"
"GDT table (%llu-%llu)",
(unsigned long long)input->inode_table,
@@ -368,7 +367,7 @@ static int verify_reserved_gdb(struct super_block *sb,
while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) {
if (le32_to_cpu(*p++) !=
grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){
- ext4_warning(sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"reserved GDT %llu"
" missing grp %d (%llu)",
blk, grp,
@@ -424,7 +423,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
*/
if (EXT4_SB(sb)->s_sbh->b_blocknr !=
le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
- ext4_warning(sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"won't resize using backup superblock at %llu",
(unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr);
return -EPERM;
@@ -448,7 +447,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
data = (__le32 *)dind->b_data;
if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) {
- ext4_warning(sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"new group %u GDT block %llu not reserved",
input->group, gdblock);
err = -EINVAL;
@@ -469,10 +468,10 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
goto exit_dindj;
n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
- GFP_KERNEL);
+ GFP_NOFS);
if (!n_group_desc) {
err = -ENOMEM;
- ext4_warning (sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"not enough memory for %lu groups", gdb_num + 1);
goto exit_inode;
}
@@ -502,8 +501,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
EXT4_SB(sb)->s_gdb_count++;
kfree(o_group_desc);
- es->s_reserved_gdt_blocks =
- cpu_to_le16(le16_to_cpu(es->s_reserved_gdt_blocks) - 1);
+ le16_add_cpu(&es->s_reserved_gdt_blocks, -1);
ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
return 0;
@@ -553,7 +551,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
int res, i;
int err;
- primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_KERNEL);
+ primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_NOFS);
if (!primary)
return -ENOMEM;
@@ -571,7 +569,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
/* Get each reserved primary GDT block and verify it holds backups */
for (res = 0; res < reserved_gdb; res++, blk++) {
if (le32_to_cpu(*data) != blk) {
- ext4_warning(sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"reserved block %llu"
" not at offset %ld",
blk,
@@ -715,7 +713,7 @@ static void update_backups(struct super_block *sb,
*/
exit_err:
if (err) {
- ext4_warning(sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"can't update backup for group %lu (err %d), "
"forcing fsck on next reboot", group, err);
sbi->s_mount_state &= ~EXT4_VALID_FS;
@@ -755,33 +753,33 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
- ext4_warning(sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"Can't resize non-sparse filesystem further");
return -EPERM;
}
if (ext4_blocks_count(es) + input->blocks_count <
ext4_blocks_count(es)) {
- ext4_warning(sb, __FUNCTION__, "blocks_count overflow\n");
+ ext4_warning(sb, __func__, "blocks_count overflow\n");
return -EINVAL;
}
if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) <
le32_to_cpu(es->s_inodes_count)) {
- ext4_warning(sb, __FUNCTION__, "inodes_count overflow\n");
+ ext4_warning(sb, __func__, "inodes_count overflow\n");
return -EINVAL;
}
if (reserved_gdb || gdb_off == 0) {
if (!EXT4_HAS_COMPAT_FEATURE(sb,
EXT4_FEATURE_COMPAT_RESIZE_INODE)){
- ext4_warning(sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"No reserved GDT blocks, can't resize");
return -EPERM;
}
inode = ext4_iget(sb, EXT4_RESIZE_INO);
if (IS_ERR(inode)) {
- ext4_warning(sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"Error opening resize inode");
return PTR_ERR(inode);
}
@@ -810,7 +808,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
lock_super(sb);
if (input->group != sbi->s_groups_count) {
- ext4_warning(sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"multiple resizers run on filesystem!");
err = -EBUSY;
goto exit_journal;
@@ -877,8 +875,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
*/
ext4_blocks_count_set(es, ext4_blocks_count(es) +
input->blocks_count);
- es->s_inodes_count = cpu_to_le32(le32_to_cpu(es->s_inodes_count) +
- EXT4_INODES_PER_GROUP(sb));
+ le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb));
/*
* We need to protect s_groups_count against other CPUs seeing
@@ -977,13 +974,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
" too large to resize to %llu blocks safely\n",
sb->s_id, n_blocks_count);
if (sizeof(sector_t) < 8)
- ext4_warning(sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"CONFIG_LBD not enabled\n");
return -EINVAL;
}
if (n_blocks_count < o_blocks_count) {
- ext4_warning(sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"can't shrink FS - resize aborted");
return -EBUSY;
}
@@ -992,7 +989,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
ext4_get_group_no_and_offset(sb, o_blocks_count, NULL, &last);
if (last == 0) {
- ext4_warning(sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"need to use ext2online to resize further");
return -EPERM;
}
@@ -1000,7 +997,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
add = EXT4_BLOCKS_PER_GROUP(sb) - last;
if (o_blocks_count + add < o_blocks_count) {
- ext4_warning(sb, __FUNCTION__, "blocks_count overflow");
+ ext4_warning(sb, __func__, "blocks_count overflow");
return -EINVAL;
}
@@ -1008,7 +1005,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
add = n_blocks_count - o_blocks_count;
if (o_blocks_count + add < n_blocks_count)
- ext4_warning(sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"will only finish group (%llu"
" blocks, %u new)",
o_blocks_count + add, add);
@@ -1016,7 +1013,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
/* See if the device is actually as big as what was requested */
bh = sb_bread(sb, o_blocks_count + add -1);
if (!bh) {
- ext4_warning(sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"can't read last block, resize aborted");
return -ENOSPC;
}
@@ -1028,13 +1025,13 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
handle = ext4_journal_start_sb(sb, 3);
if (IS_ERR(handle)) {
err = PTR_ERR(handle);
- ext4_warning(sb, __FUNCTION__, "error %d on journal start",err);
+ ext4_warning(sb, __func__, "error %d on journal start", err);
goto exit_put;
}
lock_super(sb);
if (o_blocks_count != ext4_blocks_count(es)) {
- ext4_warning(sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"multiple resizers run on filesystem!");
unlock_super(sb);
ext4_journal_stop(handle);
@@ -1044,7 +1041,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
if ((err = ext4_journal_get_write_access(handle,
EXT4_SB(sb)->s_sbh))) {
- ext4_warning(sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"error %d on journal write access", err);
unlock_super(sb);
ext4_journal_stop(handle);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c81a8e759ba..52dd0679a4e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -21,8 +21,6 @@
#include <linux/fs.h>
#include <linux/time.h>
#include <linux/jbd2.h>
-#include <linux/ext4_fs.h>
-#include <linux/ext4_jbd2.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/blkdev.h>
@@ -38,9 +36,10 @@
#include <linux/seq_file.h>
#include <linux/log2.h>
#include <linux/crc16.h>
-
#include <asm/uaccess.h>
+#include "ext4.h"
+#include "ext4_jbd2.h"
#include "xattr.h"
#include "acl.h"
#include "namei.h"
@@ -135,7 +134,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
* take the FS itself readonly cleanly. */
journal = EXT4_SB(sb)->s_journal;
if (is_journal_aborted(journal)) {
- ext4_abort(sb, __FUNCTION__,
+ ext4_abort(sb, __func__,
"Detected aborted journal");
return ERR_PTR(-EROFS);
}
@@ -355,7 +354,7 @@ void ext4_update_dynamic_rev(struct super_block *sb)
if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
return;
- ext4_warning(sb, __FUNCTION__,
+ ext4_warning(sb, __func__,
"updating to rev %d because of new feature flag, "
"running e2fsck is recommended",
EXT4_DYNAMIC_REV);
@@ -945,8 +944,8 @@ static match_table_t tokens = {
{Opt_mballoc, "mballoc"},
{Opt_nomballoc, "nomballoc"},
{Opt_stripe, "stripe=%u"},
- {Opt_err, NULL},
{Opt_resize, "resize"},
+ {Opt_err, NULL},
};
static ext4_fsblk_t get_sb_block(void **data)
@@ -1388,11 +1387,11 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
* a plain journaled filesystem we can keep it set as
* valid forever! :)
*/
- es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & ~EXT4_VALID_FS);
+ es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
#endif
if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
- es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1);
+ le16_add_cpu(&es->s_mnt_count, 1);
es->s_mtime = cpu_to_le32(get_seconds());
ext4_update_dynamic_rev(sb);
EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
@@ -1485,36 +1484,33 @@ static int ext4_check_descriptors(struct super_block *sb)
block_bitmap = ext4_block_bitmap(sb, gdp);
if (block_bitmap < first_block || block_bitmap > last_block)
{
- ext4_error (sb, "ext4_check_descriptors",
- "Block bitmap for group %lu"
- " not in group (block %llu)!",
- i, block_bitmap);
+ printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
+ "Block bitmap for group %lu not in group "
+ "(block %llu)!", i, block_bitmap);
return 0;
}
inode_bitmap = ext4_inode_bitmap(sb, gdp);
if (inode_bitmap < first_block || inode_bitmap > last_block)
{
- ext4_error (sb, "ext4_check_descriptors",
- "Inode bitmap for group %lu"
- " not in group (block %llu)!",
- i, inode_bitmap);
+ printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
+ "Inode bitmap for group %lu not in group "
+ "(block %llu)!", i, inode_bitmap);
return 0;
}
inode_table = ext4_inode_table(sb, gdp);
if (inode_table < first_block ||
inode_table + sbi->s_itb_per_group - 1 > last_block)
{
- ext4_error (sb, "ext4_check_descriptors",
- "Inode table for group %lu"
- " not in group (block %llu)!",
- i, inode_table);
+ printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
+ "Inode table for group %lu not in group "
+ "(block %llu)!", i, inode_table);
return 0;
}
if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
- ext4_error(sb, __FUNCTION__,
- "Checksum for group %lu failed (%u!=%u)\n",
- i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
- gdp)), le16_to_cpu(gdp->bg_checksum));
+ printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
+ "Checksum for group %lu failed (%u!=%u)\n",
+ i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
+ gdp)), le16_to_cpu(gdp->bg_checksum));
return 0;
}
if (!flexbg_flag)
@@ -1594,8 +1590,8 @@ static void ext4_orphan_cleanup (struct super_block * sb,
while (es->s_last_orphan) {
struct inode *inode;
- if (!(inode =
- ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)))) {
+ inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
+ if (IS_ERR(inode)) {
es->s_last_orphan = 0;
break;
}
@@ -1605,7 +1601,7 @@ static void ext4_orphan_cleanup (struct super_block * sb,
if (inode->i_nlink) {
printk(KERN_DEBUG
"%s: truncating inode %lu to %Ld bytes\n",
- __FUNCTION__, inode->i_ino, inode->i_size);
+ __func__, inode->i_ino, inode->i_size);
jbd_debug(2, "truncating inode %lu to %Ld bytes\n",
inode->i_ino, inode->i_size);
ext4_truncate(inode);
@@ -1613,7 +1609,7 @@ static void ext4_orphan_cleanup (struct super_block * sb,
} else {
printk(KERN_DEBUG
"%s: deleting unreferenced inode %lu\n",
- __FUNCTION__, inode->i_ino);
+ __func__, inode->i_ino);
jbd_debug(2, "deleting unreferenced inode %lu\n",
inode->i_ino);
nr_orphans++;
@@ -2699,9 +2695,9 @@ static void ext4_clear_journal_err(struct super_block * sb,
char nbuf[16];
errstr = ext4_decode_error(sb, j_errno, nbuf);
- ext4_warning(sb, __FUNCTION__, "Filesystem error recorded "
+ ext4_warning(sb, __func__, "Filesystem error recorded "
"from previous mount: %s", errstr);
- ext4_warning(sb, __FUNCTION__, "Marking fs in need of "
+ ext4_warning(sb, __func__, "Marking fs in need of "
"filesystem check.");
EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
@@ -2828,7 +2824,7 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data)
}
if (sbi->s_mount_opt & EXT4_MOUNT_ABORT)
- ext4_abort(sb, __FUNCTION__, "Abort forced by user");
+ ext4_abort(sb, __func__, "Abort forced by user");
sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
@@ -3040,8 +3036,14 @@ static int ext4_dquot_drop(struct inode *inode)
/* We may delete quota structure so we need to reserve enough blocks */
handle = ext4_journal_start(inode, 2*EXT4_QUOTA_DEL_BLOCKS(inode->i_sb));
- if (IS_ERR(handle))
+ if (IS_ERR(handle)) {
+ /*
+ * We call dquot_drop() anyway to at least release references
+ * to quota structures so that umount does not hang.
+ */
+ dquot_drop(inode);
return PTR_ERR(handle);
+ }
ret = dquot_drop(inode);
err = ext4_journal_stop(handle);
if (!ret)
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index e6f9da4287c..e9178643dc0 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -19,8 +19,8 @@
#include <linux/fs.h>
#include <linux/jbd2.h>
-#include <linux/ext4_fs.h>
#include <linux/namei.h>
+#include "ext4.h"
#include "xattr.h"
static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index e9054c1c7d9..3fbc2c6c3d0 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -53,11 +53,11 @@
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/slab.h>
-#include <linux/ext4_jbd2.h>
-#include <linux/ext4_fs.h>
#include <linux/mbcache.h>
#include <linux/quotaops.h>
#include <linux/rwsem.h>
+#include "ext4_jbd2.h"
+#include "ext4.h"
#include "xattr.h"
#include "acl.h"
@@ -92,6 +92,8 @@ static struct buffer_head *ext4_xattr_cache_find(struct inode *,
struct mb_cache_entry **);
static void ext4_xattr_rehash(struct ext4_xattr_header *,
struct ext4_xattr_entry *);
+static int ext4_xattr_list(struct inode *inode, char *buffer,
+ size_t buffer_size);
static struct mb_cache *ext4_xattr_cache;
@@ -225,7 +227,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
ea_bdebug(bh, "b_count=%d, refcount=%d",
atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
if (ext4_xattr_check_block(bh)) {
-bad_block: ext4_error(inode->i_sb, __FUNCTION__,
+bad_block: ext4_error(inode->i_sb, __func__,
"inode %lu: bad block %llu", inode->i_ino,
EXT4_I(inode)->i_file_acl);
error = -EIO;
@@ -367,7 +369,7 @@ ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
ea_bdebug(bh, "b_count=%d, refcount=%d",
atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
if (ext4_xattr_check_block(bh)) {
- ext4_error(inode->i_sb, __FUNCTION__,
+ ext4_error(inode->i_sb, __func__,
"inode %lu: bad block %llu", inode->i_ino,
EXT4_I(inode)->i_file_acl);
error = -EIO;
@@ -420,7 +422,7 @@ cleanup:
* Returns a negative error number on failure, or the number of bytes
* used / required on success.
*/
-int
+static int
ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
{
int i_error, b_error;
@@ -484,8 +486,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
get_bh(bh);
ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
} else {
- BHDR(bh)->h_refcount = cpu_to_le32(
- le32_to_cpu(BHDR(bh)->h_refcount) - 1);
+ le32_add_cpu(&BHDR(bh)->h_refcount, -1);
error = ext4_journal_dirty_metadata(handle, bh);
if (IS_SYNC(inode))
handle->h_sync = 1;
@@ -660,7 +661,7 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
atomic_read(&(bs->bh->b_count)),
le32_to_cpu(BHDR(bs->bh)->h_refcount));
if (ext4_xattr_check_block(bs->bh)) {
- ext4_error(sb, __FUNCTION__,
+ ext4_error(sb, __func__,
"inode %lu: bad block %llu", inode->i_ino,
EXT4_I(inode)->i_file_acl);
error = -EIO;
@@ -738,7 +739,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
ce = NULL;
}
ea_bdebug(bs->bh, "cloning");
- s->base = kmalloc(bs->bh->b_size, GFP_KERNEL);
+ s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
error = -ENOMEM;
if (s->base == NULL)
goto cleanup;
@@ -750,7 +751,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
}
} else {
/* Allocate a buffer where we construct the new block. */
- s->base = kzalloc(sb->s_blocksize, GFP_KERNEL);
+ s->base = kzalloc(sb->s_blocksize, GFP_NOFS);
/* assert(header == s->base) */
error = -ENOMEM;
if (s->base == NULL)
@@ -789,8 +790,7 @@ inserted:
if (error)
goto cleanup_dquot;
lock_buffer(new_bh);
- BHDR(new_bh)->h_refcount = cpu_to_le32(1 +
- le32_to_cpu(BHDR(new_bh)->h_refcount));
+ le32_add_cpu(&BHDR(new_bh)->h_refcount, 1);
ea_bdebug(new_bh, "reusing; refcount now=%d",
le32_to_cpu(BHDR(new_bh)->h_refcount));
unlock_buffer(new_bh);
@@ -808,10 +808,8 @@ inserted:
get_bh(new_bh);
} else {
/* We need to allocate a new block */
- ext4_fsblk_t goal = le32_to_cpu(
- EXT4_SB(sb)->s_es->s_first_data_block) +
- (ext4_fsblk_t)EXT4_I(inode)->i_block_group *
- EXT4_BLOCKS_PER_GROUP(sb);
+ ext4_fsblk_t goal = ext4_group_first_block_no(sb,
+ EXT4_I(inode)->i_block_group);
ext4_fsblk_t block = ext4_new_block(handle, inode,
goal, &error);
if (error)
@@ -863,7 +861,7 @@ cleanup_dquot:
goto cleanup;
bad_block:
- ext4_error(inode->i_sb, __FUNCTION__,
+ ext4_error(inode->i_sb, __func__,
"inode %lu: bad block %llu", inode->i_ino,
EXT4_I(inode)->i_file_acl);
goto cleanup;
@@ -1166,7 +1164,7 @@ retry:
if (!bh)
goto cleanup;
if (ext4_xattr_check_block(bh)) {
- ext4_error(inode->i_sb, __FUNCTION__,
+ ext4_error(inode->i_sb, __func__,
"inode %lu: bad block %llu", inode->i_ino,
EXT4_I(inode)->i_file_acl);
error = -EIO;
@@ -1341,14 +1339,14 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
goto cleanup;
bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
if (!bh) {
- ext4_error(inode->i_sb, __FUNCTION__,
+ ext4_error(inode->i_sb, __func__,
"inode %lu: block %llu read error", inode->i_ino,
EXT4_I(inode)->i_file_acl);
goto cleanup;
}
if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
BHDR(bh)->h_blocks != cpu_to_le32(1)) {
- ext4_error(inode->i_sb, __FUNCTION__,
+ ext4_error(inode->i_sb, __func__,
"inode %lu: bad block %llu", inode->i_ino,
EXT4_I(inode)->i_file_acl);
goto cleanup;
@@ -1475,7 +1473,7 @@ again:
}
bh = sb_bread(inode->i_sb, ce->e_block);
if (!bh) {
- ext4_error(inode->i_sb, __FUNCTION__,
+ ext4_error(inode->i_sb, __func__,
"inode %lu: block %lu read error",
inode->i_ino, (unsigned long) ce->e_block);
} else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index d7f5d6a1265..5992fe979bb 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -74,7 +74,6 @@ extern struct xattr_handler ext4_xattr_security_handler;
extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);
-extern int ext4_xattr_list(struct inode *, char *, size_t);
extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
@@ -99,12 +98,6 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name,
}
static inline int
-ext4_xattr_list(struct inode *inode, void *buffer, size_t size)
-{
- return -EOPNOTSUPP;
-}
-
-static inline int
ext4_xattr_set(struct inode *inode, int name_index, const char *name,
const void *value, size_t size, int flags)
{
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index f17eaf2321b..ca5f89fc6ca 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -6,9 +6,9 @@
#include <linux/module.h>
#include <linux/string.h>
#include <linux/fs.h>
-#include <linux/ext4_jbd2.h>
-#include <linux/ext4_fs.h>
#include <linux/security.h>
+#include "ext4_jbd2.h"
+#include "ext4.h"
#include "xattr.h"
static size_t
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c
index e0f05acdafe..fff33382cad 100644
--- a/fs/ext4/xattr_trusted.c
+++ b/fs/ext4/xattr_trusted.c
@@ -9,8 +9,8 @@
#include <linux/string.h>
#include <linux/capability.h>
#include <linux/fs.h>
-#include <linux/ext4_jbd2.h>
-#include <linux/ext4_fs.h>
+#include "ext4_jbd2.h"
+#include "ext4.h"
#include "xattr.h"
#define XATTR_TRUSTED_PREFIX "trusted."
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c
index 7ed3d8ebf09..67be723fcc4 100644
--- a/fs/ext4/xattr_user.c
+++ b/fs/ext4/xattr_user.c
@@ -8,8 +8,8 @@
#include <linux/module.h>
#include <linux/string.h>
#include <linux/fs.h>
-#include <linux/ext4_jbd2.h>
-#include <linux/ext4_fs.h>
+#include "ext4_jbd2.h"
+#include "ext4.h"
#include "xattr.h"
#define XATTR_USER_PREFIX "user."
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 639b3b4f86d..fda25479af2 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -242,7 +242,7 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus)
/* prevent the infinite loop of cluster chain */
if (*fclus > limit) {
fat_fs_panic(sb, "%s: detected the cluster chain loop"
- " (i_pos %lld)", __FUNCTION__,
+ " (i_pos %lld)", __func__,
MSDOS_I(inode)->i_pos);
nr = -EIO;
goto out;
@@ -253,7 +253,7 @@ int fat_get_cluster(struct inode *inode, int cluster, int *fclus, int *dclus)
goto out;
else if (nr == FAT_ENT_FREE) {
fat_fs_panic(sb, "%s: invalid cluster chain"
- " (i_pos %lld)", __FUNCTION__,
+ " (i_pos %lld)", __func__,
MSDOS_I(inode)->i_pos);
nr = -EIO;
goto out;
@@ -286,7 +286,7 @@ static int fat_bmap_cluster(struct inode *inode, int cluster)
return ret;
else if (ret == FAT_ENT_EOF) {
fat_fs_panic(sb, "%s: request beyond EOF (i_pos %lld)",
- __FUNCTION__, MSDOS_I(inode)->i_pos);
+ __func__, MSDOS_I(inode)->i_pos);
return -EIO;
}
return dclus;
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index 13ab763cc51..302e95c4af7 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -546,7 +546,7 @@ int fat_free_clusters(struct inode *inode, int cluster)
goto error;
} else if (cluster == FAT_ENT_FREE) {
fat_fs_panic(sb, "%s: deleting FAT entry beyond EOF",
- __FUNCTION__);
+ __func__);
err = -EIO;
goto error;
}
diff --git a/fs/fat/file.c b/fs/fat/file.c
index d604bb13242..27cc1164ec3 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -208,7 +208,7 @@ static int fat_free(struct inode *inode, int skip)
} else if (ret == FAT_ENT_FREE) {
fat_fs_panic(sb,
"%s: invalid cluster chain (i_pos %lld)",
- __FUNCTION__, MSDOS_I(inode)->i_pos);
+ __func__, MSDOS_I(inode)->i_pos);
ret = -EIO;
} else if (ret > 0) {
err = fat_ent_write(inode, &fatent, FAT_ENT_EOF, wait);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 3f3ac630ccd..bfd776509a7 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -9,6 +9,7 @@
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/file.h>
+#include <linux/fdtable.h>
#include <linux/capability.h>
#include <linux/dnotify.h>
#include <linux/smp_lock.h>
diff --git a/fs/file.c b/fs/file.c
index 5110acb1c9e..4c6f0ea12c4 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -12,6 +12,7 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/file.h>
+#include <linux/fdtable.h>
#include <linux/bitops.h>
#include <linux/interrupt.h>
#include <linux/spinlock.h>
@@ -149,8 +150,16 @@ static struct fdtable * alloc_fdtable(unsigned int nr)
nr /= (1024 / sizeof(struct file *));
nr = roundup_pow_of_two(nr + 1);
nr *= (1024 / sizeof(struct file *));
- if (nr > sysctl_nr_open)
- nr = sysctl_nr_open;
+ /*
+ * Note that this can drive nr *below* what we had passed if sysctl_nr_open
+ * had been set lower between the check in expand_files() and here. Deal
+ * with that in caller, it's cheaper that way.
+ *
+ * We make sure that nr remains a multiple of BITS_PER_LONG - otherwise
+ * bitmaps handling below becomes unpleasant, to put it mildly...
+ */
+ if (unlikely(nr > sysctl_nr_open))
+ nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1;
fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL);
if (!fdt)
@@ -199,6 +208,16 @@ static int expand_fdtable(struct files_struct *files, int nr)
if (!new_fdt)
return -ENOMEM;
/*
+ * extremely unlikely race - sysctl_nr_open decreased between the check in
+ * caller and alloc_fdtable(). Cheaper to catch it here...
+ */
+ if (unlikely(new_fdt->max_fds <= nr)) {
+ free_fdarr(new_fdt);
+ free_fdset(new_fdt);
+ kfree(new_fdt);
+ return -EMFILE;
+ }
+ /*
* Check again since another task may have expanded the fd table while
* we dropped the lock
*/
diff --git a/fs/file_table.c b/fs/file_table.c
index 7a0a9b87225..83084225b4c 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -8,6 +8,7 @@
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/file.h>
+#include <linux/fdtable.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/fs.h>
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 105d4a271e0..4f3cab32141 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -117,7 +117,7 @@ int fuse_ctl_add_conn(struct fuse_conn *fc)
parent = fuse_control_sb->s_root;
inc_nlink(parent->d_inode);
- sprintf(name, "%llu", (unsigned long long) fc->id);
+ sprintf(name, "%u", fc->dev);
parent = fuse_ctl_add_dentry(parent, fc, name, S_IFDIR | 0500, 2,
&simple_dir_inode_operations,
&simple_dir_operations);
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index af639807524..87250b6a868 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -47,6 +47,14 @@ struct fuse_req *fuse_request_alloc(void)
return req;
}
+struct fuse_req *fuse_request_alloc_nofs(void)
+{
+ struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS);
+ if (req)
+ fuse_request_init(req);
+ return req;
+}
+
void fuse_request_free(struct fuse_req *req)
{
kmem_cache_free(fuse_req_cachep, req);
@@ -291,6 +299,7 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
static void wait_answer_interruptible(struct fuse_conn *fc,
struct fuse_req *req)
+ __releases(fc->lock) __acquires(fc->lock)
{
if (signal_pending(current))
return;
@@ -307,8 +316,8 @@ static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req)
kill_fasync(&fc->fasync, SIGIO, POLL_IN);
}
-/* Called with fc->lock held. Releases, and then reacquires it. */
static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
+ __releases(fc->lock) __acquires(fc->lock)
{
if (!fc->no_interrupt) {
/* Any signal may interrupt this */
@@ -430,6 +439,17 @@ void request_send_background(struct fuse_conn *fc, struct fuse_req *req)
}
/*
+ * Called under fc->lock
+ *
+ * fc->connected must have been checked previously
+ */
+void request_send_background_locked(struct fuse_conn *fc, struct fuse_req *req)
+{
+ req->isreply = 1;
+ request_send_nowait_locked(fc, req);
+}
+
+/*
* Lock the request. Up to the next unlock_request() there mustn't be
* anything that could cause a page-fault. If the request was already
* aborted bail out.
@@ -968,6 +988,7 @@ static void end_requests(struct fuse_conn *fc, struct list_head *head)
* locked).
*/
static void end_io_requests(struct fuse_conn *fc)
+ __releases(fc->lock) __acquires(fc->lock)
{
while (!list_empty(&fc->io)) {
struct fuse_req *req =
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index c4807b3fc8a..2060bf06b90 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -132,7 +132,7 @@ static void fuse_lookup_init(struct fuse_req *req, struct inode *dir,
req->out.args[0].value = outarg;
}
-static u64 fuse_get_attr_version(struct fuse_conn *fc)
+u64 fuse_get_attr_version(struct fuse_conn *fc)
{
u64 curr_version;
@@ -1107,6 +1107,50 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg)
}
/*
+ * Prevent concurrent writepages on inode
+ *
+ * This is done by adding a negative bias to the inode write counter
+ * and waiting for all pending writes to finish.
+ */
+void fuse_set_nowrite(struct inode *inode)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ BUG_ON(!mutex_is_locked(&inode->i_mutex));
+
+ spin_lock(&fc->lock);
+ BUG_ON(fi->writectr < 0);
+ fi->writectr += FUSE_NOWRITE;
+ spin_unlock(&fc->lock);
+ wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
+}
+
+/*
+ * Allow writepages on inode
+ *
+ * Remove the bias from the writecounter and send any queued
+ * writepages.
+ */
+static void __fuse_release_nowrite(struct inode *inode)
+{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ BUG_ON(fi->writectr != FUSE_NOWRITE);
+ fi->writectr = 0;
+ fuse_flush_writepages(inode);
+}
+
+void fuse_release_nowrite(struct inode *inode)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+
+ spin_lock(&fc->lock);
+ __fuse_release_nowrite(inode);
+ spin_unlock(&fc->lock);
+}
+
+/*
* Set attributes, and at the same time refresh them.
*
* Truncation is slightly complicated, because the 'truncate' request
@@ -1122,6 +1166,8 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
struct fuse_req *req;
struct fuse_setattr_in inarg;
struct fuse_attr_out outarg;
+ bool is_truncate = false;
+ loff_t oldsize;
int err;
if (!fuse_allow_task(fc, current))
@@ -1145,12 +1191,16 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
send_sig(SIGXFSZ, current, 0);
return -EFBIG;
}
+ is_truncate = true;
}
req = fuse_get_req(fc);
if (IS_ERR(req))
return PTR_ERR(req);
+ if (is_truncate)
+ fuse_set_nowrite(inode);
+
memset(&inarg, 0, sizeof(inarg));
memset(&outarg, 0, sizeof(outarg));
iattr_to_fattr(attr, &inarg);
@@ -1181,16 +1231,44 @@ static int fuse_do_setattr(struct dentry *entry, struct iattr *attr,
if (err) {
if (err == -EINTR)
fuse_invalidate_attr(inode);
- return err;
+ goto error;
}
if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
make_bad_inode(inode);
- return -EIO;
+ err = -EIO;
+ goto error;
+ }
+
+ spin_lock(&fc->lock);
+ fuse_change_attributes_common(inode, &outarg.attr,
+ attr_timeout(&outarg));
+ oldsize = inode->i_size;
+ i_size_write(inode, outarg.attr.size);
+
+ if (is_truncate) {
+ /* NOTE: this may release/reacquire fc->lock */
+ __fuse_release_nowrite(inode);
+ }
+ spin_unlock(&fc->lock);
+
+ /*
+ * Only call invalidate_inode_pages2() after removing
+ * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
+ */
+ if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
+ if (outarg.attr.size < oldsize)
+ fuse_truncate(inode->i_mapping, outarg.attr.size);
+ invalidate_inode_pages2(inode->i_mapping);
}
- fuse_change_attributes(inode, &outarg.attr, attr_timeout(&outarg), 0);
return 0;
+
+error:
+ if (is_truncate)
+ fuse_release_nowrite(inode);
+
+ return err;
}
static int fuse_setattr(struct dentry *entry, struct iattr *attr)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 676b0bc8a86..f28cf8b46f8 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -210,6 +210,49 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
return (u64) v0 + ((u64) v1 << 32);
}
+/*
+ * Check if page is under writeback
+ *
+ * This is currently done by walking the list of writepage requests
+ * for the inode, which can be pretty inefficient.
+ */
+static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_req *req;
+ bool found = false;
+
+ spin_lock(&fc->lock);
+ list_for_each_entry(req, &fi->writepages, writepages_entry) {
+ pgoff_t curr_index;
+
+ BUG_ON(req->inode != inode);
+ curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
+ if (curr_index == index) {
+ found = true;
+ break;
+ }
+ }
+ spin_unlock(&fc->lock);
+
+ return found;
+}
+
+/*
+ * Wait for page writeback to be completed.
+ *
+ * Since fuse doesn't rely on the VM writeback tracking, this has to
+ * use some other means.
+ */
+static int fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index)
+{
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ wait_event(fi->page_waitq, !fuse_page_is_writeback(inode, index));
+ return 0;
+}
+
static int fuse_flush(struct file *file, fl_owner_t id)
{
struct inode *inode = file->f_path.dentry->d_inode;
@@ -245,6 +288,21 @@ static int fuse_flush(struct file *file, fl_owner_t id)
return err;
}
+/*
+ * Wait for all pending writepages on the inode to finish.
+ *
+ * This is currently done by blocking further writes with FUSE_NOWRITE
+ * and waiting for all sent writes to complete.
+ *
+ * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage
+ * could conflict with truncation.
+ */
+static void fuse_sync_writes(struct inode *inode)
+{
+ fuse_set_nowrite(inode);
+ fuse_release_nowrite(inode);
+}
+
int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
int isdir)
{
@@ -261,6 +319,17 @@ int fuse_fsync_common(struct file *file, struct dentry *de, int datasync,
if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
return 0;
+ /*
+ * Start writeback against all dirty pages of the inode, then
+ * wait for all outstanding writes, before sending the FSYNC
+ * request.
+ */
+ err = write_inode_now(inode, 0);
+ if (err)
+ return err;
+
+ fuse_sync_writes(inode);
+
req = fuse_get_req(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -294,7 +363,7 @@ static int fuse_fsync(struct file *file, struct dentry *de, int datasync)
void fuse_read_fill(struct fuse_req *req, struct file *file,
struct inode *inode, loff_t pos, size_t count, int opcode)
{
- struct fuse_read_in *inarg = &req->misc.read_in;
+ struct fuse_read_in *inarg = &req->misc.read.in;
struct fuse_file *ff = file->private_data;
inarg->fh = ff->fh;
@@ -320,7 +389,7 @@ static size_t fuse_send_read(struct fuse_req *req, struct file *file,
fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
if (owner != NULL) {
- struct fuse_read_in *inarg = &req->misc.read_in;
+ struct fuse_read_in *inarg = &req->misc.read.in;
inarg->read_flags |= FUSE_READ_LOCKOWNER;
inarg->lock_owner = fuse_lock_owner_id(fc, owner);
@@ -329,31 +398,66 @@ static size_t fuse_send_read(struct fuse_req *req, struct file *file,
return req->out.args[0].size;
}
+static void fuse_read_update_size(struct inode *inode, loff_t size,
+ u64 attr_ver)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ spin_lock(&fc->lock);
+ if (attr_ver == fi->attr_version && size < inode->i_size) {
+ fi->attr_version = ++fc->attr_version;
+ i_size_write(inode, size);
+ }
+ spin_unlock(&fc->lock);
+}
+
static int fuse_readpage(struct file *file, struct page *page)
{
struct inode *inode = page->mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_req *req;
+ size_t num_read;
+ loff_t pos = page_offset(page);
+ size_t count = PAGE_CACHE_SIZE;
+ u64 attr_ver;
int err;
err = -EIO;
if (is_bad_inode(inode))
goto out;
+ /*
+ * Page writeback can extend beyond the liftime of the
+ * page-cache page, so make sure we read a properly synced
+ * page.
+ */
+ fuse_wait_on_page_writeback(inode, page->index);
+
req = fuse_get_req(fc);
err = PTR_ERR(req);
if (IS_ERR(req))
goto out;
+ attr_ver = fuse_get_attr_version(fc);
+
req->out.page_zeroing = 1;
req->num_pages = 1;
req->pages[0] = page;
- fuse_send_read(req, file, inode, page_offset(page), PAGE_CACHE_SIZE,
- NULL);
+ num_read = fuse_send_read(req, file, inode, pos, count, NULL);
err = req->out.h.error;
fuse_put_request(fc, req);
- if (!err)
+
+ if (!err) {
+ /*
+ * Short read means EOF. If file size is larger, truncate it
+ */
+ if (num_read < count)
+ fuse_read_update_size(inode, pos + num_read, attr_ver);
+
SetPageUptodate(page);
+ }
+
fuse_invalidate_attr(inode); /* atime changed */
out:
unlock_page(page);
@@ -363,8 +467,19 @@ static int fuse_readpage(struct file *file, struct page *page)
static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
{
int i;
+ size_t count = req->misc.read.in.size;
+ size_t num_read = req->out.args[0].size;
+ struct inode *inode = req->pages[0]->mapping->host;
+
+ /*
+ * Short read means EOF. If file size is larger, truncate it
+ */
+ if (!req->out.h.error && num_read < count) {
+ loff_t pos = page_offset(req->pages[0]) + num_read;
+ fuse_read_update_size(inode, pos, req->misc.read.attr_ver);
+ }
- fuse_invalidate_attr(req->pages[0]->mapping->host); /* atime changed */
+ fuse_invalidate_attr(inode); /* atime changed */
for (i = 0; i < req->num_pages; i++) {
struct page *page = req->pages[i];
@@ -387,6 +502,7 @@ static void fuse_send_readpages(struct fuse_req *req, struct file *file,
size_t count = req->num_pages << PAGE_CACHE_SHIFT;
req->out.page_zeroing = 1;
fuse_read_fill(req, file, inode, pos, count, FUSE_READ);
+ req->misc.read.attr_ver = fuse_get_attr_version(fc);
if (fc->async_read) {
struct fuse_file *ff = file->private_data;
req->ff = fuse_file_get(ff);
@@ -411,6 +527,8 @@ static int fuse_readpages_fill(void *_data, struct page *page)
struct inode *inode = data->inode;
struct fuse_conn *fc = get_fuse_conn(inode);
+ fuse_wait_on_page_writeback(inode, page->index);
+
if (req->num_pages &&
(req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
(req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
@@ -477,11 +595,10 @@ static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
}
static void fuse_write_fill(struct fuse_req *req, struct file *file,
- struct inode *inode, loff_t pos, size_t count,
- int writepage)
+ struct fuse_file *ff, struct inode *inode,
+ loff_t pos, size_t count, int writepage)
{
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_file *ff = file->private_data;
struct fuse_write_in *inarg = &req->misc.write.in;
struct fuse_write_out *outarg = &req->misc.write.out;
@@ -490,7 +607,7 @@ static void fuse_write_fill(struct fuse_req *req, struct file *file,
inarg->offset = pos;
inarg->size = count;
inarg->write_flags = writepage ? FUSE_WRITE_CACHE : 0;
- inarg->flags = file->f_flags;
+ inarg->flags = file ? file->f_flags : 0;
req->in.h.opcode = FUSE_WRITE;
req->in.h.nodeid = get_node_id(inode);
req->in.argpages = 1;
@@ -511,7 +628,7 @@ static size_t fuse_send_write(struct fuse_req *req, struct file *file,
fl_owner_t owner)
{
struct fuse_conn *fc = get_fuse_conn(inode);
- fuse_write_fill(req, file, inode, pos, count, 0);
+ fuse_write_fill(req, file, file->private_data, inode, pos, count, 0);
if (owner != NULL) {
struct fuse_write_in *inarg = &req->misc.write.in;
inarg->write_flags |= FUSE_WRITE_LOCKOWNER;
@@ -533,19 +650,36 @@ static int fuse_write_begin(struct file *file, struct address_space *mapping,
return 0;
}
+static void fuse_write_update_size(struct inode *inode, loff_t pos)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ spin_lock(&fc->lock);
+ fi->attr_version = ++fc->attr_version;
+ if (pos > inode->i_size)
+ i_size_write(inode, pos);
+ spin_unlock(&fc->lock);
+}
+
static int fuse_buffered_write(struct file *file, struct inode *inode,
loff_t pos, unsigned count, struct page *page)
{
int err;
size_t nres;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_inode *fi = get_fuse_inode(inode);
unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
struct fuse_req *req;
if (is_bad_inode(inode))
return -EIO;
+ /*
+ * Make sure writepages on the same page are not mixed up with
+ * plain writes.
+ */
+ fuse_wait_on_page_writeback(inode, page->index);
+
req = fuse_get_req(fc);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -560,12 +694,7 @@ static int fuse_buffered_write(struct file *file, struct inode *inode,
err = -EIO;
if (!err) {
pos += nres;
- spin_lock(&fc->lock);
- fi->attr_version = ++fc->attr_version;
- if (pos > inode->i_size)
- i_size_write(inode, pos);
- spin_unlock(&fc->lock);
-
+ fuse_write_update_size(inode, pos);
if (count == PAGE_CACHE_SIZE)
SetPageUptodate(page);
}
@@ -588,6 +717,198 @@ static int fuse_write_end(struct file *file, struct address_space *mapping,
return res;
}
+static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
+ struct inode *inode, loff_t pos,
+ size_t count)
+{
+ size_t res;
+ unsigned offset;
+ unsigned i;
+
+ for (i = 0; i < req->num_pages; i++)
+ fuse_wait_on_page_writeback(inode, req->pages[i]->index);
+
+ res = fuse_send_write(req, file, inode, pos, count, NULL);
+
+ offset = req->page_offset;
+ count = res;
+ for (i = 0; i < req->num_pages; i++) {
+ struct page *page = req->pages[i];
+
+ if (!req->out.h.error && !offset && count >= PAGE_CACHE_SIZE)
+ SetPageUptodate(page);
+
+ if (count > PAGE_CACHE_SIZE - offset)
+ count -= PAGE_CACHE_SIZE - offset;
+ else
+ count = 0;
+ offset = 0;
+
+ unlock_page(page);
+ page_cache_release(page);
+ }
+
+ return res;
+}
+
+static ssize_t fuse_fill_write_pages(struct fuse_req *req,
+ struct address_space *mapping,
+ struct iov_iter *ii, loff_t pos)
+{
+ struct fuse_conn *fc = get_fuse_conn(mapping->host);
+ unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
+ size_t count = 0;
+ int err;
+
+ req->page_offset = offset;
+
+ do {
+ size_t tmp;
+ struct page *page;
+ pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ size_t bytes = min_t(size_t, PAGE_CACHE_SIZE - offset,
+ iov_iter_count(ii));
+
+ bytes = min_t(size_t, bytes, fc->max_write - count);
+
+ again:
+ err = -EFAULT;
+ if (iov_iter_fault_in_readable(ii, bytes))
+ break;
+
+ err = -ENOMEM;
+ page = __grab_cache_page(mapping, index);
+ if (!page)
+ break;
+
+ pagefault_disable();
+ tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes);
+ pagefault_enable();
+ flush_dcache_page(page);
+
+ if (!tmp) {
+ unlock_page(page);
+ page_cache_release(page);
+ bytes = min(bytes, iov_iter_single_seg_count(ii));
+ goto again;
+ }
+
+ err = 0;
+ req->pages[req->num_pages] = page;
+ req->num_pages++;
+
+ iov_iter_advance(ii, tmp);
+ count += tmp;
+ pos += tmp;
+ offset += tmp;
+ if (offset == PAGE_CACHE_SIZE)
+ offset = 0;
+
+ } while (iov_iter_count(ii) && count < fc->max_write &&
+ req->num_pages < FUSE_MAX_PAGES_PER_REQ && offset == 0);
+
+ return count > 0 ? count : err;
+}
+
+static ssize_t fuse_perform_write(struct file *file,
+ struct address_space *mapping,
+ struct iov_iter *ii, loff_t pos)
+{
+ struct inode *inode = mapping->host;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ int err = 0;
+ ssize_t res = 0;
+
+ if (is_bad_inode(inode))
+ return -EIO;
+
+ do {
+ struct fuse_req *req;
+ ssize_t count;
+
+ req = fuse_get_req(fc);
+ if (IS_ERR(req)) {
+ err = PTR_ERR(req);
+ break;
+ }
+
+ count = fuse_fill_write_pages(req, mapping, ii, pos);
+ if (count <= 0) {
+ err = count;
+ } else {
+ size_t num_written;
+
+ num_written = fuse_send_write_pages(req, file, inode,
+ pos, count);
+ err = req->out.h.error;
+ if (!err) {
+ res += num_written;
+ pos += num_written;
+
+ /* break out of the loop on short write */
+ if (num_written != count)
+ err = -EIO;
+ }
+ }
+ fuse_put_request(fc, req);
+ } while (!err && iov_iter_count(ii));
+
+ if (res > 0)
+ fuse_write_update_size(inode, pos);
+
+ fuse_invalidate_attr(inode);
+
+ return res > 0 ? res : err;
+}
+
+static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ size_t count = 0;
+ ssize_t written = 0;
+ struct inode *inode = mapping->host;
+ ssize_t err;
+ struct iov_iter i;
+
+ WARN_ON(iocb->ki_pos != pos);
+
+ err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ);
+ if (err)
+ return err;
+
+ mutex_lock(&inode->i_mutex);
+ vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
+
+ /* We can write back this queue in page reclaim */
+ current->backing_dev_info = mapping->backing_dev_info;
+
+ err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
+ if (err)
+ goto out;
+
+ if (count == 0)
+ goto out;
+
+ err = remove_suid(file->f_path.dentry);
+ if (err)
+ goto out;
+
+ file_update_time(file);
+
+ iov_iter_init(&i, iov, nr_segs, count, 0);
+ written = fuse_perform_write(file, mapping, &i, pos);
+ if (written >= 0)
+ iocb->ki_pos = pos + written;
+
+out:
+ current->backing_dev_info = NULL;
+ mutex_unlock(&inode->i_mutex);
+
+ return written ? written : err;
+}
+
static void fuse_release_user_pages(struct fuse_req *req, int write)
{
unsigned i;
@@ -613,7 +934,7 @@ static int fuse_get_user_pages(struct fuse_req *req, const char __user *buf,
nbytes = min(nbytes, (unsigned) FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT);
npages = (nbytes + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
- npages = min(max(npages, 1), FUSE_MAX_PAGES_PER_REQ);
+ npages = clamp(npages, 1, FUSE_MAX_PAGES_PER_REQ);
down_read(&current->mm->mmap_sem);
npages = get_user_pages(current, current->mm, user_addr, npages, write,
0, req->pages, NULL);
@@ -645,14 +966,15 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
while (count) {
size_t nres;
- size_t nbytes = min(count, nmax);
- int err = fuse_get_user_pages(req, buf, nbytes, !write);
+ size_t nbytes_limit = min(count, nmax);
+ size_t nbytes;
+ int err = fuse_get_user_pages(req, buf, nbytes_limit, !write);
if (err) {
res = err;
break;
}
nbytes = (req->num_pages << PAGE_SHIFT) - req->page_offset;
- nbytes = min(count, nbytes);
+ nbytes = min(nbytes_limit, nbytes);
if (write)
nres = fuse_send_write(req, file, inode, pos, nbytes,
current->files);
@@ -683,12 +1005,8 @@ static ssize_t fuse_direct_io(struct file *file, const char __user *buf,
}
fuse_put_request(fc, req);
if (res > 0) {
- if (write) {
- spin_lock(&fc->lock);
- if (pos > inode->i_size)
- i_size_write(inode, pos);
- spin_unlock(&fc->lock);
- }
+ if (write)
+ fuse_write_update_size(inode, pos);
*ppos = pos;
}
fuse_invalidate_attr(inode);
@@ -716,21 +1034,225 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf,
return res;
}
-static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
+static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req)
{
- if ((vma->vm_flags & VM_SHARED)) {
- if ((vma->vm_flags & VM_WRITE))
- return -ENODEV;
- else
- vma->vm_flags &= ~VM_MAYWRITE;
+ __free_page(req->pages[0]);
+ fuse_file_put(req->ff);
+ fuse_put_request(fc, req);
+}
+
+static void fuse_writepage_finish(struct fuse_conn *fc, struct fuse_req *req)
+{
+ struct inode *inode = req->inode;
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct backing_dev_info *bdi = inode->i_mapping->backing_dev_info;
+
+ list_del(&req->writepages_entry);
+ dec_bdi_stat(bdi, BDI_WRITEBACK);
+ dec_zone_page_state(req->pages[0], NR_WRITEBACK_TEMP);
+ bdi_writeout_inc(bdi);
+ wake_up(&fi->page_waitq);
+}
+
+/* Called under fc->lock, may release and reacquire it */
+static void fuse_send_writepage(struct fuse_conn *fc, struct fuse_req *req)
+{
+ struct fuse_inode *fi = get_fuse_inode(req->inode);
+ loff_t size = i_size_read(req->inode);
+ struct fuse_write_in *inarg = &req->misc.write.in;
+
+ if (!fc->connected)
+ goto out_free;
+
+ if (inarg->offset + PAGE_CACHE_SIZE <= size) {
+ inarg->size = PAGE_CACHE_SIZE;
+ } else if (inarg->offset < size) {
+ inarg->size = size & (PAGE_CACHE_SIZE - 1);
+ } else {
+ /* Got truncated off completely */
+ goto out_free;
+ }
+
+ req->in.args[1].size = inarg->size;
+ fi->writectr++;
+ request_send_background_locked(fc, req);
+ return;
+
+ out_free:
+ fuse_writepage_finish(fc, req);
+ spin_unlock(&fc->lock);
+ fuse_writepage_free(fc, req);
+ spin_lock(&fc->lock);
+}
+
+/*
+ * If fi->writectr is positive (no truncate or fsync going on) send
+ * all queued writepage requests.
+ *
+ * Called with fc->lock
+ */
+void fuse_flush_writepages(struct inode *inode)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_req *req;
+
+ while (fi->writectr >= 0 && !list_empty(&fi->queued_writes)) {
+ req = list_entry(fi->queued_writes.next, struct fuse_req, list);
+ list_del_init(&req->list);
+ fuse_send_writepage(fc, req);
+ }
+}
+
+static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_req *req)
+{
+ struct inode *inode = req->inode;
+ struct fuse_inode *fi = get_fuse_inode(inode);
+
+ mapping_set_error(inode->i_mapping, req->out.h.error);
+ spin_lock(&fc->lock);
+ fi->writectr--;
+ fuse_writepage_finish(fc, req);
+ spin_unlock(&fc->lock);
+ fuse_writepage_free(fc, req);
+}
+
+static int fuse_writepage_locked(struct page *page)
+{
+ struct address_space *mapping = page->mapping;
+ struct inode *inode = mapping->host;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_req *req;
+ struct fuse_file *ff;
+ struct page *tmp_page;
+
+ set_page_writeback(page);
+
+ req = fuse_request_alloc_nofs();
+ if (!req)
+ goto err;
+
+ tmp_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+ if (!tmp_page)
+ goto err_free;
+
+ spin_lock(&fc->lock);
+ BUG_ON(list_empty(&fi->write_files));
+ ff = list_entry(fi->write_files.next, struct fuse_file, write_entry);
+ req->ff = fuse_file_get(ff);
+ spin_unlock(&fc->lock);
+
+ fuse_write_fill(req, NULL, ff, inode, page_offset(page), 0, 1);
+
+ copy_highpage(tmp_page, page);
+ req->num_pages = 1;
+ req->pages[0] = tmp_page;
+ req->page_offset = 0;
+ req->end = fuse_writepage_end;
+ req->inode = inode;
+
+ inc_bdi_stat(mapping->backing_dev_info, BDI_WRITEBACK);
+ inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
+ end_page_writeback(page);
+
+ spin_lock(&fc->lock);
+ list_add(&req->writepages_entry, &fi->writepages);
+ list_add_tail(&req->list, &fi->queued_writes);
+ fuse_flush_writepages(inode);
+ spin_unlock(&fc->lock);
+
+ return 0;
+
+err_free:
+ fuse_request_free(req);
+err:
+ end_page_writeback(page);
+ return -ENOMEM;
+}
+
+static int fuse_writepage(struct page *page, struct writeback_control *wbc)
+{
+ int err;
+
+ err = fuse_writepage_locked(page);
+ unlock_page(page);
+
+ return err;
+}
+
+static int fuse_launder_page(struct page *page)
+{
+ int err = 0;
+ if (clear_page_dirty_for_io(page)) {
+ struct inode *inode = page->mapping->host;
+ err = fuse_writepage_locked(page);
+ if (!err)
+ fuse_wait_on_page_writeback(inode, page->index);
}
- return generic_file_mmap(file, vma);
+ return err;
}
-static int fuse_set_page_dirty(struct page *page)
+/*
+ * Write back dirty pages now, because there may not be any suitable
+ * open files later
+ */
+static void fuse_vma_close(struct vm_area_struct *vma)
{
- printk("fuse_set_page_dirty: should not happen\n");
- dump_stack();
+ filemap_write_and_wait(vma->vm_file->f_mapping);
+}
+
+/*
+ * Wait for writeback against this page to complete before allowing it
+ * to be marked dirty again, and hence written back again, possibly
+ * before the previous writepage completed.
+ *
+ * Block here, instead of in ->writepage(), so that the userspace fs
+ * can only block processes actually operating on the filesystem.
+ *
+ * Otherwise unprivileged userspace fs would be able to block
+ * unrelated:
+ *
+ * - page migration
+ * - sync(2)
+ * - try_to_free_pages() with order > PAGE_ALLOC_COSTLY_ORDER
+ */
+static int fuse_page_mkwrite(struct vm_area_struct *vma, struct page *page)
+{
+ /*
+ * Don't use page->mapping as it may become NULL from a
+ * concurrent truncate.
+ */
+ struct inode *inode = vma->vm_file->f_mapping->host;
+
+ fuse_wait_on_page_writeback(inode, page->index);
+ return 0;
+}
+
+static struct vm_operations_struct fuse_file_vm_ops = {
+ .close = fuse_vma_close,
+ .fault = filemap_fault,
+ .page_mkwrite = fuse_page_mkwrite,
+};
+
+static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) {
+ struct inode *inode = file->f_dentry->d_inode;
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ struct fuse_file *ff = file->private_data;
+ /*
+ * file may be written through mmap, so chain it onto the
+ * inodes's write_file list
+ */
+ spin_lock(&fc->lock);
+ if (list_empty(&ff->write_entry))
+ list_add(&ff->write_entry, &fi->write_files);
+ spin_unlock(&fc->lock);
+ }
+ file_accessed(file);
+ vma->vm_ops = &fuse_file_vm_ops;
return 0;
}
@@ -909,12 +1431,37 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
return err ? 0 : outarg.block;
}
+static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin)
+{
+ loff_t retval;
+ struct inode *inode = file->f_path.dentry->d_inode;
+
+ mutex_lock(&inode->i_mutex);
+ switch (origin) {
+ case SEEK_END:
+ offset += i_size_read(inode);
+ break;
+ case SEEK_CUR:
+ offset += file->f_pos;
+ }
+ retval = -EINVAL;
+ if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) {
+ if (offset != file->f_pos) {
+ file->f_pos = offset;
+ file->f_version = 0;
+ }
+ retval = offset;
+ }
+ mutex_unlock(&inode->i_mutex);
+ return retval;
+}
+
static const struct file_operations fuse_file_operations = {
- .llseek = generic_file_llseek,
+ .llseek = fuse_file_llseek,
.read = do_sync_read,
.aio_read = fuse_file_aio_read,
.write = do_sync_write,
- .aio_write = generic_file_aio_write,
+ .aio_write = fuse_file_aio_write,
.mmap = fuse_file_mmap,
.open = fuse_open,
.flush = fuse_flush,
@@ -926,7 +1473,7 @@ static const struct file_operations fuse_file_operations = {
};
static const struct file_operations fuse_direct_io_file_operations = {
- .llseek = generic_file_llseek,
+ .llseek = fuse_file_llseek,
.read = fuse_direct_read,
.write = fuse_direct_write,
.open = fuse_open,
@@ -940,10 +1487,12 @@ static const struct file_operations fuse_direct_io_file_operations = {
static const struct address_space_operations fuse_file_aops = {
.readpage = fuse_readpage,
+ .writepage = fuse_writepage,
+ .launder_page = fuse_launder_page,
.write_begin = fuse_write_begin,
.write_end = fuse_write_end,
.readpages = fuse_readpages,
- .set_page_dirty = fuse_set_page_dirty,
+ .set_page_dirty = __set_page_dirty_nobuffers,
.bmap = fuse_bmap,
};
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 67aaf6ee38e..dadffa21a20 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -15,6 +15,7 @@
#include <linux/mm.h>
#include <linux/backing-dev.h>
#include <linux/mutex.h>
+#include <linux/rwsem.h>
/** Max number of pages that can be used in a single read request */
#define FUSE_MAX_PAGES_PER_REQ 32
@@ -25,6 +26,9 @@
/** Congestion starts at 75% of maximum */
#define FUSE_CONGESTION_THRESHOLD (FUSE_MAX_BACKGROUND * 75 / 100)
+/** Bias for fi->writectr, meaning new writepages must not be sent */
+#define FUSE_NOWRITE INT_MIN
+
/** It could be as large as PATH_MAX, but would that have any uses? */
#define FUSE_NAME_MAX 1024
@@ -73,6 +77,19 @@ struct fuse_inode {
/** Files usable in writepage. Protected by fc->lock */
struct list_head write_files;
+
+ /** Writepages pending on truncate or fsync */
+ struct list_head queued_writes;
+
+ /** Number of sent writes, a negative bias (FUSE_NOWRITE)
+ * means more writes are blocked */
+ int writectr;
+
+ /** Waitq for writepage completion */
+ wait_queue_head_t page_waitq;
+
+ /** List of writepage requestst (pending or sent) */
+ struct list_head writepages;
};
/** FUSE specific file data */
@@ -222,7 +239,10 @@ struct fuse_req {
} release;
struct fuse_init_in init_in;
struct fuse_init_out init_out;
- struct fuse_read_in read_in;
+ struct {
+ struct fuse_read_in in;
+ u64 attr_ver;
+ } read;
struct {
struct fuse_write_in in;
struct fuse_write_out out;
@@ -242,6 +262,12 @@ struct fuse_req {
/** File used in the request (or NULL) */
struct fuse_file *ff;
+ /** Inode used in the request or NULL */
+ struct inode *inode;
+
+ /** Link on fi->writepages */
+ struct list_head writepages_entry;
+
/** Request completion callback */
void (*end)(struct fuse_conn *, struct fuse_req *);
@@ -390,8 +416,8 @@ struct fuse_conn {
/** Entry on the fuse_conn_list */
struct list_head entry;
- /** Unique ID */
- u64 id;
+ /** Device ID from super block */
+ dev_t dev;
/** Dentries in the control filesystem */
struct dentry *ctl_dentry[FUSE_CTL_NUM_DENTRIES];
@@ -438,7 +464,7 @@ extern const struct file_operations fuse_dev_operations;
/**
* Get a filled in inode
*/
-struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
+struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
int generation, struct fuse_attr *attr,
u64 attr_valid, u64 attr_version);
@@ -446,7 +472,7 @@ struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
* Send FORGET command
*/
void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
- unsigned long nodeid, u64 nlookup);
+ u64 nodeid, u64 nlookup);
/**
* Initialize READ or READDIR request
@@ -504,6 +530,11 @@ void fuse_init_symlink(struct inode *inode);
void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
u64 attr_valid, u64 attr_version);
+void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
+ u64 attr_valid);
+
+void fuse_truncate(struct address_space *mapping, loff_t offset);
+
/**
* Initialize the client device
*/
@@ -522,6 +553,8 @@ void fuse_ctl_cleanup(void);
*/
struct fuse_req *fuse_request_alloc(void);
+struct fuse_req *fuse_request_alloc_nofs(void);
+
/**
* Free a request
*/
@@ -558,6 +591,8 @@ void request_send_noreply(struct fuse_conn *fc, struct fuse_req *req);
*/
void request_send_background(struct fuse_conn *fc, struct fuse_req *req);
+void request_send_background_locked(struct fuse_conn *fc, struct fuse_req *req);
+
/* Abort all requests */
void fuse_abort_conn(struct fuse_conn *fc);
@@ -600,3 +635,10 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id);
int fuse_update_attributes(struct inode *inode, struct kstat *stat,
struct file *file, bool *refreshed);
+
+void fuse_flush_writepages(struct inode *inode);
+
+void fuse_set_nowrite(struct inode *inode);
+void fuse_release_nowrite(struct inode *inode);
+
+u64 fuse_get_attr_version(struct fuse_conn *fc);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 4df34da2284..79b61587383 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -59,7 +59,11 @@ static struct inode *fuse_alloc_inode(struct super_block *sb)
fi->nodeid = 0;
fi->nlookup = 0;
fi->attr_version = 0;
+ fi->writectr = 0;
INIT_LIST_HEAD(&fi->write_files);
+ INIT_LIST_HEAD(&fi->queued_writes);
+ INIT_LIST_HEAD(&fi->writepages);
+ init_waitqueue_head(&fi->page_waitq);
fi->forget_req = fuse_request_alloc();
if (!fi->forget_req) {
kmem_cache_free(fuse_inode_cachep, inode);
@@ -73,13 +77,14 @@ static void fuse_destroy_inode(struct inode *inode)
{
struct fuse_inode *fi = get_fuse_inode(inode);
BUG_ON(!list_empty(&fi->write_files));
+ BUG_ON(!list_empty(&fi->queued_writes));
if (fi->forget_req)
fuse_request_free(fi->forget_req);
kmem_cache_free(fuse_inode_cachep, inode);
}
void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
- unsigned long nodeid, u64 nlookup)
+ u64 nodeid, u64 nlookup)
{
struct fuse_forget_in *inarg = &req->misc.forget_in;
inarg->nlookup = nlookup;
@@ -109,7 +114,7 @@ static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)
return 0;
}
-static void fuse_truncate(struct address_space *mapping, loff_t offset)
+void fuse_truncate(struct address_space *mapping, loff_t offset)
{
/* See vmtruncate() */
unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
@@ -117,19 +122,12 @@ static void fuse_truncate(struct address_space *mapping, loff_t offset)
unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
}
-
-void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
- u64 attr_valid, u64 attr_version)
+void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
+ u64 attr_valid)
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
- loff_t oldsize;
- spin_lock(&fc->lock);
- if (attr_version != 0 && fi->attr_version > attr_version) {
- spin_unlock(&fc->lock);
- return;
- }
fi->attr_version = ++fc->attr_version;
fi->i_time = attr_valid;
@@ -159,6 +157,22 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
fi->orig_i_mode = inode->i_mode;
if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
inode->i_mode &= ~S_ISVTX;
+}
+
+void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
+ u64 attr_valid, u64 attr_version)
+{
+ struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
+ loff_t oldsize;
+
+ spin_lock(&fc->lock);
+ if (attr_version != 0 && fi->attr_version > attr_version) {
+ spin_unlock(&fc->lock);
+ return;
+ }
+
+ fuse_change_attributes_common(inode, attr, attr_valid);
oldsize = inode->i_size;
i_size_write(inode, attr->size);
@@ -193,7 +207,7 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
{
- unsigned long nodeid = *(unsigned long *) _nodeidp;
+ u64 nodeid = *(u64 *) _nodeidp;
if (get_node_id(inode) == nodeid)
return 1;
else
@@ -202,12 +216,12 @@ static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
static int fuse_inode_set(struct inode *inode, void *_nodeidp)
{
- unsigned long nodeid = *(unsigned long *) _nodeidp;
+ u64 nodeid = *(u64 *) _nodeidp;
get_fuse_inode(inode)->nodeid = nodeid;
return 0;
}
-struct inode *fuse_iget(struct super_block *sb, unsigned long nodeid,
+struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
int generation, struct fuse_attr *attr,
u64 attr_valid, u64 attr_version)
{
@@ -447,7 +461,7 @@ static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
return 0;
}
-static struct fuse_conn *new_conn(void)
+static struct fuse_conn *new_conn(struct super_block *sb)
{
struct fuse_conn *fc;
int err;
@@ -468,19 +482,41 @@ static struct fuse_conn *new_conn(void)
atomic_set(&fc->num_waiting, 0);
fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
fc->bdi.unplug_io_fn = default_unplug_io_fn;
+ /* fuse does it's own writeback accounting */
+ fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
+ fc->dev = sb->s_dev;
err = bdi_init(&fc->bdi);
- if (err) {
- kfree(fc);
- fc = NULL;
- goto out;
- }
+ if (err)
+ goto error_kfree;
+ err = bdi_register_dev(&fc->bdi, fc->dev);
+ if (err)
+ goto error_bdi_destroy;
+ /*
+ * For a single fuse filesystem use max 1% of dirty +
+ * writeback threshold.
+ *
+ * This gives about 1M of write buffer for memory maps on a
+ * machine with 1G and 10% dirty_ratio, which should be more
+ * than enough.
+ *
+ * Privileged users can raise it by writing to
+ *
+ * /sys/class/bdi/<bdi>/max_ratio
+ */
+ bdi_set_max_ratio(&fc->bdi, 1);
fc->reqctr = 0;
fc->blocked = 1;
fc->attr_version = 1;
get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
}
-out:
return fc;
+
+error_bdi_destroy:
+ bdi_destroy(&fc->bdi);
+error_kfree:
+ mutex_destroy(&fc->inst_mutex);
+ kfree(fc);
+ return NULL;
}
void fuse_conn_put(struct fuse_conn *fc)
@@ -548,6 +584,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages);
fc->minor = arg->minor;
fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
+ fc->max_write = min_t(unsigned, 4096, fc->max_write);
fc->conn_init = 1;
}
fuse_put_request(fc, req);
@@ -578,12 +615,6 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
request_send_background(fc, req);
}
-static u64 conn_id(void)
-{
- static u64 ctr = 1;
- return ctr++;
-}
-
static int fuse_fill_super(struct super_block *sb, void *data, int silent)
{
struct fuse_conn *fc;
@@ -621,14 +652,14 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
if (file->f_op != &fuse_dev_operations)
return -EINVAL;
- fc = new_conn();
+ fc = new_conn(sb);
if (!fc)
return -ENOMEM;
fc->flags = d.flags;
fc->user_id = d.user_id;
fc->group_id = d.group_id;
- fc->max_read = d.max_read;
+ fc->max_read = min_t(unsigned, 4096, d.max_read);
/* Used by get_root_inode() */
sb->s_fs_info = fc;
@@ -659,7 +690,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
if (file->private_data)
goto err_unlock;
- fc->id = conn_id();
err = fuse_ctl_add_conn(fc);
if (err)
goto err_unlock;
diff --git a/fs/gfs2/locking/dlm/sysfs.c b/fs/gfs2/locking/dlm/sysfs.c
index 8479da47049..a4ff271df9e 100644
--- a/fs/gfs2/locking/dlm/sysfs.c
+++ b/fs/gfs2/locking/dlm/sysfs.c
@@ -212,7 +212,7 @@ int gdlm_sysfs_init(void)
{
gdlm_kset = kset_create_and_add("lock_dlm", NULL, kernel_kobj);
if (!gdlm_kset) {
- printk(KERN_WARNING "%s: can not create kset\n", __FUNCTION__);
+ printk(KERN_WARNING "%s: can not create kset\n", __func__);
return -ENOMEM;
}
return 0;
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index 509c5d60bd8..7f48576289c 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -41,7 +41,7 @@ int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
#define gfs2_assert_withdraw(sdp, assertion) \
((likely(assertion)) ? 0 : gfs2_assert_withdraw_i((sdp), #assertion, \
- __FUNCTION__, __FILE__, __LINE__))
+ __func__, __FILE__, __LINE__))
int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
@@ -49,28 +49,28 @@ int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
#define gfs2_assert_warn(sdp, assertion) \
((likely(assertion)) ? 0 : gfs2_assert_warn_i((sdp), #assertion, \
- __FUNCTION__, __FILE__, __LINE__))
+ __func__, __FILE__, __LINE__))
int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide,
const char *function, char *file, unsigned int line);
#define gfs2_consist(sdp) \
-gfs2_consist_i((sdp), 0, __FUNCTION__, __FILE__, __LINE__)
+gfs2_consist_i((sdp), 0, __func__, __FILE__, __LINE__)
int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
const char *function, char *file, unsigned int line);
#define gfs2_consist_inode(ip) \
-gfs2_consist_inode_i((ip), 0, __FUNCTION__, __FILE__, __LINE__)
+gfs2_consist_inode_i((ip), 0, __func__, __FILE__, __LINE__)
int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
const char *function, char *file, unsigned int line);
#define gfs2_consist_rgrpd(rgd) \
-gfs2_consist_rgrpd_i((rgd), 0, __FUNCTION__, __FILE__, __LINE__)
+gfs2_consist_rgrpd_i((rgd), 0, __func__, __FILE__, __LINE__)
int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
@@ -91,7 +91,7 @@ static inline int gfs2_meta_check_i(struct gfs2_sbd *sdp,
}
#define gfs2_meta_check(sdp, bh) \
-gfs2_meta_check_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__)
+gfs2_meta_check_i((sdp), (bh), __func__, __FILE__, __LINE__)
int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
@@ -118,7 +118,7 @@ static inline int gfs2_metatype_check_i(struct gfs2_sbd *sdp,
}
#define gfs2_metatype_check(sdp, bh, type) \
-gfs2_metatype_check_i((sdp), (bh), (type), __FUNCTION__, __FILE__, __LINE__)
+gfs2_metatype_check_i((sdp), (bh), (type), __func__, __FILE__, __LINE__)
static inline void gfs2_metatype_set(struct buffer_head *bh, u16 type,
u16 format)
@@ -134,14 +134,14 @@ int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function,
char *file, unsigned int line);
#define gfs2_io_error(sdp) \
-gfs2_io_error_i((sdp), __FUNCTION__, __FILE__, __LINE__);
+gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__);
int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
const char *function, char *file, unsigned int line);
#define gfs2_io_error_bh(sdp, bh) \
-gfs2_io_error_bh_i((sdp), (bh), __FUNCTION__, __FILE__, __LINE__);
+gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__);
extern struct kmem_cache *gfs2_glock_cachep;
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index 24cf6fc4302..f6621a78520 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -208,7 +208,9 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
struct hfs_bnode *node, *next_node;
struct page **pagep;
u32 nidx, idx;
- u16 off, len;
+ unsigned off;
+ u16 off16;
+ u16 len;
u8 *data, byte, m;
int i;
@@ -235,7 +237,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
node = hfs_bnode_find(tree, nidx);
if (IS_ERR(node))
return node;
- len = hfs_brec_lenoff(node, 2, &off);
+ len = hfs_brec_lenoff(node, 2, &off16);
+ off = off16;
off += node->page_offset;
pagep = node->page + (off >> PAGE_CACHE_SHIFT);
@@ -280,7 +283,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
return next_node;
node = next_node;
- len = hfs_brec_lenoff(node, 0, &off);
+ len = hfs_brec_lenoff(node, 0, &off16);
+ off = off16;
off += node->page_offset;
pagep = node->page + (off >> PAGE_CACHE_SHIFT);
data = kmap(*pagep);
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index b4651e128d7..36ca2e1a4fa 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -215,7 +215,7 @@ int hfs_mdb_get(struct super_block *sb)
attrib &= cpu_to_be16(~HFS_SB_ATTRIB_UNMNT);
attrib |= cpu_to_be16(HFS_SB_ATTRIB_INCNSTNT);
mdb->drAtrb = attrib;
- mdb->drWrCnt = cpu_to_be32(be32_to_cpu(mdb->drWrCnt) + 1);
+ be32_add_cpu(&mdb->drWrCnt, 1);
mdb->drLsMod = hfs_mtime();
mark_buffer_dirty(HFS_SB(sb)->mdb_bh);
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index bb5433608a4..e49fcee1e29 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c
@@ -184,7 +184,9 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
struct hfs_bnode *node, *next_node;
struct page **pagep;
u32 nidx, idx;
- u16 off, len;
+ unsigned off;
+ u16 off16;
+ u16 len;
u8 *data, byte, m;
int i;
@@ -211,7 +213,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
node = hfs_bnode_find(tree, nidx);
if (IS_ERR(node))
return node;
- len = hfs_brec_lenoff(node, 2, &off);
+ len = hfs_brec_lenoff(node, 2, &off16);
+ off = off16;
off += node->page_offset;
pagep = node->page + (off >> PAGE_CACHE_SHIFT);
@@ -256,7 +259,8 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
return next_node;
node = next_node;
- len = hfs_brec_lenoff(node, 0, &off);
+ len = hfs_brec_lenoff(node, 0, &off16);
+ off = off16;
off += node->page_offset;
pagep = node->page + (off >> PAGE_CACHE_SHIFT);
data = kmap(*pagep);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 946466cd9f2..ce97a54518d 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -423,7 +423,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
*/
vhdr->last_mount_vers = cpu_to_be32(HFSP_MOUNT_VERSION);
vhdr->modify_date = hfsp_now2mt();
- vhdr->write_count = cpu_to_be32(be32_to_cpu(vhdr->write_count) + 1);
+ be32_add_cpu(&vhdr->write_count, 1);
vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT);
vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT);
mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh);
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 9783723e8ff..aeabf80f81a 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -45,7 +45,7 @@ static const struct inode_operations hugetlbfs_inode_operations;
static struct backing_dev_info hugetlbfs_backing_dev_info = {
.ra_pages = 0, /* No readahead */
- .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
+ .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
};
int sysctl_hugetlb_shm_group;
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c
index 1ba407c64df..2f0dc5a1463 100644
--- a/fs/isofs/dir.c
+++ b/fs/isofs/dir.c
@@ -145,6 +145,14 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp,
}
de = tmpde;
}
+ /* Basic sanity check, whether name doesn't exceed dir entry */
+ if (de_len < de->name_len[0] +
+ sizeof(struct iso_directory_record)) {
+ printk(KERN_NOTICE "iso9660: Corrupted directory entry"
+ " in block %lu of inode %lu\n", block,
+ inode->i_ino);
+ return -EIO;
+ }
if (first_de) {
isofs_normalize_block_and_offset(de,
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c
index 344b247bc29..8299889a835 100644
--- a/fs/isofs/namei.c
+++ b/fs/isofs/namei.c
@@ -111,6 +111,13 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry,
dlen = de->name_len[0];
dpnt = de->name;
+ /* Basic sanity check, whether name doesn't exceed dir entry */
+ if (de_len < dlen + sizeof(struct iso_directory_record)) {
+ printk(KERN_NOTICE "iso9660: Corrupted directory entry"
+ " in block %lu of inode %lu\n", block,
+ dir->i_ino);
+ return 0;
+ }
if (sbi->s_rock &&
((i = get_rock_ridge_filename(de, tmpname, dir)))) {
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index a8173081f83..e0139786f71 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -520,22 +520,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
jbd_debug (3, "JBD: commit phase 2\n");
/*
- * First, drop modified flag: all accesses to the buffers
- * will be tracked for a new trasaction only -bzzz
- */
- spin_lock(&journal->j_list_lock);
- if (commit_transaction->t_buffers) {
- new_jh = jh = commit_transaction->t_buffers->b_tnext;
- do {
- J_ASSERT_JH(new_jh, new_jh->b_modified == 1 ||
- new_jh->b_modified == 0);
- new_jh->b_modified = 0;
- new_jh = new_jh->b_tnext;
- } while (new_jh != jh);
- }
- spin_unlock(&journal->j_list_lock);
-
- /*
* Now start flushing things to disk, in the order they appear
* on the transaction lists. Data blocks go first.
*/
@@ -584,6 +568,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
stats.u.run.rs_blocks = commit_transaction->t_outstanding_credits;
stats.u.run.rs_blocks_logged = 0;
+ J_ASSERT(commit_transaction->t_nr_buffers <=
+ commit_transaction->t_outstanding_credits);
+
descriptor = NULL;
bufs = 0;
while (commit_transaction->t_buffers) {
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index eb7eb6c27bc..53632e3e845 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -534,7 +534,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
if (!tid_geq(journal->j_commit_request, tid)) {
printk(KERN_EMERG
"%s: error: j_commit_request=%d, tid=%d\n",
- __FUNCTION__, journal->j_commit_request, tid);
+ __func__, journal->j_commit_request, tid);
}
spin_unlock(&journal->j_state_lock);
#endif
@@ -599,7 +599,7 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
printk(KERN_ALERT "%s: journal block not found "
"at offset %lu on %s\n",
- __FUNCTION__,
+ __func__,
blocknr,
bdevname(journal->j_dev, b));
err = -EIO;
@@ -997,13 +997,14 @@ fail:
*/
/**
- * journal_t * jbd2_journal_init_dev() - creates an initialises a journal structure
+ * journal_t * jbd2_journal_init_dev() - creates and initialises a journal structure
* @bdev: Block device on which to create the journal
* @fs_dev: Device which hold journalled filesystem for this journal.
* @start: Block nr Start of journal.
* @len: Length of the journal in blocks.
* @blocksize: blocksize of journalling device
- * @returns: a newly created journal_t *
+ *
+ * Returns: a newly created journal_t *
*
* jbd2_journal_init_dev creates a journal which maps a fixed contiguous
* range of blocks on an arbitrary block device.
@@ -1027,7 +1028,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
if (!journal->j_wbuf) {
printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
- __FUNCTION__);
+ __func__);
kfree(journal);
journal = NULL;
goto out;
@@ -1083,7 +1084,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
if (!journal->j_wbuf) {
printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
- __FUNCTION__);
+ __func__);
kfree(journal);
return NULL;
}
@@ -1092,7 +1093,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
/* If that failed, give up */
if (err) {
printk(KERN_ERR "%s: Cannnot locate journal superblock\n",
- __FUNCTION__);
+ __func__);
kfree(journal);
return NULL;
}
@@ -1178,7 +1179,7 @@ int jbd2_journal_create(journal_t *journal)
*/
printk(KERN_EMERG
"%s: creation of journal on external device!\n",
- __FUNCTION__);
+ __func__);
BUG();
}
@@ -1976,9 +1977,10 @@ static int journal_init_jbd2_journal_head_cache(void)
static void jbd2_journal_destroy_jbd2_journal_head_cache(void)
{
- J_ASSERT(jbd2_journal_head_cache != NULL);
- kmem_cache_destroy(jbd2_journal_head_cache);
- jbd2_journal_head_cache = NULL;
+ if (jbd2_journal_head_cache) {
+ kmem_cache_destroy(jbd2_journal_head_cache);
+ jbd2_journal_head_cache = NULL;
+ }
}
/*
@@ -1997,7 +1999,7 @@ static struct journal_head *journal_alloc_journal_head(void)
jbd_debug(1, "out of memory for journal_head\n");
if (time_after(jiffies, last_warning + 5*HZ)) {
printk(KERN_NOTICE "ENOMEM in %s, retrying.\n",
- __FUNCTION__);
+ __func__);
last_warning = jiffies;
}
while (!ret) {
@@ -2134,13 +2136,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
if (jh->b_frozen_data) {
printk(KERN_WARNING "%s: freeing "
"b_frozen_data\n",
- __FUNCTION__);
+ __func__);
jbd2_free(jh->b_frozen_data, bh->b_size);
}
if (jh->b_committed_data) {
printk(KERN_WARNING "%s: freeing "
"b_committed_data\n",
- __FUNCTION__);
+ __func__);
jbd2_free(jh->b_committed_data, bh->b_size);
}
bh->b_private = NULL;
@@ -2305,10 +2307,12 @@ static int __init journal_init(void)
BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024);
ret = journal_init_caches();
- if (ret != 0)
+ if (ret == 0) {
+ jbd2_create_debugfs_entry();
+ jbd2_create_jbd_stats_proc_entry();
+ } else {
jbd2_journal_destroy_caches();
- jbd2_create_debugfs_entry();
- jbd2_create_jbd_stats_proc_entry();
+ }
return ret;
}
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 2e1453a5e99..257ff262576 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -139,7 +139,7 @@ repeat:
oom:
if (!journal_oom_retry)
return -ENOMEM;
- jbd_debug(1, "ENOMEM in %s, retrying\n", __FUNCTION__);
+ jbd_debug(1, "ENOMEM in %s, retrying\n", __func__);
yield();
goto repeat;
}
@@ -167,138 +167,121 @@ static struct jbd2_revoke_record_s *find_revoke_record(journal_t *journal,
return NULL;
}
+void jbd2_journal_destroy_revoke_caches(void)
+{
+ if (jbd2_revoke_record_cache) {
+ kmem_cache_destroy(jbd2_revoke_record_cache);
+ jbd2_revoke_record_cache = NULL;
+ }
+ if (jbd2_revoke_table_cache) {
+ kmem_cache_destroy(jbd2_revoke_table_cache);
+ jbd2_revoke_table_cache = NULL;
+ }
+}
+
int __init jbd2_journal_init_revoke_caches(void)
{
+ J_ASSERT(!jbd2_revoke_record_cache);
+ J_ASSERT(!jbd2_revoke_table_cache);
+
jbd2_revoke_record_cache = kmem_cache_create("jbd2_revoke_record",
sizeof(struct jbd2_revoke_record_s),
0,
SLAB_HWCACHE_ALIGN|SLAB_TEMPORARY,
NULL);
if (!jbd2_revoke_record_cache)
- return -ENOMEM;
+ goto record_cache_failure;
jbd2_revoke_table_cache = kmem_cache_create("jbd2_revoke_table",
sizeof(struct jbd2_revoke_table_s),
0, SLAB_TEMPORARY, NULL);
- if (!jbd2_revoke_table_cache) {
- kmem_cache_destroy(jbd2_revoke_record_cache);
- jbd2_revoke_record_cache = NULL;
- return -ENOMEM;
- }
+ if (!jbd2_revoke_table_cache)
+ goto table_cache_failure;
return 0;
+table_cache_failure:
+ jbd2_journal_destroy_revoke_caches();
+record_cache_failure:
+ return -ENOMEM;
}
-void jbd2_journal_destroy_revoke_caches(void)
+static struct jbd2_revoke_table_s *jbd2_journal_init_revoke_table(int hash_size)
{
- kmem_cache_destroy(jbd2_revoke_record_cache);
- jbd2_revoke_record_cache = NULL;
- kmem_cache_destroy(jbd2_revoke_table_cache);
- jbd2_revoke_table_cache = NULL;
-}
-
-/* Initialise the revoke table for a given journal to a given size. */
-
-int jbd2_journal_init_revoke(journal_t *journal, int hash_size)
-{
- int shift, tmp;
+ int shift = 0;
+ int tmp = hash_size;
+ struct jbd2_revoke_table_s *table;
- J_ASSERT (journal->j_revoke_table[0] == NULL);
+ table = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL);
+ if (!table)
+ goto out;
- shift = 0;
- tmp = hash_size;
while((tmp >>= 1UL) != 0UL)
shift++;
- journal->j_revoke_table[0] = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL);
- if (!journal->j_revoke_table[0])
- return -ENOMEM;
- journal->j_revoke = journal->j_revoke_table[0];
-
- /* Check that the hash_size is a power of two */
- J_ASSERT(is_power_of_2(hash_size));
-
- journal->j_revoke->hash_size = hash_size;
-
- journal->j_revoke->hash_shift = shift;
-
- journal->j_revoke->hash_table =
+ table->hash_size = hash_size;
+ table->hash_shift = shift;
+ table->hash_table =
kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
- if (!journal->j_revoke->hash_table) {
- kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]);
- journal->j_revoke = NULL;
- return -ENOMEM;
+ if (!table->hash_table) {
+ kmem_cache_free(jbd2_revoke_table_cache, table);
+ table = NULL;
+ goto out;
}
for (tmp = 0; tmp < hash_size; tmp++)
- INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
+ INIT_LIST_HEAD(&table->hash_table[tmp]);
- journal->j_revoke_table[1] = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL);
- if (!journal->j_revoke_table[1]) {
- kfree(journal->j_revoke_table[0]->hash_table);
- kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]);
- return -ENOMEM;
+out:
+ return table;
+}
+
+static void jbd2_journal_destroy_revoke_table(struct jbd2_revoke_table_s *table)
+{
+ int i;
+ struct list_head *hash_list;
+
+ for (i = 0; i < table->hash_size; i++) {
+ hash_list = &table->hash_table[i];
+ J_ASSERT(list_empty(hash_list));
}
- journal->j_revoke = journal->j_revoke_table[1];
+ kfree(table->hash_table);
+ kmem_cache_free(jbd2_revoke_table_cache, table);
+}
- /* Check that the hash_size is a power of two */
+/* Initialise the revoke table for a given journal to a given size. */
+int jbd2_journal_init_revoke(journal_t *journal, int hash_size)
+{
+ J_ASSERT(journal->j_revoke_table[0] == NULL);
J_ASSERT(is_power_of_2(hash_size));
- journal->j_revoke->hash_size = hash_size;
-
- journal->j_revoke->hash_shift = shift;
+ journal->j_revoke_table[0] = jbd2_journal_init_revoke_table(hash_size);
+ if (!journal->j_revoke_table[0])
+ goto fail0;
- journal->j_revoke->hash_table =
- kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
- if (!journal->j_revoke->hash_table) {
- kfree(journal->j_revoke_table[0]->hash_table);
- kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]);
- kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[1]);
- journal->j_revoke = NULL;
- return -ENOMEM;
- }
+ journal->j_revoke_table[1] = jbd2_journal_init_revoke_table(hash_size);
+ if (!journal->j_revoke_table[1])
+ goto fail1;
- for (tmp = 0; tmp < hash_size; tmp++)
- INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
+ journal->j_revoke = journal->j_revoke_table[1];
spin_lock_init(&journal->j_revoke_lock);
return 0;
-}
-/* Destoy a journal's revoke table. The table must already be empty! */
+fail1:
+ jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]);
+fail0:
+ return -ENOMEM;
+}
+/* Destroy a journal's revoke table. The table must already be empty! */
void jbd2_journal_destroy_revoke(journal_t *journal)
{
- struct jbd2_revoke_table_s *table;
- struct list_head *hash_list;
- int i;
-
- table = journal->j_revoke_table[0];
- if (!table)
- return;
-
- for (i=0; i<table->hash_size; i++) {
- hash_list = &table->hash_table[i];
- J_ASSERT (list_empty(hash_list));
- }
-
- kfree(table->hash_table);
- kmem_cache_free(jbd2_revoke_table_cache, table);
- journal->j_revoke = NULL;
-
- table = journal->j_revoke_table[1];
- if (!table)
- return;
-
- for (i=0; i<table->hash_size; i++) {
- hash_list = &table->hash_table[i];
- J_ASSERT (list_empty(hash_list));
- }
-
- kfree(table->hash_table);
- kmem_cache_free(jbd2_revoke_table_cache, table);
journal->j_revoke = NULL;
+ if (journal->j_revoke_table[0])
+ jbd2_journal_destroy_revoke_table(journal->j_revoke_table[0]);
+ if (journal->j_revoke_table[1])
+ jbd2_journal_destroy_revoke_table(journal->j_revoke_table[1]);
}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index b9b0b6f899b..d6e006e6780 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -618,6 +618,12 @@ repeat:
goto done;
/*
+ * this is the first time this transaction is touching this buffer,
+ * reset the modified flag
+ */
+ jh->b_modified = 0;
+
+ /*
* If there is already a copy-out version of this buffer, then we don't
* need to make another one
*/
@@ -690,7 +696,7 @@ repeat:
if (!frozen_buffer) {
printk(KERN_EMERG
"%s: OOM for frozen_buffer\n",
- __FUNCTION__);
+ __func__);
JBUFFER_TRACE(jh, "oom!");
error = -ENOMEM;
jbd_lock_bh_state(bh);
@@ -829,9 +835,16 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
if (jh->b_transaction == NULL) {
jh->b_transaction = transaction;
+
+ /* first access by this transaction */
+ jh->b_modified = 0;
+
JBUFFER_TRACE(jh, "file as BJ_Reserved");
__jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
} else if (jh->b_transaction == journal->j_committing_transaction) {
+ /* first access by this transaction */
+ jh->b_modified = 0;
+
JBUFFER_TRACE(jh, "set next transaction");
jh->b_next_transaction = transaction;
}
@@ -901,7 +914,7 @@ repeat:
committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
if (!committed_data) {
printk(KERN_EMERG "%s: No memory for committed data\n",
- __FUNCTION__);
+ __func__);
err = -ENOMEM;
goto out;
}
@@ -1230,6 +1243,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
struct journal_head *jh;
int drop_reserve = 0;
int err = 0;
+ int was_modified = 0;
BUFFER_TRACE(bh, "entry");
@@ -1248,6 +1262,9 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
goto not_jbd;
}
+ /* keep track of wether or not this transaction modified us */
+ was_modified = jh->b_modified;
+
/*
* The buffer's going from the transaction, we must drop
* all references -bzzz
@@ -1265,7 +1282,12 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
- drop_reserve = 1;
+ /*
+ * we only want to drop a reference if this transaction
+ * modified the buffer
+ */
+ if (was_modified)
+ drop_reserve = 1;
/*
* We are no longer going to journal this buffer.
@@ -1305,7 +1327,13 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
if (jh->b_next_transaction) {
J_ASSERT(jh->b_next_transaction == transaction);
jh->b_next_transaction = NULL;
- drop_reserve = 1;
+
+ /*
+ * only drop a reference if this transaction modified
+ * the buffer
+ */
+ if (was_modified)
+ drop_reserve = 1;
}
}
@@ -1434,7 +1462,8 @@ int jbd2_journal_stop(handle_t *handle)
return err;
}
-/**int jbd2_journal_force_commit() - force any uncommitted transactions
+/**
+ * int jbd2_journal_force_commit() - force any uncommitted transactions
* @journal: journal to force
*
* For synchronous operations: force any uncommitted transactions
@@ -2077,7 +2106,7 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
jh->b_transaction = jh->b_next_transaction;
jh->b_next_transaction = NULL;
__jbd2_journal_file_buffer(jh, jh->b_transaction,
- was_dirty ? BJ_Metadata : BJ_Reserved);
+ jh->b_modified ? BJ_Metadata : BJ_Reserved);
J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
if (was_dirty)
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index d58f845ccb8..c5e1450d79f 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -46,7 +46,7 @@ next_inode(int *i, struct jffs2_inode_cache *ic, struct jffs2_sb_info *c)
static void jffs2_build_inode_pass1(struct jffs2_sb_info *c,
- struct jffs2_inode_cache *ic)
+ struct jffs2_inode_cache *ic)
{
struct jffs2_full_dirent *fd;
@@ -68,11 +68,17 @@ static void jffs2_build_inode_pass1(struct jffs2_sb_info *c,
continue;
}
- if (child_ic->nlink++ && fd->type == DT_DIR) {
- JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u appears to be a hard link\n",
- fd->name, fd->ino, ic->ino);
- /* TODO: What do we do about it? */
- }
+ if (fd->type == DT_DIR) {
+ if (child_ic->pino_nlink) {
+ JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u appears to be a hard link\n",
+ fd->name, fd->ino, ic->ino);
+ /* TODO: What do we do about it? */
+ } else {
+ child_ic->pino_nlink = ic->ino;
+ }
+ } else
+ child_ic->pino_nlink++;
+
dbg_fsbuild("increased nlink for child \"%s\" (ino #%u)\n", fd->name, fd->ino);
/* Can't free scan_dents so far. We might need them in pass 2 */
}
@@ -125,7 +131,7 @@ static int jffs2_build_filesystem(struct jffs2_sb_info *c)
dbg_fsbuild("pass 2 starting\n");
for_each_inode(i, c, ic) {
- if (ic->nlink)
+ if (ic->pino_nlink)
continue;
jffs2_build_remove_unlinked_inode(c, ic, &dead_fds);
@@ -232,16 +238,19 @@ static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *c,
/* Reduce nlink of the child. If it's now zero, stick it on the
dead_fds list to be cleaned up later. Else just free the fd */
- child_ic->nlink--;
+ if (fd->type == DT_DIR)
+ child_ic->pino_nlink = 0;
+ else
+ child_ic->pino_nlink--;
- if (!child_ic->nlink) {
- dbg_fsbuild("inode #%u (\"%s\") has now got zero nlink, adding to dead_fds list.\n",
+ if (!child_ic->pino_nlink) {
+ dbg_fsbuild("inode #%u (\"%s\") now has no links; adding to dead_fds list.\n",
fd->ino, fd->name);
fd->next = *dead_fds;
*dead_fds = fd;
} else {
dbg_fsbuild("inode #%u (\"%s\") has now got nlink %d. Ignoring.\n",
- fd->ino, fd->name, child_ic->nlink);
+ fd->ino, fd->name, child_ic->pino_nlink);
jffs2_free_full_dirent(fd);
}
}
diff --git a/fs/jffs2/debug.h b/fs/jffs2/debug.h
index 9645275023e..a113ecc3baf 100644
--- a/fs/jffs2/debug.h
+++ b/fs/jffs2/debug.h
@@ -82,28 +82,28 @@
do { \
printk(JFFS2_ERR_MSG_PREFIX \
" (%d) %s: " fmt, task_pid_nr(current), \
- __FUNCTION__ , ##__VA_ARGS__); \
+ __func__ , ##__VA_ARGS__); \
} while(0)
#define JFFS2_WARNING(fmt, ...) \
do { \
printk(JFFS2_WARN_MSG_PREFIX \
" (%d) %s: " fmt, task_pid_nr(current), \
- __FUNCTION__ , ##__VA_ARGS__); \
+ __func__ , ##__VA_ARGS__); \
} while(0)
#define JFFS2_NOTICE(fmt, ...) \
do { \
printk(JFFS2_NOTICE_MSG_PREFIX \
" (%d) %s: " fmt, task_pid_nr(current), \
- __FUNCTION__ , ##__VA_ARGS__); \
+ __func__ , ##__VA_ARGS__); \
} while(0)
#define JFFS2_DEBUG(fmt, ...) \
do { \
printk(JFFS2_DBG_MSG_PREFIX \
" (%d) %s: " fmt, task_pid_nr(current), \
- __FUNCTION__ , ##__VA_ARGS__); \
+ __func__ , ##__VA_ARGS__); \
} while(0)
/*
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index c63e7a96af0..c0c141f6fde 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -208,6 +208,13 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
f = JFFS2_INODE_INFO(inode);
dir_f = JFFS2_INODE_INFO(dir_i);
+ /* jffs2_do_create() will want to lock it, _after_ reserving
+ space and taking c-alloc_sem. If we keep it locked here,
+ lockdep gets unhappy (although it's a false positive;
+ nothing else will be looking at this inode yet so there's
+ no chance of AB-BA deadlock involving its f->sem). */
+ mutex_unlock(&f->sem);
+
ret = jffs2_do_create(c, dir_f, f, ri,
dentry->d_name.name, dentry->d_name.len);
if (ret)
@@ -219,7 +226,8 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, int mode,
d_instantiate(dentry, inode);
D1(printk(KERN_DEBUG "jffs2_create: Created ino #%lu with mode %o, nlink %d(%d). nrpages %ld\n",
- inode->i_ino, inode->i_mode, inode->i_nlink, f->inocache->nlink, inode->i_mapping->nrpages));
+ inode->i_ino, inode->i_mode, inode->i_nlink,
+ f->inocache->pino_nlink, inode->i_mapping->nrpages));
return 0;
fail:
@@ -243,7 +251,7 @@ static int jffs2_unlink(struct inode *dir_i, struct dentry *dentry)
ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name,
dentry->d_name.len, dead_f, now);
if (dead_f->inocache)
- dentry->d_inode->i_nlink = dead_f->inocache->nlink;
+ dentry->d_inode->i_nlink = dead_f->inocache->pino_nlink;
if (!ret)
dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
return ret;
@@ -276,7 +284,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de
if (!ret) {
mutex_lock(&f->sem);
- old_dentry->d_inode->i_nlink = ++f->inocache->nlink;
+ old_dentry->d_inode->i_nlink = ++f->inocache->pino_nlink;
mutex_unlock(&f->sem);
d_instantiate(dentry, old_dentry->d_inode);
dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
@@ -493,11 +501,14 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
inode->i_op = &jffs2_dir_inode_operations;
inode->i_fop = &jffs2_dir_operations;
- /* Directories get nlink 2 at start */
- inode->i_nlink = 2;
f = JFFS2_INODE_INFO(inode);
+ /* Directories get nlink 2 at start */
+ inode->i_nlink = 2;
+ /* but ic->pino_nlink is the parent ino# */
+ f->inocache->pino_nlink = dir_i->i_ino;
+
ri->data_crc = cpu_to_je32(0);
ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8));
@@ -594,17 +605,25 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry)
{
+ struct jffs2_sb_info *c = JFFS2_SB_INFO(dir_i->i_sb);
+ struct jffs2_inode_info *dir_f = JFFS2_INODE_INFO(dir_i);
struct jffs2_inode_info *f = JFFS2_INODE_INFO(dentry->d_inode);
struct jffs2_full_dirent *fd;
int ret;
+ uint32_t now = get_seconds();
for (fd = f->dents ; fd; fd = fd->next) {
if (fd->ino)
return -ENOTEMPTY;
}
- ret = jffs2_unlink(dir_i, dentry);
- if (!ret)
+
+ ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name,
+ dentry->d_name.len, f, now);
+ if (!ret) {
+ dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
+ clear_nlink(dentry->d_inode);
drop_nlink(dir_i);
+ }
return ret;
}
@@ -817,7 +836,10 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
inode which didn't exist. */
if (victim_f->inocache) {
mutex_lock(&victim_f->sem);
- victim_f->inocache->nlink--;
+ if (S_ISDIR(new_dentry->d_inode->i_mode))
+ victim_f->inocache->pino_nlink = 0;
+ else
+ victim_f->inocache->pino_nlink--;
mutex_unlock(&victim_f->sem);
}
}
@@ -838,8 +860,8 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
struct jffs2_inode_info *f = JFFS2_INODE_INFO(old_dentry->d_inode);
mutex_lock(&f->sem);
inc_nlink(old_dentry->d_inode);
- if (f->inocache)
- f->inocache->nlink++;
+ if (f->inocache && !S_ISDIR(old_dentry->d_inode->i_mode))
+ f->inocache->pino_nlink++;
mutex_unlock(&f->sem);
printk(KERN_NOTICE "jffs2_rename(): Link succeeded, unlink failed (err %d). You now have a hard link\n", ret);
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index 25a640e566d..dddb2a6c9e2 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -294,7 +294,7 @@ static inline void jffs2_remove_node_refs_from_ino_list(struct jffs2_sb_info *c,
break;
#endif
default:
- if (ic->nodes == (void *)ic && ic->nlink == 0)
+ if (ic->nodes == (void *)ic && ic->pino_nlink == 0)
jffs2_del_ino_cache(c, ic);
}
}
@@ -332,7 +332,8 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
if (c->mtd->point) {
unsigned long *wordebuf;
- ret = c->mtd->point(c->mtd, jeb->offset, c->sector_size, &retlen, (unsigned char **)&ebuf);
+ ret = c->mtd->point(c->mtd, jeb->offset, c->sector_size,
+ &retlen, &ebuf, NULL);
if (ret) {
D1(printk(KERN_DEBUG "MTD point failed %d\n", ret));
goto do_flash_read;
@@ -340,7 +341,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
if (retlen < c->sector_size) {
/* Don't muck about if it won't let us point to the whole erase sector */
D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", retlen));
- c->mtd->unpoint(c->mtd, ebuf, jeb->offset, retlen);
+ c->mtd->unpoint(c->mtd, jeb->offset, retlen);
goto do_flash_read;
}
wordebuf = ebuf-sizeof(*wordebuf);
@@ -349,7 +350,7 @@ static int jffs2_block_check_erase(struct jffs2_sb_info *c, struct jffs2_erasebl
if (*++wordebuf != ~0)
break;
} while(--retlen);
- c->mtd->unpoint(c->mtd, ebuf, jeb->offset, c->sector_size);
+ c->mtd->unpoint(c->mtd, jeb->offset, c->sector_size);
if (retlen) {
printk(KERN_WARNING "Newly-erased block contained word 0x%lx at offset 0x%08tx\n",
*wordebuf, jeb->offset + c->sector_size-retlen*sizeof(*wordebuf));
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 3eb1c84b0a3..086c4383022 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -273,7 +273,7 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
inode->i_mtime = ITIME(je32_to_cpu(latest_node.mtime));
inode->i_ctime = ITIME(je32_to_cpu(latest_node.ctime));
- inode->i_nlink = f->inocache->nlink;
+ inode->i_nlink = f->inocache->pino_nlink;
inode->i_blocks = (inode->i_size + 511) >> 9;
@@ -286,13 +286,12 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
case S_IFDIR:
{
struct jffs2_full_dirent *fd;
+ inode->i_nlink = 2; /* parent and '.' */
for (fd=f->dents; fd; fd = fd->next) {
if (fd->type == DT_DIR && fd->ino)
inc_nlink(inode);
}
- /* and '..' */
- inc_nlink(inode);
/* Root dir gets i_nlink 3 for some reason */
if (inode->i_ino == 1)
inc_nlink(inode);
@@ -586,11 +585,12 @@ void jffs2_gc_release_inode(struct jffs2_sb_info *c,
}
struct jffs2_inode_info *jffs2_gc_fetch_inode(struct jffs2_sb_info *c,
- int inum, int nlink)
+ int inum, int unlinked)
{
struct inode *inode;
struct jffs2_inode_cache *ic;
- if (!nlink) {
+
+ if (unlinked) {
/* The inode has zero nlink but its nodes weren't yet marked
obsolete. This has to be because we're still waiting for
the final (close() and) iput() to happen.
@@ -638,8 +638,8 @@ struct jffs2_inode_info *jffs2_gc_fetch_inode(struct jffs2_sb_info *c,
return ERR_CAST(inode);
}
if (is_bad_inode(inode)) {
- printk(KERN_NOTICE "Eep. read_inode() failed for ino #%u. nlink %d\n",
- inum, nlink);
+ printk(KERN_NOTICE "Eep. read_inode() failed for ino #%u. unlinked %d\n",
+ inum, unlinked);
/* NB. This will happen again. We need to do something appropriate here. */
iput(inode);
return ERR_PTR(-EIO);
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index bad005664e3..090c556ffed 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -161,8 +161,8 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
continue;
}
- if (!ic->nlink) {
- D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink zero\n",
+ if (!ic->pino_nlink) {
+ D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink/pino zero\n",
ic->ino));
spin_unlock(&c->inocache_lock);
jffs2_xattr_delete_inode(c, ic);
@@ -398,10 +398,10 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
it's vaguely possible. */
inum = ic->ino;
- nlink = ic->nlink;
+ nlink = ic->pino_nlink;
spin_unlock(&c->inocache_lock);
- f = jffs2_gc_fetch_inode(c, inum, nlink);
+ f = jffs2_gc_fetch_inode(c, inum, !nlink);
if (IS_ERR(f)) {
ret = PTR_ERR(f);
goto release_sem;
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index 8219df6eb6d..1750445556c 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -177,7 +177,10 @@ struct jffs2_inode_cache {
#ifdef CONFIG_JFFS2_FS_XATTR
struct jffs2_xattr_ref *xref;
#endif
- int nlink;
+ uint32_t pino_nlink; /* Directories store parent inode
+ here; other inodes store nlink.
+ Zero always means that it's
+ completely unlinked. */
};
/* Inode states for 'state' above. We need the 'GC' state to prevent
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index 9df8f3ef20d..a9bf9603c1b 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -709,7 +709,7 @@ void jffs2_mark_node_obsolete(struct jffs2_sb_info *c, struct jffs2_raw_node_ref
break;
#endif
default:
- if (ic->nodes == (void *)ic && ic->nlink == 0)
+ if (ic->nodes == (void *)ic && ic->pino_nlink == 0)
jffs2_del_ino_cache(c, ic);
break;
}
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 1b10d259409..2cc866cf134 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -187,7 +187,7 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent);
void jffs2_gc_release_inode(struct jffs2_sb_info *c,
struct jffs2_inode_info *f);
struct jffs2_inode_info *jffs2_gc_fetch_inode(struct jffs2_sb_info *c,
- int inum, int nlink);
+ int inum, int unlinked);
unsigned char *jffs2_gc_fetch_page(struct jffs2_sb_info *c,
struct jffs2_inode_info *f,
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 4cb4d76de07..6ca08ad887c 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -63,10 +63,11 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info
/* TODO: instead, incapsulate point() stuff to jffs2_flash_read(),
* adding and jffs2_flash_read_end() interface. */
if (c->mtd->point) {
- err = c->mtd->point(c->mtd, ofs, len, &retlen, &buffer);
+ err = c->mtd->point(c->mtd, ofs, len, &retlen,
+ (void **)&buffer, NULL);
if (!err && retlen < len) {
JFFS2_WARNING("MTD point returned len too short: %zu instead of %u.\n", retlen, tn->csize);
- c->mtd->unpoint(c->mtd, buffer, ofs, retlen);
+ c->mtd->unpoint(c->mtd, ofs, retlen);
} else if (err)
JFFS2_WARNING("MTD point failed: error code %d.\n", err);
else
@@ -100,7 +101,7 @@ static int check_node_data(struct jffs2_sb_info *c, struct jffs2_tmp_dnode_info
kfree(buffer);
#ifndef __ECOS
else
- c->mtd->unpoint(c->mtd, buffer, ofs, len);
+ c->mtd->unpoint(c->mtd, ofs, len);
#endif
if (crc != tn->data_crc) {
@@ -136,7 +137,7 @@ free_out:
kfree(buffer);
#ifndef __ECOS
else
- c->mtd->unpoint(c->mtd, buffer, ofs, len);
+ c->mtd->unpoint(c->mtd, ofs, len);
#endif
return err;
}
@@ -1123,7 +1124,8 @@ static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
size_t retlen;
int ret;
- dbg_readinode("ino #%u nlink is %d\n", f->inocache->ino, f->inocache->nlink);
+ dbg_readinode("ino #%u pino/nlink is %d\n", f->inocache->ino,
+ f->inocache->pino_nlink);
memset(&rii, 0, sizeof(rii));
@@ -1358,7 +1360,7 @@ int jffs2_do_read_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
}
dbg_readinode("creating inocache for root inode\n");
memset(f->inocache, 0, sizeof(struct jffs2_inode_cache));
- f->inocache->ino = f->inocache->nlink = 1;
+ f->inocache->ino = f->inocache->pino_nlink = 1;
f->inocache->nodes = (struct jffs2_raw_node_ref *)f->inocache;
f->inocache->state = INO_STATE_READING;
jffs2_add_ino_cache(c, f->inocache);
@@ -1401,7 +1403,7 @@ void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f)
jffs2_clear_acl(f);
jffs2_xattr_delete_inode(c, f->inocache);
mutex_lock(&f->sem);
- deleted = f->inocache && !f->inocache->nlink;
+ deleted = f->inocache && !f->inocache->pino_nlink;
if (f->inocache && f->inocache->state != INO_STATE_CHECKING)
jffs2_set_inocache_state(c, f->inocache, INO_STATE_CLEARING);
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 272872d27fd..1d437de1e9a 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -97,11 +97,12 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
size_t pointlen;
if (c->mtd->point) {
- ret = c->mtd->point (c->mtd, 0, c->mtd->size, &pointlen, &flashbuf);
+ ret = c->mtd->point(c->mtd, 0, c->mtd->size, &pointlen,
+ (void **)&flashbuf, NULL);
if (!ret && pointlen < c->mtd->size) {
/* Don't muck about if it won't let us point to the whole flash */
D1(printk(KERN_DEBUG "MTD point returned len too short: 0x%zx\n", pointlen));
- c->mtd->unpoint(c->mtd, flashbuf, 0, pointlen);
+ c->mtd->unpoint(c->mtd, 0, pointlen);
flashbuf = NULL;
}
if (ret)
@@ -267,7 +268,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
kfree(flashbuf);
#ifndef __ECOS
else
- c->mtd->unpoint(c->mtd, flashbuf, 0, c->mtd->size);
+ c->mtd->unpoint(c->mtd, 0, c->mtd->size);
#endif
if (s)
kfree(s);
@@ -940,7 +941,7 @@ struct jffs2_inode_cache *jffs2_scan_make_ino_cache(struct jffs2_sb_info *c, uin
ic->nodes = (void *)ic;
jffs2_add_ino_cache(c, ic);
if (ino == 1)
- ic->nlink = 1;
+ ic->pino_nlink = 1;
return ic;
}
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index f3353df178e..7da69eae49e 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -31,11 +31,12 @@ static struct kmem_cache *jffs2_inode_cachep;
static struct inode *jffs2_alloc_inode(struct super_block *sb)
{
- struct jffs2_inode_info *ei;
- ei = (struct jffs2_inode_info *)kmem_cache_alloc(jffs2_inode_cachep, GFP_KERNEL);
- if (!ei)
+ struct jffs2_inode_info *f;
+
+ f = kmem_cache_alloc(jffs2_inode_cachep, GFP_KERNEL);
+ if (!f)
return NULL;
- return &ei->vfs_inode;
+ return &f->vfs_inode;
}
static void jffs2_destroy_inode(struct inode *inode)
@@ -45,10 +46,10 @@ static void jffs2_destroy_inode(struct inode *inode)
static void jffs2_i_init_once(struct kmem_cache *cachep, void *foo)
{
- struct jffs2_inode_info *ei = (struct jffs2_inode_info *) foo;
+ struct jffs2_inode_info *f = foo;
- mutex_init(&ei->sem);
- inode_init_once(&ei->vfs_inode);
+ mutex_init(&f->sem);
+ inode_init_once(&f->vfs_inode);
}
static int jffs2_sync_fs(struct super_block *sb, int wait)
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index 8de52b60767..0e78b00035e 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -494,7 +494,7 @@ static void jffs2_wbuf_recover(struct jffs2_sb_info *c)
/* If it's an in-core inode, then we have to adjust any
full_dirent or full_dnode structure to point to the
new version instead of the old */
- f = jffs2_gc_fetch_inode(c, ic->ino, ic->nlink);
+ f = jffs2_gc_fetch_inode(c, ic->ino, !ic->pino_nlink);
if (IS_ERR(f)) {
/* Should never happen; it _must_ be present */
JFFS2_ERROR("Failed to iget() ino #%u, err %ld\n",
diff --git a/fs/jffs2/write.c b/fs/jffs2/write.c
index 665fce9797d..ca29440e943 100644
--- a/fs/jffs2/write.c
+++ b/fs/jffs2/write.c
@@ -19,7 +19,8 @@
#include "compr.h"
-int jffs2_do_new_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, uint32_t mode, struct jffs2_raw_inode *ri)
+int jffs2_do_new_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
+ uint32_t mode, struct jffs2_raw_inode *ri)
{
struct jffs2_inode_cache *ic;
@@ -31,7 +32,7 @@ int jffs2_do_new_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f, uint
memset(ic, 0, sizeof(*ic));
f->inocache = ic;
- f->inocache->nlink = 1;
+ f->inocache->pino_nlink = 1; /* Will be overwritten shortly for directories */
f->inocache->nodes = (struct jffs2_raw_node_ref *)f->inocache;
f->inocache->state = INO_STATE_PRESENT;
@@ -438,10 +439,10 @@ int jffs2_do_create(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f, str
ret = jffs2_reserve_space(c, sizeof(*ri), &alloclen, ALLOC_NORMAL,
JFFS2_SUMMARY_INODE_SIZE);
D1(printk(KERN_DEBUG "jffs2_do_create(): reserved 0x%x bytes\n", alloclen));
- if (ret) {
- mutex_unlock(&f->sem);
+ if (ret)
return ret;
- }
+
+ mutex_lock(&f->sem);
ri->data_crc = cpu_to_je32(0);
ri->node_crc = cpu_to_je32(crc32(0, ri, sizeof(*ri)-8));
@@ -635,9 +636,9 @@ int jffs2_do_unlink(struct jffs2_sb_info *c, struct jffs2_inode_info *dir_f,
jffs2_mark_node_obsolete(c, fd->raw);
jffs2_free_full_dirent(fd);
}
- }
-
- dead_f->inocache->nlink--;
+ dead_f->inocache->pino_nlink = 0;
+ } else
+ dead_f->inocache->pino_nlink--;
/* NB: Caller must set inode nlink if appropriate */
mutex_unlock(&dead_f->sem);
}
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index e48665984cb..082e844ab2d 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -82,7 +82,7 @@ static int is_xattr_datum_unchecked(struct jffs2_sb_info *c, struct jffs2_xattr_
static void unload_xattr_datum(struct jffs2_sb_info *c, struct jffs2_xattr_datum *xd)
{
/* must be called under down_write(xattr_sem) */
- D1(dbg_xattr("%s: xid=%u, version=%u\n", __FUNCTION__, xd->xid, xd->version));
+ D1(dbg_xattr("%s: xid=%u, version=%u\n", __func__, xd->xid, xd->version));
if (xd->xname) {
c->xdatum_mem_usage -= (xd->name_len + 1 + xd->value_len);
kfree(xd->xname);
@@ -592,7 +592,7 @@ void jffs2_xattr_delete_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache
When an inode with XATTR is removed, those XATTRs must be removed. */
struct jffs2_xattr_ref *ref, *_ref;
- if (!ic || ic->nlink > 0)
+ if (!ic || ic->pino_nlink > 0)
return;
down_write(&c->xattr_sem);
@@ -829,7 +829,7 @@ void jffs2_build_xattr_subsystem(struct jffs2_sb_info *c)
ref->xd and ref->ic are not valid yet. */
xd = jffs2_find_xattr_datum(c, ref->xid);
ic = jffs2_get_ino_cache(c, ref->ino);
- if (!xd || !ic || !ic->nlink) {
+ if (!xd || !ic || !ic->pino_nlink) {
dbg_xattr("xref(ino=%u, xid=%u, xseqno=%u) is orphan.\n",
ref->ino, ref->xid, ref->xseqno);
ref->xseqno |= XREF_DELETE_MARKER;
@@ -1252,7 +1252,7 @@ int jffs2_garbage_collect_xattr_ref(struct jffs2_sb_info *c, struct jffs2_xattr_
rc = jffs2_reserve_space_gc(c, totlen, &length, JFFS2_SUMMARY_XREF_SIZE);
if (rc) {
JFFS2_WARNING("%s: jffs2_reserve_space_gc() = %d, request = %u\n",
- __FUNCTION__, rc, totlen);
+ __func__, rc, totlen);
rc = rc ? rc : -EBADFD;
goto out;
}
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 40b16f23e49..5df517b81f3 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -573,7 +573,7 @@ again:
/* Ensure the resulting lock will get added to granted list */
fl->fl_flags |= FL_SLEEP;
if (do_vfs_lock(fl) < 0)
- printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__);
+ printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __func__);
up_read(&host->h_rwsem);
fl->fl_flags = fl_flags;
status = 0;
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 4d81553d294..81aca859bfd 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -752,7 +752,7 @@ nlmsvc_grant_blocked(struct nlm_block *block)
return;
default:
printk(KERN_WARNING "lockd: unexpected error %d in %s!\n",
- -error, __FUNCTION__);
+ -error, __func__);
nlmsvc_insert_block(block, 10 * HZ);
nlmsvc_release_block(block);
return;
diff --git a/fs/locks.c b/fs/locks.c
index 44d9a6a7ec5..663c069b59b 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -116,6 +116,7 @@
#include <linux/capability.h>
#include <linux/file.h>
+#include <linux/fdtable.h>
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/module.h>
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
index 2d4358c59f6..05ff4f1d702 100644
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -609,7 +609,7 @@ error_inode:
if (corrupt < 0) {
fat_fs_panic(new_dir->i_sb,
"%s: Filesystem corrupted (i_pos %lld)",
- __FUNCTION__, sinfo.i_pos);
+ __func__, sinfo.i_pos);
}
goto out;
}
diff --git a/fs/namespace.c b/fs/namespace.c
index 061e5edb4d2..4fc302c2a0e 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2329,10 +2329,10 @@ void __init mnt_init(void)
err = sysfs_init();
if (err)
printk(KERN_WARNING "%s: sysfs_init error: %d\n",
- __FUNCTION__, err);
+ __func__, err);
fs_kobj = kobject_create_and_add("fs", NULL);
if (!fs_kobj)
- printk(KERN_WARNING "%s: kobj create error\n", __FUNCTION__);
+ printk(KERN_WARNING "%s: kobj create error\n", __func__);
init_rootfs();
init_mount_tree();
}
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index fa220dc7460..7226a506f3c 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1575,6 +1575,11 @@ static int nfs_compare_super(struct super_block *sb, void *data)
return nfs_compare_mount_options(sb, server, mntflags);
}
+static int nfs_bdi_register(struct nfs_server *server)
+{
+ return bdi_register_dev(&server->backing_dev_info, server->s_dev);
+}
+
static int nfs_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
{
@@ -1617,6 +1622,10 @@ static int nfs_get_sb(struct file_system_type *fs_type,
if (s->s_fs_info != server) {
nfs_free_server(server);
server = NULL;
+ } else {
+ error = nfs_bdi_register(server);
+ if (error)
+ goto error_splat_super;
}
if (!s->s_root) {
@@ -1664,6 +1673,7 @@ static void nfs_kill_super(struct super_block *s)
{
struct nfs_server *server = NFS_SB(s);
+ bdi_unregister(&server->backing_dev_info);
kill_anon_super(s);
nfs_free_server(server);
}
@@ -1708,6 +1718,10 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
if (s->s_fs_info != server) {
nfs_free_server(server);
server = NULL;
+ } else {
+ error = nfs_bdi_register(server);
+ if (error)
+ goto error_splat_super;
}
if (!s->s_root) {
@@ -1984,6 +1998,10 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
if (s->s_fs_info != server) {
nfs_free_server(server);
server = NULL;
+ } else {
+ error = nfs_bdi_register(server);
+ if (error)
+ goto error_splat_super;
}
if (!s->s_root) {
@@ -2070,6 +2088,10 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
if (s->s_fs_info != server) {
nfs_free_server(server);
server = NULL;
+ } else {
+ error = nfs_bdi_register(server);
+ if (error)
+ goto error_splat_super;
}
if (!s->s_root) {
@@ -2149,6 +2171,10 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags,
if (s->s_fs_info != server) {
nfs_free_server(server);
server = NULL;
+ } else {
+ error = nfs_bdi_register(server);
+ if (error)
+ goto error_splat_super;
}
if (!s->s_root) {
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 562abf3380d..0b3ffa9840c 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -104,7 +104,7 @@ xdr_writemem(__be32 *p, const void *ptr, int nbytes)
} while (0)
#define RESERVE_SPACE(nbytes) do { \
p = xdr_reserve_space(xdr, nbytes); \
- if (!p) dprintk("NFSD: RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __FUNCTION__); \
+ if (!p) dprintk("NFSD: RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __func__); \
BUG_ON(!p); \
} while (0)
@@ -134,7 +134,7 @@ xdr_error: \
p = xdr_inline_decode(xdr, nbytes); \
if (!p) { \
dprintk("NFSD: %s: reply buffer overflowed in line %d.\n", \
- __FUNCTION__, __LINE__); \
+ __func__, __LINE__); \
return -EIO; \
} \
} while (0)
diff --git a/fs/ntfs/debug.h b/fs/ntfs/debug.h
index 8ac37c33d12..5e6724c1afd 100644
--- a/fs/ntfs/debug.h
+++ b/fs/ntfs/debug.h
@@ -45,7 +45,7 @@ static void ntfs_debug(const char *f, ...);
extern void __ntfs_debug (const char *file, int line, const char *function,
const char *format, ...) __attribute__ ((format (printf, 4, 5)));
#define ntfs_debug(f, a...) \
- __ntfs_debug(__FILE__, __LINE__, __FUNCTION__, f, ##a)
+ __ntfs_debug(__FILE__, __LINE__, __func__, f, ##a)
extern void ntfs_debug_dump_runlist(const runlist_element *rl);
@@ -58,10 +58,10 @@ extern void ntfs_debug_dump_runlist(const runlist_element *rl);
extern void __ntfs_warning(const char *function, const struct super_block *sb,
const char *fmt, ...) __attribute__ ((format (printf, 3, 4)));
-#define ntfs_warning(sb, f, a...) __ntfs_warning(__FUNCTION__, sb, f, ##a)
+#define ntfs_warning(sb, f, a...) __ntfs_warning(__func__, sb, f, ##a)
extern void __ntfs_error(const char *function, const struct super_block *sb,
const char *fmt, ...) __attribute__ ((format (printf, 3, 4)));
-#define ntfs_error(sb, f, a...) __ntfs_error(__FUNCTION__, sb, f, ##a)
+#define ntfs_error(sb, f, a...) __ntfs_error(__func__, sb, f, ##a)
#endif /* _LINUX_NTFS_DEBUG_H */
diff --git a/fs/ocfs2/cluster/sys.c b/fs/ocfs2/cluster/sys.c
index 98429fd6849..bc702dab5d1 100644
--- a/fs/ocfs2/cluster/sys.c
+++ b/fs/ocfs2/cluster/sys.c
@@ -65,7 +65,7 @@ int o2cb_sys_init(void)
{
int ret;
- o2cb_kset = kset_create_and_add("o2cb", NULL, NULL);
+ o2cb_kset = kset_create_and_add("o2cb", NULL, fs_kobj);
if (!o2cb_kset)
return -ENOMEM;
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 5f6d858770a..1b81dcba175 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -44,7 +44,8 @@
#define MLOG_MASK_PREFIX ML_DLM
#include "cluster/masklog.h"
-int stringify_lockname(const char *lockname, int locklen, char *buf, int len);
+static int stringify_lockname(const char *lockname, int locklen, char *buf,
+ int len);
void dlm_print_one_lock_resource(struct dlm_lock_resource *res)
{
@@ -251,7 +252,8 @@ EXPORT_SYMBOL_GPL(dlm_errname);
*
* For more on lockname formats, please refer to dlmglue.c and ocfs2_lockid.h.
*/
-int stringify_lockname(const char *lockname, int locklen, char *buf, int len)
+static int stringify_lockname(const char *lockname, int locklen, char *buf,
+ int len)
{
int out = 0;
__be64 inode_blkno_be;
@@ -368,7 +370,7 @@ static void dlm_debug_free(struct kref *kref)
kfree(dc);
}
-void dlm_debug_put(struct dlm_debug_ctxt *dc)
+static void dlm_debug_put(struct dlm_debug_ctxt *dc)
{
if (dc)
kref_put(&dc->debug_refcnt, dlm_debug_free);
diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c
index 61a000f8524..e48aba698b7 100644
--- a/fs/ocfs2/dlm/dlmfs.c
+++ b/fs/ocfs2/dlm/dlmfs.c
@@ -327,7 +327,7 @@ clear_fields:
static struct backing_dev_info dlmfs_backing_dev_info = {
.ra_pages = 0, /* No readahead */
- .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
+ .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
};
static struct inode *dlmfs_get_root_inode(struct super_block *sb)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 9154c82d325..57e0d30cde9 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1048,6 +1048,10 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
mlog_entry("(0x%p, '%.*s')\n", dentry,
dentry->d_name.len, dentry->d_name.name);
+ /* ensuring we don't even attempt to truncate a symlink */
+ if (S_ISLNK(inode->i_mode))
+ attr->ia_valid &= ~ATTR_SIZE;
+
if (attr->ia_valid & ATTR_MODE)
mlog(0, "mode change: %d\n", attr->ia_mode);
if (attr->ia_valid & ATTR_UID)
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index ce0dc147602..be774bdc8b3 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -260,7 +260,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
bh = osb->local_alloc_bh;
alloc = (struct ocfs2_dinode *) bh->b_data;
- alloc_copy = kmalloc(bh->b_size, GFP_KERNEL);
+ alloc_copy = kmalloc(bh->b_size, GFP_NOFS);
if (!alloc_copy) {
status = -ENOMEM;
goto out_commit;
@@ -931,7 +931,7 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
* local alloc shutdown won't try to double free main bitmap
* bits. Make a copy so the sync function knows which bits to
* free. */
- alloc_copy = kmalloc(osb->local_alloc_bh->b_size, GFP_KERNEL);
+ alloc_copy = kmalloc(osb->local_alloc_bh->b_size, GFP_NOFS);
if (!alloc_copy) {
status = -ENOMEM;
mlog_errno(status);
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index ac1d74c63bf..bbd1667aa7d 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -385,7 +385,7 @@ static int o2cb_cluster_this_node(unsigned int *node)
return 0;
}
-struct ocfs2_stack_operations o2cb_stack_ops = {
+static struct ocfs2_stack_operations o2cb_stack_ops = {
.connect = o2cb_cluster_connect,
.disconnect = o2cb_cluster_disconnect,
.hangup = o2cb_cluster_hangup,
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 7428663f9cb..b503772cd0e 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -635,7 +635,7 @@ static const struct file_operations ocfs2_control_fops = {
.owner = THIS_MODULE,
};
-struct miscdevice ocfs2_control_device = {
+static struct miscdevice ocfs2_control_device = {
.minor = MISC_DYNAMIC_MINOR,
.name = "ocfs2_control",
.fops = &ocfs2_control_fops,
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index 7134007ba22..ba9dbb51d25 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -167,9 +167,11 @@ const struct inode_operations ocfs2_symlink_inode_operations = {
.readlink = page_readlink,
.follow_link = ocfs2_follow_link,
.getattr = ocfs2_getattr,
+ .setattr = ocfs2_setattr,
};
const struct inode_operations ocfs2_fast_symlink_inode_operations = {
.readlink = ocfs2_readlink,
.follow_link = ocfs2_follow_link,
.getattr = ocfs2_getattr,
+ .setattr = ocfs2_setattr,
};
diff --git a/fs/open.c b/fs/open.c
index 7af1f05d597..a1450086e92 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -7,6 +7,7 @@
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/file.h>
+#include <linux/fdtable.h>
#include <linux/quotaops.h>
#include <linux/fsnotify.h>
#include <linux/module.h>
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index e7dd1d4e347..0fdda2e8a4c 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -41,12 +41,12 @@
#ifndef CONFIG_LDM_DEBUG
#define ldm_debug(...) do {} while (0)
#else
-#define ldm_debug(f, a...) _ldm_printk (KERN_DEBUG, __FUNCTION__, f, ##a)
+#define ldm_debug(f, a...) _ldm_printk (KERN_DEBUG, __func__, f, ##a)
#endif
-#define ldm_crit(f, a...) _ldm_printk (KERN_CRIT, __FUNCTION__, f, ##a)
-#define ldm_error(f, a...) _ldm_printk (KERN_ERR, __FUNCTION__, f, ##a)
-#define ldm_info(f, a...) _ldm_printk (KERN_INFO, __FUNCTION__, f, ##a)
+#define ldm_crit(f, a...) _ldm_printk (KERN_CRIT, __func__, f, ##a)
+#define ldm_error(f, a...) _ldm_printk (KERN_ERR, __func__, f, ##a)
+#define ldm_info(f, a...) _ldm_printk (KERN_INFO, __func__, f, ##a)
__attribute__ ((format (printf, 3, 4)))
static void _ldm_printk (const char *level, const char *function,
diff --git a/fs/pipe.c b/fs/pipe.c
index f73492b6817..3499f9ff631 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1076,6 +1076,23 @@ int do_pipe(int *fd)
}
/*
+ * sys_pipe() is the normal C calling standard for creating
+ * a pipe. It's not the way Unix traditionally does this, though.
+ */
+asmlinkage long __weak sys_pipe(int __user *fildes)
+{
+ int fd[2];
+ int error;
+
+ error = do_pipe(fd);
+ if (!error) {
+ if (copy_to_user(fildes, fd, sizeof(fd)))
+ error = -EFAULT;
+ }
+ return error;
+}
+
+/*
* pipefs should _never_ be mounted by userland - too much of security hassle,
* no real gain from having the whole whorehouse mounted. So we don't need
* any operations on the root directory. However, we need a non-trivial
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 07d6c4853fe..dca997a93bf 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -73,6 +73,7 @@
#include <linux/signal.h>
#include <linux/highmem.h>
#include <linux/file.h>
+#include <linux/fdtable.h>
#include <linux/times.h>
#include <linux/cpuset.h>
#include <linux/rcupdate.h>
@@ -425,12 +426,13 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
cutime = cstime = utime = stime = cputime_zero;
cgtime = gtime = cputime_zero;
- rcu_read_lock();
if (lock_task_sighand(task, &flags)) {
struct signal_struct *sig = task->signal;
if (sig->tty) {
- tty_pgrp = pid_nr_ns(sig->tty->pgrp, ns);
+ struct pid *pgrp = tty_get_pgrp(sig->tty);
+ tty_pgrp = pid_nr_ns(pgrp, ns);
+ put_pid(pgrp);
tty_nr = new_encode_dev(tty_devnum(sig->tty));
}
@@ -469,7 +471,6 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
unlock_task_sighand(task, &flags);
}
- rcu_read_unlock();
if (!whole || num_threads < 2)
wchan = get_wchan(task);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index fcf02f2deeb..808cbdc193d 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -56,6 +56,7 @@
#include <linux/init.h>
#include <linux/capability.h>
#include <linux/file.h>
+#include <linux/fdtable.h>
#include <linux/string.h>
#include <linux/seq_file.h>
#include <linux/namei.h>
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 9d53b39a9cf..43e54e86cef 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -641,6 +641,23 @@ struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode,
return ent;
}
+struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
+ struct proc_dir_entry *parent)
+{
+ struct proc_dir_entry *ent;
+
+ ent = __proc_create(&parent, name, S_IFDIR | S_IRUGO | S_IXUGO, 2);
+ if (ent) {
+ ent->data = net;
+ if (proc_register(parent, ent) < 0) {
+ kfree(ent);
+ ent = NULL;
+ }
+ }
+ return ent;
+}
+EXPORT_SYMBOL_GPL(proc_net_mkdir);
+
struct proc_dir_entry *proc_mkdir(const char *name,
struct proc_dir_entry *parent)
{
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
index 48bcf20cec2..74a323d2b85 100644
--- a/fs/proc/proc_misc.c
+++ b/fs/proc/proc_misc.c
@@ -179,6 +179,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
"PageTables: %8lu kB\n"
"NFS_Unstable: %8lu kB\n"
"Bounce: %8lu kB\n"
+ "WritebackTmp: %8lu kB\n"
"CommitLimit: %8lu kB\n"
"Committed_AS: %8lu kB\n"
"VmallocTotal: %8lu kB\n"
@@ -210,6 +211,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
K(global_page_state(NR_PAGETABLE)),
K(global_page_state(NR_UNSTABLE_NFS)),
K(global_page_state(NR_BOUNCE)),
+ K(global_page_state(NR_WRITEBACK_TEMP)),
K(allowed),
K(committed),
(unsigned long)VMALLOC_TOTAL >> 10,
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 13cd7835d0d..83f357b30d7 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -159,17 +159,6 @@ struct net *get_proc_net(const struct inode *inode)
}
EXPORT_SYMBOL_GPL(get_proc_net);
-struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
- struct proc_dir_entry *parent)
-{
- struct proc_dir_entry *pde;
- pde = proc_mkdir_mode(name, S_IRUGO | S_IXUGO, parent);
- if (pde != NULL)
- pde->data = net;
- return pde;
-}
-EXPORT_SYMBOL_GPL(proc_net_mkdir);
-
static __net_init int proc_net_ns_init(struct net *net)
{
struct proc_dir_entry *netd, *net_statd;
diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c
index ac26ccc25f4..21f490f5d65 100644
--- a/fs/proc/proc_tty.c
+++ b/fs/proc/proc_tty.c
@@ -192,16 +192,14 @@ void proc_tty_register_driver(struct tty_driver *driver)
{
struct proc_dir_entry *ent;
- if ((!driver->read_proc && !driver->write_proc) ||
- !driver->driver_name ||
+ if (!driver->ops->read_proc || !driver->driver_name ||
driver->proc_entry)
return;
ent = create_proc_entry(driver->driver_name, 0, proc_tty_driver);
if (!ent)
return;
- ent->read_proc = driver->read_proc;
- ent->write_proc = driver->write_proc;
+ ent->read_proc = driver->ops->read_proc;
ent->owner = driver->owner;
ent->data = driver;
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index 4b733f10845..4b4f9cc2f18 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -1,6 +1,7 @@
#include <linux/mm.h>
#include <linux/file.h>
+#include <linux/fdtable.h>
#include <linux/mount.h>
#include <linux/ptrace.h>
#include <linux/seq_file.h>
diff --git a/fs/quota_v2.c b/fs/quota_v2.c
index 23b647f25d0..234ada90363 100644
--- a/fs/quota_v2.c
+++ b/fs/quota_v2.c
@@ -306,7 +306,7 @@ static uint find_free_dqentry(struct dquot *dquot, int *err)
printk(KERN_ERR "VFS: find_free_dqentry(): Can't remove block (%u) from entry free list.\n", blk);
goto out_buf;
}
- dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)+1);
+ le16_add_cpu(&dh->dqdh_entries, 1);
memset(&fakedquot, 0, sizeof(struct v2_disk_dqblk));
/* Find free structure in block */
for (i = 0; i < V2_DQSTRINBLK && memcmp(&fakedquot, ddquot+i, sizeof(struct v2_disk_dqblk)); i++);
@@ -448,7 +448,7 @@ static int free_dqentry(struct dquot *dquot, uint blk)
goto out_buf;
}
dh = (struct v2_disk_dqdbheader *)buf;
- dh->dqdh_entries = cpu_to_le16(le16_to_cpu(dh->dqdh_entries)-1);
+ le16_add_cpu(&dh->dqdh_entries, -1);
if (!le16_to_cpu(dh->dqdh_entries)) { /* Block got free? */
if ((ret = remove_free_dqentry(sb, type, buf, blk)) < 0 ||
(ret = put_free_dqblk(sb, type, buf, blk)) < 0) {
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 8428d5b2711..b13123424e4 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -44,7 +44,7 @@ static const struct inode_operations ramfs_dir_inode_operations;
static struct backing_dev_info ramfs_backing_dev_info = {
.ra_pages = 0, /* No readahead */
- .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK |
+ .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK |
BDI_CAP_MAP_DIRECT | BDI_CAP_MAP_COPY |
BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP,
};
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index da86042b3e0..e396b2fa474 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2574,11 +2574,9 @@ static int release_journal_dev(struct super_block *super,
result = 0;
- if (journal->j_dev_file != NULL) {
- result = filp_close(journal->j_dev_file, NULL);
- journal->j_dev_file = NULL;
- journal->j_dev_bd = NULL;
- } else if (journal->j_dev_bd != NULL) {
+ if (journal->j_dev_bd != NULL) {
+ if (journal->j_dev_bd->bd_dev != super->s_dev)
+ bd_release(journal->j_dev_bd);
result = blkdev_put(journal->j_dev_bd);
journal->j_dev_bd = NULL;
}
@@ -2603,7 +2601,6 @@ static int journal_init_dev(struct super_block *super,
result = 0;
journal->j_dev_bd = NULL;
- journal->j_dev_file = NULL;
jdev = SB_ONDISK_JOURNAL_DEVICE(super) ?
new_decode_dev(SB_ONDISK_JOURNAL_DEVICE(super)) : super->s_dev;
@@ -2620,35 +2617,34 @@ static int journal_init_dev(struct super_block *super,
"cannot init journal device '%s': %i",
__bdevname(jdev, b), result);
return result;
- } else if (jdev != super->s_dev)
+ } else if (jdev != super->s_dev) {
+ result = bd_claim(journal->j_dev_bd, journal);
+ if (result) {
+ blkdev_put(journal->j_dev_bd);
+ return result;
+ }
+
set_blocksize(journal->j_dev_bd, super->s_blocksize);
+ }
+
return 0;
}
- journal->j_dev_file = filp_open(jdev_name, 0, 0);
- if (!IS_ERR(journal->j_dev_file)) {
- struct inode *jdev_inode = journal->j_dev_file->f_mapping->host;
- if (!S_ISBLK(jdev_inode->i_mode)) {
- reiserfs_warning(super, "journal_init_dev: '%s' is "
- "not a block device", jdev_name);
- result = -ENOTBLK;
- release_journal_dev(super, journal);
- } else {
- /* ok */
- journal->j_dev_bd = I_BDEV(jdev_inode);
- set_blocksize(journal->j_dev_bd, super->s_blocksize);
- reiserfs_info(super,
- "journal_init_dev: journal device: %s\n",
- bdevname(journal->j_dev_bd, b));
- }
- } else {
- result = PTR_ERR(journal->j_dev_file);
- journal->j_dev_file = NULL;
+ journal->j_dev_bd = open_bdev_excl(jdev_name, 0, journal);
+ if (IS_ERR(journal->j_dev_bd)) {
+ result = PTR_ERR(journal->j_dev_bd);
+ journal->j_dev_bd = NULL;
reiserfs_warning(super,
"journal_init_dev: Cannot open '%s': %i",
jdev_name, result);
+ return result;
}
- return result;
+
+ set_blocksize(journal->j_dev_bd, super->s_blocksize);
+ reiserfs_info(super,
+ "journal_init_dev: journal device: %s\n",
+ bdevname(journal->j_dev_bd, b));
+ return 0;
}
/**
diff --git a/fs/select.c b/fs/select.c
index 00f58c5c7e0..8dda969614a 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -21,6 +21,7 @@
#include <linux/poll.h>
#include <linux/personality.h> /* for STICKY_TIMEOUTS */
#include <linux/file.h>
+#include <linux/fdtable.h>
#include <linux/fs.h>
#include <linux/rcupdate.h>
@@ -298,7 +299,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout)
#define MAX_SELECT_SECONDS \
((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1)
-static int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
+int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp,
fd_set __user *exp, s64 *timeout)
{
fd_set_bits fds;
@@ -425,7 +426,7 @@ sticky:
return ret;
}
-#ifdef TIF_RESTORE_SIGMASK
+#ifdef HAVE_SET_RESTORE_SIGMASK
asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp,
fd_set __user *exp, struct timespec __user *tsp,
const sigset_t __user *sigmask, size_t sigsetsize)
@@ -498,7 +499,7 @@ sticky:
if (sigmask) {
memcpy(&current->saved_sigmask, &sigsaved,
sizeof(sigsaved));
- set_thread_flag(TIF_RESTORE_SIGMASK);
+ set_restore_sigmask();
}
} else if (sigmask)
sigprocmask(SIG_SETMASK, &sigsaved, NULL);
@@ -528,7 +529,7 @@ asmlinkage long sys_pselect6(int n, fd_set __user *inp, fd_set __user *outp,
return sys_pselect7(n, inp, outp, exp, tsp, up, sigsetsize);
}
-#endif /* TIF_RESTORE_SIGMASK */
+#endif /* HAVE_SET_RESTORE_SIGMASK */
struct poll_list {
struct poll_list *next;
@@ -759,7 +760,7 @@ asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds,
return ret;
}
-#ifdef TIF_RESTORE_SIGMASK
+#ifdef HAVE_SET_RESTORE_SIGMASK
asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
struct timespec __user *tsp, const sigset_t __user *sigmask,
size_t sigsetsize)
@@ -805,7 +806,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
if (sigmask) {
memcpy(&current->saved_sigmask, &sigsaved,
sizeof(sigsaved));
- set_thread_flag(TIF_RESTORE_SIGMASK);
+ set_restore_sigmask();
}
ret = -ERESTARTNOHAND;
} else if (sigmask)
@@ -839,4 +840,4 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds,
return ret;
}
-#endif /* TIF_RESTORE_SIGMASK */
+#endif /* HAVE_SET_RESTORE_SIGMASK */
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 8ead0db3593..619725644c7 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -207,11 +207,8 @@ static const struct file_operations signalfd_fops = {
asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask)
{
- int error;
sigset_t sigmask;
struct signalfd_ctx *ctx;
- struct file *file;
- struct inode *inode;
if (sizemask != sizeof(sigset_t) ||
copy_from_user(&sigmask, user_mask, sizeof(sigmask)))
@@ -230,12 +227,11 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
* When we call this, the initialization must be complete, since
* anon_inode_getfd() will install the fd.
*/
- error = anon_inode_getfd(&ufd, &inode, &file, "[signalfd]",
- &signalfd_fops, ctx);
- if (error)
- goto err_fdalloc;
+ ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx);
+ if (ufd < 0)
+ kfree(ctx);
} else {
- file = fget(ufd);
+ struct file *file = fget(ufd);
if (!file)
return -EBADF;
ctx = file->private_data;
@@ -252,9 +248,4 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas
}
return ufd;
-
-err_fdalloc:
- kfree(ctx);
- return error;
}
-
diff --git a/fs/smbfs/smb_debug.h b/fs/smbfs/smb_debug.h
index 734972b9269..fc4b1a5dd75 100644
--- a/fs/smbfs/smb_debug.h
+++ b/fs/smbfs/smb_debug.h
@@ -11,14 +11,14 @@
* these are normally enabled.
*/
#ifdef SMBFS_PARANOIA
-# define PARANOIA(f, a...) printk(KERN_NOTICE "%s: " f, __FUNCTION__ , ## a)
+# define PARANOIA(f, a...) printk(KERN_NOTICE "%s: " f, __func__ , ## a)
#else
# define PARANOIA(f, a...) do { ; } while(0)
#endif
/* lots of debug messages */
#ifdef SMBFS_DEBUG_VERBOSE
-# define VERBOSE(f, a...) printk(KERN_DEBUG "%s: " f, __FUNCTION__ , ## a)
+# define VERBOSE(f, a...) printk(KERN_DEBUG "%s: " f, __func__ , ## a)
#else
# define VERBOSE(f, a...) do { ; } while(0)
#endif
@@ -28,7 +28,7 @@
* too common name.
*/
#ifdef SMBFS_DEBUG
-#define DEBUG1(f, a...) printk(KERN_DEBUG "%s: " f, __FUNCTION__ , ## a)
+#define DEBUG1(f, a...) printk(KERN_DEBUG "%s: " f, __func__ , ## a)
#else
#define DEBUG1(f, a...) do { ; } while(0)
#endif
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index dbdfabbfd60..e7735f643cd 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -135,7 +135,7 @@ sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos)
goto out;
}
pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n",
- __FUNCTION__, count, *ppos, buffer->page);
+ __func__, count, *ppos, buffer->page);
retval = simple_read_from_buffer(buf, count, ppos, buffer->page,
buffer->count);
out:
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index d9262f74f94..eb53c632f85 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -30,7 +30,7 @@ static const struct address_space_operations sysfs_aops = {
static struct backing_dev_info sysfs_backing_dev_info = {
.ra_pages = 0, /* No readahead */
- .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
+ .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK,
};
static const struct inode_operations sysfs_inode_operations ={
@@ -59,6 +59,8 @@ int sysfs_setattr(struct dentry * dentry, struct iattr * iattr)
if (error)
return error;
+ iattr->ia_valid &= ~ATTR_SIZE; /* ignore size changes */
+
error = inode_setattr(inode, iattr);
if (error)
return error;
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 74168266cd5..14f0023984d 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -61,7 +61,7 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent)
/* instantiate and link root dentry */
root = d_alloc_root(inode);
if (!root) {
- pr_debug("%s: could not get root dentry!\n",__FUNCTION__);
+ pr_debug("%s: could not get root dentry!\n",__func__);
iput(inode);
return -ENOMEM;
}
diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h
index 42d51d1c05c..38ebe3f85b3 100644
--- a/fs/sysv/sysv.h
+++ b/fs/sysv/sysv.h
@@ -217,9 +217,9 @@ static inline __fs32 fs32_add(struct sysv_sb_info *sbi, __fs32 *n, int d)
if (sbi->s_bytesex == BYTESEX_PDP)
*(__u32*)n = PDP_swab(PDP_swab(*(__u32*)n)+d);
else if (sbi->s_bytesex == BYTESEX_LE)
- *(__le32*)n = cpu_to_le32(le32_to_cpu(*(__le32*)n)+d);
+ le32_add_cpu((__le32 *)n, d);
else
- *(__be32*)n = cpu_to_be32(be32_to_cpu(*(__be32*)n)+d);
+ be32_add_cpu((__be32 *)n, d);
return *n;
}
@@ -242,9 +242,9 @@ static inline __fs16 cpu_to_fs16(struct sysv_sb_info *sbi, __u16 n)
static inline __fs16 fs16_add(struct sysv_sb_info *sbi, __fs16 *n, int d)
{
if (sbi->s_bytesex != BYTESEX_BE)
- *(__le16*)n = cpu_to_le16(le16_to_cpu(*(__le16 *)n)+d);
+ le16_add_cpu((__le16 *)n, d);
else
- *(__be16*)n = cpu_to_be16(be16_to_cpu(*(__be16 *)n)+d);
+ be16_add_cpu((__be16 *)n, d);
return *n;
}
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 5400524e9cb..d87d354ec42 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -181,10 +181,8 @@ static struct file *timerfd_fget(int fd)
asmlinkage long sys_timerfd_create(int clockid, int flags)
{
- int error, ufd;
+ int ufd;
struct timerfd_ctx *ctx;
- struct file *file;
- struct inode *inode;
if (flags)
return -EINVAL;
@@ -200,12 +198,9 @@ asmlinkage long sys_timerfd_create(int clockid, int flags)
ctx->clockid = clockid;
hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
- error = anon_inode_getfd(&ufd, &inode, &file, "[timerfd]",
- &timerfd_fops, ctx);
- if (error) {
+ ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx);
+ if (ufd < 0)
kfree(ctx);
- return error;
- }
return ufd;
}
diff --git a/fs/udf/super.c b/fs/udf/super.c
index b564fc140fe..9fb18a340fc 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -240,7 +240,7 @@ static int udf_sb_alloc_partition_maps(struct super_block *sb, u32 count)
sbi->s_partmaps = kcalloc(count, sizeof(struct udf_part_map),
GFP_KERNEL);
if (!sbi->s_partmaps) {
- udf_error(sb, __FUNCTION__,
+ udf_error(sb, __func__,
"Unable to allocate space for %d partition maps",
count);
sbi->s_partitions = 0;
@@ -1086,7 +1086,7 @@ static struct udf_bitmap *udf_sb_alloc_bitmap(struct super_block *sb, u32 index)
bitmap = vmalloc(size); /* TODO: get rid of vmalloc */
if (bitmap == NULL) {
- udf_error(sb, __FUNCTION__,
+ udf_error(sb, __func__,
"Unable to allocate space for bitmap "
"and %d buffer_head pointers", nr_groups);
return NULL;
diff --git a/fs/utimes.c b/fs/utimes.c
index a2bef77dc9c..af059d5cb48 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -40,9 +40,14 @@ asmlinkage long sys_utime(char __user *filename, struct utimbuf __user *times)
#endif
+static bool nsec_special(long nsec)
+{
+ return nsec == UTIME_OMIT || nsec == UTIME_NOW;
+}
+
static bool nsec_valid(long nsec)
{
- if (nsec == UTIME_OMIT || nsec == UTIME_NOW)
+ if (nsec_special(nsec))
return true;
return nsec >= 0 && nsec <= 999999999;
@@ -119,7 +124,15 @@ long do_utimes(int dfd, char __user *filename, struct timespec *times, int flags
newattrs.ia_mtime.tv_nsec = times[1].tv_nsec;
newattrs.ia_valid |= ATTR_MTIME_SET;
}
- } else {
+ }
+
+ /*
+ * If times is NULL or both times are either UTIME_OMIT or
+ * UTIME_NOW, then need to check permissions, because
+ * inode_change_ok() won't do it.
+ */
+ if (!times || (nsec_special(times[0].tv_nsec) &&
+ nsec_special(times[1].tv_nsec))) {
error = -EACCES;
if (IS_IMMUTABLE(inode))
goto mnt_drop_write_and_out;
diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c
index 5b66162d074..a3522727ea5 100644
--- a/fs/vfat/namei.c
+++ b/fs/vfat/namei.c
@@ -986,7 +986,7 @@ error_inode:
if (corrupt < 0) {
fat_fs_panic(new_dir->i_sb,
"%s: Filesystem corrupted (i_pos %lld)",
- __FUNCTION__, sinfo.i_pos);
+ __func__, sinfo.i_pos);
}
goto out;
}
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 524021ff543..3f53dd101f9 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -64,3 +64,16 @@ config XFS_RT
See the xfs man page in section 5 for additional information.
If unsure, say N.
+
+config XFS_DEBUG
+ bool "XFS Debugging support (EXPERIMENTAL)"
+ depends on XFS_FS && EXPERIMENTAL
+ help
+ Say Y here to get an XFS build with many debugging features,
+ including ASSERT checks, function wrappers around macros,
+ and extra sanity-checking functions in various code paths.
+
+ Note that the resulting code will be HUGE and SLOW, and probably
+ not useful unless you are debugging a particular problem.
+
+ Say N unless you are an XFS developer, or you play one on TV.
diff --git a/fs/xfs/linux-2.6/mrlock.h b/fs/xfs/linux-2.6/mrlock.h
index c110bb00266..ff6a19873e5 100644
--- a/fs/xfs/linux-2.6/mrlock.h
+++ b/fs/xfs/linux-2.6/mrlock.h
@@ -20,29 +20,24 @@
#include <linux/rwsem.h>
-enum { MR_NONE, MR_ACCESS, MR_UPDATE };
-
typedef struct {
struct rw_semaphore mr_lock;
+#ifdef DEBUG
int mr_writer;
+#endif
} mrlock_t;
+#ifdef DEBUG
#define mrinit(mrp, name) \
do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0)
+#else
+#define mrinit(mrp, name) \
+ do { init_rwsem(&(mrp)->mr_lock); } while (0)
+#endif
+
#define mrlock_init(mrp, t,n,s) mrinit(mrp, n)
#define mrfree(mrp) do { } while (0)
-static inline void mraccess(mrlock_t *mrp)
-{
- down_read(&mrp->mr_lock);
-}
-
-static inline void mrupdate(mrlock_t *mrp)
-{
- down_write(&mrp->mr_lock);
- mrp->mr_writer = 1;
-}
-
static inline void mraccess_nested(mrlock_t *mrp, int subclass)
{
down_read_nested(&mrp->mr_lock, subclass);
@@ -51,10 +46,11 @@ static inline void mraccess_nested(mrlock_t *mrp, int subclass)
static inline void mrupdate_nested(mrlock_t *mrp, int subclass)
{
down_write_nested(&mrp->mr_lock, subclass);
+#ifdef DEBUG
mrp->mr_writer = 1;
+#endif
}
-
static inline int mrtryaccess(mrlock_t *mrp)
{
return down_read_trylock(&mrp->mr_lock);
@@ -64,39 +60,31 @@ static inline int mrtryupdate(mrlock_t *mrp)
{
if (!down_write_trylock(&mrp->mr_lock))
return 0;
+#ifdef DEBUG
mrp->mr_writer = 1;
+#endif
return 1;
}
-static inline void mrunlock(mrlock_t *mrp)
+static inline void mrunlock_excl(mrlock_t *mrp)
{
- if (mrp->mr_writer) {
- mrp->mr_writer = 0;
- up_write(&mrp->mr_lock);
- } else {
- up_read(&mrp->mr_lock);
- }
+#ifdef DEBUG
+ mrp->mr_writer = 0;
+#endif
+ up_write(&mrp->mr_lock);
}
-static inline void mrdemote(mrlock_t *mrp)
+static inline void mrunlock_shared(mrlock_t *mrp)
{
- mrp->mr_writer = 0;
- downgrade_write(&mrp->mr_lock);
+ up_read(&mrp->mr_lock);
}
-#ifdef DEBUG
-/*
- * Debug-only routine, without some platform-specific asm code, we can
- * now only answer requests regarding whether we hold the lock for write
- * (reader state is outside our visibility, we only track writer state).
- * Note: means !ismrlocked would give false positives, so don't do that.
- */
-static inline int ismrlocked(mrlock_t *mrp, int type)
+static inline void mrdemote(mrlock_t *mrp)
{
- if (mrp && type == MR_UPDATE)
- return mrp->mr_writer;
- return 1;
-}
+#ifdef DEBUG
+ mrp->mr_writer = 0;
#endif
+ downgrade_write(&mrp->mr_lock);
+}
#endif /* __XFS_SUPPORT_MRLOCK_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 52f6846101d..5105015a75a 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -886,7 +886,7 @@ int
xfs_buf_lock_value(
xfs_buf_t *bp)
{
- return atomic_read(&bp->b_sema.count);
+ return bp->b_sema.count;
}
#endif
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index 265f0168ab7..c672b3238b1 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -133,7 +133,7 @@ xfs_nfs_get_inode(
if (!ip)
return ERR_PTR(-EIO);
- if (!ip->i_d.di_mode || ip->i_d.di_gen != generation) {
+ if (ip->i_d.di_gen != generation) {
xfs_iput_new(ip, XFS_ILOCK_SHARED);
return ERR_PTR(-ENOENT);
}
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 05905246434..65e78c13d4a 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -43,9 +43,6 @@
#include <linux/smp_lock.h>
static struct vm_operations_struct xfs_file_vm_ops;
-#ifdef CONFIG_XFS_DMAPI
-static struct vm_operations_struct xfs_dmapi_file_vm_ops;
-#endif
STATIC_INLINE ssize_t
__xfs_file_read(
@@ -202,22 +199,6 @@ xfs_file_fsync(
(xfs_off_t)0, (xfs_off_t)-1);
}
-#ifdef CONFIG_XFS_DMAPI
-STATIC int
-xfs_vm_fault(
- struct vm_area_struct *vma,
- struct vm_fault *vmf)
-{
- struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
- bhv_vnode_t *vp = vn_from_inode(inode);
-
- ASSERT_ALWAYS(vp->v_vfsp->vfs_flag & VFS_DMI);
- if (XFS_SEND_MMAP(XFS_VFSTOM(vp->v_vfsp), vma, 0))
- return VM_FAULT_SIGBUS;
- return filemap_fault(vma, vmf);
-}
-#endif /* CONFIG_XFS_DMAPI */
-
/*
* Unfortunately we can't just use the clean and simple readdir implementation
* below, because nfs might call back into ->lookup from the filldir callback
@@ -386,11 +367,6 @@ xfs_file_mmap(
vma->vm_ops = &xfs_file_vm_ops;
vma->vm_flags |= VM_CAN_NONLINEAR;
-#ifdef CONFIG_XFS_DMAPI
- if (XFS_M(filp->f_path.dentry->d_inode->i_sb)->m_flags & XFS_MOUNT_DMAPI)
- vma->vm_ops = &xfs_dmapi_file_vm_ops;
-#endif /* CONFIG_XFS_DMAPI */
-
file_accessed(filp);
return 0;
}
@@ -437,47 +413,6 @@ xfs_file_ioctl_invis(
return error;
}
-#ifdef CONFIG_XFS_DMAPI
-#ifdef HAVE_VMOP_MPROTECT
-STATIC int
-xfs_vm_mprotect(
- struct vm_area_struct *vma,
- unsigned int newflags)
-{
- struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
- struct xfs_mount *mp = XFS_M(inode->i_sb);
- int error = 0;
-
- if (mp->m_flags & XFS_MOUNT_DMAPI) {
- if ((vma->vm_flags & VM_MAYSHARE) &&
- (newflags & VM_WRITE) && !(vma->vm_flags & VM_WRITE))
- error = XFS_SEND_MMAP(mp, vma, VM_WRITE);
- }
- return error;
-}
-#endif /* HAVE_VMOP_MPROTECT */
-#endif /* CONFIG_XFS_DMAPI */
-
-#ifdef HAVE_FOP_OPEN_EXEC
-/* If the user is attempting to execute a file that is offline then
- * we have to trigger a DMAPI READ event before the file is marked as busy
- * otherwise the invisible I/O will not be able to write to the file to bring
- * it back online.
- */
-STATIC int
-xfs_file_open_exec(
- struct inode *inode)
-{
- struct xfs_mount *mp = XFS_M(inode->i_sb);
- struct xfs_inode *ip = XFS_I(inode);
-
- if (unlikely(mp->m_flags & XFS_MOUNT_DMAPI) &&
- DM_EVENT_ENABLED(ip, DM_EVENT_READ))
- return -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, 0, 0, 0, NULL);
- return 0;
-}
-#endif /* HAVE_FOP_OPEN_EXEC */
-
/*
* mmap()d file has taken write protection fault and is being made
* writable. We can set the page state up correctly for a writable
@@ -546,13 +481,3 @@ static struct vm_operations_struct xfs_file_vm_ops = {
.fault = filemap_fault,
.page_mkwrite = xfs_vm_page_mkwrite,
};
-
-#ifdef CONFIG_XFS_DMAPI
-static struct vm_operations_struct xfs_dmapi_file_vm_ops = {
- .fault = xfs_vm_fault,
- .page_mkwrite = xfs_vm_page_mkwrite,
-#ifdef HAVE_VMOP_MPROTECT
- .mprotect = xfs_vm_mprotect,
-#endif
-};
-#endif /* CONFIG_XFS_DMAPI */
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 4ddb86b73c6..a42ba9d7115 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -238,7 +238,7 @@ xfs_vget_fsop_handlereq(
return error;
if (ip == NULL)
return XFS_ERROR(EIO);
- if (ip->i_d.di_mode == 0 || ip->i_d.di_gen != igen) {
+ if (ip->i_d.di_gen != igen) {
xfs_iput_new(ip, XFS_ILOCK_SHARED);
return XFS_ERROR(ENOENT);
}
@@ -505,14 +505,14 @@ xfs_attrmulti_attr_get(
{
char *kbuf;
int error = EFAULT;
-
+
if (*len > XATTR_SIZE_MAX)
return EINVAL;
kbuf = kmalloc(*len, GFP_KERNEL);
if (!kbuf)
return ENOMEM;
- error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags, NULL);
+ error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags);
if (error)
goto out_kfree;
@@ -546,7 +546,7 @@ xfs_attrmulti_attr_set(
if (copy_from_user(kbuf, ubuf, len))
goto out_kfree;
-
+
error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags);
out_kfree:
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index a1237dad643..2bf287ef548 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -511,7 +511,8 @@ xfs_vn_rename(
xfs_dentry_to_name(&nname, ndentry);
error = xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
- XFS_I(ndir), &nname);
+ XFS_I(ndir), &nname, new_inode ?
+ XFS_I(new_inode) : NULL);
if (likely(!error)) {
if (new_inode)
xfs_validate_fields(new_inode);
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index e5143323e71..4edc46915b5 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -75,6 +75,7 @@
#include <linux/delay.h>
#include <linux/log2.h>
#include <linux/spinlock.h>
+#include <linux/random.h>
#include <asm/page.h>
#include <asm/div64.h>
@@ -99,7 +100,6 @@
/*
* Feature macros (disable/enable)
*/
-#define HAVE_SPLICE /* a splice(2) exists in 2.6, but not in 2.4 */
#ifdef CONFIG_SMP
#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
#else
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 1ebd8004469..5e3b57516ec 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -394,7 +394,7 @@ xfs_zero_last_block(
int error = 0;
xfs_bmbt_irec_t imap;
- ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
zero_offset = XFS_B_FSB_OFFSET(mp, isize);
if (zero_offset == 0) {
@@ -425,14 +425,14 @@ xfs_zero_last_block(
* out sync. We need to drop the ilock while we do this so we
* don't deadlock when the buffer cache calls back to us.
*/
- xfs_iunlock(ip, XFS_ILOCK_EXCL| XFS_EXTSIZE_RD);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
zero_len = mp->m_sb.sb_blocksize - zero_offset;
if (isize + zero_len > offset)
zero_len = offset - isize;
error = xfs_iozero(ip, isize, zero_len);
- xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
ASSERT(error >= 0);
return error;
}
@@ -465,8 +465,7 @@ xfs_zero_eof(
int error = 0;
xfs_bmbt_irec_t imap;
- ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
- ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
ASSERT(offset > isize);
/*
@@ -475,8 +474,7 @@ xfs_zero_eof(
*/
error = xfs_zero_last_block(ip, offset, isize);
if (error) {
- ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
- ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
return error;
}
@@ -507,8 +505,7 @@ xfs_zero_eof(
error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
0, NULL, 0, &imap, &nimaps, NULL, NULL);
if (error) {
- ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
- ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
return error;
}
ASSERT(nimaps > 0);
@@ -532,7 +529,7 @@ xfs_zero_eof(
* Drop the inode lock while we're doing the I/O.
* We'll still have the iolock to protect us.
*/
- xfs_iunlock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
@@ -548,13 +545,13 @@ xfs_zero_eof(
start_zero_fsb = imap.br_startoff + imap.br_blockcount;
ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
- xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
}
return 0;
out_lock:
- xfs_ilock(ip, XFS_ILOCK_EXCL|XFS_EXTSIZE_RD);
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
ASSERT(error >= 0);
return error;
}
diff --git a/fs/xfs/linux-2.6/xfs_lrw.h b/fs/xfs/linux-2.6/xfs_lrw.h
index e1d498b4ba7..e6be37dbd0e 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.h
+++ b/fs/xfs/linux-2.6/xfs_lrw.h
@@ -50,7 +50,6 @@ struct xfs_iomap;
#define XFS_INVAL_CACHED 18
#define XFS_DIORD_ENTER 19
#define XFS_DIOWR_ENTER 20
-#define XFS_SENDFILE_ENTER 21
#define XFS_WRITEPAGE_ENTER 22
#define XFS_RELEASEPAGE_ENTER 23
#define XFS_INVALIDPAGE_ENTER 24
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 865eb708aa9..742b2c7852c 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1181,7 +1181,7 @@ xfs_fs_statfs(
statp->f_fsid.val[0] = (u32)id;
statp->f_fsid.val[1] = (u32)(id >> 32);
- xfs_icsb_sync_counters_flags(mp, XFS_ICSB_LAZY_COUNT);
+ xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
spin_lock(&mp->m_sb_lock);
statp->f_bsize = sbp->sb_blocksize;
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
index 8b4d63ce869..9d73cb5c0fc 100644
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ b/fs/xfs/linux-2.6/xfs_vnode.h
@@ -25,12 +25,6 @@ struct attrlist_cursor_kern;
typedef struct inode bhv_vnode_t;
-#define VN_ISLNK(vp) S_ISLNK((vp)->i_mode)
-#define VN_ISREG(vp) S_ISREG((vp)->i_mode)
-#define VN_ISDIR(vp) S_ISDIR((vp)->i_mode)
-#define VN_ISCHR(vp) S_ISCHR((vp)->i_mode)
-#define VN_ISBLK(vp) S_ISBLK((vp)->i_mode)
-
/*
* Vnode to Linux inode mapping.
*/
@@ -151,24 +145,6 @@ typedef struct bhv_vattr {
XFS_AT_TYPE|XFS_AT_BLKSIZE|XFS_AT_NBLOCKS|XFS_AT_VCODE|\
XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|XFS_AT_GENCOUNT)
-/*
- * Modes.
- */
-#define VSUID S_ISUID /* set user id on execution */
-#define VSGID S_ISGID /* set group id on execution */
-#define VSVTX S_ISVTX /* save swapped text even after use */
-#define VREAD S_IRUSR /* read, write, execute permissions */
-#define VWRITE S_IWUSR
-#define VEXEC S_IXUSR
-
-#define MODEMASK S_IALLUGO /* mode bits plus permission bits */
-
-/*
- * Check whether mandatory file locking is enabled.
- */
-#define MANDLOCK(vp, mode) \
- (VN_ISREG(vp) && ((mode) & (VSGID|(VEXEC>>3))) == VSGID)
-
extern void vn_init(void);
extern int vn_revalidate(bhv_vnode_t *);
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
index 631ebb31b29..85df3288efd 100644
--- a/fs/xfs/quota/xfs_dquot.c
+++ b/fs/xfs/quota/xfs_dquot.c
@@ -933,7 +933,7 @@ xfs_qm_dqget(
type == XFS_DQ_PROJ ||
type == XFS_DQ_GROUP);
if (ip) {
- ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
if (type == XFS_DQ_USER)
ASSERT(ip->i_udquot == NULL);
else
@@ -1088,7 +1088,7 @@ xfs_qm_dqget(
xfs_qm_mplist_unlock(mp);
XFS_DQ_HASH_UNLOCK(h);
dqret:
- ASSERT((ip == NULL) || XFS_ISLOCKED_INODE_EXCL(ip));
+ ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
xfs_dqtrace_entry(dqp, "DQGET DONE");
*O_dqpp = dqp;
return (0);
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
index 40ea5640956..d31cce1165c 100644
--- a/fs/xfs/quota/xfs_qm.c
+++ b/fs/xfs/quota/xfs_qm.c
@@ -670,7 +670,7 @@ xfs_qm_dqattach_one(
xfs_dquot_t *dqp;
int error;
- ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
error = 0;
/*
* See if we already have it in the inode itself. IO_idqpp is
@@ -874,7 +874,7 @@ xfs_qm_dqattach(
return 0;
ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 ||
- XFS_ISLOCKED_INODE_EXCL(ip));
+ xfs_isilocked(ip, XFS_ILOCK_EXCL));
if (! (flags & XFS_QMOPT_ILOCKED))
xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -888,7 +888,8 @@ xfs_qm_dqattach(
goto done;
nquotas++;
}
- ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
if (XFS_IS_OQUOTA_ON(mp)) {
error = XFS_IS_GQUOTA_ON(mp) ?
xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
@@ -913,7 +914,7 @@ xfs_qm_dqattach(
* This WON'T, in general, result in a thrash.
*/
if (nquotas == 2) {
- ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT(ip->i_udquot);
ASSERT(ip->i_gdquot);
@@ -956,7 +957,7 @@ xfs_qm_dqattach(
#ifdef QUOTADEBUG
else
- ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
#endif
return error;
}
@@ -1291,7 +1292,7 @@ xfs_qm_dqget_noattach(
xfs_mount_t *mp;
xfs_dquot_t *udqp, *gdqp;
- ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
mp = ip->i_mount;
udqp = NULL;
gdqp = NULL;
@@ -1392,7 +1393,7 @@ xfs_qm_qino_alloc(
* Keep an extra reference to this quota inode. This inode is
* locked exclusively and joined to the transaction already.
*/
- ASSERT(XFS_ISLOCKED_INODE_EXCL(*ip));
+ ASSERT(xfs_isilocked(*ip, XFS_ILOCK_EXCL));
VN_HOLD(XFS_ITOV((*ip)));
/*
@@ -1737,12 +1738,6 @@ xfs_qm_dqusage_adjust(
return error;
}
- if (ip->i_d.di_mode == 0) {
- xfs_iput_new(ip, XFS_ILOCK_EXCL);
- *res = BULKSTAT_RV_NOTHING;
- return XFS_ERROR(ENOENT);
- }
-
/*
* Obtain the locked dquots. In case of an error (eg. allocation
* fails for ENOSPC), we return the negative of the error number
@@ -2563,7 +2558,7 @@ xfs_qm_vop_chown(
uint bfield = XFS_IS_REALTIME_INODE(ip) ?
XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
- ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
/* old dquot */
@@ -2607,7 +2602,7 @@ xfs_qm_vop_chown_reserve(
uint delblks, blkflags, prjflags = 0;
xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq;
- ASSERT(XFS_ISLOCKED_INODE(ip));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
mp = ip->i_mount;
ASSERT(XFS_IS_QUOTA_RUNNING(mp));
@@ -2717,7 +2712,7 @@ xfs_qm_vop_dqattach_and_dqmod_newinode(
if (!XFS_IS_QUOTA_ON(tp->t_mountp))
return;
- ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
if (udqp) {
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 8342823dbdc..768a3b27d2b 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -1366,12 +1366,6 @@ xfs_qm_internalqcheck_adjust(
return (error);
}
- if (ip->i_d.di_mode == 0) {
- xfs_iput_new(ip, lock_flags);
- *res = BULKSTAT_RV_NOTHING;
- return XFS_ERROR(ENOENT);
- }
-
/*
* This inode can have blocks after eof which can get released
* when we send it to inactive. Since we don't check the dquot
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
index a8b85e2be9d..5e4a40b1c56 100644
--- a/fs/xfs/quota/xfs_quota_priv.h
+++ b/fs/xfs/quota/xfs_quota_priv.h
@@ -27,11 +27,6 @@
/* Number of dquots that fit in to a dquot block */
#define XFS_QM_DQPERBLK(mp) ((mp)->m_quotainfo->qi_dqperchunk)
-#define XFS_ISLOCKED_INODE(ip) (ismrlocked(&(ip)->i_lock, \
- MR_UPDATE | MR_ACCESS) != 0)
-#define XFS_ISLOCKED_INODE_EXCL(ip) (ismrlocked(&(ip)->i_lock, \
- MR_UPDATE) != 0)
-
#define XFS_DQ_IS_ADDEDTO_TRX(t, d) ((d)->q_transp == (t))
#define XFS_QI_MPLRECLAIMS(mp) ((mp)->m_quotainfo->qi_dqreclaims)
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
index f441f836ca8..99611381e74 100644
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ b/fs/xfs/quota/xfs_trans_dquot.c
@@ -834,7 +834,7 @@ xfs_trans_reserve_quota_nblks(
ASSERT(ip->i_ino != mp->m_sb.sb_uquotino);
ASSERT(ip->i_ino != mp->m_sb.sb_gquotino);
- ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
XFS_TRANS_DQ_RES_RTBLKS ||
diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h
index 855da040864..75845f95081 100644
--- a/fs/xfs/support/debug.h
+++ b/fs/xfs/support/debug.h
@@ -49,8 +49,6 @@ extern void assfail(char *expr, char *f, int l);
#else /* DEBUG */
-#include <linux/random.h>
-
#define ASSERT(expr) \
(unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index 765aaf65e2d..540e4c98982 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -22,7 +22,7 @@
#define STATIC
#define DEBUG 1
#define XFS_BUF_LOCK_TRACKING 1
-#define QUOTADEBUG 1
+/* #define QUOTADEBUG 1 */
#endif
#ifdef CONFIG_XFS_TRACE
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 8e130b9720a..ebee3a4f703 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -72,7 +72,7 @@ xfs_acl_vhasacl_default(
{
int error;
- if (!VN_ISDIR(vp))
+ if (!S_ISDIR(vp->i_mode))
return 0;
xfs_acl_get_attr(vp, NULL, _ACL_TYPE_DEFAULT, ATTR_KERNOVAL, &error);
return (error == 0);
@@ -238,15 +238,8 @@ xfs_acl_vget(
error = EINVAL;
goto out;
}
- if (kind == _ACL_TYPE_ACCESS) {
- bhv_vattr_t va;
-
- va.va_mask = XFS_AT_MODE;
- error = xfs_getattr(xfs_vtoi(vp), &va, 0);
- if (error)
- goto out;
- xfs_acl_sync_mode(va.va_mode, xfs_acl);
- }
+ if (kind == _ACL_TYPE_ACCESS)
+ xfs_acl_sync_mode(xfs_vtoi(vp)->i_d.di_mode, xfs_acl);
error = -posix_acl_xfs_to_xattr(xfs_acl, ext_acl, size);
}
out:
@@ -341,14 +334,15 @@ xfs_acl_iaccess(
{
xfs_acl_t *acl;
int rval;
+ struct xfs_name acl_name = {SGI_ACL_FILE, SGI_ACL_FILE_SIZE};
if (!(_ACL_ALLOC(acl)))
return -1;
/* If the file has no ACL return -1. */
rval = sizeof(xfs_acl_t);
- if (xfs_attr_fetch(ip, SGI_ACL_FILE, SGI_ACL_FILE_SIZE,
- (char *)acl, &rval, ATTR_ROOT | ATTR_KERNACCESS, cr)) {
+ if (xfs_attr_fetch(ip, &acl_name, (char *)acl, &rval,
+ ATTR_ROOT | ATTR_KERNACCESS)) {
_ACL_FREE(acl);
return -1;
}
@@ -373,23 +367,15 @@ xfs_acl_allow_set(
bhv_vnode_t *vp,
int kind)
{
- xfs_inode_t *ip = xfs_vtoi(vp);
- bhv_vattr_t va;
- int error;
-
if (vp->i_flags & (S_IMMUTABLE|S_APPEND))
return EPERM;
- if (kind == _ACL_TYPE_DEFAULT && !VN_ISDIR(vp))
+ if (kind == _ACL_TYPE_DEFAULT && !S_ISDIR(vp->i_mode))
return ENOTDIR;
if (vp->i_sb->s_flags & MS_RDONLY)
return EROFS;
- va.va_mask = XFS_AT_UID;
- error = xfs_getattr(ip, &va, 0);
- if (error)
- return error;
- if (va.va_uid != current->fsuid && !capable(CAP_FOWNER))
+ if (xfs_vtoi(vp)->i_d.di_uid != current->fsuid && !capable(CAP_FOWNER))
return EPERM;
- return error;
+ return 0;
}
/*
@@ -594,7 +580,7 @@ xfs_acl_get_attr(
*error = xfs_attr_get(xfs_vtoi(vp),
kind == _ACL_TYPE_ACCESS ?
SGI_ACL_FILE : SGI_ACL_DEFAULT,
- (char *)aclp, &len, flags, sys_cred);
+ (char *)aclp, &len, flags);
if (*error || (flags & ATTR_KERNOVAL))
return;
xfs_acl_get_endian(aclp);
@@ -643,7 +629,6 @@ xfs_acl_vtoacl(
xfs_acl_t *access_acl,
xfs_acl_t *default_acl)
{
- bhv_vattr_t va;
int error = 0;
if (access_acl) {
@@ -652,16 +637,10 @@ xfs_acl_vtoacl(
* be obtained for some reason, invalidate the access ACL.
*/
xfs_acl_get_attr(vp, access_acl, _ACL_TYPE_ACCESS, 0, &error);
- if (!error) {
- /* Got the ACL, need the mode... */
- va.va_mask = XFS_AT_MODE;
- error = xfs_getattr(xfs_vtoi(vp), &va, 0);
- }
-
if (error)
access_acl->acl_cnt = XFS_ACL_NOT_PRESENT;
else /* We have a good ACL and the file mode, synchronize. */
- xfs_acl_sync_mode(va.va_mode, access_acl);
+ xfs_acl_sync_mode(xfs_vtoi(vp)->i_d.di_mode, access_acl);
}
if (default_acl) {
@@ -719,7 +698,7 @@ xfs_acl_inherit(
* If the new file is a directory, its default ACL is a copy of
* the containing directory's default ACL.
*/
- if (VN_ISDIR(vp))
+ if (S_ISDIR(vp->i_mode))
xfs_acl_set_attr(vp, pdaclp, _ACL_TYPE_DEFAULT, &error);
if (!error && !basicperms)
xfs_acl_set_attr(vp, cacl, _ACL_TYPE_ACCESS, &error);
@@ -744,7 +723,7 @@ xfs_acl_setmode(
bhv_vattr_t va;
xfs_acl_entry_t *ap;
xfs_acl_entry_t *gap = NULL;
- int i, error, nomask = 1;
+ int i, nomask = 1;
*basicperms = 1;
@@ -756,11 +735,7 @@ xfs_acl_setmode(
* mode. The m:: bits take precedence over the g:: bits.
*/
va.va_mask = XFS_AT_MODE;
- error = xfs_getattr(xfs_vtoi(vp), &va, 0);
- if (error)
- return error;
-
- va.va_mask = XFS_AT_MODE;
+ va.va_mode = xfs_vtoi(vp)->i_d.di_mode;
va.va_mode &= ~(S_IRWXU|S_IRWXG|S_IRWXO);
ap = acl->acl_entry;
for (i = 0; i < acl->acl_cnt; ++i) {
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 36d781ee5fc..df151a85918 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -101,14 +101,28 @@ STATIC int xfs_attr_rmtval_remove(xfs_da_args_t *args);
ktrace_t *xfs_attr_trace_buf;
#endif
+STATIC int
+xfs_attr_name_to_xname(
+ struct xfs_name *xname,
+ const char *aname)
+{
+ if (!aname)
+ return EINVAL;
+ xname->name = aname;
+ xname->len = strlen(aname);
+ if (xname->len >= MAXNAMELEN)
+ return EFAULT; /* match IRIX behaviour */
+
+ return 0;
+}
/*========================================================================
* Overall external interface routines.
*========================================================================*/
int
-xfs_attr_fetch(xfs_inode_t *ip, const char *name, int namelen,
- char *value, int *valuelenp, int flags, struct cred *cred)
+xfs_attr_fetch(xfs_inode_t *ip, struct xfs_name *name,
+ char *value, int *valuelenp, int flags)
{
xfs_da_args_t args;
int error;
@@ -122,8 +136,8 @@ xfs_attr_fetch(xfs_inode_t *ip, const char *name, int namelen,
* Fill in the arg structure for this request.
*/
memset((char *)&args, 0, sizeof(args));
- args.name = name;
- args.namelen = namelen;
+ args.name = name->name;
+ args.namelen = name->len;
args.value = value;
args.valuelen = *valuelenp;
args.flags = flags;
@@ -162,31 +176,29 @@ xfs_attr_get(
const char *name,
char *value,
int *valuelenp,
- int flags,
- cred_t *cred)
+ int flags)
{
- int error, namelen;
+ int error;
+ struct xfs_name xname;
XFS_STATS_INC(xs_attr_get);
- if (!name)
- return(EINVAL);
- namelen = strlen(name);
- if (namelen >= MAXNAMELEN)
- return(EFAULT); /* match IRIX behaviour */
-
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return(EIO);
+ error = xfs_attr_name_to_xname(&xname, name);
+ if (error)
+ return error;
+
xfs_ilock(ip, XFS_ILOCK_SHARED);
- error = xfs_attr_fetch(ip, name, namelen, value, valuelenp, flags, cred);
+ error = xfs_attr_fetch(ip, &xname, value, valuelenp, flags);
xfs_iunlock(ip, XFS_ILOCK_SHARED);
return(error);
}
-int
-xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
- char *value, int valuelen, int flags)
+STATIC int
+xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
+ char *value, int valuelen, int flags)
{
xfs_da_args_t args;
xfs_fsblock_t firstblock;
@@ -209,7 +221,7 @@ xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
*/
if (XFS_IFORK_Q(dp) == 0) {
int sf_size = sizeof(xfs_attr_sf_hdr_t) +
- XFS_ATTR_SF_ENTSIZE_BYNAME(namelen, valuelen);
+ XFS_ATTR_SF_ENTSIZE_BYNAME(name->len, valuelen);
if ((error = xfs_bmap_add_attrfork(dp, sf_size, rsvd)))
return(error);
@@ -219,8 +231,8 @@ xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
* Fill in the arg structure for this request.
*/
memset((char *)&args, 0, sizeof(args));
- args.name = name;
- args.namelen = namelen;
+ args.name = name->name;
+ args.namelen = name->len;
args.value = value;
args.valuelen = valuelen;
args.flags = flags;
@@ -236,7 +248,7 @@ xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
* Determine space new attribute will use, and if it would be
* "local" or "remote" (note: local != inline).
*/
- size = xfs_attr_leaf_newentsize(namelen, valuelen,
+ size = xfs_attr_leaf_newentsize(name->len, valuelen,
mp->m_sb.sb_blocksize, &local);
nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
@@ -429,26 +441,27 @@ xfs_attr_set(
int valuelen,
int flags)
{
- int namelen;
-
- namelen = strlen(name);
- if (namelen >= MAXNAMELEN)
- return EFAULT; /* match IRIX behaviour */
+ int error;
+ struct xfs_name xname;
XFS_STATS_INC(xs_attr_set);
if (XFS_FORCED_SHUTDOWN(dp->i_mount))
return (EIO);
- return xfs_attr_set_int(dp, name, namelen, value, valuelen, flags);
+ error = xfs_attr_name_to_xname(&xname, name);
+ if (error)
+ return error;
+
+ return xfs_attr_set_int(dp, &xname, value, valuelen, flags);
}
/*
* Generic handler routine to remove a name from an attribute list.
* Transitions attribute list from Btree to shortform as necessary.
*/
-int
-xfs_attr_remove_int(xfs_inode_t *dp, const char *name, int namelen, int flags)
+STATIC int
+xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
{
xfs_da_args_t args;
xfs_fsblock_t firstblock;
@@ -460,8 +473,8 @@ xfs_attr_remove_int(xfs_inode_t *dp, const char *name, int namelen, int flags)
* Fill in the arg structure for this request.
*/
memset((char *)&args, 0, sizeof(args));
- args.name = name;
- args.namelen = namelen;
+ args.name = name->name;
+ args.namelen = name->len;
args.flags = flags;
args.hashval = xfs_da_hashname(args.name, args.namelen);
args.dp = dp;
@@ -575,17 +588,18 @@ xfs_attr_remove(
const char *name,
int flags)
{
- int namelen;
-
- namelen = strlen(name);
- if (namelen >= MAXNAMELEN)
- return EFAULT; /* match IRIX behaviour */
+ int error;
+ struct xfs_name xname;
XFS_STATS_INC(xs_attr_remove);
if (XFS_FORCED_SHUTDOWN(dp->i_mount))
return (EIO);
+ error = xfs_attr_name_to_xname(&xname, name);
+ if (error)
+ return error;
+
xfs_ilock(dp, XFS_ILOCK_SHARED);
if (XFS_IFORK_Q(dp) == 0 ||
(dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
@@ -595,10 +609,10 @@ xfs_attr_remove(
}
xfs_iunlock(dp, XFS_ILOCK_SHARED);
- return xfs_attr_remove_int(dp, name, namelen, flags);
+ return xfs_attr_remove_int(dp, &xname, flags);
}
-int /* error */
+STATIC int
xfs_attr_list_int(xfs_attr_list_context_t *context)
{
int error;
@@ -2522,8 +2536,7 @@ attr_generic_get(
{
int error, asize = size;
- error = xfs_attr_get(xfs_vtoi(vp), name, data,
- &asize, xflags, NULL);
+ error = xfs_attr_get(xfs_vtoi(vp), name, data, &asize, xflags);
if (!error)
return asize;
return -error;
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index 786eba3121c..6cfc9384fe3 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -158,14 +158,10 @@ struct xfs_da_args;
/*
* Overall external interface routines.
*/
-int xfs_attr_set_int(struct xfs_inode *, const char *, int, char *, int, int);
-int xfs_attr_remove_int(struct xfs_inode *, const char *, int, int);
-int xfs_attr_list_int(struct xfs_attr_list_context *);
int xfs_attr_inactive(struct xfs_inode *dp);
int xfs_attr_shortform_getvalue(struct xfs_da_args *);
-int xfs_attr_fetch(struct xfs_inode *, const char *, int,
- char *, int *, int, struct cred *);
+int xfs_attr_fetch(struct xfs_inode *, struct xfs_name *, char *, int *, int);
int xfs_attr_rmtval_get(struct xfs_da_args *args);
#endif /* __XFS_ATTR_H__ */
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index eb198c01c35..53c259f5a5a 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -4074,7 +4074,6 @@ xfs_bmap_add_attrfork(
error2:
xfs_bmap_cancel(&flist);
error1:
- ASSERT(ismrlocked(&ip->i_lock,MR_UPDATE));
xfs_iunlock(ip, XFS_ILOCK_EXCL);
error0:
xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 3f53fad356a..5f3647cb988 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -162,7 +162,7 @@ xfs_swap_extents(
ips[1] = ip;
}
- xfs_lock_inodes(ips, 2, 0, lock_flags);
+ xfs_lock_inodes(ips, 2, lock_flags);
locked = 1;
/* Verify that both files have the same format */
@@ -265,7 +265,7 @@ xfs_swap_extents(
locked = 0;
goto error0;
}
- xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL);
+ xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
/*
* Count the number of extended attribute blocks
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index d3a0f538d6a..381ebda4f7b 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -462,7 +462,7 @@ xfs_fs_counts(
xfs_mount_t *mp,
xfs_fsop_counts_t *cnt)
{
- xfs_icsb_sync_counters_flags(mp, XFS_ICSB_LAZY_COUNT);
+ xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
spin_lock(&mp->m_sb_lock);
cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
cnt->freertx = mp->m_sb.sb_frextents;
@@ -524,7 +524,7 @@ xfs_reserve_blocks(
*/
retry:
spin_lock(&mp->m_sb_lock);
- xfs_icsb_sync_counters_flags(mp, XFS_ICSB_SB_LOCKED);
+ xfs_icsb_sync_counters_locked(mp, 0);
/*
* If our previous reservation was larger than the current value,
@@ -552,11 +552,8 @@ retry:
mp->m_resblks += free;
mp->m_resblks_avail += free;
fdblks_delta = -free;
- mp->m_sb.sb_fdblocks = XFS_ALLOC_SET_ASIDE(mp);
} else {
fdblks_delta = -delta;
- mp->m_sb.sb_fdblocks =
- lcounter + XFS_ALLOC_SET_ASIDE(mp);
mp->m_resblks = request;
mp->m_resblks_avail += delta;
}
@@ -587,7 +584,6 @@ out:
if (error == ENOSPC)
goto retry;
}
-
return 0;
}
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index a64dfbd565a..aad8c5da38a 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -147,6 +147,7 @@ xfs_ialloc_ag_alloc(
int version; /* inode version number to use */
int isaligned = 0; /* inode allocation at stripe unit */
/* boundary */
+ unsigned int gen;
args.tp = tp;
args.mp = tp->t_mountp;
@@ -290,6 +291,14 @@ xfs_ialloc_ag_alloc(
else
version = XFS_DINODE_VERSION_1;
+ /*
+ * Seed the new inode cluster with a random generation number. This
+ * prevents short-term reuse of generation numbers if a chunk is
+ * freed and then immediately reallocated. We use random numbers
+ * rather than a linear progression to prevent the next generation
+ * number from being easily guessable.
+ */
+ gen = random32();
for (j = 0; j < nbufs; j++) {
/*
* Get the block.
@@ -309,6 +318,7 @@ xfs_ialloc_ag_alloc(
free = XFS_MAKE_IPTR(args.mp, fbuf, i);
free->di_core.di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
free->di_core.di_version = version;
+ free->di_core.di_gen = cpu_to_be32(gen);
free->di_next_unlinked = cpu_to_be32(NULLAGINO);
xfs_ialloc_log_di(tp, fbuf, i,
XFS_DI_CORE_BITS | XFS_DI_NEXT_UNLINKED);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index e657c512846..b07604b94d9 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -593,8 +593,9 @@ xfs_iunlock_map_shared(
* XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL
*/
void
-xfs_ilock(xfs_inode_t *ip,
- uint lock_flags)
+xfs_ilock(
+ xfs_inode_t *ip,
+ uint lock_flags)
{
/*
* You can't set both SHARED and EXCL for the same lock,
@@ -607,16 +608,16 @@ xfs_ilock(xfs_inode_t *ip,
(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
- if (lock_flags & XFS_IOLOCK_EXCL) {
+ if (lock_flags & XFS_IOLOCK_EXCL)
mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
- } else if (lock_flags & XFS_IOLOCK_SHARED) {
+ else if (lock_flags & XFS_IOLOCK_SHARED)
mraccess_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
- }
- if (lock_flags & XFS_ILOCK_EXCL) {
+
+ if (lock_flags & XFS_ILOCK_EXCL)
mrupdate_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
- } else if (lock_flags & XFS_ILOCK_SHARED) {
+ else if (lock_flags & XFS_ILOCK_SHARED)
mraccess_nested(&ip->i_lock, XFS_ILOCK_DEP(lock_flags));
- }
+
xfs_ilock_trace(ip, 1, lock_flags, (inst_t *)__return_address);
}
@@ -631,15 +632,12 @@ xfs_ilock(xfs_inode_t *ip,
* lock_flags -- this parameter indicates the inode's locks to be
* to be locked. See the comment for xfs_ilock() for a list
* of valid values.
- *
*/
int
-xfs_ilock_nowait(xfs_inode_t *ip,
- uint lock_flags)
+xfs_ilock_nowait(
+ xfs_inode_t *ip,
+ uint lock_flags)
{
- int iolocked;
- int ilocked;
-
/*
* You can't set both SHARED and EXCL for the same lock,
* and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
@@ -651,37 +649,30 @@ xfs_ilock_nowait(xfs_inode_t *ip,
(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
- iolocked = 0;
if (lock_flags & XFS_IOLOCK_EXCL) {
- iolocked = mrtryupdate(&ip->i_iolock);
- if (!iolocked) {
- return 0;
- }
+ if (!mrtryupdate(&ip->i_iolock))
+ goto out;
} else if (lock_flags & XFS_IOLOCK_SHARED) {
- iolocked = mrtryaccess(&ip->i_iolock);
- if (!iolocked) {
- return 0;
- }
+ if (!mrtryaccess(&ip->i_iolock))
+ goto out;
}
if (lock_flags & XFS_ILOCK_EXCL) {
- ilocked = mrtryupdate(&ip->i_lock);
- if (!ilocked) {
- if (iolocked) {
- mrunlock(&ip->i_iolock);
- }
- return 0;
- }
+ if (!mrtryupdate(&ip->i_lock))
+ goto out_undo_iolock;
} else if (lock_flags & XFS_ILOCK_SHARED) {
- ilocked = mrtryaccess(&ip->i_lock);
- if (!ilocked) {
- if (iolocked) {
- mrunlock(&ip->i_iolock);
- }
- return 0;
- }
+ if (!mrtryaccess(&ip->i_lock))
+ goto out_undo_iolock;
}
xfs_ilock_trace(ip, 2, lock_flags, (inst_t *)__return_address);
return 1;
+
+ out_undo_iolock:
+ if (lock_flags & XFS_IOLOCK_EXCL)
+ mrunlock_excl(&ip->i_iolock);
+ else if (lock_flags & XFS_IOLOCK_SHARED)
+ mrunlock_shared(&ip->i_iolock);
+ out:
+ return 0;
}
/*
@@ -697,8 +688,9 @@ xfs_ilock_nowait(xfs_inode_t *ip,
*
*/
void
-xfs_iunlock(xfs_inode_t *ip,
- uint lock_flags)
+xfs_iunlock(
+ xfs_inode_t *ip,
+ uint lock_flags)
{
/*
* You can't set both SHARED and EXCL for the same lock,
@@ -713,31 +705,25 @@ xfs_iunlock(xfs_inode_t *ip,
XFS_LOCK_DEP_MASK)) == 0);
ASSERT(lock_flags != 0);
- if (lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) {
- ASSERT(!(lock_flags & XFS_IOLOCK_SHARED) ||
- (ismrlocked(&ip->i_iolock, MR_ACCESS)));
- ASSERT(!(lock_flags & XFS_IOLOCK_EXCL) ||
- (ismrlocked(&ip->i_iolock, MR_UPDATE)));
- mrunlock(&ip->i_iolock);
- }
+ if (lock_flags & XFS_IOLOCK_EXCL)
+ mrunlock_excl(&ip->i_iolock);
+ else if (lock_flags & XFS_IOLOCK_SHARED)
+ mrunlock_shared(&ip->i_iolock);
- if (lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) {
- ASSERT(!(lock_flags & XFS_ILOCK_SHARED) ||
- (ismrlocked(&ip->i_lock, MR_ACCESS)));
- ASSERT(!(lock_flags & XFS_ILOCK_EXCL) ||
- (ismrlocked(&ip->i_lock, MR_UPDATE)));
- mrunlock(&ip->i_lock);
+ if (lock_flags & XFS_ILOCK_EXCL)
+ mrunlock_excl(&ip->i_lock);
+ else if (lock_flags & XFS_ILOCK_SHARED)
+ mrunlock_shared(&ip->i_lock);
+ if ((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) &&
+ !(lock_flags & XFS_IUNLOCK_NONOTIFY) && ip->i_itemp) {
/*
* Let the AIL know that this item has been unlocked in case
* it is in the AIL and anyone is waiting on it. Don't do
* this if the caller has asked us not to.
*/
- if (!(lock_flags & XFS_IUNLOCK_NONOTIFY) &&
- ip->i_itemp != NULL) {
- xfs_trans_unlocked_item(ip->i_mount,
- (xfs_log_item_t*)(ip->i_itemp));
- }
+ xfs_trans_unlocked_item(ip->i_mount,
+ (xfs_log_item_t*)(ip->i_itemp));
}
xfs_ilock_trace(ip, 3, lock_flags, (inst_t *)__return_address);
}
@@ -747,21 +733,47 @@ xfs_iunlock(xfs_inode_t *ip,
* if it is being demoted.
*/
void
-xfs_ilock_demote(xfs_inode_t *ip,
- uint lock_flags)
+xfs_ilock_demote(
+ xfs_inode_t *ip,
+ uint lock_flags)
{
ASSERT(lock_flags & (XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL));
ASSERT((lock_flags & ~(XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL)) == 0);
- if (lock_flags & XFS_ILOCK_EXCL) {
- ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
+ if (lock_flags & XFS_ILOCK_EXCL)
mrdemote(&ip->i_lock);
- }
- if (lock_flags & XFS_IOLOCK_EXCL) {
- ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
+ if (lock_flags & XFS_IOLOCK_EXCL)
mrdemote(&ip->i_iolock);
+}
+
+#ifdef DEBUG
+/*
+ * Debug-only routine, without additional rw_semaphore APIs, we can
+ * now only answer requests regarding whether we hold the lock for write
+ * (reader state is outside our visibility, we only track writer state).
+ *
+ * Note: this means !xfs_isilocked would give false positives, so don't do that.
+ */
+int
+xfs_isilocked(
+ xfs_inode_t *ip,
+ uint lock_flags)
+{
+ if ((lock_flags & (XFS_ILOCK_EXCL|XFS_ILOCK_SHARED)) ==
+ XFS_ILOCK_EXCL) {
+ if (!ip->i_lock.mr_writer)
+ return 0;
}
+
+ if ((lock_flags & (XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED)) ==
+ XFS_IOLOCK_EXCL) {
+ if (!ip->i_iolock.mr_writer)
+ return 0;
+ }
+
+ return 1;
}
+#endif
/*
* The following three routines simply manage the i_flock
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ca12acb9039..cf0bb9c1d62 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1291,7 +1291,7 @@ xfs_file_last_byte(
xfs_fileoff_t size_last_block;
int error;
- ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE | MR_ACCESS));
+ ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED));
mp = ip->i_mount;
/*
@@ -1402,7 +1402,7 @@ xfs_itruncate_start(
bhv_vnode_t *vp;
int error = 0;
- ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0);
+ ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
ASSERT((new_size == 0) || (new_size <= ip->i_size));
ASSERT((flags == XFS_ITRUNC_DEFINITE) ||
(flags == XFS_ITRUNC_MAYBE));
@@ -1528,8 +1528,7 @@ xfs_itruncate_finish(
xfs_bmap_free_t free_list;
int error;
- ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE) != 0);
- ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
ASSERT((new_size == 0) || (new_size <= ip->i_size));
ASSERT(*tp != NULL);
ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
@@ -1780,8 +1779,7 @@ xfs_igrow_start(
xfs_fsize_t new_size,
cred_t *credp)
{
- ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0);
- ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0);
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
ASSERT(new_size > ip->i_size);
/*
@@ -1809,8 +1807,7 @@ xfs_igrow_finish(
xfs_fsize_t new_size,
int change_flag)
{
- ASSERT(ismrlocked(&(ip->i_lock), MR_UPDATE) != 0);
- ASSERT(ismrlocked(&(ip->i_iolock), MR_UPDATE) != 0);
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
ASSERT(ip->i_transp == tp);
ASSERT(new_size > ip->i_size);
@@ -2287,7 +2284,7 @@ xfs_ifree(
xfs_dinode_t *dip;
xfs_buf_t *ibp;
- ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT(ip->i_transp == tp);
ASSERT(ip->i_d.di_nlink == 0);
ASSERT(ip->i_d.di_nextents == 0);
@@ -2746,7 +2743,7 @@ void
xfs_ipin(
xfs_inode_t *ip)
{
- ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
atomic_inc(&ip->i_pincount);
}
@@ -2779,7 +2776,7 @@ __xfs_iunpin_wait(
{
xfs_inode_log_item_t *iip = ip->i_itemp;
- ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE | MR_ACCESS));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
if (atomic_read(&ip->i_pincount) == 0)
return;
@@ -2829,7 +2826,7 @@ xfs_iextents_copy(
xfs_fsblock_t start_block;
ifp = XFS_IFORK_PTR(ip, whichfork);
- ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
ASSERT(ifp->if_bytes > 0);
nrecs = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
@@ -3132,7 +3129,7 @@ xfs_iflush(
XFS_STATS_INC(xs_iflush_count);
- ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
ASSERT(issemalocked(&(ip->i_flock)));
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
ip->i_d.di_nextents > ip->i_df.if_ext_max);
@@ -3297,7 +3294,7 @@ xfs_iflush_int(
int first;
#endif
- ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE|MR_ACCESS));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
ASSERT(issemalocked(&(ip->i_flock)));
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
ip->i_d.di_nextents > ip->i_df.if_ext_max);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 93c37697a72..0a999fee4f0 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -386,20 +386,9 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags)
#define XFS_ILOCK_EXCL (1<<2)
#define XFS_ILOCK_SHARED (1<<3)
#define XFS_IUNLOCK_NONOTIFY (1<<4)
-/* #define XFS_IOLOCK_NESTED (1<<5) */
-#define XFS_EXTENT_TOKEN_RD (1<<6)
-#define XFS_SIZE_TOKEN_RD (1<<7)
-#define XFS_EXTSIZE_RD (XFS_EXTENT_TOKEN_RD|XFS_SIZE_TOKEN_RD)
-#define XFS_WILLLEND (1<<8) /* Always acquire tokens for lending */
-#define XFS_EXTENT_TOKEN_WR (XFS_EXTENT_TOKEN_RD | XFS_WILLLEND)
-#define XFS_SIZE_TOKEN_WR (XFS_SIZE_TOKEN_RD | XFS_WILLLEND)
-#define XFS_EXTSIZE_WR (XFS_EXTSIZE_RD | XFS_WILLLEND)
-/* TODO:XFS_SIZE_TOKEN_WANT (1<<9) */
#define XFS_LOCK_MASK (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED \
- | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED \
- | XFS_EXTENT_TOKEN_RD | XFS_SIZE_TOKEN_RD \
- | XFS_WILLLEND)
+ | XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)
/*
* Flags for lockdep annotations.
@@ -483,6 +472,7 @@ void xfs_ilock(xfs_inode_t *, uint);
int xfs_ilock_nowait(xfs_inode_t *, uint);
void xfs_iunlock(xfs_inode_t *, uint);
void xfs_ilock_demote(xfs_inode_t *, uint);
+int xfs_isilocked(xfs_inode_t *, uint);
void xfs_iflock(xfs_inode_t *);
int xfs_iflock_nowait(xfs_inode_t *);
uint xfs_ilock_map_shared(xfs_inode_t *);
@@ -534,7 +524,7 @@ int xfs_iflush(xfs_inode_t *, uint);
void xfs_iflush_all(struct xfs_mount *);
void xfs_ichgtime(xfs_inode_t *, int);
xfs_fsize_t xfs_file_last_byte(xfs_inode_t *);
-void xfs_lock_inodes(xfs_inode_t **, int, int, uint);
+void xfs_lock_inodes(xfs_inode_t **, int, uint);
void xfs_synchronize_atime(xfs_inode_t *);
void xfs_mark_inode_dirty_sync(xfs_inode_t *);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 93b5db453ea..167b33f1577 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -547,7 +547,7 @@ STATIC void
xfs_inode_item_pin(
xfs_inode_log_item_t *iip)
{
- ASSERT(ismrlocked(&(iip->ili_inode->i_lock), MR_UPDATE));
+ ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL));
xfs_ipin(iip->ili_inode);
}
@@ -664,13 +664,13 @@ xfs_inode_item_unlock(
ASSERT(iip != NULL);
ASSERT(iip->ili_inode->i_itemp != NULL);
- ASSERT(ismrlocked(&(iip->ili_inode->i_lock), MR_UPDATE));
+ ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL));
ASSERT((!(iip->ili_inode->i_itemp->ili_flags &
XFS_ILI_IOLOCKED_EXCL)) ||
- ismrlocked(&(iip->ili_inode->i_iolock), MR_UPDATE));
+ xfs_isilocked(iip->ili_inode, XFS_IOLOCK_EXCL));
ASSERT((!(iip->ili_inode->i_itemp->ili_flags &
XFS_ILI_IOLOCKED_SHARED)) ||
- ismrlocked(&(iip->ili_inode->i_iolock), MR_ACCESS));
+ xfs_isilocked(iip->ili_inode, XFS_IOLOCK_SHARED));
/*
* Clear the transaction pointer in the inode.
*/
@@ -769,7 +769,7 @@ xfs_inode_item_pushbuf(
ip = iip->ili_inode;
- ASSERT(ismrlocked(&(ip->i_lock), MR_ACCESS));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
/*
* The ili_pushbuf_flag keeps others from
@@ -857,7 +857,7 @@ xfs_inode_item_push(
ip = iip->ili_inode;
- ASSERT(ismrlocked(&(ip->i_lock), MR_ACCESS));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED));
ASSERT(issemalocked(&(ip->i_flock)));
/*
* Since we were able to lock the inode's flush lock and
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index fb3cf119141..7edcde691d1 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -196,14 +196,14 @@ xfs_iomap(
break;
case BMAPI_WRITE:
xfs_iomap_enter_trace(XFS_IOMAP_WRITE_ENTER, ip, offset, count);
- lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR;
+ lockmode = XFS_ILOCK_EXCL;
if (flags & BMAPI_IGNSTATE)
bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE;
xfs_ilock(ip, lockmode);
break;
case BMAPI_ALLOCATE:
xfs_iomap_enter_trace(XFS_IOMAP_ALLOC_ENTER, ip, offset, count);
- lockmode = XFS_ILOCK_SHARED|XFS_EXTSIZE_RD;
+ lockmode = XFS_ILOCK_SHARED;
bmapi_flags = XFS_BMAPI_ENTIRE;
/* Attempt non-blocking lock */
@@ -523,8 +523,7 @@ xfs_iomap_write_direct(
goto error_out;
}
- if (unlikely(!imap.br_startblock &&
- !(XFS_IS_REALTIME_INODE(ip)))) {
+ if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) {
error = xfs_cmn_err_fsblock_zero(ip, &imap);
goto error_out;
}
@@ -624,7 +623,7 @@ xfs_iomap_write_delay(
int prealloc, fsynced = 0;
int error;
- ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE) != 0);
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
/*
* Make sure that the dquots are there. This doesn't hold
@@ -686,8 +685,7 @@ retry:
goto retry;
}
- if (unlikely(!imap[0].br_startblock &&
- !(XFS_IS_REALTIME_INODE(ip))))
+ if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip)))
return xfs_cmn_err_fsblock_zero(ip, &imap[0]);
*ret_imap = imap[0];
@@ -838,9 +836,9 @@ xfs_iomap_write_allocate(
* See if we were able to allocate an extent that
* covers at least part of the callers request
*/
- if (unlikely(!imap.br_startblock &&
- XFS_IS_REALTIME_INODE(ip)))
+ if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
return xfs_cmn_err_fsblock_zero(ip, &imap);
+
if ((offset_fsb >= imap.br_startoff) &&
(offset_fsb < (imap.br_startoff +
imap.br_blockcount))) {
@@ -934,8 +932,7 @@ xfs_iomap_write_unwritten(
if (error)
return XFS_ERROR(error);
- if (unlikely(!imap.br_startblock &&
- !(XFS_IS_REALTIME_INODE(ip))))
+ if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
return xfs_cmn_err_fsblock_zero(ip, &imap);
if ((numblks_fsb = imap.br_blockcount) == 0) {
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index eb85bdedad0..419de15aeb4 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -71,11 +71,6 @@ xfs_bulkstat_one_iget(
ASSERT(ip != NULL);
ASSERT(ip->i_blkno != (xfs_daddr_t)0);
- if (ip->i_d.di_mode == 0) {
- *stat = BULKSTAT_RV_NOTHING;
- error = XFS_ERROR(ENOENT);
- goto out_iput;
- }
vp = XFS_ITOV(ip);
dic = &ip->i_d;
@@ -124,7 +119,6 @@ xfs_bulkstat_one_iget(
break;
}
- out_iput:
xfs_iput(ip, XFS_ILOCK_SHARED);
return error;
}
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 2fec452afbc..da3988453b7 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -54,8 +54,9 @@ STATIC void xfs_unmountfs_wait(xfs_mount_t *);
#ifdef HAVE_PERCPU_SB
STATIC void xfs_icsb_destroy_counters(xfs_mount_t *);
STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
- int, int);
-STATIC void xfs_icsb_sync_counters(xfs_mount_t *);
+ int);
+STATIC void xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t,
+ int);
STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t,
int64_t, int);
STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
@@ -63,8 +64,8 @@ STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
#else
#define xfs_icsb_destroy_counters(mp) do { } while (0)
-#define xfs_icsb_balance_counter(mp, a, b, c) do { } while (0)
-#define xfs_icsb_sync_counters(mp) do { } while (0)
+#define xfs_icsb_balance_counter(mp, a, b) do { } while (0)
+#define xfs_icsb_balance_counter_locked(mp, a, b) do { } while (0)
#define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0)
#endif
@@ -1400,7 +1401,7 @@ xfs_log_sbcount(
if (!xfs_fs_writable(mp))
return 0;
- xfs_icsb_sync_counters(mp);
+ xfs_icsb_sync_counters(mp, 0);
/*
* we don't need to do this if we are updating the superblock
@@ -2026,9 +2027,9 @@ xfs_icsb_cpu_notify(
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
xfs_icsb_lock(mp);
- xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0);
- xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0);
- xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0, 0);
+ xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
+ xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
+ xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
xfs_icsb_unlock(mp);
break;
case CPU_DEAD:
@@ -2048,12 +2049,9 @@ xfs_icsb_cpu_notify(
memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
- xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT,
- XFS_ICSB_SB_LOCKED, 0);
- xfs_icsb_balance_counter(mp, XFS_SBS_IFREE,
- XFS_ICSB_SB_LOCKED, 0);
- xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS,
- XFS_ICSB_SB_LOCKED, 0);
+ xfs_icsb_balance_counter_locked(mp, XFS_SBS_ICOUNT, 0);
+ xfs_icsb_balance_counter_locked(mp, XFS_SBS_IFREE, 0);
+ xfs_icsb_balance_counter_locked(mp, XFS_SBS_FDBLOCKS, 0);
spin_unlock(&mp->m_sb_lock);
xfs_icsb_unlock(mp);
break;
@@ -2105,9 +2103,9 @@ xfs_icsb_reinit_counters(
* initial balance kicks us off correctly
*/
mp->m_icsb_counters = -1;
- xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0, 0);
- xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0, 0);
- xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0, 0);
+ xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
+ xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
+ xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
xfs_icsb_unlock(mp);
}
@@ -2223,7 +2221,7 @@ xfs_icsb_disable_counter(
if (!test_and_set_bit(field, &mp->m_icsb_counters)) {
/* drain back to superblock */
- xfs_icsb_count(mp, &cnt, XFS_ICSB_SB_LOCKED|XFS_ICSB_LAZY_COUNT);
+ xfs_icsb_count(mp, &cnt, XFS_ICSB_LAZY_COUNT);
switch(field) {
case XFS_SBS_ICOUNT:
mp->m_sb.sb_icount = cnt.icsb_icount;
@@ -2278,38 +2276,33 @@ xfs_icsb_enable_counter(
}
void
-xfs_icsb_sync_counters_flags(
+xfs_icsb_sync_counters_locked(
xfs_mount_t *mp,
int flags)
{
xfs_icsb_cnts_t cnt;
- /* Pass 1: lock all counters */
- if ((flags & XFS_ICSB_SB_LOCKED) == 0)
- spin_lock(&mp->m_sb_lock);
-
xfs_icsb_count(mp, &cnt, flags);
- /* Step 3: update mp->m_sb fields */
if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT))
mp->m_sb.sb_icount = cnt.icsb_icount;
if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE))
mp->m_sb.sb_ifree = cnt.icsb_ifree;
if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS))
mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
-
- if ((flags & XFS_ICSB_SB_LOCKED) == 0)
- spin_unlock(&mp->m_sb_lock);
}
/*
* Accurate update of per-cpu counters to incore superblock
*/
-STATIC void
+void
xfs_icsb_sync_counters(
- xfs_mount_t *mp)
+ xfs_mount_t *mp,
+ int flags)
{
- xfs_icsb_sync_counters_flags(mp, 0);
+ spin_lock(&mp->m_sb_lock);
+ xfs_icsb_sync_counters_locked(mp, flags);
+ spin_unlock(&mp->m_sb_lock);
}
/*
@@ -2332,19 +2325,15 @@ xfs_icsb_sync_counters(
#define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \
(uint64_t)(512 + XFS_ALLOC_SET_ASIDE(mp))
STATIC void
-xfs_icsb_balance_counter(
+xfs_icsb_balance_counter_locked(
xfs_mount_t *mp,
xfs_sb_field_t field,
- int flags,
int min_per_cpu)
{
uint64_t count, resid;
int weight = num_online_cpus();
uint64_t min = (uint64_t)min_per_cpu;
- if (!(flags & XFS_ICSB_SB_LOCKED))
- spin_lock(&mp->m_sb_lock);
-
/* disable counter and sync counter */
xfs_icsb_disable_counter(mp, field);
@@ -2354,19 +2343,19 @@ xfs_icsb_balance_counter(
count = mp->m_sb.sb_icount;
resid = do_div(count, weight);
if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
- goto out;
+ return;
break;
case XFS_SBS_IFREE:
count = mp->m_sb.sb_ifree;
resid = do_div(count, weight);
if (count < max(min, XFS_ICSB_INO_CNTR_REENABLE))
- goto out;
+ return;
break;
case XFS_SBS_FDBLOCKS:
count = mp->m_sb.sb_fdblocks;
resid = do_div(count, weight);
if (count < max(min, XFS_ICSB_FDBLK_CNTR_REENABLE(mp)))
- goto out;
+ return;
break;
default:
BUG();
@@ -2375,9 +2364,17 @@ xfs_icsb_balance_counter(
}
xfs_icsb_enable_counter(mp, field, count, resid);
-out:
- if (!(flags & XFS_ICSB_SB_LOCKED))
- spin_unlock(&mp->m_sb_lock);
+}
+
+STATIC void
+xfs_icsb_balance_counter(
+ xfs_mount_t *mp,
+ xfs_sb_field_t fields,
+ int min_per_cpu)
+{
+ spin_lock(&mp->m_sb_lock);
+ xfs_icsb_balance_counter_locked(mp, fields, min_per_cpu);
+ spin_unlock(&mp->m_sb_lock);
}
STATIC int
@@ -2484,7 +2481,7 @@ slow_path:
* we are done.
*/
if (ret != ENOSPC)
- xfs_icsb_balance_counter(mp, field, 0, 0);
+ xfs_icsb_balance_counter(mp, field, 0);
xfs_icsb_unlock(mp);
return ret;
@@ -2508,7 +2505,7 @@ balance_counter:
* will either succeed through the fast path or slow path without
* another balance operation being required.
*/
- xfs_icsb_balance_counter(mp, field, 0, delta);
+ xfs_icsb_balance_counter(mp, field, delta);
xfs_icsb_unlock(mp);
goto again;
}
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 1ed575110ff..63e0693a358 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -206,17 +206,18 @@ typedef struct xfs_icsb_cnts {
#define XFS_ICSB_FLAG_LOCK (1 << 0) /* counter lock bit */
-#define XFS_ICSB_SB_LOCKED (1 << 0) /* sb already locked */
#define XFS_ICSB_LAZY_COUNT (1 << 1) /* accuracy not needed */
extern int xfs_icsb_init_counters(struct xfs_mount *);
extern void xfs_icsb_reinit_counters(struct xfs_mount *);
-extern void xfs_icsb_sync_counters_flags(struct xfs_mount *, int);
+extern void xfs_icsb_sync_counters(struct xfs_mount *, int);
+extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int);
#else
#define xfs_icsb_init_counters(mp) (0)
#define xfs_icsb_reinit_counters(mp) do { } while (0)
-#define xfs_icsb_sync_counters_flags(mp, flags) do { } while (0)
+#define xfs_icsb_sync_counters(mp, flags) do { } while (0)
+#define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0)
#endif
typedef struct xfs_ail {
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index ee371890d85..d8063e1ad29 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -55,85 +55,32 @@ xfs_rename_unlock4(
xfs_iunlock(i_tab[0], lock_mode);
for (i = 1; i < 4; i++) {
- if (i_tab[i] == NULL) {
+ if (i_tab[i] == NULL)
break;
- }
+
/*
* Watch out for duplicate entries in the table.
*/
- if (i_tab[i] != i_tab[i-1]) {
+ if (i_tab[i] != i_tab[i-1])
xfs_iunlock(i_tab[i], lock_mode);
- }
}
}
-#ifdef DEBUG
-int xfs_rename_skip, xfs_rename_nskip;
-#endif
-
/*
- * The following routine will acquire the locks required for a rename
- * operation. The code understands the semantics of renames and will
- * validate that name1 exists under dp1 & that name2 may or may not
- * exist under dp2.
- *
- * We are renaming dp1/name1 to dp2/name2.
- *
- * Return ENOENT if dp1 does not exist, other lookup errors, or 0 for success.
+ * Enter all inodes for a rename transaction into a sorted array.
*/
-STATIC int
-xfs_lock_for_rename(
+STATIC void
+xfs_sort_for_rename(
xfs_inode_t *dp1, /* in: old (source) directory inode */
xfs_inode_t *dp2, /* in: new (target) directory inode */
xfs_inode_t *ip1, /* in: inode of old entry */
- struct xfs_name *name2, /* in: new entry name */
- xfs_inode_t **ipp2, /* out: inode of new entry, if it
+ xfs_inode_t *ip2, /* in: inode of new entry, if it
already exists, NULL otherwise. */
xfs_inode_t **i_tab,/* out: array of inode returned, sorted */
int *num_inodes) /* out: number of inodes in array */
{
- xfs_inode_t *ip2 = NULL;
xfs_inode_t *temp;
- xfs_ino_t inum1, inum2;
- int error;
int i, j;
- uint lock_mode;
- int diff_dirs = (dp1 != dp2);
-
- /*
- * First, find out the current inums of the entries so that we
- * can determine the initial locking order. We'll have to
- * sanity check stuff after all the locks have been acquired
- * to see if we still have the right inodes, directories, etc.
- */
- lock_mode = xfs_ilock_map_shared(dp1);
- IHOLD(ip1);
- xfs_itrace_ref(ip1);
-
- inum1 = ip1->i_ino;
-
- /*
- * Unlock dp1 and lock dp2 if they are different.
- */
- if (diff_dirs) {
- xfs_iunlock_map_shared(dp1, lock_mode);
- lock_mode = xfs_ilock_map_shared(dp2);
- }
-
- error = xfs_dir_lookup_int(dp2, lock_mode, name2, &inum2, &ip2);
- if (error == ENOENT) { /* target does not need to exist. */
- inum2 = 0;
- } else if (error) {
- /*
- * If dp2 and dp1 are the same, the next line unlocks dp1.
- * Got it?
- */
- xfs_iunlock_map_shared(dp2, lock_mode);
- IRELE (ip1);
- return error;
- } else {
- xfs_itrace_ref(ip2);
- }
/*
* i_tab contains a list of pointers to inodes. We initialize
@@ -145,21 +92,20 @@ xfs_lock_for_rename(
i_tab[0] = dp1;
i_tab[1] = dp2;
i_tab[2] = ip1;
- if (inum2 == 0) {
- *num_inodes = 3;
- i_tab[3] = NULL;
- } else {
+ if (ip2) {
*num_inodes = 4;
i_tab[3] = ip2;
+ } else {
+ *num_inodes = 3;
+ i_tab[3] = NULL;
}
- *ipp2 = i_tab[3];
/*
* Sort the elements via bubble sort. (Remember, there are at
* most 4 elements to sort, so this is adequate.)
*/
- for (i=0; i < *num_inodes; i++) {
- for (j=1; j < *num_inodes; j++) {
+ for (i = 0; i < *num_inodes; i++) {
+ for (j = 1; j < *num_inodes; j++) {
if (i_tab[j]->i_ino < i_tab[j-1]->i_ino) {
temp = i_tab[j];
i_tab[j] = i_tab[j-1];
@@ -167,30 +113,6 @@ xfs_lock_for_rename(
}
}
}
-
- /*
- * We have dp2 locked. If it isn't first, unlock it.
- * If it is first, tell xfs_lock_inodes so it can skip it
- * when locking. if dp1 == dp2, xfs_lock_inodes will skip both
- * since they are equal. xfs_lock_inodes needs all these inodes
- * so that it can unlock and retry if there might be a dead-lock
- * potential with the log.
- */
-
- if (i_tab[0] == dp2 && lock_mode == XFS_ILOCK_SHARED) {
-#ifdef DEBUG
- xfs_rename_skip++;
-#endif
- xfs_lock_inodes(i_tab, *num_inodes, 1, XFS_ILOCK_SHARED);
- } else {
-#ifdef DEBUG
- xfs_rename_nskip++;
-#endif
- xfs_iunlock_map_shared(dp2, lock_mode);
- xfs_lock_inodes(i_tab, *num_inodes, 0, XFS_ILOCK_SHARED);
- }
-
- return 0;
}
/*
@@ -202,10 +124,10 @@ xfs_rename(
struct xfs_name *src_name,
xfs_inode_t *src_ip,
xfs_inode_t *target_dp,
- struct xfs_name *target_name)
+ struct xfs_name *target_name,
+ xfs_inode_t *target_ip)
{
- xfs_trans_t *tp;
- xfs_inode_t *target_ip;
+ xfs_trans_t *tp = NULL;
xfs_mount_t *mp = src_dp->i_mount;
int new_parent; /* moving to a new dir */
int src_is_directory; /* src_name is a directory */
@@ -215,9 +137,7 @@ xfs_rename(
int cancel_flags;
int committed;
xfs_inode_t *inodes[4];
- int target_ip_dropped = 0; /* dropped target_ip link? */
int spaceres;
- int target_link_zero = 0;
int num_inodes;
xfs_itrace_entry(src_dp);
@@ -230,64 +150,27 @@ xfs_rename(
target_dp, DM_RIGHT_NULL,
src_name->name, target_name->name,
0, 0, 0);
- if (error) {
+ if (error)
return error;
- }
}
/* Return through std_return after this point. */
- /*
- * Lock all the participating inodes. Depending upon whether
- * the target_name exists in the target directory, and
- * whether the target directory is the same as the source
- * directory, we can lock from 2 to 4 inodes.
- * xfs_lock_for_rename() will return ENOENT if src_name
- * does not exist in the source directory.
- */
- tp = NULL;
- error = xfs_lock_for_rename(src_dp, target_dp, src_ip, target_name,
- &target_ip, inodes, &num_inodes);
- if (error) {
- /*
- * We have nothing locked, no inode references, and
- * no transaction, so just get out.
- */
- goto std_return;
- }
-
- ASSERT(src_ip != NULL);
+ new_parent = (src_dp != target_dp);
+ src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR);
- if ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
+ if (src_is_directory) {
/*
* Check for link count overflow on target_dp
*/
- if (target_ip == NULL && (src_dp != target_dp) &&
+ if (target_ip == NULL && new_parent &&
target_dp->i_d.di_nlink >= XFS_MAXLINK) {
error = XFS_ERROR(EMLINK);
- xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED);
- goto rele_return;
+ goto std_return;
}
}
- /*
- * If we are using project inheritance, we only allow renames
- * into our tree when the project IDs are the same; else the
- * tree quota mechanism would be circumvented.
- */
- if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
- (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) {
- error = XFS_ERROR(EXDEV);
- xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED);
- goto rele_return;
- }
-
- new_parent = (src_dp != target_dp);
- src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR);
-
- /*
- * Drop the locks on our inodes so that we can start the transaction.
- */
- xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED);
+ xfs_sort_for_rename(src_dp, target_dp, src_ip, target_ip,
+ inodes, &num_inodes);
XFS_BMAP_INIT(&free_list, &first_block);
tp = xfs_trans_alloc(mp, XFS_TRANS_RENAME);
@@ -302,7 +185,7 @@ xfs_rename(
}
if (error) {
xfs_trans_cancel(tp, 0);
- goto rele_return;
+ goto std_return;
}
/*
@@ -310,13 +193,29 @@ xfs_rename(
*/
if ((error = XFS_QM_DQVOPRENAME(mp, inodes))) {
xfs_trans_cancel(tp, cancel_flags);
- goto rele_return;
+ goto std_return;
}
/*
- * Reacquire the inode locks we dropped above.
+ * Lock all the participating inodes. Depending upon whether
+ * the target_name exists in the target directory, and
+ * whether the target directory is the same as the source
+ * directory, we can lock from 2 to 4 inodes.
+ */
+ xfs_lock_inodes(inodes, num_inodes, XFS_ILOCK_EXCL);
+
+ /*
+ * If we are using project inheritance, we only allow renames
+ * into our tree when the project IDs are the same; else the
+ * tree quota mechanism would be circumvented.
*/
- xfs_lock_inodes(inodes, num_inodes, 0, XFS_ILOCK_EXCL);
+ if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
+ (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) {
+ error = XFS_ERROR(EXDEV);
+ xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED);
+ xfs_trans_cancel(tp, cancel_flags);
+ goto std_return;
+ }
/*
* Join all the inodes to the transaction. From this point on,
@@ -328,17 +227,17 @@ xfs_rename(
*/
IHOLD(src_dp);
xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL);
+
if (new_parent) {
IHOLD(target_dp);
xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL);
}
- if ((src_ip != src_dp) && (src_ip != target_dp)) {
- xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
- }
- if ((target_ip != NULL) &&
- (target_ip != src_ip) &&
- (target_ip != src_dp) &&
- (target_ip != target_dp)) {
+
+ IHOLD(src_ip);
+ xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
+
+ if (target_ip) {
+ IHOLD(target_ip);
xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);
}
@@ -412,7 +311,6 @@ xfs_rename(
error = xfs_droplink(tp, target_ip);
if (error)
goto abort_return;
- target_ip_dropped = 1;
if (src_is_directory) {
/*
@@ -422,10 +320,6 @@ xfs_rename(
if (error)
goto abort_return;
}
-
- /* Do this test while we still hold the locks */
- target_link_zero = (target_ip)->i_d.di_nlink==0;
-
} /* target_ip != NULL */
/*
@@ -492,15 +386,6 @@ xfs_rename(
}
/*
- * If there was a target inode, take an extra reference on
- * it here so that it doesn't go to xfs_inactive() from
- * within the commit.
- */
- if (target_ip != NULL) {
- IHOLD(target_ip);
- }
-
- /*
* If this is a synchronous mount, make sure that the
* rename transaction goes to disk before returning to
* the user.
@@ -509,30 +394,11 @@ xfs_rename(
xfs_trans_set_sync(tp);
}
- /*
- * Take refs. for vop_link_removed calls below. No need to worry
- * about directory refs. because the caller holds them.
- *
- * Do holds before the xfs_bmap_finish since it might rele them down
- * to zero.
- */
-
- if (target_ip_dropped)
- IHOLD(target_ip);
- IHOLD(src_ip);
-
error = xfs_bmap_finish(&tp, &free_list, &committed);
if (error) {
xfs_bmap_cancel(&free_list);
xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
XFS_TRANS_ABORT));
- if (target_ip != NULL) {
- IRELE(target_ip);
- }
- if (target_ip_dropped) {
- IRELE(target_ip);
- }
- IRELE(src_ip);
goto std_return;
}
@@ -541,15 +407,6 @@ xfs_rename(
* the vnode references.
*/
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
- if (target_ip != NULL)
- IRELE(target_ip);
- /*
- * Let interposed file systems know about removed links.
- */
- if (target_ip_dropped)
- IRELE(target_ip);
-
- IRELE(src_ip);
/* Fall through to std_return with error = 0 or errno from
* xfs_trans_commit */
@@ -571,11 +428,4 @@ std_return:
xfs_bmap_cancel(&free_list);
xfs_trans_cancel(tp, cancel_flags);
goto std_return;
-
- rele_return:
- IRELE(src_ip);
- if (target_ip != NULL) {
- IRELE(target_ip);
- }
- goto std_return;
}
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index b8db1d5cde5..4c70bf5e998 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -111,13 +111,13 @@ xfs_trans_iget(
*/
ASSERT(ip->i_itemp != NULL);
ASSERT(lock_flags & XFS_ILOCK_EXCL);
- ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) ||
- ismrlocked(&ip->i_iolock, MR_UPDATE));
+ xfs_isilocked(ip, XFS_IOLOCK_EXCL));
ASSERT((!(lock_flags & XFS_IOLOCK_EXCL)) ||
(ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_EXCL));
ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) ||
- ismrlocked(&ip->i_iolock, (MR_UPDATE | MR_ACCESS)));
+ xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED));
ASSERT((!(lock_flags & XFS_IOLOCK_SHARED)) ||
(ip->i_itemp->ili_flags & XFS_ILI_IOLOCKED_ANY));
@@ -185,7 +185,7 @@ xfs_trans_ijoin(
xfs_inode_log_item_t *iip;
ASSERT(ip->i_transp == NULL);
- ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT(lock_flags & XFS_ILOCK_EXCL);
if (ip->i_itemp == NULL)
xfs_inode_item_init(ip, ip->i_mount);
@@ -232,7 +232,7 @@ xfs_trans_ihold(
{
ASSERT(ip->i_transp == tp);
ASSERT(ip->i_itemp != NULL);
- ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ip->i_itemp->ili_flags |= XFS_ILI_HOLD;
}
@@ -257,7 +257,7 @@ xfs_trans_log_inode(
ASSERT(ip->i_transp == tp);
ASSERT(ip->i_itemp != NULL);
- ASSERT(ismrlocked(&ip->i_lock, MR_UPDATE));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
lidp = xfs_trans_find_item(tp, (xfs_log_item_t*)(ip->i_itemp));
ASSERT(lidp != NULL);
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 2b8dc7e4077..98e5f110ba5 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -41,49 +41,6 @@
#include "xfs_utils.h"
-int
-xfs_dir_lookup_int(
- xfs_inode_t *dp,
- uint lock_mode,
- struct xfs_name *name,
- xfs_ino_t *inum,
- xfs_inode_t **ipp)
-{
- int error;
-
- xfs_itrace_entry(dp);
-
- error = xfs_dir_lookup(NULL, dp, name, inum);
- if (!error) {
- /*
- * Unlock the directory. We do this because we can't
- * hold the directory lock while doing the vn_get()
- * in xfs_iget(). Doing so could cause us to hold
- * a lock while waiting for the inode to finish
- * being inactive while it's waiting for a log
- * reservation in the inactive routine.
- */
- xfs_iunlock(dp, lock_mode);
- error = xfs_iget(dp->i_mount, NULL, *inum, 0, 0, ipp, 0);
- xfs_ilock(dp, lock_mode);
-
- if (error) {
- *ipp = NULL;
- } else if ((*ipp)->i_d.di_mode == 0) {
- /*
- * The inode has been freed. Something is
- * wrong so just get out of here.
- */
- xfs_iunlock(dp, lock_mode);
- xfs_iput_new(*ipp, 0);
- *ipp = NULL;
- xfs_ilock(dp, lock_mode);
- error = XFS_ERROR(ENOENT);
- }
- }
- return error;
-}
-
/*
* Allocates a new inode from disk and return a pointer to the
* incore copy. This routine will internally commit the current
@@ -310,7 +267,7 @@ xfs_bump_ino_vers2(
{
xfs_mount_t *mp;
- ASSERT(ismrlocked (&ip->i_lock, MR_UPDATE));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT(ip->i_d.di_version == XFS_DINODE_VERSION_1);
ip->i_d.di_version = XFS_DINODE_VERSION_2;
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h
index 175b126d2ca..f316cb85d8e 100644
--- a/fs/xfs/xfs_utils.h
+++ b/fs/xfs/xfs_utils.h
@@ -21,8 +21,6 @@
#define IRELE(ip) VN_RELE(XFS_ITOV(ip))
#define IHOLD(ip) VN_HOLD(XFS_ITOV(ip))
-extern int xfs_dir_lookup_int(xfs_inode_t *, uint, struct xfs_name *,
- xfs_ino_t *, xfs_inode_t **);
extern int xfs_truncate_file(xfs_mount_t *, xfs_inode_t *);
extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t,
xfs_dev_t, cred_t *, prid_t, int,
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index fc48158fe47..30bacd8bb0e 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -186,6 +186,7 @@ xfs_cleanup(void)
kmem_zone_destroy(xfs_efi_zone);
kmem_zone_destroy(xfs_ifork_zone);
kmem_zone_destroy(xfs_ili_zone);
+ kmem_zone_destroy(xfs_log_ticket_zone);
}
/*
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 6650601c64f..70702a60b4b 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -76,132 +76,6 @@ xfs_open(
}
/*
- * xfs_getattr
- */
-int
-xfs_getattr(
- xfs_inode_t *ip,
- bhv_vattr_t *vap,
- int flags)
-{
- bhv_vnode_t *vp = XFS_ITOV(ip);
- xfs_mount_t *mp = ip->i_mount;
-
- xfs_itrace_entry(ip);
-
- if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
-
- if (!(flags & ATTR_LAZY))
- xfs_ilock(ip, XFS_ILOCK_SHARED);
-
- vap->va_size = XFS_ISIZE(ip);
- if (vap->va_mask == XFS_AT_SIZE)
- goto all_done;
-
- vap->va_nblocks =
- XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
- vap->va_nodeid = ip->i_ino;
-#if XFS_BIG_INUMS
- vap->va_nodeid += mp->m_inoadd;
-#endif
- vap->va_nlink = ip->i_d.di_nlink;
-
- /*
- * Quick exit for non-stat callers
- */
- if ((vap->va_mask &
- ~(XFS_AT_SIZE|XFS_AT_FSID|XFS_AT_NODEID|
- XFS_AT_NLINK|XFS_AT_BLKSIZE)) == 0)
- goto all_done;
-
- /*
- * Copy from in-core inode.
- */
- vap->va_mode = ip->i_d.di_mode;
- vap->va_uid = ip->i_d.di_uid;
- vap->va_gid = ip->i_d.di_gid;
- vap->va_projid = ip->i_d.di_projid;
-
- /*
- * Check vnode type block/char vs. everything else.
- */
- switch (ip->i_d.di_mode & S_IFMT) {
- case S_IFBLK:
- case S_IFCHR:
- vap->va_rdev = ip->i_df.if_u2.if_rdev;
- vap->va_blocksize = BLKDEV_IOSIZE;
- break;
- default:
- vap->va_rdev = 0;
-
- if (!(XFS_IS_REALTIME_INODE(ip))) {
- vap->va_blocksize = xfs_preferred_iosize(mp);
- } else {
-
- /*
- * If the file blocks are being allocated from a
- * realtime partition, then return the inode's
- * realtime extent size or the realtime volume's
- * extent size.
- */
- vap->va_blocksize =
- xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog;
- }
- break;
- }
-
- vn_atime_to_timespec(vp, &vap->va_atime);
- vap->va_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
- vap->va_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
- vap->va_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
- vap->va_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
-
- /*
- * Exit for stat callers. See if any of the rest of the fields
- * to be filled in are needed.
- */
- if ((vap->va_mask &
- (XFS_AT_XFLAGS|XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|
- XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0)
- goto all_done;
-
- /*
- * Convert di_flags to xflags.
- */
- vap->va_xflags = xfs_ip2xflags(ip);
-
- /*
- * Exit for inode revalidate. See if any of the rest of
- * the fields to be filled in are needed.
- */
- if ((vap->va_mask &
- (XFS_AT_EXTSIZE|XFS_AT_NEXTENTS|XFS_AT_ANEXTENTS|
- XFS_AT_GENCOUNT|XFS_AT_VCODE)) == 0)
- goto all_done;
-
- vap->va_extsize = ip->i_d.di_extsize << mp->m_sb.sb_blocklog;
- vap->va_nextents =
- (ip->i_df.if_flags & XFS_IFEXTENTS) ?
- ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) :
- ip->i_d.di_nextents;
- if (ip->i_afp)
- vap->va_anextents =
- (ip->i_afp->if_flags & XFS_IFEXTENTS) ?
- ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) :
- ip->i_d.di_anextents;
- else
- vap->va_anextents = 0;
- vap->va_gen = ip->i_d.di_gen;
-
- all_done:
- if (!(flags & ATTR_LAZY))
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
- return 0;
-}
-
-
-/*
* xfs_setattr
*/
int
@@ -211,7 +85,6 @@ xfs_setattr(
int flags,
cred_t *credp)
{
- bhv_vnode_t *vp = XFS_ITOV(ip);
xfs_mount_t *mp = ip->i_mount;
xfs_trans_t *tp;
int mask;
@@ -222,7 +95,6 @@ xfs_setattr(
gid_t gid=0, igid=0;
int timeflags = 0;
xfs_prid_t projid=0, iprojid=0;
- int mandlock_before, mandlock_after;
struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2;
int file_owner;
int need_iolock = 1;
@@ -383,7 +255,7 @@ xfs_setattr(
m |= S_ISGID;
#if 0
/* Linux allows this, Irix doesn't. */
- if ((vap->va_mode & S_ISVTX) && !VN_ISDIR(vp))
+ if ((vap->va_mode & S_ISVTX) && !S_ISDIR(ip->i_d.di_mode))
m |= S_ISVTX;
#endif
if (m && !capable(CAP_FSETID))
@@ -461,10 +333,10 @@ xfs_setattr(
goto error_return;
}
- if (VN_ISDIR(vp)) {
+ if (S_ISDIR(ip->i_d.di_mode)) {
code = XFS_ERROR(EISDIR);
goto error_return;
- } else if (!VN_ISREG(vp)) {
+ } else if (!S_ISREG(ip->i_d.di_mode)) {
code = XFS_ERROR(EINVAL);
goto error_return;
}
@@ -626,9 +498,6 @@ xfs_setattr(
xfs_trans_ihold(tp, ip);
}
- /* determine whether mandatory locking mode changes */
- mandlock_before = MANDLOCK(vp, ip->i_d.di_mode);
-
/*
* Truncate file. Must have write permission and not be a directory.
*/
@@ -858,13 +727,6 @@ xfs_setattr(
code = xfs_trans_commit(tp, commit_flags);
}
- /*
- * If the (regular) file's mandatory locking mode changed, then
- * notify the vnode. We do this under the inode lock to prevent
- * racing calls to vop_vnode_change.
- */
- mandlock_after = MANDLOCK(vp, ip->i_d.di_mode);
-
xfs_iunlock(ip, lock_flags);
/*
@@ -1443,7 +1305,7 @@ xfs_inactive_attrs(
int error;
xfs_mount_t *mp;
- ASSERT(ismrlocked(&ip->i_iolock, MR_UPDATE));
+ ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
tp = *tpp;
mp = ip->i_mount;
ASSERT(ip->i_d.di_forkoff != 0);
@@ -1491,7 +1353,7 @@ xfs_release(
xfs_mount_t *mp = ip->i_mount;
int error;
- if (!VN_ISREG(vp) || (ip->i_d.di_mode == 0))
+ if (!S_ISREG(ip->i_d.di_mode) || (ip->i_d.di_mode == 0))
return 0;
/* If this is a read-only mount, don't do this (would generate I/O) */
@@ -1774,8 +1636,7 @@ xfs_lookup(
struct xfs_name *name,
xfs_inode_t **ipp)
{
- xfs_inode_t *ip;
- xfs_ino_t e_inum;
+ xfs_ino_t inum;
int error;
uint lock_mode;
@@ -1785,12 +1646,21 @@ xfs_lookup(
return XFS_ERROR(EIO);
lock_mode = xfs_ilock_map_shared(dp);
- error = xfs_dir_lookup_int(dp, lock_mode, name, &e_inum, &ip);
- if (!error) {
- *ipp = ip;
- xfs_itrace_ref(ip);
- }
+ error = xfs_dir_lookup(NULL, dp, name, &inum);
xfs_iunlock_map_shared(dp, lock_mode);
+
+ if (error)
+ goto out;
+
+ error = xfs_iget(dp->i_mount, NULL, inum, 0, 0, ipp, 0);
+ if (error)
+ goto out;
+
+ xfs_itrace_ref(*ipp);
+ return 0;
+
+ out:
+ *ipp = NULL;
return error;
}
@@ -1906,7 +1776,7 @@ xfs_create(
* It is locked (and joined to the transaction).
*/
- ASSERT(ismrlocked (&ip->i_lock, MR_UPDATE));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
/*
* Now we join the directory inode to the transaction. We do not do it
@@ -2112,7 +1982,7 @@ again:
ips[0] = ip;
ips[1] = dp;
- xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL);
+ xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
}
/* else e_inum == dp->i_ino */
/* This can happen if we're asked to lock /x/..
@@ -2160,7 +2030,6 @@ void
xfs_lock_inodes(
xfs_inode_t **ips,
int inodes,
- int first_locked,
uint lock_mode)
{
int attempts = 0, i, j, try_lock;
@@ -2168,13 +2037,8 @@ xfs_lock_inodes(
ASSERT(ips && (inodes >= 2)); /* we need at least two */
- if (first_locked) {
- try_lock = 1;
- i = 1;
- } else {
- try_lock = 0;
- i = 0;
- }
+ try_lock = 0;
+ i = 0;
again:
for (; i < inodes; i++) {
@@ -2298,29 +2162,14 @@ xfs_remove(
return error;
}
- /*
- * We need to get a reference to ip before we get our log
- * reservation. The reason for this is that we cannot call
- * xfs_iget for an inode for which we do not have a reference
- * once we've acquired a log reservation. This is because the
- * inode we are trying to get might be in xfs_inactive going
- * for a log reservation. Since we'll have to wait for the
- * inactive code to complete before returning from xfs_iget,
- * we need to make sure that we don't have log space reserved
- * when we call xfs_iget. Instead we get an unlocked reference
- * to the inode before getting our log reservation.
- */
- IHOLD(ip);
-
xfs_itrace_entry(ip);
xfs_itrace_ref(ip);
error = XFS_QM_DQATTACH(mp, dp, 0);
- if (!error && dp != ip)
+ if (!error)
error = XFS_QM_DQATTACH(mp, ip, 0);
if (error) {
REMOVE_DEBUG_TRACE(__LINE__);
- IRELE(ip);
goto std_return;
}
@@ -2347,7 +2196,6 @@ xfs_remove(
ASSERT(error != ENOSPC);
REMOVE_DEBUG_TRACE(__LINE__);
xfs_trans_cancel(tp, 0);
- IRELE(ip);
return error;
}
@@ -2355,7 +2203,6 @@ xfs_remove(
if (error) {
REMOVE_DEBUG_TRACE(__LINE__);
xfs_trans_cancel(tp, cancel_flags);
- IRELE(ip);
goto std_return;
}
@@ -2363,23 +2210,18 @@ xfs_remove(
* At this point, we've gotten both the directory and the entry
* inodes locked.
*/
+ IHOLD(ip);
xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
- if (dp != ip) {
- /*
- * Increment vnode ref count only in this case since
- * there's an extra vnode reference in the case where
- * dp == ip.
- */
- IHOLD(dp);
- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
- }
+
+ IHOLD(dp);
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
/*
* Entry must exist since we did a lookup in xfs_lock_dir_and_entry.
*/
XFS_BMAP_INIT(&free_list, &first_block);
error = xfs_dir_removename(tp, dp, name, ip->i_ino,
- &first_block, &free_list, 0);
+ &first_block, &free_list, resblks);
if (error) {
ASSERT(error != ENOENT);
REMOVE_DEBUG_TRACE(__LINE__);
@@ -2402,12 +2244,6 @@ xfs_remove(
link_zero = (ip)->i_d.di_nlink==0;
/*
- * Take an extra ref on the inode so that it doesn't
- * go to xfs_inactive() from within the commit.
- */
- IHOLD(ip);
-
- /*
* If this is a synchronous mount, make sure that the
* remove transaction goes to disk before returning to
* the user.
@@ -2423,10 +2259,8 @@ xfs_remove(
}
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
- if (error) {
- IRELE(ip);
+ if (error)
goto std_return;
- }
/*
* If we are using filestreams, kill the stream association.
@@ -2438,7 +2272,6 @@ xfs_remove(
xfs_filestream_deassociate(ip);
xfs_itrace_exit(ip);
- IRELE(ip);
/* Fall through to std_return with error = 0 */
std_return:
@@ -2467,8 +2300,6 @@ xfs_remove(
cancel_flags |= XFS_TRANS_ABORT;
xfs_trans_cancel(tp, cancel_flags);
- IRELE(ip);
-
goto std_return;
}
@@ -2536,7 +2367,7 @@ xfs_link(
ips[1] = sip;
}
- xfs_lock_inodes(ips, 2, 0, XFS_ILOCK_EXCL);
+ xfs_lock_inodes(ips, 2, XFS_ILOCK_EXCL);
/*
* Increment vnode ref counts since xfs_trans_commit &
@@ -2840,7 +2671,6 @@ xfs_rmdir(
struct xfs_name *name,
xfs_inode_t *cdp)
{
- bhv_vnode_t *dir_vp = XFS_ITOV(dp);
xfs_mount_t *mp = dp->i_mount;
xfs_trans_t *tp;
int error;
@@ -2866,27 +2696,12 @@ xfs_rmdir(
}
/*
- * We need to get a reference to cdp before we get our log
- * reservation. The reason for this is that we cannot call
- * xfs_iget for an inode for which we do not have a reference
- * once we've acquired a log reservation. This is because the
- * inode we are trying to get might be in xfs_inactive going
- * for a log reservation. Since we'll have to wait for the
- * inactive code to complete before returning from xfs_iget,
- * we need to make sure that we don't have log space reserved
- * when we call xfs_iget. Instead we get an unlocked reference
- * to the inode before getting our log reservation.
- */
- IHOLD(cdp);
-
- /*
* Get the dquots for the inodes.
*/
error = XFS_QM_DQATTACH(mp, dp, 0);
- if (!error && dp != cdp)
+ if (!error)
error = XFS_QM_DQATTACH(mp, cdp, 0);
if (error) {
- IRELE(cdp);
REMOVE_DEBUG_TRACE(__LINE__);
goto std_return;
}
@@ -2913,7 +2728,6 @@ xfs_rmdir(
if (error) {
ASSERT(error != ENOSPC);
cancel_flags = 0;
- IRELE(cdp);
goto error_return;
}
XFS_BMAP_INIT(&free_list, &first_block);
@@ -2927,21 +2741,13 @@ xfs_rmdir(
error = xfs_lock_dir_and_entry(dp, cdp);
if (error) {
xfs_trans_cancel(tp, cancel_flags);
- IRELE(cdp);
goto std_return;
}
+ IHOLD(dp);
xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
- if (dp != cdp) {
- /*
- * Only increment the parent directory vnode count if
- * we didn't bump it in looking up cdp. The only time
- * we don't bump it is when we're looking up ".".
- */
- VN_HOLD(dir_vp);
- }
- xfs_itrace_ref(cdp);
+ IHOLD(cdp);
xfs_trans_ijoin(tp, cdp, XFS_ILOCK_EXCL);
ASSERT(cdp->i_d.di_nlink >= 2);
@@ -2995,12 +2801,6 @@ xfs_rmdir(
last_cdp_link = (cdp)->i_d.di_nlink==0;
/*
- * Take an extra ref on the child vnode so that it
- * does not go to xfs_inactive() from within the commit.
- */
- IHOLD(cdp);
-
- /*
* If this is a synchronous mount, make sure that the
* rmdir transaction goes to disk before returning to
* the user.
@@ -3014,19 +2814,15 @@ xfs_rmdir(
xfs_bmap_cancel(&free_list);
xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES |
XFS_TRANS_ABORT));
- IRELE(cdp);
goto std_return;
}
error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
if (error) {
- IRELE(cdp);
goto std_return;
}
- IRELE(cdp);
-
/* Fall through to std_return with error = 0 or the errno
* from xfs_trans_commit. */
std_return:
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 24c53923dc2..8abe8f186e2 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -15,7 +15,6 @@ struct xfs_iomap;
int xfs_open(struct xfs_inode *ip);
-int xfs_getattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags);
int xfs_setattr(struct xfs_inode *ip, struct bhv_vattr *vap, int flags,
struct cred *credp);
int xfs_readlink(struct xfs_inode *ip, char *link);
@@ -48,9 +47,9 @@ int xfs_change_file_space(struct xfs_inode *ip, int cmd,
struct cred *credp, int attr_flags);
int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name,
struct xfs_inode *src_ip, struct xfs_inode *target_dp,
- struct xfs_name *target_name);
+ struct xfs_name *target_name, struct xfs_inode *target_ip);
int xfs_attr_get(struct xfs_inode *ip, const char *name, char *value,
- int *valuelenp, int flags, cred_t *cred);
+ int *valuelenp, int flags);
int xfs_attr_set(struct xfs_inode *dp, const char *name, char *value,
int valuelen, int flags);
int xfs_attr_remove(struct xfs_inode *dp, const char *name, int flags);
@@ -61,9 +60,6 @@ int xfs_ioctl(struct xfs_inode *ip, struct file *filp,
ssize_t xfs_read(struct xfs_inode *ip, struct kiocb *iocb,
const struct iovec *iovp, unsigned int segs,
loff_t *offset, int ioflags);
-ssize_t xfs_sendfile(struct xfs_inode *ip, struct file *filp,
- loff_t *offset, int ioflags, size_t count,
- read_actor_t actor, void *target);
ssize_t xfs_splice_read(struct xfs_inode *ip, struct file *infilp,
loff_t *ppos, struct pipe_inode_info *pipe, size_t count,
int flags, int ioflags);