aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorPaul Mundt <lethal@linux-sh.org>2009-12-21 11:59:55 +0900
committerPaul Mundt <lethal@linux-sh.org>2009-12-21 11:59:55 +0900
commit76e7461a21dfe13565b2a323b53c8cc963541126 (patch)
tree8e399c71b38bb80c00d8a82310021b55ab6e8e33 /fs
parentd0b873fc73b793277c8a0824ce986b5bfeaef157 (diff)
parentdd59f6c76b265ed2ff18b497d6105a9511b1feb1 (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig4
-rw-r--r--fs/anon_inodes.c17
-rw-r--r--fs/binfmt_aout.c13
-rw-r--r--fs/binfmt_elf.c24
-rw-r--r--fs/binfmt_elf_fdpic.c29
-rw-r--r--fs/binfmt_flat.c6
-rw-r--r--fs/binfmt_som.c2
-rw-r--r--fs/btrfs/Kconfig1
-rw-r--r--fs/btrfs/acl.c23
-rw-r--r--fs/btrfs/btrfs_inode.h5
-rw-r--r--fs/btrfs/ctree.c229
-rw-r--r--fs/btrfs/ctree.h40
-rw-r--r--fs/btrfs/dir-item.c19
-rw-r--r--fs/btrfs/disk-io.c27
-rw-r--r--fs/btrfs/extent-tree.c72
-rw-r--r--fs/btrfs/file.c669
-rw-r--r--fs/btrfs/inode.c567
-rw-r--r--fs/btrfs/ioctl.c34
-rw-r--r--fs/btrfs/ordered-data.c115
-rw-r--r--fs/btrfs/ordered-data.h5
-rw-r--r--fs/btrfs/relocation.c38
-rw-r--r--fs/btrfs/super.c15
-rw-r--r--fs/btrfs/transaction.c44
-rw-r--r--fs/btrfs/transaction.h6
-rw-r--r--fs/btrfs/tree-log.c86
-rw-r--r--fs/btrfs/volumes.c2
-rw-r--r--fs/btrfs/xattr.c80
-rw-r--r--fs/btrfs/xattr.h9
-rw-r--r--fs/direct-io.c2
-rw-r--r--fs/ecryptfs/dentry.c2
-rw-r--r--fs/ecryptfs/inode.c6
-rw-r--r--fs/ecryptfs/main.c2
-rw-r--r--fs/exec.c40
-rw-r--r--fs/ext4/Kconfig1
-rw-r--r--fs/gfs2/Kconfig1
-rw-r--r--fs/gfs2/inode.c2
-rw-r--r--fs/inode.c26
-rw-r--r--fs/jbd/Kconfig1
-rw-r--r--fs/jbd2/Kconfig1
-rw-r--r--fs/jfs/jfs_txnmgr.c2
-rw-r--r--fs/namei.c2
-rw-r--r--fs/namespace.c3
-rw-r--r--fs/nfs/super.c8
-rw-r--r--fs/nilfs2/Kconfig1
-rw-r--r--fs/ntfs/inode.c6
-rw-r--r--fs/pipe.c18
-rw-r--r--fs/proc/array.c19
-rw-r--r--fs/ramfs/file-nommu.c2
-rw-r--r--fs/reiserfs/Kconfig1
-rw-r--r--fs/reiserfs/inode.c18
-rw-r--r--fs/stack.c71
-rw-r--r--fs/sync.c59
-rw-r--r--fs/ubifs/file.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c2
-rw-r--r--fs/xfs/xfs_iget.c4
55 files changed, 1383 insertions, 1100 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index f8fccaaad62..64d44efad7a 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -6,10 +6,6 @@ menu "File systems"
if BLOCK
-config FS_JOURNAL_INFO
- bool
- default n
-
source "fs/ext2/Kconfig"
source "fs/ext3/Kconfig"
source "fs/ext4/Kconfig"
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 94f5110c465..2c994591f4d 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -35,14 +35,13 @@ static int anon_inodefs_get_sb(struct file_system_type *fs_type, int flags,
mnt);
}
-static int anon_inodefs_delete_dentry(struct dentry *dentry)
+/*
+ * anon_inodefs_dname() is called from d_path().
+ */
+static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen)
{
- /*
- * We faked vfs to believe the dentry was hashed when we created it.
- * Now we restore the flag so that dput() will work correctly.
- */
- dentry->d_flags |= DCACHE_UNHASHED;
- return 1;
+ return dynamic_dname(dentry, buffer, buflen, "anon_inode:%s",
+ dentry->d_name.name);
}
static struct file_system_type anon_inode_fs_type = {
@@ -51,7 +50,7 @@ static struct file_system_type anon_inode_fs_type = {
.kill_sb = kill_anon_super,
};
static const struct dentry_operations anon_inodefs_dentry_operations = {
- .d_delete = anon_inodefs_delete_dentry,
+ .d_dname = anon_inodefs_dname,
};
/*
@@ -119,8 +118,6 @@ struct file *anon_inode_getfile(const char *name,
atomic_inc(&anon_inode_inode->i_count);
path.dentry->d_op = &anon_inodefs_dentry_operations;
- /* Do not publish this dentry inside the global dentry hash table */
- path.dentry->d_flags &= ~DCACHE_UNHASHED;
d_instantiate(path.dentry, anon_inode_inode);
error = -ENFILE;
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index b639dcf7c77..346b6940536 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -32,7 +32,7 @@
static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
static int load_aout_library(struct file*);
-static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+static int aout_core_dump(struct coredump_params *cprm);
static struct linux_binfmt aout_format = {
.module = THIS_MODULE,
@@ -89,8 +89,9 @@ if (file->f_op->llseek) { \
* dumping of the process results in another error..
*/
-static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
+static int aout_core_dump(struct coredump_params *cprm)
{
+ struct file *file = cprm->file;
mm_segment_t fs;
int has_dumped = 0;
unsigned long dump_start, dump_size;
@@ -108,16 +109,16 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, u
current->flags |= PF_DUMPCORE;
strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm));
dump.u_ar0 = offsetof(struct user, regs);
- dump.signal = signr;
- aout_dump_thread(regs, &dump);
+ dump.signal = cprm->signr;
+ aout_dump_thread(cprm->regs, &dump);
/* If the size of the dump file exceeds the rlimit, then see what would happen
if we wrote the stack, but not the data area. */
- if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > limit)
+ if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > cprm->limit)
dump.u_dsize = 0;
/* Make sure we have enough room to write the stack and data areas. */
- if ((dump.u_ssize + 1) * PAGE_SIZE > limit)
+ if ((dump.u_ssize + 1) * PAGE_SIZE > cprm->limit)
dump.u_ssize = 0;
/* make sure we actually have a data and stack area to dump */
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 97b6e9efeb7..edd90c49003 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -45,7 +45,7 @@ static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
* don't even try.
*/
#ifdef CONFIG_ELF_CORE
-static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+static int elf_core_dump(struct coredump_params *cprm);
#else
#define elf_core_dump NULL
#endif
@@ -1272,8 +1272,9 @@ static int writenote(struct memelfnote *men, struct file *file,
}
#undef DUMP_WRITE
-#define DUMP_WRITE(addr, nr) \
- if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
+#define DUMP_WRITE(addr, nr) \
+ if ((size += (nr)) > cprm->limit || \
+ !dump_write(cprm->file, (addr), (nr))) \
goto end_coredump;
static void fill_elf_header(struct elfhdr *elf, int segs,
@@ -1901,7 +1902,7 @@ static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
* and then they are actually written out. If we run out of core limit
* we just truncate.
*/
-static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
+static int elf_core_dump(struct coredump_params *cprm)
{
int has_dumped = 0;
mm_segment_t fs;
@@ -1947,7 +1948,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
* notes. This also sets up the file header.
*/
if (!fill_note_info(elf, segs + 1, /* including notes section */
- &info, signr, regs))
+ &info, cprm->signr, cprm->regs))
goto cleanup;
has_dumped = 1;
@@ -2009,14 +2010,14 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
#endif
/* write out the notes section */
- if (!write_note_info(&info, file, &foffset))
+ if (!write_note_info(&info, cprm->file, &foffset))
goto end_coredump;
- if (elf_coredump_extra_notes_write(file, &foffset))
+ if (elf_coredump_extra_notes_write(cprm->file, &foffset))
goto end_coredump;
/* Align to page */
- if (!dump_seek(file, dataoff - foffset))
+ if (!dump_seek(cprm->file, dataoff - foffset))
goto end_coredump;
for (vma = first_vma(current, gate_vma); vma != NULL;
@@ -2033,12 +2034,13 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
page = get_dump_page(addr);
if (page) {
void *kaddr = kmap(page);
- stop = ((size += PAGE_SIZE) > limit) ||
- !dump_write(file, kaddr, PAGE_SIZE);
+ stop = ((size += PAGE_SIZE) > cprm->limit) ||
+ !dump_write(cprm->file, kaddr,
+ PAGE_SIZE);
kunmap(page);
page_cache_release(page);
} else
- stop = !dump_seek(file, PAGE_SIZE);
+ stop = !dump_seek(cprm->file, PAGE_SIZE);
if (stop)
goto end_coredump;
}
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 7b055385db8..c25256a5c5b 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -76,7 +76,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *,
struct file *, struct mm_struct *);
#ifdef CONFIG_ELF_CORE
-static int elf_fdpic_core_dump(long, struct pt_regs *, struct file *, unsigned long limit);
+static int elf_fdpic_core_dump(struct coredump_params *cprm);
#endif
static struct linux_binfmt elf_fdpic_format = {
@@ -1326,8 +1326,9 @@ static int writenote(struct memelfnote *men, struct file *file)
#undef DUMP_WRITE
#undef DUMP_SEEK
-#define DUMP_WRITE(addr, nr) \
- if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
+#define DUMP_WRITE(addr, nr) \
+ if ((size += (nr)) > cprm->limit || \
+ !dump_write(cprm->file, (addr), (nr))) \
goto end_coredump;
static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs)
@@ -1582,8 +1583,7 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size,
* and then they are actually written out. If we run out of core limit
* we just truncate.
*/
-static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
- struct file *file, unsigned long limit)
+static int elf_fdpic_core_dump(struct coredump_params *cprm)
{
#define NUM_NOTES 6
int has_dumped = 0;
@@ -1642,7 +1642,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
goto cleanup;
#endif
- if (signr) {
+ if (cprm->signr) {
struct core_thread *ct;
struct elf_thread_status *tmp;
@@ -1661,14 +1661,14 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
int sz;
tmp = list_entry(t, struct elf_thread_status, list);
- sz = elf_dump_thread_status(signr, tmp);
+ sz = elf_dump_thread_status(cprm->signr, tmp);
thread_status_size += sz;
}
}
/* now collect the dump for the current */
- fill_prstatus(prstatus, current, signr);
- elf_core_copy_regs(&prstatus->pr_reg, regs);
+ fill_prstatus(prstatus, current, cprm->signr);
+ elf_core_copy_regs(&prstatus->pr_reg, cprm->regs);
segs = current->mm->map_count;
#ifdef ELF_CORE_EXTRA_PHDRS
@@ -1703,7 +1703,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
/* Try to dump the FPU. */
if ((prstatus->pr_fpvalid =
- elf_core_copy_task_fpregs(current, regs, fpu)))
+ elf_core_copy_task_fpregs(current, cprm->regs, fpu)))
fill_note(notes + numnote++,
"CORE", NT_PRFPREG, sizeof(*fpu), fpu);
#ifdef ELF_CORE_COPY_XFPREGS
@@ -1774,7 +1774,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
/* write out the notes section */
for (i = 0; i < numnote; i++)
- if (!writenote(notes + i, file))
+ if (!writenote(notes + i, cprm->file))
goto end_coredump;
/* write out the thread status notes section */
@@ -1783,14 +1783,15 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
list_entry(t, struct elf_thread_status, list);
for (i = 0; i < tmp->num_notes; i++)
- if (!writenote(&tmp->notes[i], file))
+ if (!writenote(&tmp->notes[i], cprm->file))
goto end_coredump;
}
- if (!dump_seek(file, dataoff))
+ if (!dump_seek(cprm->file, dataoff))
goto end_coredump;
- if (elf_fdpic_dump_segments(file, &size, &limit, mm_flags) < 0)
+ if (elf_fdpic_dump_segments(cprm->file, &size, &cprm->limit,
+ mm_flags) < 0)
goto end_coredump;
#ifdef ELF_CORE_WRITE_EXTRA_DATA
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index a2796651e75..d4a00ea1054 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -87,7 +87,7 @@ static int load_flat_shared_library(int id, struct lib_info *p);
#endif
static int load_flat_binary(struct linux_binprm *, struct pt_regs * regs);
-static int flat_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+static int flat_core_dump(struct coredump_params *cprm);
static struct linux_binfmt flat_format = {
.module = THIS_MODULE,
@@ -102,10 +102,10 @@ static struct linux_binfmt flat_format = {
* Currently only a stub-function.
*/
-static int flat_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
+static int flat_core_dump(struct coredump_params *cprm)
{
printk("Process %s:%d received signr %d and should have core dumped\n",
- current->comm, current->pid, (int) signr);
+ current->comm, current->pid, (int) cprm->signr);
return(1);
}
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index eff74b9c9e7..2a9b5330cc5 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -43,7 +43,7 @@ static int load_som_library(struct file *);
* don't even try.
*/
#if 0
-static int som_core_dump(long signr, struct pt_regs *regs, unsigned long limit);
+static int som_core_dump(struct coredump_params *cprm);
#else
#define som_core_dump NULL
#endif
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 402afe0a0bf..7bb3c020e57 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -4,7 +4,6 @@ config BTRFS_FS
select LIBCRC32C
select ZLIB_INFLATE
select ZLIB_DEFLATE
- select FS_JOURNAL_INFO
help
Btrfs is a new filesystem with extents, writable snapshotting,
support for multiple devices and many more features.
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 52cbe47022b..2e9e69987a8 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -94,7 +94,8 @@ static int btrfs_xattr_acl_get(struct dentry *dentry, const char *name,
/*
* Needs to be called with fs_mutex held
*/
-static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+static int btrfs_set_acl(struct btrfs_trans_handle *trans,
+ struct inode *inode, struct posix_acl *acl, int type)
{
int ret, size = 0;
const char *name;
@@ -140,8 +141,7 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
goto out;
}
- ret = __btrfs_setxattr(inode, name, value, size, 0);
-
+ ret = __btrfs_setxattr(trans, inode, name, value, size, 0);
out:
kfree(value);
@@ -154,7 +154,7 @@ out:
static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags, int type)
{
- int ret = 0;
+ int ret;
struct posix_acl *acl = NULL;
if (value) {
@@ -167,7 +167,7 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
}
}
- ret = btrfs_set_acl(dentry->d_inode, acl, type);
+ ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type);
posix_acl_release(acl);
@@ -196,7 +196,8 @@ int btrfs_check_acl(struct inode *inode, int mask)
* stuff has been fixed to work with that. If the locking stuff changes, we
* need to re-evaluate the acl locking stuff.
*/
-int btrfs_init_acl(struct inode *inode, struct inode *dir)
+int btrfs_init_acl(struct btrfs_trans_handle *trans,
+ struct inode *inode, struct inode *dir)
{
struct posix_acl *acl = NULL;
int ret = 0;
@@ -221,7 +222,8 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir)
mode_t mode;
if (S_ISDIR(inode->i_mode)) {
- ret = btrfs_set_acl(inode, acl, ACL_TYPE_DEFAULT);
+ ret = btrfs_set_acl(trans, inode, acl,
+ ACL_TYPE_DEFAULT);
if (ret)
goto failed;
}
@@ -236,7 +238,7 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir)
inode->i_mode = mode;
if (ret > 0) {
/* we need an acl */
- ret = btrfs_set_acl(inode, clone,
+ ret = btrfs_set_acl(trans, inode, clone,
ACL_TYPE_ACCESS);
}
}
@@ -269,7 +271,7 @@ int btrfs_acl_chmod(struct inode *inode)
ret = posix_acl_chmod_masq(clone, inode->i_mode);
if (!ret)
- ret = btrfs_set_acl(inode, clone, ACL_TYPE_ACCESS);
+ ret = btrfs_set_acl(NULL, inode, clone, ACL_TYPE_ACCESS);
posix_acl_release(clone);
@@ -297,7 +299,8 @@ int btrfs_acl_chmod(struct inode *inode)
return 0;
}
-int btrfs_init_acl(struct inode *inode, struct inode *dir)
+int btrfs_init_acl(struct btrfs_trans_handle *trans,
+ struct inode *inode, struct inode *dir)
{
return 0;
}
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index f6783a42f01..3f1f50d9d91 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -44,9 +44,6 @@ struct btrfs_inode {
*/
struct extent_io_tree io_failure_tree;
- /* held while inesrting or deleting extents from files */
- struct mutex extent_mutex;
-
/* held while logging the inode in tree-log.c */
struct mutex log_mutex;
@@ -166,7 +163,7 @@ static inline struct btrfs_inode *BTRFS_I(struct inode *inode)
static inline void btrfs_i_size_write(struct inode *inode, u64 size)
{
- inode->i_size = size;
+ i_size_write(inode, size);
BTRFS_I(inode)->disk_i_size = size;
}
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index ec96f3a6d53..c4bc570a396 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -37,6 +37,11 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
struct extent_buffer *src_buf);
static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_path *path, int level, int slot);
+static int setup_items_for_insert(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ struct btrfs_key *cpu_key, u32 *data_size,
+ u32 total_data, u32 total_size, int nr);
+
struct btrfs_path *btrfs_alloc_path(void)
{
@@ -451,9 +456,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
extent_buffer_get(cow);
spin_unlock(&root->node_lock);
- btrfs_free_extent(trans, root, buf->start, buf->len,
- parent_start, root->root_key.objectid,
- level, 0);
+ btrfs_free_tree_block(trans, root, buf->start, buf->len,
+ parent_start, root->root_key.objectid, level);
free_extent_buffer(buf);
add_root_to_dirty_list(root);
} else {
@@ -468,9 +472,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
btrfs_set_node_ptr_generation(parent, parent_slot,
trans->transid);
btrfs_mark_buffer_dirty(parent);
- btrfs_free_extent(trans, root, buf->start, buf->len,
- parent_start, root->root_key.objectid,
- level, 0);
+ btrfs_free_tree_block(trans, root, buf->start, buf->len,
+ parent_start, root->root_key.objectid, level);
}
if (unlock_orig)
btrfs_tree_unlock(buf);
@@ -1030,8 +1033,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
btrfs_tree_unlock(mid);
/* once for the path */
free_extent_buffer(mid);
- ret = btrfs_free_extent(trans, root, mid->start, mid->len,
- 0, root->root_key.objectid, level, 1);
+ ret = btrfs_free_tree_block(trans, root, mid->start, mid->len,
+ 0, root->root_key.objectid, level);
/* once for the root ptr */
free_extent_buffer(mid);
return ret;
@@ -1095,10 +1098,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1);
if (wret)
ret = wret;
- wret = btrfs_free_extent(trans, root, bytenr,
- blocksize, 0,
- root->root_key.objectid,
- level, 0);
+ wret = btrfs_free_tree_block(trans, root,
+ bytenr, blocksize, 0,
+ root->root_key.objectid,
+ level);
if (wret)
ret = wret;
} else {
@@ -1143,9 +1146,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
wret = del_ptr(trans, root, path, level + 1, pslot);
if (wret)
ret = wret;
- wret = btrfs_free_extent(trans, root, bytenr, blocksize,
- 0, root->root_key.objectid,
- level, 0);
+ wret = btrfs_free_tree_block(trans, root, bytenr, blocksize,
+ 0, root->root_key.objectid, level);
if (wret)
ret = wret;
} else {
@@ -2997,75 +2999,85 @@ again:
return ret;
}
-/*
- * This function splits a single item into two items,
- * giving 'new_key' to the new item and splitting the
- * old one at split_offset (from the start of the item).
- *
- * The path may be released by this operation. After
- * the split, the path is pointing to the old item. The
- * new item is going to be in the same node as the old one.
- *
- * Note, the item being split must be smaller enough to live alone on
- * a tree block with room for one extra struct btrfs_item
- *
- * This allows us to split the item in place, keeping a lock on the
- * leaf the entire time.
- */
-int btrfs_split_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_key *new_key,
- unsigned long split_offset)
+static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path, int ins_len)
{
- u32 item_size;
+ struct btrfs_key key;
struct extent_buffer *leaf;
- struct btrfs_key orig_key;
- struct btrfs_item *item;
- struct btrfs_item *new_item;
- int ret = 0;
- int slot;
- u32 nritems;
- u32 orig_offset;
- struct btrfs_disk_key disk_key;
- char *buf;
+ struct btrfs_file_extent_item *fi;
+ u64 extent_len = 0;
+ u32 item_size;
+ int ret;
leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &orig_key, path->slots[0]);
- if (btrfs_leaf_free_space(root, leaf) >= sizeof(struct btrfs_item))
- goto split;
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+
+ BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
+ key.type != BTRFS_EXTENT_CSUM_KEY);
+
+ if (btrfs_leaf_free_space(root, leaf) >= ins_len)
+ return 0;
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+ if (key.type == BTRFS_EXTENT_DATA_KEY) {
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+ extent_len = btrfs_file_extent_num_bytes(leaf, fi);
+ }
btrfs_release_path(root, path);
- path->search_for_split = 1;
path->keep_locks = 1;
-
- ret = btrfs_search_slot(trans, root, &orig_key, path, 0, 1);
+ path->search_for_split = 1;
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
path->search_for_split = 0;
+ if (ret < 0)
+ goto err;
+ ret = -EAGAIN;
+ leaf = path->nodes[0];
/* if our item isn't there or got smaller, return now */
- if (ret != 0 || item_size != btrfs_item_size_nr(path->nodes[0],
- path->slots[0])) {
- path->keep_locks = 0;
- return -EAGAIN;
+ if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0]))
+ goto err;
+
+ if (key.type == BTRFS_EXTENT_DATA_KEY) {
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+ if (extent_len != btrfs_file_extent_num_bytes(leaf, fi))
+ goto err;
}
btrfs_set_path_blocking(path);
- ret = split_leaf(trans, root, &orig_key, path,
- sizeof(struct btrfs_item), 1);
- path->keep_locks = 0;
+ ret = split_leaf(trans, root, &key, path, ins_len, 1);
BUG_ON(ret);
+ path->keep_locks = 0;
btrfs_unlock_up_safe(path, 1);
+ return 0;
+err:
+ path->keep_locks = 0;
+ return ret;
+}
+
+static noinline int split_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_key *new_key,
+ unsigned long split_offset)
+{
+ struct extent_buffer *leaf;
+ struct btrfs_item *item;
+ struct btrfs_item *new_item;
+ int slot;
+ char *buf;
+ u32 nritems;
+ u32 item_size;
+ u32 orig_offset;
+ struct btrfs_disk_key disk_key;
+
leaf = path->nodes[0];
BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
-split:
- /*
- * make sure any changes to the path from split_leaf leave it
- * in a blocking state
- */
btrfs_set_path_blocking(path);
item = btrfs_item_nr(leaf, path->slots[0]);
@@ -3073,19 +3085,19 @@ split:
item_size = btrfs_item_size(leaf, item);
buf = kmalloc(item_size, GFP_NOFS);
+ if (!buf)
+ return -ENOMEM;
+
read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
path->slots[0]), item_size);
- slot = path->slots[0] + 1;
- leaf = path->nodes[0];
+ slot = path->slots[0] + 1;
nritems = btrfs_header_nritems(leaf);
-
if (slot != nritems) {
/* shift the items */
memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
- btrfs_item_nr_offset(slot),
- (nritems - slot) * sizeof(struct btrfs_item));
-
+ btrfs_item_nr_offset(slot),
+ (nritems - slot) * sizeof(struct btrfs_item));
}
btrfs_cpu_key_to_disk(&disk_key, new_key);
@@ -3113,16 +3125,81 @@ split:
item_size - split_offset);
btrfs_mark_buffer_dirty(leaf);
- ret = 0;
- if (btrfs_leaf_free_space(root, leaf) < 0) {
- btrfs_print_leaf(root, leaf);
- BUG();
- }
+ BUG_ON(btrfs_leaf_free_space(root, leaf) < 0);
kfree(buf);
+ return 0;
+}
+
+/*
+ * This function splits a single item into two items,
+ * giving 'new_key' to the new item and splitting the
+ * old one at split_offset (from the start of the item).
+ *
+ * The path may be released by this operation. After
+ * the split, the path is pointing to the old item. The
+ * new item is going to be in the same node as the old one.
+ *
+ * Note, the item being split must be smaller enough to live alone on
+ * a tree block with room for one extra struct btrfs_item
+ *
+ * This allows us to split the item in place, keeping a lock on the
+ * leaf the entire time.
+ */
+int btrfs_split_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_key *new_key,
+ unsigned long split_offset)
+{
+ int ret;
+ ret = setup_leaf_for_split(trans, root, path,
+ sizeof(struct btrfs_item));
+ if (ret)
+ return ret;
+
+ ret = split_item(trans, root, path, new_key, split_offset);
return ret;
}
/*
+ * This function duplicate a item, giving 'new_key' to the new item.
+ * It guarantees both items live in the same tree leaf and the new item
+ * is contiguous with the original item.
+ *
+ * This allows us to split file extent in place, keeping a lock on the
+ * leaf the entire time.
+ */
+int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_key *new_key)
+{
+ struct extent_buffer *leaf;
+ int ret;
+ u32 item_size;
+
+ leaf = path->nodes[0];
+ item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+ ret = setup_leaf_for_split(trans, root, path,
+ item_size + sizeof(struct btrfs_item));
+ if (ret)
+ return ret;
+
+ path->slots[0]++;
+ ret = setup_items_for_insert(trans, root, path, new_key, &item_size,
+ item_size, item_size +
+ sizeof(struct btrfs_item), 1);
+ BUG_ON(ret);
+
+ leaf = path->nodes[0];
+ memcpy_extent_buffer(leaf,
+ btrfs_item_ptr_offset(leaf, path->slots[0]),
+ btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
+ item_size);
+ return 0;
+}
+
+/*
* make the item pointed to by the path smaller. new_size indicates
* how small to make it, and from_end tells us if we just chop bytes
* off the end of the item or if we shift the item to chop bytes off
@@ -3714,8 +3791,8 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
*/
btrfs_unlock_up_safe(path, 0);
- ret = btrfs_free_extent(trans, root, leaf->start, leaf->len,
- 0, root->root_key.objectid, 0, 0);
+ ret = btrfs_free_tree_block(trans, root, leaf->start, leaf->len,
+ 0, root->root_key.objectid, 0);
return ret;
}
/*
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 444b3e9b92a..9f806dd04c2 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -310,6 +310,9 @@ struct btrfs_header {
#define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
sizeof(struct btrfs_item) - \
sizeof(struct btrfs_file_extent_item))
+#define BTRFS_MAX_XATTR_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
+ sizeof(struct btrfs_item) -\
+ sizeof(struct btrfs_dir_item))
/*
@@ -859,8 +862,9 @@ struct btrfs_fs_info {
struct mutex ordered_operations_mutex;
struct rw_semaphore extent_commit_sem;
- struct rw_semaphore subvol_sem;
+ struct rw_semaphore cleanup_work_sem;
+ struct rw_semaphore subvol_sem;
struct srcu_struct subvol_srcu;
struct list_head trans_list;
@@ -868,6 +872,9 @@ struct btrfs_fs_info {
struct list_head dead_roots;
struct list_head caching_block_groups;
+ spinlock_t delayed_iput_lock;
+ struct list_head delayed_iputs;
+
atomic_t nr_async_submits;
atomic_t async_submit_draining;
atomic_t nr_async_bios;
@@ -1034,12 +1041,12 @@ struct btrfs_root {
int ref_cows;
int track_dirty;
int in_radix;
+ int clean_orphans;
u64 defrag_trans_start;
struct btrfs_key defrag_progress;
struct btrfs_key defrag_max;
int defrag_running;
- int defrag_level;
char *name;
int in_sysfs;
@@ -1975,6 +1982,10 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
u64 parent, u64 root_objectid,
struct btrfs_disk_key *key, int level,
u64 hint, u64 empty_size);
+int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 bytenr, u32 blocksize,
+ u64 parent, u64 root_objectid, int level);
struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u32 blocksize,
@@ -2089,6 +2100,10 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct btrfs_key *new_key,
unsigned long split_offset);
+int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_key *new_key);
int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct btrfs_key *key, struct btrfs_path *p, int
ins_len, int cow);
@@ -2196,9 +2211,10 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct btrfs_dir_item *di);
int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, const char *name,
- u16 name_len, const void *data, u16 data_len,
- u64 dir);
+ struct btrfs_root *root,
+ struct btrfs_path *path, u64 objectid,
+ const char *name, u16 name_len,
+ const void *data, u16 data_len);
struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, u64 dir,
@@ -2292,7 +2308,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
struct inode *inode, u64 new_size,
u32 min_type);
-int btrfs_start_delalloc_inodes(struct btrfs_root *root);
+int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end);
int btrfs_writepages(struct address_space *mapping,
struct writeback_control *wbc);
@@ -2332,6 +2348,8 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
void btrfs_orphan_cleanup(struct btrfs_root *root);
int btrfs_cont_expand(struct inode *inode, loff_t size);
int btrfs_invalidate_inodes(struct btrfs_root *root);
+void btrfs_add_delayed_iput(struct inode *inode);
+void btrfs_run_delayed_iputs(struct btrfs_root *root);
extern const struct dentry_operations btrfs_dentry_operations;
/* ioctl.c */
@@ -2345,12 +2363,9 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
int skip_pinned);
int btrfs_check_file(struct btrfs_root *root, struct inode *inode);
extern const struct file_operations btrfs_file_operations;
-int btrfs_drop_extents(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode,
- u64 start, u64 end, u64 locked_end,
- u64 inline_limit, u64 *hint_block, int drop_cache);
+int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
+ u64 start, u64 end, u64 *hint_byte, int drop_cache);
int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
struct inode *inode, u64 start, u64 end);
int btrfs_release_file(struct inode *inode, struct file *file);
@@ -2380,7 +2395,8 @@ int btrfs_check_acl(struct inode *inode, int mask);
#else
#define btrfs_check_acl NULL
#endif
-int btrfs_init_acl(struct inode *inode, struct inode *dir);
+int btrfs_init_acl(struct btrfs_trans_handle *trans,
+ struct inode *inode, struct inode *dir);
int btrfs_acl_chmod(struct inode *inode);
/* relocation.c */
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index f3a6075519c..e9103b3baa4 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -68,12 +68,12 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
* into the tree
*/
int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, const char *name,
- u16 name_len, const void *data, u16 data_len,
- u64 dir)
+ struct btrfs_root *root,
+ struct btrfs_path *path, u64 objectid,
+ const char *name, u16 name_len,
+ const void *data, u16 data_len)
{
int ret = 0;
- struct btrfs_path *path;
struct btrfs_dir_item *dir_item;
unsigned long name_ptr, data_ptr;
struct btrfs_key key, location;
@@ -81,15 +81,11 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf;
u32 data_size;
- key.objectid = dir;
+ BUG_ON(name_len + data_len > BTRFS_MAX_XATTR_SIZE(root));
+
+ key.objectid = objectid;
btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
key.offset = btrfs_name_hash(name, name_len);
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
- if (name_len + data_len + sizeof(struct btrfs_dir_item) >
- BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item))
- return -ENOSPC;
data_size = sizeof(*dir_item) + name_len + data_len;
dir_item = insert_with_overflow(trans, root, path, &key, data_size,
@@ -117,7 +113,6 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
write_extent_buffer(leaf, data, data_ptr, data_len);
btrfs_mark_buffer_dirty(path->nodes[0]);
- btrfs_free_path(path);
return ret;
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 02b6afbd745..009e3bd18f2 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -892,6 +892,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->stripesize = stripesize;
root->ref_cows = 0;
root->track_dirty = 0;
+ root->in_radix = 0;
+ root->clean_orphans = 0;
root->fs_info = fs_info;
root->objectid = objectid;
@@ -928,7 +930,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->defrag_trans_start = fs_info->generation;
init_completion(&root->kobj_unregister);
root->defrag_running = 0;
- root->defrag_level = 0;
root->root_key.objectid = objectid;
root->anon_super.s_root = NULL;
root->anon_super.s_dev = 0;
@@ -980,12 +981,12 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
while (1) {
ret = find_first_extent_bit(&log_root_tree->dirty_log_pages,
- 0, &start, &end, EXTENT_DIRTY);
+ 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW);
if (ret)
break;
- clear_extent_dirty(&log_root_tree->dirty_log_pages,
- start, end, GFP_NOFS);
+ clear_extent_bits(&log_root_tree->dirty_log_pages, start, end,
+ EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
}
eb = fs_info->log_root_tree->node;
@@ -1210,8 +1211,10 @@ again:
ret = radix_tree_insert(&fs_info->fs_roots_radix,
(unsigned long)root->root_key.objectid,
root);
- if (ret == 0)
+ if (ret == 0) {
root->in_radix = 1;
+ root->clean_orphans = 1;
+ }
spin_unlock(&fs_info->fs_roots_radix_lock);
radix_tree_preload_end();
if (ret) {
@@ -1225,10 +1228,6 @@ again:
ret = btrfs_find_dead_roots(fs_info->tree_root,
root->root_key.objectid);
WARN_ON(ret);
-
- if (!(fs_info->sb->s_flags & MS_RDONLY))
- btrfs_orphan_cleanup(root);
-
return root;
fail:
free_fs_root(root);
@@ -1477,6 +1476,7 @@ static int cleaner_kthread(void *arg)
if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
mutex_trylock(&root->fs_info->cleaner_mutex)) {
+ btrfs_run_delayed_iputs(root);
btrfs_clean_old_snapshots(root);
mutex_unlock(&root->fs_info->cleaner_mutex);
}
@@ -1606,6 +1606,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
INIT_LIST_HEAD(&fs_info->trans_list);
INIT_LIST_HEAD(&fs_info->dead_roots);
+ INIT_LIST_HEAD(&fs_info->delayed_iputs);
INIT_LIST_HEAD(&fs_info->hashers);
INIT_LIST_HEAD(&fs_info->delalloc_inodes);
INIT_LIST_HEAD(&fs_info->ordered_operations);
@@ -1614,6 +1615,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->new_trans_lock);
spin_lock_init(&fs_info->ref_cache_lock);
spin_lock_init(&fs_info->fs_roots_radix_lock);
+ spin_lock_init(&fs_info->delayed_iput_lock);
init_completion(&fs_info->kobj_unregister);
fs_info->tree_root = tree_root;
@@ -1689,6 +1691,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
mutex_init(&fs_info->cleaner_mutex);
mutex_init(&fs_info->volume_mutex);
init_rwsem(&fs_info->extent_commit_sem);
+ init_rwsem(&fs_info->cleanup_work_sem);
init_rwsem(&fs_info->subvol_sem);
btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
@@ -2386,8 +2389,14 @@ int btrfs_commit_super(struct btrfs_root *root)
int ret;
mutex_lock(&root->fs_info->cleaner_mutex);
+ btrfs_run_delayed_iputs(root);
btrfs_clean_old_snapshots(root);
mutex_unlock(&root->fs_info->cleaner_mutex);
+
+ /* wait until ongoing cleanup work done */
+ down_write(&root->fs_info->cleanup_work_sem);
+ up_write(&root->fs_info->cleanup_work_sem);
+
trans = btrfs_start_transaction(root, 1);
ret = btrfs_commit_transaction(trans, root);
BUG_ON(ret);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 94627c4cc19..56e50137d0e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -195,6 +195,14 @@ static int exclude_super_stripes(struct btrfs_root *root,
int stripe_len;
int i, nr, ret;
+ if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
+ stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
+ cache->bytes_super += stripe_len;
+ ret = add_excluded_extent(root, cache->key.objectid,
+ stripe_len);
+ BUG_ON(ret);
+ }
+
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
bytenr = btrfs_sb_offset(i);
ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
@@ -255,7 +263,7 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
if (ret)
break;
- if (extent_start == start) {
+ if (extent_start <= start) {
start = extent_end + 1;
} else if (extent_start > start && extent_start < end) {
size = extent_start - start;
@@ -2880,9 +2888,9 @@ static noinline void flush_delalloc_async(struct btrfs_work *work)
root = async->root;
info = async->info;
- btrfs_start_delalloc_inodes(root);
+ btrfs_start_delalloc_inodes(root, 0);
wake_up(&info->flush_wait);
- btrfs_wait_ordered_extents(root, 0);
+ btrfs_wait_ordered_extents(root, 0, 0);
spin_lock(&info->lock);
info->flushing = 0;
@@ -2956,8 +2964,8 @@ static void flush_delalloc(struct btrfs_root *root,
return;
flush:
- btrfs_start_delalloc_inodes(root);
- btrfs_wait_ordered_extents(root, 0);
+ btrfs_start_delalloc_inodes(root, 0);
+ btrfs_wait_ordered_extents(root, 0, 0);
spin_lock(&info->lock);
info->flushing = 0;
@@ -3454,14 +3462,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,
else
old_val -= num_bytes;
btrfs_set_super_bytes_used(&info->super_copy, old_val);
-
- /* block accounting for root item */
- old_val = btrfs_root_used(&root->root_item);
- if (alloc)
- old_val += num_bytes;
- else
- old_val -= num_bytes;
- btrfs_set_root_used(&root->root_item, old_val);
spin_unlock(&info->delalloc_lock);
while (total) {
@@ -4049,6 +4049,21 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
return ret;
}
+int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 bytenr, u32 blocksize,
+ u64 parent, u64 root_objectid, int level)
+{
+ u64 used;
+ spin_lock(&root->node_lock);
+ used = btrfs_root_used(&root->root_item) - blocksize;
+ btrfs_set_root_used(&root->root_item, used);
+ spin_unlock(&root->node_lock);
+
+ return btrfs_free_extent(trans, root, bytenr, blocksize,
+ parent, root_objectid, level, 0);
+}
+
static u64 stripe_align(struct btrfs_root *root, u64 val)
{
u64 mask = ((u64)root->stripesize - 1);
@@ -4578,7 +4593,6 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
{
int ret;
u64 search_start = 0;
- struct btrfs_fs_info *info = root->fs_info;
data = btrfs_get_alloc_profile(root, data);
again:
@@ -4586,17 +4600,9 @@ again:
* the only place that sets empty_size is btrfs_realloc_node, which
* is not called recursively on allocations
*/
- if (empty_size || root->ref_cows) {
- if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
- ret = do_chunk_alloc(trans, root->fs_info->extent_root,
- 2 * 1024 * 1024,
- BTRFS_BLOCK_GROUP_METADATA |
- (info->metadata_alloc_profile &
- info->avail_metadata_alloc_bits), 0);
- }
+ if (empty_size || root->ref_cows)
ret = do_chunk_alloc(trans, root->fs_info->extent_root,
num_bytes + 2 * 1024 * 1024, data, 0);
- }
WARN_ON(num_bytes < root->sectorsize);
ret = find_free_extent(trans, root, num_bytes, empty_size,
@@ -4897,6 +4903,14 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
extent_op);
BUG_ON(ret);
}
+
+ if (root_objectid == root->root_key.objectid) {
+ u64 used;
+ spin_lock(&root->node_lock);
+ used = btrfs_root_used(&root->root_item) + num_bytes;
+ btrfs_set_root_used(&root->root_item, used);
+ spin_unlock(&root->node_lock);
+ }
return ret;
}
@@ -4919,8 +4933,16 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
btrfs_set_buffer_uptodate(buf);
if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
- set_extent_dirty(&root->dirty_log_pages, buf->start,
- buf->start + buf->len - 1, GFP_NOFS);
+ /*
+ * we allow two log transactions at a time, use different
+ * EXENT bit to differentiate dirty pages.
+ */
+ if (root->log_transid % 2 == 0)
+ set_extent_dirty(&root->dirty_log_pages, buf->start,
+ buf->start + buf->len - 1, GFP_NOFS);
+ else
+ set_extent_new(&root->dirty_log_pages, buf->start,
+ buf->start + buf->len - 1, GFP_NOFS);
} else {
set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
buf->start + buf->len - 1, GFP_NOFS);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 77f759302e1..feaa13b105d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -179,18 +179,14 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
}
flags = em->flags;
if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
- if (em->start <= start &&
- (!testend || em->start + em->len >= start + len)) {
+ if (testend && em->start + em->len >= start + len) {
free_extent_map(em);
write_unlock(&em_tree->lock);
break;
}
- if (start < em->start) {
- len = em->start - start;
- } else {
+ start = em->start + em->len;
+ if (testend)
len = start + len - (em->start + em->len);
- start = em->start + em->len;
- }
free_extent_map(em);
write_unlock(&em_tree->lock);
continue;
@@ -265,319 +261,247 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
* If an extent intersects the range but is not entirely inside the range
* it is either truncated or split. Anything entirely inside the range
* is deleted from the tree.
- *
- * inline_limit is used to tell this code which offsets in the file to keep
- * if they contain inline extents.
*/
-noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode,
- u64 start, u64 end, u64 locked_end,
- u64 inline_limit, u64 *hint_byte, int drop_cache)
+int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
+ u64 start, u64 end, u64 *hint_byte, int drop_cache)
{
- u64 extent_end = 0;
- u64 search_start = start;
- u64 ram_bytes = 0;
- u64 disk_bytenr = 0;
- u64 orig_locked_end = locked_end;
- u8 compression;
- u8 encryption;
- u16 other_encoding = 0;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
struct extent_buffer *leaf;
- struct btrfs_file_extent_item *extent;
+ struct btrfs_file_extent_item *fi;
struct btrfs_path *path;
struct btrfs_key key;
- struct btrfs_file_extent_item old;
- int keep;
- int slot;
- int bookend;
- int found_type = 0;
- int found_extent;
- int found_inline;
+ struct btrfs_key new_key;
+ u64 search_start = start;
+ u64 disk_bytenr = 0;
+ u64 num_bytes = 0;
+ u64 extent_offset = 0;
+ u64 extent_end = 0;
+ int del_nr = 0;
+ int del_slot = 0;
+ int extent_type;
int recow;
int ret;
- inline_limit = 0;
if (drop_cache)
btrfs_drop_extent_cache(inode, start, end - 1, 0);
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
+
while (1) {
recow = 0;
- btrfs_release_path(root, path);
ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
search_start, -1);
if (ret < 0)
- goto out;
- if (ret > 0) {
- if (path->slots[0] == 0) {
- ret = 0;
- goto out;
- }
- path->slots[0]--;
+ break;
+ if (ret > 0 && path->slots[0] > 0 && search_start == start) {
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
+ if (key.objectid == inode->i_ino &&
+ key.type == BTRFS_EXTENT_DATA_KEY)
+ path->slots[0]--;
}
+ ret = 0;
next_slot:
- keep = 0;
- bookend = 0;
- found_extent = 0;
- found_inline = 0;
- compression = 0;
- encryption = 0;
- extent = NULL;
leaf = path->nodes[0];
- slot = path->slots[0];
- ret = 0;
- btrfs_item_key_to_cpu(leaf, &key, slot);
- if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY &&
- key.offset >= end) {
- goto out;
- }
- if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY ||
- key.objectid != inode->i_ino) {
- goto out;
- }
- if (recow) {
- search_start = max(key.offset, start);
- continue;
- }
- if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
- extent = btrfs_item_ptr(leaf, slot,
- struct btrfs_file_extent_item);
- found_type = btrfs_file_extent_type(leaf, extent);
- compression = btrfs_file_extent_compression(leaf,
- extent);
- encryption = btrfs_file_extent_encryption(leaf,
- extent);
- other_encoding = btrfs_file_extent_other_encoding(leaf,
- extent);
- if (found_type == BTRFS_FILE_EXTENT_REG ||
- found_type == BTRFS_FILE_EXTENT_PREALLOC) {
- extent_end =
- btrfs_file_extent_disk_bytenr(leaf,
- extent);
- if (extent_end)
- *hint_byte = extent_end;
-
- extent_end = key.offset +
- btrfs_file_extent_num_bytes(leaf, extent);
- ram_bytes = btrfs_file_extent_ram_bytes(leaf,
- extent);
- found_extent = 1;
- } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
- found_inline = 1;
- extent_end = key.offset +
- btrfs_file_extent_inline_len(leaf, extent);
+ if (path->slots[0] >= btrfs_header_nritems(leaf)) {
+ BUG_ON(del_nr > 0);
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0)
+ break;
+ if (ret > 0) {
+ ret = 0;
+ break;
}
+ leaf = path->nodes[0];
+ recow = 1;
+ }
+
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ if (key.objectid > inode->i_ino ||
+ key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
+ break;
+
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+ extent_type = btrfs_file_extent_type(leaf, fi);
+
+ if (extent_type == BTRFS_FILE_EXTENT_REG ||
+ extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
+ disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+ num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
+ extent_offset = btrfs_file_extent_offset(leaf, fi);
+ extent_end = key.offset +
+ btrfs_file_extent_num_bytes(leaf, fi);
+ } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
+ extent_end = key.offset +
+ btrfs_file_extent_inline_len(leaf, fi);
} else {
+ WARN_ON(1);
extent_end = search_start;
}
- /* we found nothing we can drop */
- if ((!found_extent && !found_inline) ||
- search_start >= extent_end) {
- int nextret;
- u32 nritems;
- nritems = btrfs_header_nritems(leaf);
- if (slot >= nritems - 1) {
- nextret = btrfs_next_leaf(root, path);
- if (nextret)
- goto out;
- recow = 1;
- } else {
- path->slots[0]++;
- }
+ if (extent_end <= search_start) {
+ path->slots[0]++;
goto next_slot;
}
- if (end <= extent_end && start >= key.offset && found_inline)
- *hint_byte = EXTENT_MAP_INLINE;
-
- if (found_extent) {
- read_extent_buffer(leaf, &old, (unsigned long)extent,
- sizeof(old));
- }
-
- if (end < extent_end && end >= key.offset) {
- bookend = 1;
- if (found_inline && start <= key.offset)
- keep = 1;
+ search_start = max(key.offset, start);
+ if (recow) {
+ btrfs_release_path(root, path);
+ continue;
}
- if (bookend && found_extent) {
- if (locked_end < extent_end) {
- ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
- locked_end, extent_end - 1,
- GFP_NOFS);
- if (!ret) {
- btrfs_release_path(root, path);
- lock_extent(&BTRFS_I(inode)->io_tree,
- locked_end, extent_end - 1,
- GFP_NOFS);
- locked_end = extent_end;
- continue;
- }
- locked_end = extent_end;
+ /*
+ * | - range to drop - |
+ * | -------- extent -------- |
+ */
+ if (start > key.offset && end < extent_end) {
+ BUG_ON(del_nr > 0);
+ BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
+
+ memcpy(&new_key, &key, sizeof(new_key));
+ new_key.offset = start;
+ ret = btrfs_duplicate_item(trans, root, path,
+ &new_key);
+ if (ret == -EAGAIN) {
+ btrfs_release_path(root, path);
+ continue;
}
- disk_bytenr = le64_to_cpu(old.disk_bytenr);
- if (disk_bytenr != 0) {
+ if (ret < 0)
+ break;
+
+ leaf = path->nodes[0];
+ fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
+ struct btrfs_file_extent_item);
+ btrfs_set_file_extent_num_bytes(leaf, fi,
+ start - key.offset);
+
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+
+ extent_offset += start - key.offset;
+ btrfs_set_file_extent_offset(leaf, fi, extent_offset);
+ btrfs_set_file_extent_num_bytes(leaf, fi,
+ extent_end - start);
+ btrfs_mark_buffer_dirty(leaf);
+
+ if (disk_bytenr > 0) {
ret = btrfs_inc_extent_ref(trans, root,
- disk_bytenr,
- le64_to_cpu(old.disk_num_bytes), 0,
- root->root_key.objectid,
- key.objectid, key.offset -
- le64_to_cpu(old.offset));
+ disk_bytenr, num_bytes, 0,
+ root->root_key.objectid,
+ new_key.objectid,
+ start - extent_offset);
BUG_ON(ret);
+ *hint_byte = disk_bytenr;
}
+ key.offset = start;
}
+ /*
+ * | ---- range to drop ----- |
+ * | -------- extent -------- |
+ */
+ if (start <= key.offset && end < extent_end) {
+ BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
- if (found_inline) {
- u64 mask = root->sectorsize - 1;
- search_start = (extent_end + mask) & ~mask;
- } else
- search_start = extent_end;
-
- /* truncate existing extent */
- if (start > key.offset) {
- u64 new_num;
- u64 old_num;
- keep = 1;
- WARN_ON(start & (root->sectorsize - 1));
- if (found_extent) {
- new_num = start - key.offset;
- old_num = btrfs_file_extent_num_bytes(leaf,
- extent);
- *hint_byte =
- btrfs_file_extent_disk_bytenr(leaf,
- extent);
- if (btrfs_file_extent_disk_bytenr(leaf,
- extent)) {
- inode_sub_bytes(inode, old_num -
- new_num);
- }
- btrfs_set_file_extent_num_bytes(leaf,
- extent, new_num);
- btrfs_mark_buffer_dirty(leaf);
- } else if (key.offset < inline_limit &&
- (end > extent_end) &&
- (inline_limit < extent_end)) {
- u32 new_size;
- new_size = btrfs_file_extent_calc_inline_size(
- inline_limit - key.offset);
- inode_sub_bytes(inode, extent_end -
- inline_limit);
- btrfs_set_file_extent_ram_bytes(leaf, extent,
- new_size);
- if (!compression && !encryption) {
- btrfs_truncate_item(trans, root, path,
- new_size, 1);
- }
+ memcpy(&new_key, &key, sizeof(new_key));
+ new_key.offset = end;
+ btrfs_set_item_key_safe(trans, root, path, &new_key);
+
+ extent_offset += end - key.offset;
+ btrfs_set_file_extent_offset(leaf, fi, extent_offset);
+ btrfs_set_file_extent_num_bytes(leaf, fi,
+ extent_end - end);
+ btrfs_mark_buffer_dirty(leaf);
+ if (disk_bytenr > 0) {
+ inode_sub_bytes(inode, end - key.offset);
+ *hint_byte = disk_bytenr;
}
+ break;
}
- /* delete the entire extent */
- if (!keep) {
- if (found_inline)
- inode_sub_bytes(inode, extent_end -
- key.offset);
- ret = btrfs_del_item(trans, root, path);
- /* TODO update progress marker and return */
- BUG_ON(ret);
- extent = NULL;
- btrfs_release_path(root, path);
- /* the extent will be freed later */
- }
- if (bookend && found_inline && start <= key.offset) {
- u32 new_size;
- new_size = btrfs_file_extent_calc_inline_size(
- extent_end - end);
- inode_sub_bytes(inode, end - key.offset);
- btrfs_set_file_extent_ram_bytes(leaf, extent,
- new_size);
- if (!compression && !encryption)
- ret = btrfs_truncate_item(trans, root, path,
- new_size, 0);
- BUG_ON(ret);
- }
- /* create bookend, splitting the extent in two */
- if (bookend && found_extent) {
- struct btrfs_key ins;
- ins.objectid = inode->i_ino;
- ins.offset = end;
- btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
- btrfs_release_path(root, path);
- path->leave_spinning = 1;
- ret = btrfs_insert_empty_item(trans, root, path, &ins,
- sizeof(*extent));
- BUG_ON(ret);
+ search_start = extent_end;
+ /*
+ * | ---- range to drop ----- |
+ * | -------- extent -------- |
+ */
+ if (start > key.offset && end >= extent_end) {
+ BUG_ON(del_nr > 0);
+ BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
- leaf = path->nodes[0];
- extent = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
- write_extent_buffer(leaf, &old,
- (unsigned long)extent, sizeof(old));
-
- btrfs_set_file_extent_compression(leaf, extent,
- compression);
- btrfs_set_file_extent_encryption(leaf, extent,
- encryption);
- btrfs_set_file_extent_other_encoding(leaf, extent,
- other_encoding);
- btrfs_set_file_extent_offset(leaf, extent,
- le64_to_cpu(old.offset) + end - key.offset);
- WARN_ON(le64_to_cpu(old.num_bytes) <
- (extent_end - end));
- btrfs_set_file_extent_num_bytes(leaf, extent,
- extent_end - end);
+ btrfs_set_file_extent_num_bytes(leaf, fi,
+ start - key.offset);
+ btrfs_mark_buffer_dirty(leaf);
+ if (disk_bytenr > 0) {
+ inode_sub_bytes(inode, extent_end - start);
+ *hint_byte = disk_bytenr;
+ }
+ if (end == extent_end)
+ break;
- /*
- * set the ram bytes to the size of the full extent
- * before splitting. This is a worst case flag,
- * but its the best we can do because we don't know
- * how splitting affects compression
- */
- btrfs_set_file_extent_ram_bytes(leaf, extent,
- ram_bytes);
- btrfs_set_file_extent_type(leaf, extent, found_type);
-
- btrfs_unlock_up_safe(path, 1);
- btrfs_mark_buffer_dirty(path->nodes[0]);
- btrfs_set_lock_blocking(path->nodes[0]);
-
- path->leave_spinning = 0;
- btrfs_release_path(root, path);
- if (disk_bytenr != 0)
- inode_add_bytes(inode, extent_end - end);
+ path->slots[0]++;
+ goto next_slot;
}
- if (found_extent && !keep) {
- u64 old_disk_bytenr = le64_to_cpu(old.disk_bytenr);
+ /*
+ * | ---- range to drop ----- |
+ * | ------ extent ------ |
+ */
+ if (start <= key.offset && end >= extent_end) {
+ if (del_nr == 0) {
+ del_slot = path->slots[0];
+ del_nr = 1;
+ } else {
+ BUG_ON(del_slot + del_nr != path->slots[0]);
+ del_nr++;
+ }
- if (old_disk_bytenr != 0) {
+ if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
inode_sub_bytes(inode,
- le64_to_cpu(old.num_bytes));
+ extent_end - key.offset);
+ extent_end = ALIGN(extent_end,
+ root->sectorsize);
+ } else if (disk_bytenr > 0) {
ret = btrfs_free_extent(trans, root,
- old_disk_bytenr,
- le64_to_cpu(old.disk_num_bytes),
- 0, root->root_key.objectid,
+ disk_bytenr, num_bytes, 0,
+ root->root_key.objectid,
key.objectid, key.offset -
- le64_to_cpu(old.offset));
+ extent_offset);
BUG_ON(ret);
- *hint_byte = old_disk_bytenr;
+ inode_sub_bytes(inode,
+ extent_end - key.offset);
+ *hint_byte = disk_bytenr;
}
- }
- if (search_start >= end) {
- ret = 0;
- goto out;
+ if (end == extent_end)
+ break;
+
+ if (path->slots[0] + 1 < btrfs_header_nritems(leaf)) {
+ path->slots[0]++;
+ goto next_slot;
+ }
+
+ ret = btrfs_del_items(trans, root, path, del_slot,
+ del_nr);
+ BUG_ON(ret);
+
+ del_nr = 0;
+ del_slot = 0;
+
+ btrfs_release_path(root, path);
+ continue;
}
+
+ BUG_ON(1);
}
-out:
- btrfs_free_path(path);
- if (locked_end > orig_locked_end) {
- unlock_extent(&BTRFS_I(inode)->io_tree, orig_locked_end,
- locked_end - 1, GFP_NOFS);
+
+ if (del_nr > 0) {
+ ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
+ BUG_ON(ret);
}
+
+ btrfs_free_path(path);
return ret;
}
@@ -620,23 +544,23 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot,
* two or three.
*/
int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
struct inode *inode, u64 start, u64 end)
{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
struct extent_buffer *leaf;
struct btrfs_path *path;
struct btrfs_file_extent_item *fi;
struct btrfs_key key;
+ struct btrfs_key new_key;
u64 bytenr;
u64 num_bytes;
u64 extent_end;
u64 orig_offset;
u64 other_start;
u64 other_end;
- u64 split = start;
- u64 locked_end = end;
- int extent_type;
- int split_end = 1;
+ u64 split;
+ int del_nr = 0;
+ int del_slot = 0;
int ret;
btrfs_drop_extent_cache(inode, start, end - 1, 0);
@@ -644,12 +568,10 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path();
BUG_ON(!path);
again:
+ split = start;
key.objectid = inode->i_ino;
key.type = BTRFS_EXTENT_DATA_KEY;
- if (split == start)
- key.offset = split;
- else
- key.offset = split - 1;
+ key.offset = split;
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret > 0 && path->slots[0] > 0)
@@ -661,8 +583,8 @@ again:
key.type != BTRFS_EXTENT_DATA_KEY);
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
- extent_type = btrfs_file_extent_type(leaf, fi);
- BUG_ON(extent_type != BTRFS_FILE_EXTENT_PREALLOC);
+ BUG_ON(btrfs_file_extent_type(leaf, fi) !=
+ BTRFS_FILE_EXTENT_PREALLOC);
extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
BUG_ON(key.offset > start || extent_end < end);
@@ -670,150 +592,91 @@ again:
num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi);
- if (key.offset == start)
- split = end;
-
- if (key.offset == start && extent_end == end) {
- int del_nr = 0;
- int del_slot = 0;
- other_start = end;
- other_end = 0;
- if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
- bytenr, &other_start, &other_end)) {
- extent_end = other_end;
- del_slot = path->slots[0] + 1;
- del_nr++;
- ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
- 0, root->root_key.objectid,
- inode->i_ino, orig_offset);
- BUG_ON(ret);
- }
- other_start = 0;
- other_end = start;
- if (extent_mergeable(leaf, path->slots[0] - 1, inode->i_ino,
- bytenr, &other_start, &other_end)) {
- key.offset = other_start;
- del_slot = path->slots[0];
- del_nr++;
- ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
- 0, root->root_key.objectid,
- inode->i_ino, orig_offset);
- BUG_ON(ret);
- }
- split_end = 0;
- if (del_nr == 0) {
- btrfs_set_file_extent_type(leaf, fi,
- BTRFS_FILE_EXTENT_REG);
- goto done;
+ while (start > key.offset || end < extent_end) {
+ if (key.offset == start)
+ split = end;
+
+ memcpy(&new_key, &key, sizeof(new_key));
+ new_key.offset = split;
+ ret = btrfs_duplicate_item(trans, root, path, &new_key);
+ if (ret == -EAGAIN) {
+ btrfs_release_path(root, path);
+ goto again;
}
+ BUG_ON(ret < 0);
- fi = btrfs_item_ptr(leaf, del_slot - 1,
+ leaf = path->nodes[0];
+ fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
struct btrfs_file_extent_item);
- btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG);
btrfs_set_file_extent_num_bytes(leaf, fi,
- extent_end - key.offset);
+ split - key.offset);
+
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+
+ btrfs_set_file_extent_offset(leaf, fi, split - orig_offset);
+ btrfs_set_file_extent_num_bytes(leaf, fi,
+ extent_end - split);
btrfs_mark_buffer_dirty(leaf);
- ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
+ ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
+ root->root_key.objectid,
+ inode->i_ino, orig_offset);
BUG_ON(ret);
- goto release;
- } else if (split == start) {
- if (locked_end < extent_end) {
- ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
- locked_end, extent_end - 1, GFP_NOFS);
- if (!ret) {
- btrfs_release_path(root, path);
- lock_extent(&BTRFS_I(inode)->io_tree,
- locked_end, extent_end - 1, GFP_NOFS);
- locked_end = extent_end;
- goto again;
- }
- locked_end = extent_end;
- }
- btrfs_set_file_extent_num_bytes(leaf, fi, split - key.offset);
- } else {
- BUG_ON(key.offset != start);
- key.offset = split;
- btrfs_set_file_extent_offset(leaf, fi, key.offset -
- orig_offset);
- btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - split);
- btrfs_set_item_key_safe(trans, root, path, &key);
- extent_end = split;
- }
- if (extent_end == end) {
- split_end = 0;
- extent_type = BTRFS_FILE_EXTENT_REG;
- }
- if (extent_end == end && split == start) {
- other_start = end;
- other_end = 0;
- if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
- bytenr, &other_start, &other_end)) {
- path->slots[0]++;
- fi = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
- key.offset = split;
- btrfs_set_item_key_safe(trans, root, path, &key);
- btrfs_set_file_extent_offset(leaf, fi, key.offset -
- orig_offset);
- btrfs_set_file_extent_num_bytes(leaf, fi,
- other_end - split);
- goto done;
- }
- }
- if (extent_end == end && split == end) {
- other_start = 0;
- other_end = start;
- if (extent_mergeable(leaf, path->slots[0] - 1 , inode->i_ino,
- bytenr, &other_start, &other_end)) {
+ if (split == start) {
+ key.offset = start;
+ } else {
+ BUG_ON(start != key.offset);
path->slots[0]--;
- fi = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
- btrfs_set_file_extent_num_bytes(leaf, fi, extent_end -
- other_start);
- goto done;
+ extent_end = end;
}
}
- btrfs_mark_buffer_dirty(leaf);
-
- ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
- root->root_key.objectid,
- inode->i_ino, orig_offset);
- BUG_ON(ret);
- btrfs_release_path(root, path);
-
- key.offset = start;
- ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*fi));
- BUG_ON(ret);
-
- leaf = path->nodes[0];
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
- btrfs_set_file_extent_generation(leaf, fi, trans->transid);
- btrfs_set_file_extent_type(leaf, fi, extent_type);
- btrfs_set_file_extent_disk_bytenr(leaf, fi, bytenr);
- btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes);
- btrfs_set_file_extent_offset(leaf, fi, key.offset - orig_offset);
- btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - key.offset);
- btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
- btrfs_set_file_extent_compression(leaf, fi, 0);
- btrfs_set_file_extent_encryption(leaf, fi, 0);
- btrfs_set_file_extent_other_encoding(leaf, fi, 0);
-done:
- btrfs_mark_buffer_dirty(leaf);
-release:
- btrfs_release_path(root, path);
- if (split_end && split == start) {
- split = end;
- goto again;
+ other_start = end;
+ other_end = 0;
+ if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
+ bytenr, &other_start, &other_end)) {
+ extent_end = other_end;
+ del_slot = path->slots[0] + 1;
+ del_nr++;
+ ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
+ 0, root->root_key.objectid,
+ inode->i_ino, orig_offset);
+ BUG_ON(ret);
}
- if (locked_end > end) {
- unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1,
- GFP_NOFS);
+ other_start = 0;
+ other_end = start;
+ if (extent_mergeable(leaf, path->slots[0] - 1, inode->i_ino,
+ bytenr, &other_start, &other_end)) {
+ key.offset = other_start;
+ del_slot = path->slots[0];
+ del_nr++;
+ ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
+ 0, root->root_key.objectid,
+ inode->i_ino, orig_offset);
+ BUG_ON(ret);
}
+ if (del_nr == 0) {
+ btrfs_set_file_extent_type(leaf, fi,
+ BTRFS_FILE_EXTENT_REG);
+ btrfs_mark_buffer_dirty(leaf);
+ goto out;
+ }
+
+ fi = btrfs_item_ptr(leaf, del_slot - 1,
+ struct btrfs_file_extent_item);
+ btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG);
+ btrfs_set_file_extent_num_bytes(leaf, fi,
+ extent_end - key.offset);
+ btrfs_mark_buffer_dirty(leaf);
+
+ ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
+ BUG_ON(ret);
+out:
btrfs_free_path(path);
return 0;
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b3ad168a0bf..5440bab2363 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -88,13 +88,14 @@ static noinline int cow_file_range(struct inode *inode,
u64 start, u64 end, int *page_started,
unsigned long *nr_written, int unlock);
-static int btrfs_init_inode_security(struct inode *inode, struct inode *dir)
+static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
+ struct inode *inode, struct inode *dir)
{
int err;
- err = btrfs_init_acl(inode, dir);
+ err = btrfs_init_acl(trans, inode, dir);
if (!err)
- err = btrfs_xattr_security_init(inode, dir);
+ err = btrfs_xattr_security_init(trans, inode, dir);
return err;
}
@@ -188,8 +189,18 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
btrfs_free_path(path);
+ /*
+ * we're an inline extent, so nobody can
+ * extend the file past i_size without locking
+ * a page we already have locked.
+ *
+ * We must do any isize and inode updates
+ * before we unlock the pages. Otherwise we
+ * could end up racing with unlink.
+ */
BTRFS_I(inode)->disk_i_size = inode->i_size;
btrfs_update_inode(trans, root, inode);
+
return 0;
fail:
btrfs_free_path(path);
@@ -230,8 +241,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
return 1;
}
- ret = btrfs_drop_extents(trans, root, inode, start,
- aligned_end, aligned_end, start,
+ ret = btrfs_drop_extents(trans, inode, start, aligned_end,
&hint_byte, 1);
BUG_ON(ret);
@@ -416,7 +426,6 @@ again:
start, end,
total_compressed, pages);
}
- btrfs_end_transaction(trans, root);
if (ret == 0) {
/*
* inline extent creation worked, we don't need
@@ -430,9 +439,11 @@ again:
EXTENT_CLEAR_DELALLOC |
EXTENT_CLEAR_ACCOUNTING |
EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK);
- ret = 0;
+
+ btrfs_end_transaction(trans, root);
goto free_pages_out;
}
+ btrfs_end_transaction(trans, root);
}
if (will_compress) {
@@ -543,7 +554,6 @@ static noinline int submit_compressed_extents(struct inode *inode,
if (list_empty(&async_cow->extents))
return 0;
- trans = btrfs_join_transaction(root, 1);
while (!list_empty(&async_cow->extents)) {
async_extent = list_entry(async_cow->extents.next,
@@ -590,19 +600,15 @@ retry:
lock_extent(io_tree, async_extent->start,
async_extent->start + async_extent->ram_size - 1,
GFP_NOFS);
- /*
- * here we're doing allocation and writeback of the
- * compressed pages
- */
- btrfs_drop_extent_cache(inode, async_extent->start,
- async_extent->start +
- async_extent->ram_size - 1, 0);
+ trans = btrfs_join_transaction(root, 1);
ret = btrfs_reserve_extent(trans, root,
async_extent->compressed_size,
async_extent->compressed_size,
0, alloc_hint,
(u64)-1, &ins, 1);
+ btrfs_end_transaction(trans, root);
+
if (ret) {
int i;
for (i = 0; i < async_extent->nr_pages; i++) {
@@ -618,6 +624,14 @@ retry:
goto retry;
}
+ /*
+ * here we're doing allocation and writeback of the
+ * compressed pages
+ */
+ btrfs_drop_extent_cache(inode, async_extent->start,
+ async_extent->start +
+ async_extent->ram_size - 1, 0);
+
em = alloc_extent_map(GFP_NOFS);
em->start = async_extent->start;
em->len = async_extent->ram_size;
@@ -649,8 +663,6 @@ retry:
BTRFS_ORDERED_COMPRESSED);
BUG_ON(ret);
- btrfs_end_transaction(trans, root);
-
/*
* clear dirty, set writeback and unlock the pages.
*/
@@ -672,13 +684,11 @@ retry:
async_extent->nr_pages);
BUG_ON(ret);
- trans = btrfs_join_transaction(root, 1);
alloc_hint = ins.objectid + ins.offset;
kfree(async_extent);
cond_resched();
}
- btrfs_end_transaction(trans, root);
return 0;
}
@@ -742,6 +752,7 @@ static noinline int cow_file_range(struct inode *inode,
EXTENT_CLEAR_DIRTY |
EXTENT_SET_WRITEBACK |
EXTENT_END_WRITEBACK);
+
*nr_written = *nr_written +
(end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
*page_started = 1;
@@ -1596,7 +1607,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
struct inode *inode, u64 file_pos,
u64 disk_bytenr, u64 disk_num_bytes,
u64 num_bytes, u64 ram_bytes,
- u64 locked_end,
u8 compression, u8 encryption,
u16 other_encoding, int extent_type)
{
@@ -1622,9 +1632,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
* the caller is expected to unpin it and allow it to be merged
* with the others.
*/
- ret = btrfs_drop_extents(trans, root, inode, file_pos,
- file_pos + num_bytes, locked_end,
- file_pos, &hint, 0);
+ ret = btrfs_drop_extents(trans, inode, file_pos, file_pos + num_bytes,
+ &hint, 0);
BUG_ON(ret);
ins.objectid = inode->i_ino;
@@ -1730,23 +1739,32 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
}
}
- trans = btrfs_join_transaction(root, 1);
-
if (!ordered_extent)
ordered_extent = btrfs_lookup_ordered_extent(inode, start);
BUG_ON(!ordered_extent);
- if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags))
- goto nocow;
+ if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
+ BUG_ON(!list_empty(&ordered_extent->list));
+ ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
+ if (!ret) {
+ trans = btrfs_join_transaction(root, 1);
+ ret = btrfs_update_inode(trans, root, inode);
+ BUG_ON(ret);
+ btrfs_end_transaction(trans, root);
+ }
+ goto out;
+ }
lock_extent(io_tree, ordered_extent->file_offset,
ordered_extent->file_offset + ordered_extent->len - 1,
GFP_NOFS);
+ trans = btrfs_join_transaction(root, 1);
+
if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
compressed = 1;
if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
BUG_ON(compressed);
- ret = btrfs_mark_extent_written(trans, root, inode,
+ ret = btrfs_mark_extent_written(trans, inode,
ordered_extent->file_offset,
ordered_extent->file_offset +
ordered_extent->len);
@@ -1758,8 +1776,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
ordered_extent->disk_len,
ordered_extent->len,
ordered_extent->len,
- ordered_extent->file_offset +
- ordered_extent->len,
compressed, 0, 0,
BTRFS_FILE_EXTENT_REG);
unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
@@ -1770,22 +1786,20 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
unlock_extent(io_tree, ordered_extent->file_offset,
ordered_extent->file_offset + ordered_extent->len - 1,
GFP_NOFS);
-nocow:
add_pending_csums(trans, inode, ordered_extent->file_offset,
&ordered_extent->list);
- mutex_lock(&BTRFS_I(inode)->extent_mutex);
- btrfs_ordered_update_i_size(inode, ordered_extent);
- btrfs_update_inode(trans, root, inode);
- btrfs_remove_ordered_extent(inode, ordered_extent);
- mutex_unlock(&BTRFS_I(inode)->extent_mutex);
-
+ /* this also removes the ordered extent from the tree */
+ btrfs_ordered_update_i_size(inode, 0, ordered_extent);
+ ret = btrfs_update_inode(trans, root, inode);
+ BUG_ON(ret);
+ btrfs_end_transaction(trans, root);
+out:
/* once for us */
btrfs_put_ordered_extent(ordered_extent);
/* once for the tree */
btrfs_put_ordered_extent(ordered_extent);
- btrfs_end_transaction(trans, root);
return 0;
}
@@ -2008,6 +2022,54 @@ zeroit:
return -EIO;
}
+struct delayed_iput {
+ struct list_head list;
+ struct inode *inode;
+};
+
+void btrfs_add_delayed_iput(struct inode *inode)
+{
+ struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+ struct delayed_iput *delayed;
+
+ if (atomic_add_unless(&inode->i_count, -1, 1))
+ return;
+
+ delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL);
+ delayed->inode = inode;
+
+ spin_lock(&fs_info->delayed_iput_lock);
+ list_add_tail(&delayed->list, &fs_info->delayed_iputs);
+ spin_unlock(&fs_info->delayed_iput_lock);
+}
+
+void btrfs_run_delayed_iputs(struct btrfs_root *root)
+{
+ LIST_HEAD(list);
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct delayed_iput *delayed;
+ int empty;
+
+ spin_lock(&fs_info->delayed_iput_lock);
+ empty = list_empty(&fs_info->delayed_iputs);
+ spin_unlock(&fs_info->delayed_iput_lock);
+ if (empty)
+ return;
+
+ down_read(&root->fs_info->cleanup_work_sem);
+ spin_lock(&fs_info->delayed_iput_lock);
+ list_splice_init(&fs_info->delayed_iputs, &list);
+ spin_unlock(&fs_info->delayed_iput_lock);
+
+ while (!list_empty(&list)) {
+ delayed = list_entry(list.next, struct delayed_iput, list);
+ list_del(&delayed->list);
+ iput(delayed->inode);
+ kfree(delayed);
+ }
+ up_read(&root->fs_info->cleanup_work_sem);
+}
+
/*
* This creates an orphan entry for the given inode in case something goes
* wrong in the middle of an unlink/truncate.
@@ -2080,16 +2142,17 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
struct inode *inode;
int ret = 0, nr_unlink = 0, nr_truncate = 0;
- path = btrfs_alloc_path();
- if (!path)
+ if (!xchg(&root->clean_orphans, 0))
return;
+
+ path = btrfs_alloc_path();
+ BUG_ON(!path);
path->reada = -1;
key.objectid = BTRFS_ORPHAN_OBJECTID;
btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
key.offset = (u64)-1;
-
while (1) {
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0) {
@@ -2834,37 +2897,40 @@ out:
* min_type is the minimum key type to truncate down to. If set to 0, this
* will kill all the items on this inode, including the INODE_ITEM_KEY.
*/
-noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct inode *inode,
- u64 new_size, u32 min_type)
+int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct inode *inode,
+ u64 new_size, u32 min_type)
{
- int ret;
struct btrfs_path *path;
- struct btrfs_key key;
- struct btrfs_key found_key;
- u32 found_type = (u8)-1;
struct extent_buffer *leaf;
struct btrfs_file_extent_item *fi;
+ struct btrfs_key key;
+ struct btrfs_key found_key;
u64 extent_start = 0;
u64 extent_num_bytes = 0;
u64 extent_offset = 0;
u64 item_end = 0;
+ u64 mask = root->sectorsize - 1;
+ u32 found_type = (u8)-1;
int found_extent;
int del_item;
int pending_del_nr = 0;
int pending_del_slot = 0;
int extent_type = -1;
int encoding;
- u64 mask = root->sectorsize - 1;
+ int ret;
+ int err = 0;
+
+ BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
if (root->ref_cows)
btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
+
path = btrfs_alloc_path();
BUG_ON(!path);
path->reada = -1;
- /* FIXME, add redo link to tree so we don't leak on crash */
key.objectid = inode->i_ino;
key.offset = (u64)-1;
key.type = (u8)-1;
@@ -2872,17 +2938,17 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
search_again:
path->leave_spinning = 1;
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
- if (ret < 0)
- goto error;
+ if (ret < 0) {
+ err = ret;
+ goto out;
+ }
if (ret > 0) {
/* there are no items in the tree for us to truncate, we're
* done
*/
- if (path->slots[0] == 0) {
- ret = 0;
- goto error;
- }
+ if (path->slots[0] == 0)
+ goto out;
path->slots[0]--;
}
@@ -2917,28 +2983,17 @@ search_again:
}
item_end--;
}
- if (item_end < new_size) {
- if (found_type == BTRFS_DIR_ITEM_KEY)
- found_type = BTRFS_INODE_ITEM_KEY;
- else if (found_type == BTRFS_EXTENT_ITEM_KEY)
- found_type = BTRFS_EXTENT_DATA_KEY;
- else if (found_type == BTRFS_EXTENT_DATA_KEY)
- found_type = BTRFS_XATTR_ITEM_KEY;
- else if (found_type == BTRFS_XATTR_ITEM_KEY)
- found_type = BTRFS_INODE_REF_KEY;
- else if (found_type)
- found_type--;
- else
+ if (found_type > min_type) {
+ del_item = 1;
+ } else {
+ if (item_end < new_size)
break;
- btrfs_set_key_type(&key, found_type);
- goto next;
+ if (found_key.offset >= new_size)
+ del_item = 1;
+ else
+ del_item = 0;
}
- if (found_key.offset >= new_size)
- del_item = 1;
- else
- del_item = 0;
found_extent = 0;
-
/* FIXME, shrink the extent if the ref count is only 1 */
if (found_type != BTRFS_EXTENT_DATA_KEY)
goto delete;
@@ -3025,42 +3080,36 @@ delete:
inode->i_ino, extent_offset);
BUG_ON(ret);
}
-next:
- if (path->slots[0] == 0) {
- if (pending_del_nr)
- goto del_pending;
- btrfs_release_path(root, path);
- if (found_type == BTRFS_INODE_ITEM_KEY)
- break;
- goto search_again;
- }
- path->slots[0]--;
- if (pending_del_nr &&
- path->slots[0] + 1 != pending_del_slot) {
- struct btrfs_key debug;
-del_pending:
- btrfs_item_key_to_cpu(path->nodes[0], &debug,
- pending_del_slot);
- ret = btrfs_del_items(trans, root, path,
- pending_del_slot,
- pending_del_nr);
- BUG_ON(ret);
- pending_del_nr = 0;
+ if (found_type == BTRFS_INODE_ITEM_KEY)
+ break;
+
+ if (path->slots[0] == 0 ||
+ path->slots[0] != pending_del_slot) {
+ if (root->ref_cows) {
+ err = -EAGAIN;
+ goto out;
+ }
+ if (pending_del_nr) {
+ ret = btrfs_del_items(trans, root, path,
+ pending_del_slot,
+ pending_del_nr);
+ BUG_ON(ret);
+ pending_del_nr = 0;
+ }
btrfs_release_path(root, path);
- if (found_type == BTRFS_INODE_ITEM_KEY)
- break;
goto search_again;
+ } else {
+ path->slots[0]--;
}
}
- ret = 0;
-error:
+out:
if (pending_del_nr) {
ret = btrfs_del_items(trans, root, path, pending_del_slot,
pending_del_nr);
}
btrfs_free_path(path);
- return ret;
+ return err;
}
/*
@@ -3180,10 +3229,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
if (size <= hole_start)
return 0;
- err = btrfs_truncate_page(inode->i_mapping, inode->i_size);
- if (err)
- return err;
-
while (1) {
struct btrfs_ordered_extent *ordered;
btrfs_wait_ordered_range(inode, hole_start,
@@ -3196,9 +3241,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
btrfs_put_ordered_extent(ordered);
}
- trans = btrfs_start_transaction(root, 1);
- btrfs_set_trans_block_group(trans, inode);
-
cur_offset = hole_start;
while (1) {
em = btrfs_get_extent(inode, NULL, 0, cur_offset,
@@ -3206,40 +3248,120 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
BUG_ON(IS_ERR(em) || !em);
last_byte = min(extent_map_end(em), block_end);
last_byte = (last_byte + mask) & ~mask;
- if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
+ if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
u64 hint_byte = 0;
hole_size = last_byte - cur_offset;
- err = btrfs_drop_extents(trans, root, inode,
- cur_offset,
- cur_offset + hole_size,
- block_end,
- cur_offset, &hint_byte, 1);
- if (err)
- break;
- err = btrfs_reserve_metadata_space(root, 1);
+ err = btrfs_reserve_metadata_space(root, 2);
if (err)
break;
+ trans = btrfs_start_transaction(root, 1);
+ btrfs_set_trans_block_group(trans, inode);
+
+ err = btrfs_drop_extents(trans, inode, cur_offset,
+ cur_offset + hole_size,
+ &hint_byte, 1);
+ BUG_ON(err);
+
err = btrfs_insert_file_extent(trans, root,
inode->i_ino, cur_offset, 0,
0, hole_size, 0, hole_size,
0, 0, 0);
+ BUG_ON(err);
+
btrfs_drop_extent_cache(inode, hole_start,
last_byte - 1, 0);
- btrfs_unreserve_metadata_space(root, 1);
+
+ btrfs_end_transaction(trans, root);
+ btrfs_unreserve_metadata_space(root, 2);
}
free_extent_map(em);
cur_offset = last_byte;
- if (err || cur_offset >= block_end)
+ if (cur_offset >= block_end)
break;
}
- btrfs_end_transaction(trans, root);
unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
return err;
}
+static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_trans_handle *trans;
+ unsigned long nr;
+ int ret;
+
+ if (attr->ia_size == inode->i_size)
+ return 0;
+
+ if (attr->ia_size > inode->i_size) {
+ unsigned long limit;
+ limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+ if (attr->ia_size > inode->i_sb->s_maxbytes)
+ return -EFBIG;
+ if (limit != RLIM_INFINITY && attr->ia_size > limit) {
+ send_sig(SIGXFSZ, current, 0);
+ return -EFBIG;
+ }
+ }
+
+ ret = btrfs_reserve_metadata_space(root, 1);
+ if (ret)
+ return ret;
+
+ trans = btrfs_start_transaction(root, 1);
+ btrfs_set_trans_block_group(trans, inode);
+
+ ret = btrfs_orphan_add(trans, inode);
+ BUG_ON(ret);
+
+ nr = trans->blocks_used;
+ btrfs_end_transaction(trans, root);
+ btrfs_unreserve_metadata_space(root, 1);
+ btrfs_btree_balance_dirty(root, nr);
+
+ if (attr->ia_size > inode->i_size) {
+ ret = btrfs_cont_expand(inode, attr->ia_size);
+ if (ret) {
+ btrfs_truncate(inode);
+ return ret;
+ }
+
+ i_size_write(inode, attr->ia_size);
+ btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
+
+ trans = btrfs_start_transaction(root, 1);
+ btrfs_set_trans_block_group(trans, inode);
+
+ ret = btrfs_update_inode(trans, root, inode);
+ BUG_ON(ret);
+ if (inode->i_nlink > 0) {
+ ret = btrfs_orphan_del(trans, inode);
+ BUG_ON(ret);
+ }
+ nr = trans->blocks_used;
+ btrfs_end_transaction(trans, root);
+ btrfs_btree_balance_dirty(root, nr);
+ return 0;
+ }
+
+ /*
+ * We're truncating a file that used to have good data down to
+ * zero. Make sure it gets into the ordered flush list so that
+ * any new writes get down to disk quickly.
+ */
+ if (attr->ia_size == 0)
+ BTRFS_I(inode)->ordered_data_close = 1;
+
+ /* we don't support swapfiles, so vmtruncate shouldn't fail */
+ ret = vmtruncate(inode, attr->ia_size);
+ BUG_ON(ret);
+
+ return 0;
+}
+
static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = dentry->d_inode;
@@ -3250,23 +3372,14 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
return err;
if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
- if (attr->ia_size > inode->i_size) {
- err = btrfs_cont_expand(inode, attr->ia_size);
- if (err)
- return err;
- } else if (inode->i_size > 0 &&
- attr->ia_size == 0) {
-
- /* we're truncating a file that used to have good
- * data down to zero. Make sure it gets into
- * the ordered flush list so that any new writes
- * get down to disk quickly.
- */
- BTRFS_I(inode)->ordered_data_close = 1;
- }
+ err = btrfs_setattr_size(inode, attr);
+ if (err)
+ return err;
}
+ attr->ia_valid &= ~ATTR_SIZE;
- err = inode_setattr(inode, attr);
+ if (attr->ia_valid)
+ err = inode_setattr(inode, attr);
if (!err && ((attr->ia_valid & ATTR_MODE)))
err = btrfs_acl_chmod(inode);
@@ -3287,36 +3400,43 @@ void btrfs_delete_inode(struct inode *inode)
}
btrfs_wait_ordered_range(inode, 0, (u64)-1);
+ if (root->fs_info->log_root_recovering) {
+ BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan));
+ goto no_delete;
+ }
+
if (inode->i_nlink > 0) {
BUG_ON(btrfs_root_refs(&root->root_item) != 0);
goto no_delete;
}
btrfs_i_size_write(inode, 0);
- trans = btrfs_join_transaction(root, 1);
- btrfs_set_trans_block_group(trans, inode);
- ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 0);
- if (ret) {
- btrfs_orphan_del(NULL, inode);
- goto no_delete_lock;
- }
+ while (1) {
+ trans = btrfs_start_transaction(root, 1);
+ btrfs_set_trans_block_group(trans, inode);
+ ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
- btrfs_orphan_del(trans, inode);
+ if (ret != -EAGAIN)
+ break;
- nr = trans->blocks_used;
- clear_inode(inode);
+ nr = trans->blocks_used;
+ btrfs_end_transaction(trans, root);
+ trans = NULL;
+ btrfs_btree_balance_dirty(root, nr);
+ }
- btrfs_end_transaction(trans, root);
- btrfs_btree_balance_dirty(root, nr);
- return;
+ if (ret == 0) {
+ ret = btrfs_orphan_del(trans, inode);
+ BUG_ON(ret);
+ }
-no_delete_lock:
nr = trans->blocks_used;
btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root, nr);
no_delete:
clear_inode(inode);
+ return;
}
/*
@@ -3569,7 +3689,6 @@ static noinline void init_btrfs_i(struct inode *inode)
INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations);
RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
- mutex_init(&BTRFS_I(inode)->extent_mutex);
mutex_init(&BTRFS_I(inode)->log_mutex);
}
@@ -3695,6 +3814,13 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
}
srcu_read_unlock(&root->fs_info->subvol_srcu, index);
+ if (root != sub_root) {
+ down_read(&root->fs_info->cleanup_work_sem);
+ if (!(inode->i_sb->s_flags & MS_RDONLY))
+ btrfs_orphan_cleanup(sub_root);
+ up_read(&root->fs_info->cleanup_work_sem);
+ }
+
return inode;
}
@@ -4219,7 +4345,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
if (IS_ERR(inode))
goto out_unlock;
- err = btrfs_init_inode_security(inode, dir);
+ err = btrfs_init_inode_security(trans, inode, dir);
if (err) {
drop_inode = 1;
goto out_unlock;
@@ -4290,7 +4416,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
if (IS_ERR(inode))
goto out_unlock;
- err = btrfs_init_inode_security(inode, dir);
+ err = btrfs_init_inode_security(trans, inode, dir);
if (err) {
drop_inode = 1;
goto out_unlock;
@@ -4336,6 +4462,10 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
if (inode->i_nlink == 0)
return -ENOENT;
+ /* do not allow sys_link's with other subvols of the same device */
+ if (root->objectid != BTRFS_I(inode)->root->objectid)
+ return -EPERM;
+
/*
* 1 item for inode ref
* 2 items for dir items
@@ -4423,7 +4553,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
drop_on_err = 1;
- err = btrfs_init_inode_security(inode, dir);
+ err = btrfs_init_inode_security(trans, inode, dir);
if (err)
goto out_fail;
@@ -5074,17 +5204,20 @@ static void btrfs_truncate(struct inode *inode)
unsigned long nr;
u64 mask = root->sectorsize - 1;
- if (!S_ISREG(inode->i_mode))
- return;
- if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+ if (!S_ISREG(inode->i_mode)) {
+ WARN_ON(1);
return;
+ }
ret = btrfs_truncate_page(inode->i_mapping, inode->i_size);
if (ret)
return;
+
btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
+ btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
trans = btrfs_start_transaction(root, 1);
+ btrfs_set_trans_block_group(trans, inode);
/*
* setattr is responsible for setting the ordered_data_close flag,
@@ -5106,21 +5239,32 @@ static void btrfs_truncate(struct inode *inode)
if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close)
btrfs_add_ordered_operation(trans, root, inode);
- btrfs_set_trans_block_group(trans, inode);
- btrfs_i_size_write(inode, inode->i_size);
+ while (1) {
+ ret = btrfs_truncate_inode_items(trans, root, inode,
+ inode->i_size,
+ BTRFS_EXTENT_DATA_KEY);
+ if (ret != -EAGAIN)
+ break;
- ret = btrfs_orphan_add(trans, inode);
- if (ret)
- goto out;
- /* FIXME, add redo link to tree so we don't leak on crash */
- ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size,
- BTRFS_EXTENT_DATA_KEY);
- btrfs_update_inode(trans, root, inode);
+ ret = btrfs_update_inode(trans, root, inode);
+ BUG_ON(ret);
+
+ nr = trans->blocks_used;
+ btrfs_end_transaction(trans, root);
+ btrfs_btree_balance_dirty(root, nr);
+
+ trans = btrfs_start_transaction(root, 1);
+ btrfs_set_trans_block_group(trans, inode);
+ }
- ret = btrfs_orphan_del(trans, inode);
+ if (ret == 0 && inode->i_nlink > 0) {
+ ret = btrfs_orphan_del(trans, inode);
+ BUG_ON(ret);
+ }
+
+ ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
-out:
nr = trans->blocks_used;
ret = btrfs_end_transaction_throttle(trans, root);
BUG_ON(ret);
@@ -5217,9 +5361,9 @@ void btrfs_destroy_inode(struct inode *inode)
spin_lock(&root->list_lock);
if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
- printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan"
- " list\n", inode->i_ino);
- dump_stack();
+ printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
+ inode->i_ino);
+ list_del_init(&BTRFS_I(inode)->i_orphan);
}
spin_unlock(&root->list_lock);
@@ -5476,7 +5620,7 @@ out_fail:
* some fairly slow code that needs optimization. This walks the list
* of all the inodes with pending delalloc and forces them to disk.
*/
-int btrfs_start_delalloc_inodes(struct btrfs_root *root)
+int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
{
struct list_head *head = &root->fs_info->delalloc_inodes;
struct btrfs_inode *binode;
@@ -5495,7 +5639,10 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root)
spin_unlock(&root->fs_info->delalloc_lock);
if (inode) {
filemap_flush(inode->i_mapping);
- iput(inode);
+ if (delay_iput)
+ btrfs_add_delayed_iput(inode);
+ else
+ iput(inode);
}
cond_resched();
spin_lock(&root->fs_info->delalloc_lock);
@@ -5569,7 +5716,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
if (IS_ERR(inode))
goto out_unlock;
- err = btrfs_init_inode_security(inode, dir);
+ err = btrfs_init_inode_security(trans, inode, dir);
if (err) {
drop_inode = 1;
goto out_unlock;
@@ -5641,10 +5788,10 @@ out_fail:
return err;
}
-static int prealloc_file_range(struct btrfs_trans_handle *trans,
- struct inode *inode, u64 start, u64 end,
- u64 locked_end, u64 alloc_hint, int mode)
+static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
+ u64 alloc_hint, int mode)
{
+ struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_key ins;
u64 alloc_size;
@@ -5655,43 +5802,56 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans,
while (num_bytes > 0) {
alloc_size = min(num_bytes, root->fs_info->max_extent);
- ret = btrfs_reserve_metadata_space(root, 1);
- if (ret)
- goto out;
+ trans = btrfs_start_transaction(root, 1);
ret = btrfs_reserve_extent(trans, root, alloc_size,
root->sectorsize, 0, alloc_hint,
(u64)-1, &ins, 1);
if (ret) {
WARN_ON(1);
- goto out;
+ goto stop_trans;
+ }
+
+ ret = btrfs_reserve_metadata_space(root, 3);
+ if (ret) {
+ btrfs_free_reserved_extent(root, ins.objectid,
+ ins.offset);
+ goto stop_trans;
}
+
ret = insert_reserved_file_extent(trans, inode,
cur_offset, ins.objectid,
ins.offset, ins.offset,
- ins.offset, locked_end,
- 0, 0, 0,
+ ins.offset, 0, 0, 0,
BTRFS_FILE_EXTENT_PREALLOC);
BUG_ON(ret);
btrfs_drop_extent_cache(inode, cur_offset,
cur_offset + ins.offset -1, 0);
+
num_bytes -= ins.offset;
cur_offset += ins.offset;
alloc_hint = ins.objectid + ins.offset;
- btrfs_unreserve_metadata_space(root, 1);
- }
-out:
- if (cur_offset > start) {
+
inode->i_ctime = CURRENT_TIME;
BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
- cur_offset > i_size_read(inode))
- btrfs_i_size_write(inode, cur_offset);
+ cur_offset > inode->i_size) {
+ i_size_write(inode, cur_offset);
+ btrfs_ordered_update_i_size(inode, cur_offset, NULL);
+ }
+
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
+
+ btrfs_end_transaction(trans, root);
+ btrfs_unreserve_metadata_space(root, 3);
}
+ return ret;
+stop_trans:
+ btrfs_end_transaction(trans, root);
return ret;
+
}
static long btrfs_fallocate(struct inode *inode, int mode,
@@ -5705,8 +5865,6 @@ static long btrfs_fallocate(struct inode *inode, int mode,
u64 locked_end;
u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
struct extent_map *em;
- struct btrfs_trans_handle *trans;
- struct btrfs_root *root;
int ret;
alloc_start = offset & ~mask;
@@ -5725,9 +5883,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
goto out;
}
- root = BTRFS_I(inode)->root;
-
- ret = btrfs_check_data_free_space(root, inode,
+ ret = btrfs_check_data_free_space(BTRFS_I(inode)->root, inode,
alloc_end - alloc_start);
if (ret)
goto out;
@@ -5736,12 +5892,6 @@ static long btrfs_fallocate(struct inode *inode, int mode,
while (1) {
struct btrfs_ordered_extent *ordered;
- trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
- if (!trans) {
- ret = -EIO;
- goto out_free;
- }
-
/* the extent lock is ordered inside the running
* transaction
*/
@@ -5755,8 +5905,6 @@ static long btrfs_fallocate(struct inode *inode, int mode,
btrfs_put_ordered_extent(ordered);
unlock_extent(&BTRFS_I(inode)->io_tree,
alloc_start, locked_end, GFP_NOFS);
- btrfs_end_transaction(trans, BTRFS_I(inode)->root);
-
/*
* we can't wait on the range with the transaction
* running or with the extent lock held
@@ -5777,10 +5925,12 @@ static long btrfs_fallocate(struct inode *inode, int mode,
BUG_ON(IS_ERR(em) || !em);
last_byte = min(extent_map_end(em), alloc_end);
last_byte = (last_byte + mask) & ~mask;
- if (em->block_start == EXTENT_MAP_HOLE) {
- ret = prealloc_file_range(trans, inode, cur_offset,
- last_byte, locked_end + 1,
- alloc_hint, mode);
+ if (em->block_start == EXTENT_MAP_HOLE ||
+ (cur_offset >= inode->i_size &&
+ !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
+ ret = prealloc_file_range(inode,
+ cur_offset, last_byte,
+ alloc_hint, mode);
if (ret < 0) {
free_extent_map(em);
break;
@@ -5799,9 +5949,8 @@ static long btrfs_fallocate(struct inode *inode, int mode,
unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
GFP_NOFS);
- btrfs_end_transaction(trans, BTRFS_I(inode)->root);
-out_free:
- btrfs_free_reserved_data_space(root, inode, alloc_end - alloc_start);
+ btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode,
+ alloc_end - alloc_start);
out:
mutex_unlock(&inode->i_mutex);
return ret;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index cdbb054102b..645a17927a8 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -237,7 +237,6 @@ static noinline int create_subvol(struct btrfs_root *root,
u64 objectid;
u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
u64 index = 0;
- unsigned long nr = 1;
/*
* 1 - inode item
@@ -290,7 +289,7 @@ static noinline int create_subvol(struct btrfs_root *root,
btrfs_set_root_generation(&root_item, trans->transid);
btrfs_set_root_level(&root_item, 0);
btrfs_set_root_refs(&root_item, 1);
- btrfs_set_root_used(&root_item, 0);
+ btrfs_set_root_used(&root_item, leaf->len);
btrfs_set_root_last_snapshot(&root_item, 0);
memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
@@ -342,24 +341,21 @@ static noinline int create_subvol(struct btrfs_root *root,
d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
fail:
- nr = trans->blocks_used;
err = btrfs_commit_transaction(trans, root);
if (err && !ret)
ret = err;
btrfs_unreserve_metadata_space(root, 6);
- btrfs_btree_balance_dirty(root, nr);
return ret;
}
static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
char *name, int namelen)
{
+ struct inode *inode;
struct btrfs_pending_snapshot *pending_snapshot;
struct btrfs_trans_handle *trans;
- int ret = 0;
- int err;
- unsigned long nr = 0;
+ int ret;
if (!root->ref_cows)
return -EINVAL;
@@ -372,20 +368,20 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
*/
ret = btrfs_reserve_metadata_space(root, 6);
if (ret)
- goto fail_unlock;
+ goto fail;
pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
if (!pending_snapshot) {
ret = -ENOMEM;
btrfs_unreserve_metadata_space(root, 6);
- goto fail_unlock;
+ goto fail;
}
pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS);
if (!pending_snapshot->name) {
ret = -ENOMEM;
kfree(pending_snapshot);
btrfs_unreserve_metadata_space(root, 6);
- goto fail_unlock;
+ goto fail;
}
memcpy(pending_snapshot->name, name, namelen);
pending_snapshot->name[namelen] = '\0';
@@ -395,10 +391,19 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
pending_snapshot->root = root;
list_add(&pending_snapshot->list,
&trans->transaction->pending_snapshots);
- err = btrfs_commit_transaction(trans, root);
+ ret = btrfs_commit_transaction(trans, root);
+ BUG_ON(ret);
+ btrfs_unreserve_metadata_space(root, 6);
-fail_unlock:
- btrfs_btree_balance_dirty(root, nr);
+ inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
+ goto fail;
+ }
+ BUG_ON(!inode);
+ d_instantiate(dentry, inode);
+ ret = 0;
+fail:
return ret;
}
@@ -1027,8 +1032,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
BUG_ON(!trans);
/* punch hole in destination first */
- btrfs_drop_extents(trans, root, inode, off, off + len,
- off + len, 0, &hint_byte, 1);
+ btrfs_drop_extents(trans, inode, off, off + len, &hint_byte, 1);
/* clone data */
key.objectid = src->i_ino;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 5799bc46a30..b10a49d4bc6 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -291,16 +291,16 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
/*
* remove an ordered extent from the tree. No references are dropped
- * but, anyone waiting on this extent is woken up.
+ * and you must wake_up entry->wait. You must hold the tree mutex
+ * while you call this function.
*/
-int btrfs_remove_ordered_extent(struct inode *inode,
+static int __btrfs_remove_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *entry)
{
struct btrfs_ordered_inode_tree *tree;
struct rb_node *node;
tree = &BTRFS_I(inode)->ordered_tree;
- mutex_lock(&tree->mutex);
node = &entry->rb_node;
rb_erase(node, &tree->tree);
tree->last = NULL;
@@ -326,16 +326,34 @@ int btrfs_remove_ordered_extent(struct inode *inode,
}
spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
+ return 0;
+}
+
+/*
+ * remove an ordered extent from the tree. No references are dropped
+ * but any waiters are woken.
+ */
+int btrfs_remove_ordered_extent(struct inode *inode,
+ struct btrfs_ordered_extent *entry)
+{
+ struct btrfs_ordered_inode_tree *tree;
+ int ret;
+
+ tree = &BTRFS_I(inode)->ordered_tree;
+ mutex_lock(&tree->mutex);
+ ret = __btrfs_remove_ordered_extent(inode, entry);
mutex_unlock(&tree->mutex);
wake_up(&entry->wait);
- return 0;
+
+ return ret;
}
/*
* wait for all the ordered extents in a root. This is done when balancing
* space between drives.
*/
-int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
+int btrfs_wait_ordered_extents(struct btrfs_root *root,
+ int nocow_only, int delay_iput)
{
struct list_head splice;
struct list_head *cur;
@@ -372,7 +390,10 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
if (inode) {
btrfs_start_ordered_extent(inode, ordered, 1);
btrfs_put_ordered_extent(ordered);
- iput(inode);
+ if (delay_iput)
+ btrfs_add_delayed_iput(inode);
+ else
+ iput(inode);
} else {
btrfs_put_ordered_extent(ordered);
}
@@ -430,7 +451,7 @@ again:
btrfs_wait_ordered_range(inode, 0, (u64)-1);
else
filemap_flush(inode->i_mapping);
- iput(inode);
+ btrfs_add_delayed_iput(inode);
}
cond_resched();
@@ -589,7 +610,7 @@ out:
* After an extent is done, call this to conditionally update the on disk
* i_size. i_size is updated to cover any fully written part of the file.
*/
-int btrfs_ordered_update_i_size(struct inode *inode,
+int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
struct btrfs_ordered_extent *ordered)
{
struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
@@ -597,18 +618,30 @@ int btrfs_ordered_update_i_size(struct inode *inode,
u64 disk_i_size;
u64 new_i_size;
u64 i_size_test;
+ u64 i_size = i_size_read(inode);
struct rb_node *node;
+ struct rb_node *prev = NULL;
struct btrfs_ordered_extent *test;
+ int ret = 1;
+
+ if (ordered)
+ offset = entry_end(ordered);
mutex_lock(&tree->mutex);
disk_i_size = BTRFS_I(inode)->disk_i_size;
+ /* truncate file */
+ if (disk_i_size > i_size) {
+ BTRFS_I(inode)->disk_i_size = i_size;
+ ret = 0;
+ goto out;
+ }
+
/*
* if the disk i_size is already at the inode->i_size, or
* this ordered extent is inside the disk i_size, we're done
*/
- if (disk_i_size >= inode->i_size ||
- ordered->file_offset + ordered->len <= disk_i_size) {
+ if (disk_i_size == i_size || offset <= disk_i_size) {
goto out;
}
@@ -616,8 +649,7 @@ int btrfs_ordered_update_i_size(struct inode *inode,
* we can't update the disk_isize if there are delalloc bytes
* between disk_i_size and this ordered extent
*/
- if (test_range_bit(io_tree, disk_i_size,
- ordered->file_offset + ordered->len - 1,
+ if (test_range_bit(io_tree, disk_i_size, offset - 1,
EXTENT_DELALLOC, 0, NULL)) {
goto out;
}
@@ -626,20 +658,32 @@ int btrfs_ordered_update_i_size(struct inode *inode,
* if we find an ordered extent then we can't update disk i_size
* yet
*/
- node = &ordered->rb_node;
- while (1) {
- node = rb_prev(node);
- if (!node)
- break;
+ if (ordered) {
+ node = rb_prev(&ordered->rb_node);
+ } else {
+ prev = tree_search(tree, offset);
+ /*
+ * we insert file extents without involving ordered struct,
+ * so there should be no ordered struct cover this offset
+ */
+ if (prev) {
+ test = rb_entry(prev, struct btrfs_ordered_extent,
+ rb_node);
+ BUG_ON(offset_in_entry(test, offset));
+ }
+ node = prev;
+ }
+ while (node) {
test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
if (test->file_offset + test->len <= disk_i_size)
break;
- if (test->file_offset >= inode->i_size)
+ if (test->file_offset >= i_size)
break;
if (test->file_offset >= disk_i_size)
goto out;
+ node = rb_prev(node);
}
- new_i_size = min_t(u64, entry_end(ordered), i_size_read(inode));
+ new_i_size = min_t(u64, offset, i_size);
/*
* at this point, we know we can safely update i_size to at least
@@ -647,7 +691,14 @@ int btrfs_ordered_update_i_size(struct inode *inode,
* walk forward and see if ios from higher up in the file have
* finished.
*/
- node = rb_next(&ordered->rb_node);
+ if (ordered) {
+ node = rb_next(&ordered->rb_node);
+ } else {
+ if (prev)
+ node = rb_next(prev);
+ else
+ node = rb_first(&tree->tree);
+ }
i_size_test = 0;
if (node) {
/*
@@ -655,10 +706,10 @@ int btrfs_ordered_update_i_size(struct inode *inode,
* between our ordered extent and the next one.
*/
test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
- if (test->file_offset > entry_end(ordered))
+ if (test->file_offset > offset)
i_size_test = test->file_offset;
} else {
- i_size_test = i_size_read(inode);
+ i_size_test = i_size;
}
/*
@@ -667,15 +718,25 @@ int btrfs_ordered_update_i_size(struct inode *inode,
* are no delalloc bytes in this area, it is safe to update
* disk_i_size to the end of the region.
*/
- if (i_size_test > entry_end(ordered) &&
- !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1,
- EXTENT_DELALLOC, 0, NULL)) {
- new_i_size = min_t(u64, i_size_test, i_size_read(inode));
+ if (i_size_test > offset &&
+ !test_range_bit(io_tree, offset, i_size_test - 1,
+ EXTENT_DELALLOC, 0, NULL)) {
+ new_i_size = min_t(u64, i_size_test, i_size);
}
BTRFS_I(inode)->disk_i_size = new_i_size;
+ ret = 0;
out:
+ /*
+ * we need to remove the ordered extent with the tree lock held
+ * so that other people calling this function don't find our fully
+ * processed ordered entry and skip updating the i_size
+ */
+ if (ordered)
+ __btrfs_remove_ordered_extent(inode, ordered);
mutex_unlock(&tree->mutex);
- return 0;
+ if (ordered)
+ wake_up(&ordered->wait);
+ return ret;
}
/*
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index f82e87488ca..1fe1282ef47 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -150,12 +150,13 @@ void btrfs_start_ordered_extent(struct inode *inode,
int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
struct btrfs_ordered_extent *
btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
-int btrfs_ordered_update_i_size(struct inode *inode,
+int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
struct btrfs_ordered_extent *ordered);
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
-int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only);
int btrfs_run_ordered_operations(struct btrfs_root *root, int wait);
int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *inode);
+int btrfs_wait_ordered_extents(struct btrfs_root *root,
+ int nocow_only, int delay_iput);
#endif
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index cfcc93c93a7..a9728680eca 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1561,6 +1561,20 @@ static int invalidate_extent_cache(struct btrfs_root *root,
return 0;
}
+static void put_inodes(struct list_head *list)
+{
+ struct inodevec *ivec;
+ while (!list_empty(list)) {
+ ivec = list_entry(list->next, struct inodevec, list);
+ list_del(&ivec->list);
+ while (ivec->nr > 0) {
+ ivec->nr--;
+ iput(ivec->inode[ivec->nr]);
+ }
+ kfree(ivec);
+ }
+}
+
static int find_next_key(struct btrfs_path *path, int level,
struct btrfs_key *key)
@@ -1723,6 +1737,11 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
btrfs_btree_balance_dirty(root, nr);
+ /*
+ * put inodes outside transaction, otherwise we may deadlock.
+ */
+ put_inodes(&inode_list);
+
if (replaced && rc->stage == UPDATE_DATA_PTRS)
invalidate_extent_cache(root, &key, &next_key);
}
@@ -1752,19 +1771,7 @@ out:
btrfs_btree_balance_dirty(root, nr);
- /*
- * put inodes while we aren't holding the tree locks
- */
- while (!list_empty(&inode_list)) {
- struct inodevec *ivec;
- ivec = list_entry(inode_list.next, struct inodevec, list);
- list_del(&ivec->list);
- while (ivec->nr > 0) {
- ivec->nr--;
- iput(ivec->inode[ivec->nr]);
- }
- kfree(ivec);
- }
+ put_inodes(&inode_list);
if (replaced && rc->stage == UPDATE_DATA_PTRS)
invalidate_extent_cache(root, &key, &next_key);
@@ -3534,8 +3541,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
(unsigned long long)rc->block_group->key.objectid,
(unsigned long long)rc->block_group->flags);
- btrfs_start_delalloc_inodes(fs_info->tree_root);
- btrfs_wait_ordered_extents(fs_info->tree_root, 0);
+ btrfs_start_delalloc_inodes(fs_info->tree_root, 0);
+ btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0);
while (1) {
rc->extents_found = 0;
@@ -3755,6 +3762,7 @@ out:
BTRFS_DATA_RELOC_TREE_OBJECTID);
if (IS_ERR(fs_root))
err = PTR_ERR(fs_root);
+ btrfs_orphan_cleanup(fs_root);
}
return err;
}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 752a5463bf5..3f9b45704fc 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -128,6 +128,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
substring_t args[MAX_OPT_ARGS];
char *p, *num;
int intarg;
+ int ret = 0;
if (!options)
return 0;
@@ -262,12 +263,18 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
case Opt_discard:
btrfs_set_opt(info->mount_opt, DISCARD);
break;
+ case Opt_err:
+ printk(KERN_INFO "btrfs: unrecognized mount option "
+ "'%s'\n", p);
+ ret = -EINVAL;
+ goto out;
default:
break;
}
}
+out:
kfree(options);
- return 0;
+ return ret;
}
/*
@@ -405,8 +412,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
return 0;
}
- btrfs_start_delalloc_inodes(root);
- btrfs_wait_ordered_extents(root, 0);
+ btrfs_start_delalloc_inodes(root, 0);
+ btrfs_wait_ordered_extents(root, 0, 0);
trans = btrfs_start_transaction(root, 1);
ret = btrfs_commit_transaction(trans, root);
@@ -450,6 +457,8 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
seq_puts(seq, ",notreelog");
if (btrfs_test_opt(root, FLUSHONCOMMIT))
seq_puts(seq, ",flushoncommit");
+ if (btrfs_test_opt(root, DISCARD))
+ seq_puts(seq, ",discard");
if (!(root->fs_info->sb->s_flags & MS_POSIXACL))
seq_puts(seq, ",noacl");
return 0;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index c207e8c32c9..b2acc79f1b3 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -333,6 +333,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
memset(trans, 0, sizeof(*trans));
kmem_cache_free(btrfs_trans_handle_cachep, trans);
+ if (throttle)
+ btrfs_run_delayed_iputs(root);
+
return 0;
}
@@ -354,7 +357,7 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
* those extents are sent to disk but does not wait on them
*/
int btrfs_write_marked_extents(struct btrfs_root *root,
- struct extent_io_tree *dirty_pages)
+ struct extent_io_tree *dirty_pages, int mark)
{
int ret;
int err = 0;
@@ -367,7 +370,7 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
while (1) {
ret = find_first_extent_bit(dirty_pages, start, &start, &end,
- EXTENT_DIRTY);
+ mark);
if (ret)
break;
while (start <= end) {
@@ -413,7 +416,7 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
* on all the pages and clear them from the dirty pages state tree
*/
int btrfs_wait_marked_extents(struct btrfs_root *root,
- struct extent_io_tree *dirty_pages)
+ struct extent_io_tree *dirty_pages, int mark)
{
int ret;
int err = 0;
@@ -425,12 +428,12 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
unsigned long index;
while (1) {
- ret = find_first_extent_bit(dirty_pages, 0, &start, &end,
- EXTENT_DIRTY);
+ ret = find_first_extent_bit(dirty_pages, start, &start, &end,
+ mark);
if (ret)
break;
- clear_extent_dirty(dirty_pages, start, end, GFP_NOFS);
+ clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
while (start <= end) {
index = start >> PAGE_CACHE_SHIFT;
start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
@@ -460,13 +463,13 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
* those extents are on disk for transaction or log commit
*/
int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
- struct extent_io_tree *dirty_pages)
+ struct extent_io_tree *dirty_pages, int mark)
{
int ret;
int ret2;
- ret = btrfs_write_marked_extents(root, dirty_pages);
- ret2 = btrfs_wait_marked_extents(root, dirty_pages);
+ ret = btrfs_write_marked_extents(root, dirty_pages, mark);
+ ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark);
return ret || ret2;
}
@@ -479,7 +482,8 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
return filemap_write_and_wait(btree_inode->i_mapping);
}
return btrfs_write_and_wait_marked_extents(root,
- &trans->transaction->dirty_pages);
+ &trans->transaction->dirty_pages,
+ EXTENT_DIRTY);
}
/*
@@ -497,13 +501,16 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
{
int ret;
u64 old_root_bytenr;
+ u64 old_root_used;
struct btrfs_root *tree_root = root->fs_info->tree_root;
+ old_root_used = btrfs_root_used(&root->root_item);
btrfs_write_dirty_block_groups(trans, root);
while (1) {
old_root_bytenr = btrfs_root_bytenr(&root->root_item);
- if (old_root_bytenr == root->node->start)
+ if (old_root_bytenr == root->node->start &&
+ old_root_used == btrfs_root_used(&root->root_item))
break;
btrfs_set_root_node(&root->root_item, root->node);
@@ -512,6 +519,7 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
&root->root_item);
BUG_ON(ret);
+ old_root_used = btrfs_root_used(&root->root_item);
ret = btrfs_write_dirty_block_groups(trans, root);
BUG_ON(ret);
}
@@ -795,7 +803,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
memcpy(&pending->root_key, &key, sizeof(key));
fail:
kfree(new_root_item);
- btrfs_unreserve_metadata_space(root, 6);
return ret;
}
@@ -807,7 +814,6 @@ static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info,
u64 index = 0;
struct btrfs_trans_handle *trans;
struct inode *parent_inode;
- struct inode *inode;
struct btrfs_root *parent_root;
parent_inode = pending->dentry->d_parent->d_inode;
@@ -839,8 +845,6 @@ static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info,
BUG_ON(ret);
- inode = btrfs_lookup_dentry(parent_inode, pending->dentry);
- d_instantiate(pending->dentry, inode);
fail:
btrfs_end_transaction(trans, fs_info->fs_root);
return ret;
@@ -994,11 +998,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
mutex_unlock(&root->fs_info->trans_mutex);
if (flush_on_commit) {
- btrfs_start_delalloc_inodes(root);
- ret = btrfs_wait_ordered_extents(root, 0);
+ btrfs_start_delalloc_inodes(root, 1);
+ ret = btrfs_wait_ordered_extents(root, 0, 1);
BUG_ON(ret);
} else if (snap_pending) {
- ret = btrfs_wait_ordered_extents(root, 1);
+ ret = btrfs_wait_ordered_extents(root, 0, 1);
BUG_ON(ret);
}
@@ -1116,6 +1120,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
current->journal_info = NULL;
kmem_cache_free(btrfs_trans_handle_cachep, trans);
+
+ if (current != root->fs_info->transaction_kthread)
+ btrfs_run_delayed_iputs(root);
+
return ret;
}
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index d4e3e7a6938..93c7ccb3311 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -107,10 +107,10 @@ void btrfs_throttle(struct btrfs_root *root);
int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
- struct extent_io_tree *dirty_pages);
+ struct extent_io_tree *dirty_pages, int mark);
int btrfs_write_marked_extents(struct btrfs_root *root,
- struct extent_io_tree *dirty_pages);
+ struct extent_io_tree *dirty_pages, int mark);
int btrfs_wait_marked_extents(struct btrfs_root *root,
- struct extent_io_tree *dirty_pages);
+ struct extent_io_tree *dirty_pages, int mark);
int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
#endif
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 741666a7676..4a9434b622e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -542,8 +542,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
saved_nbytes = inode_get_bytes(inode);
/* drop any overlapping extents */
- ret = btrfs_drop_extents(trans, root, inode,
- start, extent_end, extent_end, start, &alloc_hint, 1);
+ ret = btrfs_drop_extents(trans, inode, start, extent_end,
+ &alloc_hint, 1);
BUG_ON(ret);
if (found_type == BTRFS_FILE_EXTENT_REG ||
@@ -930,6 +930,17 @@ out_nowrite:
return 0;
}
+static int insert_orphan_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 offset)
+{
+ int ret;
+ ret = btrfs_find_orphan_item(root, offset);
+ if (ret > 0)
+ ret = btrfs_insert_orphan_item(trans, root, offset);
+ return ret;
+}
+
+
/*
* There are a few corners where the link count of the file can't
* be properly maintained during replay. So, instead of adding
@@ -997,9 +1008,13 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
}
BTRFS_I(inode)->index_cnt = (u64)-1;
- if (inode->i_nlink == 0 && S_ISDIR(inode->i_mode)) {
- ret = replay_dir_deletes(trans, root, NULL, path,
- inode->i_ino, 1);
+ if (inode->i_nlink == 0) {
+ if (S_ISDIR(inode->i_mode)) {
+ ret = replay_dir_deletes(trans, root, NULL, path,
+ inode->i_ino, 1);
+ BUG_ON(ret);
+ }
+ ret = insert_orphan_item(trans, root, inode->i_ino);
BUG_ON(ret);
}
btrfs_free_path(path);
@@ -1587,7 +1602,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
/* inode keys are done during the first stage */
if (key.type == BTRFS_INODE_ITEM_KEY &&
wc->stage == LOG_WALK_REPLAY_INODES) {
- struct inode *inode;
struct btrfs_inode_item *inode_item;
u32 mode;
@@ -1603,31 +1617,16 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
eb, i, &key);
BUG_ON(ret);
- /* for regular files, truncate away
- * extents past the new EOF
+ /* for regular files, make sure corresponding
+ * orhpan item exist. extents past the new EOF
+ * will be truncated later by orphan cleanup.
*/
if (S_ISREG(mode)) {
- inode = read_one_inode(root,
- key.objectid);
- BUG_ON(!inode);
-
- ret = btrfs_truncate_inode_items(wc->trans,
- root, inode, inode->i_size,
- BTRFS_EXTENT_DATA_KEY);
+ ret = insert_orphan_item(wc->trans, root,
+ key.objectid);
BUG_ON(ret);
-
- /* if the nlink count is zero here, the iput
- * will free the inode. We bump it to make
- * sure it doesn't get freed until the link
- * count fixup is done
- */
- if (inode->i_nlink == 0) {
- btrfs_inc_nlink(inode);
- btrfs_update_inode(wc->trans,
- root, inode);
- }
- iput(inode);
}
+
ret = link_to_fixup_dir(wc->trans, root,
path, key.objectid);
BUG_ON(ret);
@@ -1977,10 +1976,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
{
int index1;
int index2;
+ int mark;
int ret;
struct btrfs_root *log = root->log_root;
struct btrfs_root *log_root_tree = root->fs_info->log_root_tree;
- u64 log_transid = 0;
+ unsigned long log_transid = 0;
mutex_lock(&root->log_mutex);
index1 = root->log_transid % 2;
@@ -2014,24 +2014,29 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
goto out;
}
+ log_transid = root->log_transid;
+ if (log_transid % 2 == 0)
+ mark = EXTENT_DIRTY;
+ else
+ mark = EXTENT_NEW;
+
/* we start IO on all the marked extents here, but we don't actually
* wait for them until later.
*/
- ret = btrfs_write_marked_extents(log, &log->dirty_log_pages);
+ ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark);
BUG_ON(ret);
btrfs_set_root_node(&log->root_item, log->node);
root->log_batch = 0;
- log_transid = root->log_transid;
root->log_transid++;
log->log_transid = root->log_transid;
root->log_start_pid = 0;
smp_mb();
/*
- * log tree has been flushed to disk, new modifications of
- * the log will be written to new positions. so it's safe to
- * allow log writers to go in.
+ * IO has been started, blocks of the log tree have WRITTEN flag set
+ * in their headers. new modifications of the log will be written to
+ * new positions. so it's safe to allow log writers to go in.
*/
mutex_unlock(&root->log_mutex);
@@ -2052,7 +2057,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
index2 = log_root_tree->log_transid % 2;
if (atomic_read(&log_root_tree->log_commit[index2])) {
- btrfs_wait_marked_extents(log, &log->dirty_log_pages);
+ btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
wait_log_commit(trans, log_root_tree,
log_root_tree->log_transid);
mutex_unlock(&log_root_tree->log_mutex);
@@ -2072,16 +2077,17 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
* check the full commit flag again
*/
if (root->fs_info->last_trans_log_full_commit == trans->transid) {
- btrfs_wait_marked_extents(log, &log->dirty_log_pages);
+ btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
mutex_unlock(&log_root_tree->log_mutex);
ret = -EAGAIN;
goto out_wake_log_root;
}
ret = btrfs_write_and_wait_marked_extents(log_root_tree,
- &log_root_tree->dirty_log_pages);
+ &log_root_tree->dirty_log_pages,
+ EXTENT_DIRTY | EXTENT_NEW);
BUG_ON(ret);
- btrfs_wait_marked_extents(log, &log->dirty_log_pages);
+ btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
btrfs_set_super_log_root(&root->fs_info->super_for_commit,
log_root_tree->node->start);
@@ -2147,12 +2153,12 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
while (1) {
ret = find_first_extent_bit(&log->dirty_log_pages,
- 0, &start, &end, EXTENT_DIRTY);
+ 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW);
if (ret)
break;
- clear_extent_dirty(&log->dirty_log_pages,
- start, end, GFP_NOFS);
+ clear_extent_bits(&log->dirty_log_pages, start, end,
+ EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
}
if (log->log_transid > 0) {
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 7eda483d7b5..198cff28766 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2209,7 +2209,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
max_chunk_size = 10 * calc_size;
min_stripe_size = 64 * 1024 * 1024;
} else if (type & BTRFS_BLOCK_GROUP_METADATA) {
- max_chunk_size = 4 * calc_size;
+ max_chunk_size = 256 * 1024 * 1024;
min_stripe_size = 32 * 1024 * 1024;
} else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
calc_size = 8 * 1024 * 1024;
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index b6dd5967c48..193b58f7d3f 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -85,22 +85,23 @@ out:
return ret;
}
-int __btrfs_setxattr(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
+static int do_setxattr(struct btrfs_trans_handle *trans,
+ struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
{
struct btrfs_dir_item *di;
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_trans_handle *trans;
struct btrfs_path *path;
- int ret = 0, mod = 0;
+ size_t name_len = strlen(name);
+ int ret = 0;
+
+ if (name_len + size > BTRFS_MAX_XATTR_SIZE(root))
+ return -ENOSPC;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- trans = btrfs_join_transaction(root, 1);
- btrfs_set_trans_block_group(trans, inode);
-
/* first lets see if we already have this xattr */
di = btrfs_lookup_xattr(trans, root, path, inode->i_ino, name,
strlen(name), -1);
@@ -118,15 +119,12 @@ int __btrfs_setxattr(struct inode *inode, const char *name,
}
ret = btrfs_delete_one_dir_name(trans, root, path, di);
- if (ret)
- goto out;
+ BUG_ON(ret);
btrfs_release_path(root, path);
/* if we don't have a value then we are removing the xattr */
- if (!value) {
- mod = 1;
+ if (!value)
goto out;
- }
} else {
btrfs_release_path(root, path);
@@ -138,20 +136,45 @@ int __btrfs_setxattr(struct inode *inode, const char *name,
}
/* ok we have to create a completely new xattr */
- ret = btrfs_insert_xattr_item(trans, root, name, strlen(name),
- value, size, inode->i_ino);
+ ret = btrfs_insert_xattr_item(trans, root, path, inode->i_ino,
+ name, name_len, value, size);
+ BUG_ON(ret);
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+int __btrfs_setxattr(struct btrfs_trans_handle *trans,
+ struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ int ret;
+
+ if (trans)
+ return do_setxattr(trans, inode, name, value, size, flags);
+
+ ret = btrfs_reserve_metadata_space(root, 2);
if (ret)
- goto out;
- mod = 1;
+ return ret;
-out:
- if (mod) {
- inode->i_ctime = CURRENT_TIME;
- ret = btrfs_update_inode(trans, root, inode);
+ trans = btrfs_start_transaction(root, 1);
+ if (!trans) {
+ ret = -ENOMEM;
+ goto out;
}
+ btrfs_set_trans_block_group(trans, inode);
- btrfs_end_transaction(trans, root);
- btrfs_free_path(path);
+ ret = do_setxattr(trans, inode, name, value, size, flags);
+ if (ret)
+ goto out;
+
+ inode->i_ctime = CURRENT_TIME;
+ ret = btrfs_update_inode(trans, root, inode);
+ BUG_ON(ret);
+out:
+ btrfs_end_transaction_throttle(trans, root);
+ btrfs_unreserve_metadata_space(root, 2);
return ret;
}
@@ -314,7 +337,9 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
if (size == 0)
value = ""; /* empty EA, do not remove */
- return __btrfs_setxattr(dentry->d_inode, name, value, size, flags);
+
+ return __btrfs_setxattr(NULL, dentry->d_inode, name, value, size,
+ flags);
}
int btrfs_removexattr(struct dentry *dentry, const char *name)
@@ -329,10 +354,13 @@ int btrfs_removexattr(struct dentry *dentry, const char *name)
if (!btrfs_is_valid_xattr(name))
return -EOPNOTSUPP;
- return __btrfs_setxattr(dentry->d_inode, name, NULL, 0, XATTR_REPLACE);
+
+ return __btrfs_setxattr(NULL, dentry->d_inode, name, NULL, 0,
+ XATTR_REPLACE);
}
-int btrfs_xattr_security_init(struct inode *inode, struct inode *dir)
+int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
+ struct inode *inode, struct inode *dir)
{
int err;
size_t len;
@@ -354,7 +382,7 @@ int btrfs_xattr_security_init(struct inode *inode, struct inode *dir)
} else {
strcpy(name, XATTR_SECURITY_PREFIX);
strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix);
- err = __btrfs_setxattr(inode, name, value, len, 0);
+ err = __btrfs_setxattr(trans, inode, name, value, len, 0);
kfree(name);
}
diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h
index c71e9c3cf3f..721efa0346e 100644
--- a/fs/btrfs/xattr.h
+++ b/fs/btrfs/xattr.h
@@ -27,15 +27,16 @@ extern struct xattr_handler *btrfs_xattr_handlers[];
extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
void *buffer, size_t size);
-extern int __btrfs_setxattr(struct inode *inode, const char *name,
- const void *value, size_t size, int flags);
-
+extern int __btrfs_setxattr(struct btrfs_trans_handle *trans,
+ struct inode *inode, const char *name,
+ const void *value, size_t size, int flags);
extern ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
void *buffer, size_t size);
extern int btrfs_setxattr(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags);
extern int btrfs_removexattr(struct dentry *dentry, const char *name);
-extern int btrfs_xattr_security_init(struct inode *inode, struct inode *dir);
+extern int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
+ struct inode *inode, struct inode *dir);
#endif /* __XATTR__ */
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 4012885d027..e82adc2debb 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1206,7 +1206,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
* NOTE: filesystems with their own locking have to handle this
* on their own.
*/
- if (dio->flags & DIO_LOCKING) {
+ if (flags & DIO_LOCKING) {
if (unlikely((rw & WRITE) && retval < 0)) {
loff_t isize = i_size_read(inode);
if (end > isize)
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 2dda5ade75b..8f006a0d607 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -62,7 +62,7 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
struct inode *lower_inode =
ecryptfs_inode_to_lower(dentry->d_inode);
- fsstack_copy_attr_all(dentry->d_inode, lower_inode, NULL);
+ fsstack_copy_attr_all(dentry->d_inode, lower_inode);
}
out:
return rc;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 056fed62d0d..429ca0b3ba0 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -626,9 +626,9 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
lower_new_dir_dentry->d_inode, lower_new_dentry);
if (rc)
goto out_lock;
- fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode, NULL);
+ fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
if (new_dir != old_dir)
- fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode, NULL);
+ fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
out_lock:
unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
dput(lower_new_dentry->d_parent);
@@ -967,7 +967,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
rc = notify_change(lower_dentry, ia);
mutex_unlock(&lower_dentry->d_inode->i_mutex);
out:
- fsstack_copy_attr_all(inode, lower_inode, NULL);
+ fsstack_copy_attr_all(inode, lower_inode);
return rc;
}
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 101fe4c7b1e..567bc4b9f70 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -189,7 +189,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
init_special_inode(inode, lower_inode->i_mode,
lower_inode->i_rdev);
dentry->d_op = &ecryptfs_dops;
- fsstack_copy_attr_all(inode, lower_inode, NULL);
+ fsstack_copy_attr_all(inode, lower_inode);
/* This size will be overwritten for real files w/ headers and
* other metadata */
fsstack_copy_inode_size(inode, lower_inode);
diff --git a/fs/exec.c b/fs/exec.c
index 623a5cc3076..632b02e34ec 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -826,7 +826,9 @@ static int de_thread(struct task_struct *tsk)
attach_pid(tsk, PIDTYPE_PID, task_pid(leader));
transfer_pid(leader, tsk, PIDTYPE_PGID);
transfer_pid(leader, tsk, PIDTYPE_SID);
+
list_replace_rcu(&leader->tasks, &tsk->tasks);
+ list_replace_init(&leader->sibling, &tsk->sibling);
tsk->group_leader = tsk;
leader->group_leader = tsk;
@@ -1761,17 +1763,20 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
struct mm_struct *mm = current->mm;
struct linux_binfmt * binfmt;
struct inode * inode;
- struct file * file;
const struct cred *old_cred;
struct cred *cred;
int retval = 0;
int flag = 0;
int ispipe = 0;
- unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
char **helper_argv = NULL;
int helper_argc = 0;
int dump_count = 0;
static atomic_t core_dump_count = ATOMIC_INIT(0);
+ struct coredump_params cprm = {
+ .signr = signr,
+ .regs = regs,
+ .limit = current->signal->rlim[RLIMIT_CORE].rlim_cur,
+ };
audit_core_dumps(signr);
@@ -1827,15 +1832,15 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
ispipe = format_corename(corename, signr);
unlock_kernel();
- if ((!ispipe) && (core_limit < binfmt->min_coredump))
+ if ((!ispipe) && (cprm.limit < binfmt->min_coredump))
goto fail_unlock;
if (ispipe) {
- if (core_limit == 0) {
+ if (cprm.limit == 0) {
/*
* Normally core limits are irrelevant to pipes, since
* we're not writing to the file system, but we use
- * core_limit of 0 here as a speacial value. Any
+ * cprm.limit of 0 here as a speacial value. Any
* non-zero limit gets set to RLIM_INFINITY below, but
* a limit of 0 skips the dump. This is a consistent
* way to catch recursive crashes. We can still crash
@@ -1868,25 +1873,25 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
goto fail_dropcount;
}
- core_limit = RLIM_INFINITY;
+ cprm.limit = RLIM_INFINITY;
/* SIGPIPE can happen, but it's just never processed */
if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL,
- &file)) {
+ &cprm.file)) {
printk(KERN_INFO "Core dump to %s pipe failed\n",
corename);
goto fail_dropcount;
}
} else
- file = filp_open(corename,
+ cprm.file = filp_open(corename,
O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
0600);
- if (IS_ERR(file))
+ if (IS_ERR(cprm.file))
goto fail_dropcount;
- inode = file->f_path.dentry->d_inode;
+ inode = cprm.file->f_path.dentry->d_inode;
if (inode->i_nlink > 1)
goto close_fail; /* multiple links - don't dump */
- if (!ispipe && d_unhashed(file->f_path.dentry))
+ if (!ispipe && d_unhashed(cprm.file->f_path.dentry))
goto close_fail;
/* AK: actually i see no reason to not allow this for named pipes etc.,
@@ -1899,21 +1904,22 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
*/
if (inode->i_uid != current_fsuid())
goto close_fail;
- if (!file->f_op)
+ if (!cprm.file->f_op)
goto close_fail;
- if (!file->f_op->write)
+ if (!cprm.file->f_op->write)
goto close_fail;
- if (!ispipe && do_truncate(file->f_path.dentry, 0, 0, file) != 0)
+ if (!ispipe &&
+ do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file) != 0)
goto close_fail;
- retval = binfmt->core_dump(signr, regs, file, core_limit);
+ retval = binfmt->core_dump(&cprm);
if (retval)
current->signal->group_exit_code |= 0x80;
close_fail:
if (ispipe && core_pipe_limit)
- wait_for_dump_helpers(file);
- filp_close(file, NULL);
+ wait_for_dump_helpers(cprm.file);
+ filp_close(cprm.file, NULL);
fail_dropcount:
if (dump_count)
atomic_dec(&core_dump_count);
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index e5f6774846e..9acf7e80813 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -2,7 +2,6 @@ config EXT4_FS
tristate "The Extended 4 (ext4) filesystem"
select JBD2
select CRC16
- select FS_JOURNAL_INFO
help
This is the next generation of the ext3 filesystem.
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index b192c661caa..4dcddf83326 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -10,7 +10,6 @@ config GFS2_FS
select SLOW_WORK
select QUOTA
select QUOTACTL
- select FS_JOURNAL_INFO
help
A cluster filesystem.
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 3ff32fa793d..6e220f4eee7 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -125,7 +125,7 @@ static struct inode *gfs2_iget_skip(struct super_block *sb,
* directory entry when gfs2_inode_lookup() is invoked. Part of the code
* segment inside gfs2_inode_lookup code needs to get moved around.
*
- * Clean up I_LOCK and I_NEW as well.
+ * Clears I_NEW as well.
**/
void gfs2_set_iop(struct inode *inode)
diff --git a/fs/inode.c b/fs/inode.c
index 06c1f02de61..03dfeb2e392 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -113,7 +113,7 @@ static void wake_up_inode(struct inode *inode)
* Prevent speculative execution through spin_unlock(&inode_lock);
*/
smp_mb();
- wake_up_bit(&inode->i_state, __I_LOCK);
+ wake_up_bit(&inode->i_state, __I_NEW);
}
/**
@@ -690,17 +690,17 @@ void unlock_new_inode(struct inode *inode)
}
#endif
/*
- * This is special! We do not need the spinlock when clearing I_LOCK,
+ * This is special! We do not need the spinlock when clearing I_NEW,
* because we're guaranteed that nobody else tries to do anything about
* the state of the inode when it is locked, as we just created it (so
- * there can be no old holders that haven't tested I_LOCK).
+ * there can be no old holders that haven't tested I_NEW).
* However we must emit the memory barrier so that other CPUs reliably
- * see the clearing of I_LOCK after the other inode initialisation has
+ * see the clearing of I_NEW after the other inode initialisation has
* completed.
*/
smp_mb();
- WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW));
- inode->i_state &= ~(I_LOCK|I_NEW);
+ WARN_ON(!(inode->i_state & I_NEW));
+ inode->i_state &= ~I_NEW;
wake_up_inode(inode);
}
EXPORT_SYMBOL(unlock_new_inode);
@@ -731,7 +731,7 @@ static struct inode *get_new_inode(struct super_block *sb,
goto set_failed;
__inode_add_to_lists(sb, head, inode);
- inode->i_state = I_LOCK|I_NEW;
+ inode->i_state = I_NEW;
spin_unlock(&inode_lock);
/* Return the locked inode with I_NEW set, the
@@ -778,7 +778,7 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
if (!old) {
inode->i_ino = ino;
__inode_add_to_lists(sb, head, inode);
- inode->i_state = I_LOCK|I_NEW;
+ inode->i_state = I_NEW;
spin_unlock(&inode_lock);
/* Return the locked inode with I_NEW set, the
@@ -1083,7 +1083,7 @@ int insert_inode_locked(struct inode *inode)
ino_t ino = inode->i_ino;
struct hlist_head *head = inode_hashtable + hash(sb, ino);
- inode->i_state |= I_LOCK|I_NEW;
+ inode->i_state |= I_NEW;
while (1) {
struct hlist_node *node;
struct inode *old = NULL;
@@ -1120,7 +1120,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
struct super_block *sb = inode->i_sb;
struct hlist_head *head = inode_hashtable + hash(sb, hashval);
- inode->i_state |= I_LOCK|I_NEW;
+ inode->i_state |= I_NEW;
while (1) {
struct hlist_node *node;
@@ -1510,7 +1510,7 @@ EXPORT_SYMBOL(inode_wait);
* until the deletion _might_ have completed. Callers are responsible
* to recheck inode state.
*
- * It doesn't matter if I_LOCK is not set initially, a call to
+ * It doesn't matter if I_NEW is not set initially, a call to
* wake_up_inode() after removing from the hash list will DTRT.
*
* This is called with inode_lock held.
@@ -1518,8 +1518,8 @@ EXPORT_SYMBOL(inode_wait);
static void __wait_on_freeing_inode(struct inode *inode)
{
wait_queue_head_t *wq;
- DEFINE_WAIT_BIT(wait, &inode->i_state, __I_LOCK);
- wq = bit_waitqueue(&inode->i_state, __I_LOCK);
+ DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
+ wq = bit_waitqueue(&inode->i_state, __I_NEW);
prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
spin_unlock(&inode_lock);
schedule();
diff --git a/fs/jbd/Kconfig b/fs/jbd/Kconfig
index a8408983abd..4e28beeed15 100644
--- a/fs/jbd/Kconfig
+++ b/fs/jbd/Kconfig
@@ -1,6 +1,5 @@
config JBD
tristate
- select FS_JOURNAL_INFO
help
This is a generic journalling layer for block devices. It is
currently used by the ext3 file system, but it could also be
diff --git a/fs/jbd2/Kconfig b/fs/jbd2/Kconfig
index 0f7d1ceafdf..f32f346f4b0 100644
--- a/fs/jbd2/Kconfig
+++ b/fs/jbd2/Kconfig
@@ -1,7 +1,6 @@
config JBD2
tristate
select CRC32
- select FS_JOURNAL_INFO
help
This is a generic journaling layer for block devices that support
both 32-bit and 64-bit block numbers. It is currently used by
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index f26e4d03ada..d945ea76b44 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -1292,7 +1292,7 @@ int txCommit(tid_t tid, /* transaction identifier */
*/
/*
* I believe this code is no longer needed. Splitting I_LOCK
- * into two bits, I_LOCK and I_SYNC should prevent this
+ * into two bits, I_NEW and I_SYNC should prevent this
* deadlock as well. But since I don't have a JFS testload
* to verify this, only a trivial s/I_LOCK/I_SYNC/ was done.
* Joern
diff --git a/fs/namei.c b/fs/namei.c
index d2783c8a770..dad4b80257d 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1764,7 +1764,7 @@ do_last:
path_to_nameidata(&path, &nd);
error = -EISDIR;
- if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))
+ if (S_ISDIR(path.dentry->d_inode->i_mode))
goto exit;
ok:
/*
diff --git a/fs/namespace.c b/fs/namespace.c
index faab1273281..7d70d63ceb2 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2068,7 +2068,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
* create_mnt_ns - creates a private namespace and adds a root filesystem
* @mnt: pointer to the new root filesystem mountpoint
*/
-static struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
+struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
{
struct mnt_namespace *new_ns;
@@ -2080,6 +2080,7 @@ static struct mnt_namespace *create_mnt_ns(struct vfsmount *mnt)
}
return new_ns;
}
+EXPORT_SYMBOL(create_mnt_ns);
SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
char __user *, type, unsigned long, flags, void __user *, data)
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index d5b112bcf3d..ce907efc550 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2648,13 +2648,21 @@ out_freepage:
static int nfs_follow_remote_path(struct vfsmount *root_mnt,
const char *export_path, struct vfsmount *mnt_target)
{
+ struct mnt_namespace *ns_private;
struct nameidata nd;
struct super_block *s;
int ret;
+ ns_private = create_mnt_ns(root_mnt);
+ ret = PTR_ERR(ns_private);
+ if (IS_ERR(ns_private))
+ goto out_mntput;
+
ret = vfs_path_lookup(root_mnt->mnt_root, root_mnt,
export_path, LOOKUP_FOLLOW, &nd);
+ put_mnt_ns(ns_private);
+
if (ret != 0)
goto out_err;
diff --git a/fs/nilfs2/Kconfig b/fs/nilfs2/Kconfig
index 1225af7b216..251da07b2a1 100644
--- a/fs/nilfs2/Kconfig
+++ b/fs/nilfs2/Kconfig
@@ -2,7 +2,6 @@ config NILFS2_FS
tristate "NILFS2 file system support (EXPERIMENTAL)"
depends on EXPERIMENTAL
select CRC32
- select FS_JOURNAL_INFO
help
NILFS2 is a log-structured file system (LFS) supporting continuous
snapshotting. In addition to versioning capability of the entire
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 9938034762c..dc2505abb6d 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -530,7 +530,7 @@ err_corrupt_attr:
* the ntfs inode.
*
* Q: What locks are held when the function is called?
- * A: i_state has I_LOCK set, hence the inode is locked, also
+ * A: i_state has I_NEW set, hence the inode is locked, also
* i_count is set to 1, so it is not going to go away
* i_flags is set to 0 and we have no business touching it. Only an ioctl()
* is allowed to write to them. We should of course be honouring them but
@@ -1207,7 +1207,7 @@ err_out:
* necessary fields in @vi as well as initializing the ntfs inode.
*
* Q: What locks are held when the function is called?
- * A: i_state has I_LOCK set, hence the inode is locked, also
+ * A: i_state has I_NEW set, hence the inode is locked, also
* i_count is set to 1, so it is not going to go away
*
* Return 0 on success and -errno on error. In the error case, the inode will
@@ -1474,7 +1474,7 @@ err_out:
* normal directory inodes.
*
* Q: What locks are held when the function is called?
- * A: i_state has I_LOCK set, hence the inode is locked, also
+ * A: i_state has I_NEW set, hence the inode is locked, also
* i_count is set to 1, so it is not going to go away
*
* Return 0 on success and -errno on error. In the error case, the inode will
diff --git a/fs/pipe.c b/fs/pipe.c
index 43d79da5c57..37ba29ff315 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -906,17 +906,6 @@ void free_pipe_info(struct inode *inode)
}
static struct vfsmount *pipe_mnt __read_mostly;
-static int pipefs_delete_dentry(struct dentry *dentry)
-{
- /*
- * At creation time, we pretended this dentry was hashed
- * (by clearing DCACHE_UNHASHED bit in d_flags)
- * At delete time, we restore the truth : not hashed.
- * (so that dput() can proceed correctly)
- */
- dentry->d_flags |= DCACHE_UNHASHED;
- return 0;
-}
/*
* pipefs_dname() is called from d_path().
@@ -928,7 +917,6 @@ static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
}
static const struct dentry_operations pipefs_dentry_operations = {
- .d_delete = pipefs_delete_dentry,
.d_dname = pipefs_dname,
};
@@ -989,12 +977,6 @@ struct file *create_write_pipe(int flags)
path.mnt = mntget(pipe_mnt);
path.dentry->d_op = &pipefs_dentry_operations;
- /*
- * We dont want to publish this dentry into global dentry hash table.
- * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
- * This permits a working /proc/$pid/fd/XXX on pipes
- */
- path.dentry->d_flags &= ~DCACHE_UNHASHED;
d_instantiate(path.dentry, inode);
err = -ENFILE;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 4badde179b1..f560325c444 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -134,13 +134,16 @@ static inline void task_name(struct seq_file *m, struct task_struct *p)
* simple bit tests.
*/
static const char *task_state_array[] = {
- "R (running)", /* 0 */
- "S (sleeping)", /* 1 */
- "D (disk sleep)", /* 2 */
- "T (stopped)", /* 4 */
- "T (tracing stop)", /* 8 */
- "Z (zombie)", /* 16 */
- "X (dead)" /* 32 */
+ "R (running)", /* 0 */
+ "S (sleeping)", /* 1 */
+ "D (disk sleep)", /* 2 */
+ "T (stopped)", /* 4 */
+ "t (tracing stop)", /* 8 */
+ "Z (zombie)", /* 16 */
+ "X (dead)", /* 32 */
+ "x (dead)", /* 64 */
+ "K (wakekill)", /* 128 */
+ "W (waking)", /* 256 */
};
static inline const char *get_task_state(struct task_struct *tsk)
@@ -148,6 +151,8 @@ static inline const char *get_task_state(struct task_struct *tsk)
unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state;
const char **p = &task_state_array[0];
+ BUILD_BUG_ON(1 + ilog2(TASK_STATE_MAX) != ARRAY_SIZE(task_state_array));
+
while (state) {
p++;
state >>= 1;
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 32fae4040eb..2efc57173fd 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -60,7 +60,7 @@ const struct inode_operations ramfs_file_inode_operations = {
*/
int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
{
- unsigned long npages, xpages, loop, limit;
+ unsigned long npages, xpages, loop;
struct page *pages;
unsigned order;
void *data;
diff --git a/fs/reiserfs/Kconfig b/fs/reiserfs/Kconfig
index ac7cd75c86f..513f431038f 100644
--- a/fs/reiserfs/Kconfig
+++ b/fs/reiserfs/Kconfig
@@ -1,7 +1,6 @@
config REISERFS_FS
tristate "Reiserfs support"
select CRC32
- select FS_JOURNAL_INFO
help
Stores not just filenames but the files themselves in a balanced
tree. Uses journalling.
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 3a28e7751b3..290ae38fca8 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -2538,6 +2538,12 @@ static int reiserfs_writepage(struct page *page, struct writeback_control *wbc)
return reiserfs_write_full_page(page, wbc);
}
+static void reiserfs_truncate_failed_write(struct inode *inode)
+{
+ truncate_inode_pages(inode->i_mapping, inode->i_size);
+ reiserfs_truncate_file(inode, 0);
+}
+
static int reiserfs_write_begin(struct file *file,
struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
@@ -2604,6 +2610,8 @@ static int reiserfs_write_begin(struct file *file,
if (ret) {
unlock_page(page);
page_cache_release(page);
+ /* Truncate allocated blocks */
+ reiserfs_truncate_failed_write(inode);
}
return ret;
}
@@ -2701,9 +2709,7 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
** transaction tracking stuff when the size changes. So, we have
** to do the i_size updates here.
*/
- pos += copied;
-
- if (pos > inode->i_size) {
+ if (pos + copied > inode->i_size) {
struct reiserfs_transaction_handle myth;
lock_depth = reiserfs_write_lock_once(inode->i_sb);
locked = true;
@@ -2721,7 +2727,7 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
goto journal_error;
reiserfs_update_inode_transaction(inode);
- inode->i_size = pos;
+ inode->i_size = pos + copied;
/*
* this will just nest into our transaction. It's important
* to use mark_inode_dirty so the inode gets pushed around on the
@@ -2751,6 +2757,10 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
reiserfs_write_unlock_once(inode->i_sb, lock_depth);
unlock_page(page);
page_cache_release(page);
+
+ if (pos + len > inode->i_size)
+ reiserfs_truncate_failed_write(inode);
+
return ret == 0 ? copied : ret;
journal_error:
diff --git a/fs/stack.c b/fs/stack.c
index 67716f6a1a4..4a6f7f44065 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -7,18 +7,63 @@
* This function cannot be inlined since i_size_{read,write} is rather
* heavy-weight on 32-bit systems
*/
-void fsstack_copy_inode_size(struct inode *dst, const struct inode *src)
+void fsstack_copy_inode_size(struct inode *dst, struct inode *src)
{
- i_size_write(dst, i_size_read((struct inode *)src));
- dst->i_blocks = src->i_blocks;
+ loff_t i_size;
+ blkcnt_t i_blocks;
+
+ /*
+ * i_size_read() includes its own seqlocking and protection from
+ * preemption (see include/linux/fs.h): we need nothing extra for
+ * that here, and prefer to avoid nesting locks than attempt to keep
+ * i_size and i_blocks in sync together.
+ */
+ i_size = i_size_read(src);
+
+ /*
+ * But if CONFIG_LBDAF (on 32-bit), we ought to make an effort to
+ * keep the two halves of i_blocks in sync despite SMP or PREEMPT -
+ * though stat's generic_fillattr() doesn't bother, and we won't be
+ * applying quotas (where i_blocks does become important) at the
+ * upper level.
+ *
+ * We don't actually know what locking is used at the lower level;
+ * but if it's a filesystem that supports quotas, it will be using
+ * i_lock as in inode_add_bytes(). tmpfs uses other locking, and
+ * its 32-bit is (just) able to exceed 2TB i_size with the aid of
+ * holes; but its i_blocks cannot carry into the upper long without
+ * almost 2TB swap - let's ignore that case.
+ */
+ if (sizeof(i_blocks) > sizeof(long))
+ spin_lock(&src->i_lock);
+ i_blocks = src->i_blocks;
+ if (sizeof(i_blocks) > sizeof(long))
+ spin_unlock(&src->i_lock);
+
+ /*
+ * If CONFIG_SMP or CONFIG_PREEMPT on 32-bit, it's vital for
+ * fsstack_copy_inode_size() to hold some lock around
+ * i_size_write(), otherwise i_size_read() may spin forever (see
+ * include/linux/fs.h). We don't necessarily hold i_mutex when this
+ * is called, so take i_lock for that case.
+ *
+ * And if CONFIG_LBADF (on 32-bit), continue our effort to keep the
+ * two halves of i_blocks in sync despite SMP or PREEMPT: use i_lock
+ * for that case too, and do both at once by combining the tests.
+ *
+ * There is none of this locking overhead in the 64-bit case.
+ */
+ if (sizeof(i_size) > sizeof(long) || sizeof(i_blocks) > sizeof(long))
+ spin_lock(&dst->i_lock);
+ i_size_write(dst, i_size);
+ dst->i_blocks = i_blocks;
+ if (sizeof(i_size) > sizeof(long) || sizeof(i_blocks) > sizeof(long))
+ spin_unlock(&dst->i_lock);
}
EXPORT_SYMBOL_GPL(fsstack_copy_inode_size);
-/* copy all attributes; get_nlinks is optional way to override the i_nlink
- * copying
- */
-void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
- int (*get_nlinks)(struct inode *))
+/* copy all attributes */
+void fsstack_copy_attr_all(struct inode *dest, const struct inode *src)
{
dest->i_mode = src->i_mode;
dest->i_uid = src->i_uid;
@@ -29,14 +74,6 @@ void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
dest->i_ctime = src->i_ctime;
dest->i_blkbits = src->i_blkbits;
dest->i_flags = src->i_flags;
-
- /*
- * Update the nlinks AFTER updating the above fields, because the
- * get_links callback may depend on them.
- */
- if (!get_nlinks)
- dest->i_nlink = src->i_nlink;
- else
- dest->i_nlink = (*get_nlinks)(dest);
+ dest->i_nlink = src->i_nlink;
}
EXPORT_SYMBOL_GPL(fsstack_copy_attr_all);
diff --git a/fs/sync.c b/fs/sync.c
index 36752a68348..418727a2a23 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -355,6 +355,7 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
{
int ret;
struct file *file;
+ struct address_space *mapping;
loff_t endbyte; /* inclusive */
int fput_needed;
umode_t i_mode;
@@ -405,7 +406,28 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
!S_ISLNK(i_mode))
goto out_put;
- ret = do_sync_mapping_range(file->f_mapping, offset, endbyte, flags);
+ mapping = file->f_mapping;
+ if (!mapping) {
+ ret = -EINVAL;
+ goto out_put;
+ }
+
+ ret = 0;
+ if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
+ ret = filemap_fdatawait_range(mapping, offset, endbyte);
+ if (ret < 0)
+ goto out_put;
+ }
+
+ if (flags & SYNC_FILE_RANGE_WRITE) {
+ ret = filemap_fdatawrite_range(mapping, offset, endbyte);
+ if (ret < 0)
+ goto out_put;
+ }
+
+ if (flags & SYNC_FILE_RANGE_WAIT_AFTER)
+ ret = filemap_fdatawait_range(mapping, offset, endbyte);
+
out_put:
fput_light(file, fput_needed);
out:
@@ -437,38 +459,3 @@ asmlinkage long SyS_sync_file_range2(long fd, long flags,
}
SYSCALL_ALIAS(sys_sync_file_range2, SyS_sync_file_range2);
#endif
-
-/*
- * `endbyte' is inclusive
- */
-int do_sync_mapping_range(struct address_space *mapping, loff_t offset,
- loff_t endbyte, unsigned int flags)
-{
- int ret;
-
- if (!mapping) {
- ret = -EINVAL;
- goto out;
- }
-
- ret = 0;
- if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
- ret = filemap_fdatawait_range(mapping, offset, endbyte);
- if (ret < 0)
- goto out;
- }
-
- if (flags & SYNC_FILE_RANGE_WRITE) {
- ret = __filemap_fdatawrite_range(mapping, offset, endbyte,
- WB_SYNC_ALL);
- if (ret < 0)
- goto out;
- }
-
- if (flags & SYNC_FILE_RANGE_WAIT_AFTER) {
- ret = filemap_fdatawait_range(mapping, offset, endbyte);
- }
-out:
- return ret;
-}
-EXPORT_SYMBOL_GPL(do_sync_mapping_range);
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 39849f887e7..16a6444330e 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -45,7 +45,7 @@
*
* Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the
* read-ahead path does not lock it ("sys_read -> generic_file_aio_read ->
- * ondemand_readahead -> readpage"). In case of readahead, @I_LOCK flag is not
+ * ondemand_readahead -> readpage"). In case of readahead, @I_SYNC flag is not
* set as well. However, UBIFS disables readahead.
*/
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 1d5b298ba8b..225946012d0 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -794,7 +794,7 @@ xfs_setup_inode(
struct inode *inode = &ip->i_vnode;
inode->i_ino = ip->i_ino;
- inode->i_state = I_NEW|I_LOCK;
+ inode->i_state = I_NEW;
inode_add_to_lists(ip->i_mount->m_super, inode);
inode->i_mode = ip->i_d.di_mode;
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 0de36c2a46f..fa402a6bbbc 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -91,7 +91,7 @@ xfs_inode_alloc(
ip->i_new_size = 0;
/* prevent anyone from using this yet */
- VFS_I(ip)->i_state = I_NEW|I_LOCK;
+ VFS_I(ip)->i_state = I_NEW;
return ip;
}
@@ -217,7 +217,7 @@ xfs_iget_cache_hit(
trace_xfs_iget_reclaim(ip);
goto out_error;
}
- inode->i_state = I_LOCK|I_NEW;
+ inode->i_state = I_NEW;
} else {
/* If the VFS inode is being torn down, pause and try again. */
if (!igrab(inode)) {