aboutsummaryrefslogtreecommitdiff
path: root/fs/ntfs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ntfs/inode.c')
-rw-r--r--fs/ntfs/inode.c514
1 files changed, 473 insertions, 41 deletions
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 7ec04513180..b24f4c4b2c5 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -30,6 +30,7 @@
#include "debug.h"
#include "inode.h"
#include "attrib.h"
+#include "lcnalloc.h"
#include "malloc.h"
#include "mft.h"
#include "time.h"
@@ -2291,11 +2292,16 @@ int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt)
#ifdef NTFS_RW
+static const char *es = " Leaving inconsistent metadata. Unmount and run "
+ "chkdsk.";
+
/**
* ntfs_truncate - called when the i_size of an ntfs inode is changed
* @vi: inode for which the i_size was changed
*
- * We do not support i_size changes yet.
+ * We only support i_size changes for normal files at present, i.e. not
+ * compressed and not encrypted. This is enforced in ntfs_setattr(), see
+ * below.
*
* The kernel guarantees that @vi is a regular file (S_ISREG() is true) and
* that the change is allowed.
@@ -2306,80 +2312,499 @@ int ntfs_show_options(struct seq_file *sf, struct vfsmount *mnt)
* Returns 0 on success or -errno on error.
*
* Called with ->i_sem held. In all but one case ->i_alloc_sem is held for
- * writing. The only case where ->i_alloc_sem is not held is
+ * writing. The only case in the kernel where ->i_alloc_sem is not held is
* mm/filemap.c::generic_file_buffered_write() where vmtruncate() is called
- * with the current i_size as the offset which means that it is a noop as far
- * as ntfs_truncate() is concerned.
+ * with the current i_size as the offset. The analogous place in NTFS is in
+ * fs/ntfs/file.c::ntfs_file_buffered_write() where we call vmtruncate() again
+ * without holding ->i_alloc_sem.
*/
int ntfs_truncate(struct inode *vi)
{
- ntfs_inode *ni = NTFS_I(vi);
+ s64 new_size, old_size, nr_freed, new_alloc_size, old_alloc_size;
+ VCN highest_vcn;
+ unsigned long flags;
+ ntfs_inode *base_ni, *ni = NTFS_I(vi);
ntfs_volume *vol = ni->vol;
ntfs_attr_search_ctx *ctx;
MFT_RECORD *m;
ATTR_RECORD *a;
const char *te = " Leaving file length out of sync with i_size.";
- int err;
+ int err, mp_size, size_change, alloc_change;
+ u32 attr_len;
ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
BUG_ON(NInoAttr(ni));
+ BUG_ON(S_ISDIR(vi->i_mode));
+ BUG_ON(NInoMstProtected(ni));
BUG_ON(ni->nr_extents < 0);
- m = map_mft_record(ni);
+retry_truncate:
+ /*
+ * Lock the runlist for writing and map the mft record to ensure it is
+ * safe to mess with the attribute runlist and sizes.
+ */
+ down_write(&ni->runlist.lock);
+ if (!NInoAttr(ni))
+ base_ni = ni;
+ else
+ base_ni = ni->ext.base_ntfs_ino;
+ m = map_mft_record(base_ni);
if (IS_ERR(m)) {
err = PTR_ERR(m);
ntfs_error(vi->i_sb, "Failed to map mft record for inode 0x%lx "
"(error code %d).%s", vi->i_ino, err, te);
ctx = NULL;
m = NULL;
- goto err_out;
+ goto old_bad_out;
}
- ctx = ntfs_attr_get_search_ctx(ni, m);
+ ctx = ntfs_attr_get_search_ctx(base_ni, m);
if (unlikely(!ctx)) {
ntfs_error(vi->i_sb, "Failed to allocate a search context for "
"inode 0x%lx (not enough memory).%s",
vi->i_ino, te);
err = -ENOMEM;
- goto err_out;
+ goto old_bad_out;
}
err = ntfs_attr_lookup(ni->type, ni->name, ni->name_len,
CASE_SENSITIVE, 0, NULL, 0, ctx);
if (unlikely(err)) {
- if (err == -ENOENT)
+ if (err == -ENOENT) {
ntfs_error(vi->i_sb, "Open attribute is missing from "
"mft record. Inode 0x%lx is corrupt. "
- "Run chkdsk.", vi->i_ino);
- else
+ "Run chkdsk.%s", vi->i_ino, te);
+ err = -EIO;
+ } else
ntfs_error(vi->i_sb, "Failed to lookup attribute in "
- "inode 0x%lx (error code %d).",
- vi->i_ino, err);
- goto err_out;
+ "inode 0x%lx (error code %d).%s",
+ vi->i_ino, err, te);
+ goto old_bad_out;
}
+ m = ctx->mrec;
a = ctx->attr;
- /* If the size has not changed there is nothing to do. */
- if (ntfs_attr_size(a) == i_size_read(vi))
- goto done;
- // TODO: Implement the truncate...
- ntfs_error(vi->i_sb, "Inode size has changed but this is not "
- "implemented yet. Resetting inode size to old value. "
- " This is most likely a bug in the ntfs driver!");
- i_size_write(vi, ntfs_attr_size(a));
-done:
+ /*
+ * The i_size of the vfs inode is the new size for the attribute value.
+ */
+ new_size = i_size_read(vi);
+ /* The current size of the attribute value is the old size. */
+ old_size = ntfs_attr_size(a);
+ /* Calculate the new allocated size. */
+ if (NInoNonResident(ni))
+ new_alloc_size = (new_size + vol->cluster_size - 1) &
+ ~(s64)vol->cluster_size_mask;
+ else
+ new_alloc_size = (new_size + 7) & ~7;
+ /* The current allocated size is the old allocated size. */
+ read_lock_irqsave(&ni->size_lock, flags);
+ old_alloc_size = ni->allocated_size;
+ read_unlock_irqrestore(&ni->size_lock, flags);
+ /*
+ * The change in the file size. This will be 0 if no change, >0 if the
+ * size is growing, and <0 if the size is shrinking.
+ */
+ size_change = -1;
+ if (new_size - old_size >= 0) {
+ size_change = 1;
+ if (new_size == old_size)
+ size_change = 0;
+ }
+ /* As above for the allocated size. */
+ alloc_change = -1;
+ if (new_alloc_size - old_alloc_size >= 0) {
+ alloc_change = 1;
+ if (new_alloc_size == old_alloc_size)
+ alloc_change = 0;
+ }
+ /*
+ * If neither the size nor the allocation are being changed there is
+ * nothing to do.
+ */
+ if (!size_change && !alloc_change)
+ goto unm_done;
+ /* If the size is changing, check if new size is allowed in $AttrDef. */
+ if (size_change) {
+ err = ntfs_attr_size_bounds_check(vol, ni->type, new_size);
+ if (unlikely(err)) {
+ if (err == -ERANGE) {
+ ntfs_error(vol->sb, "Truncate would cause the "
+ "inode 0x%lx to %simum size "
+ "for its attribute type "
+ "(0x%x). Aborting truncate.",
+ vi->i_ino,
+ new_size > old_size ? "exceed "
+ "the max" : "go under the min",
+ le32_to_cpu(ni->type));
+ err = -EFBIG;
+ } else {
+ ntfs_error(vol->sb, "Inode 0x%lx has unknown "
+ "attribute type 0x%x. "
+ "Aborting truncate.",
+ vi->i_ino,
+ le32_to_cpu(ni->type));
+ err = -EIO;
+ }
+ /* Reset the vfs inode size to the old size. */
+ i_size_write(vi, old_size);
+ goto err_out;
+ }
+ }
+ if (NInoCompressed(ni) || NInoEncrypted(ni)) {
+ ntfs_warning(vi->i_sb, "Changes in inode size are not "
+ "supported yet for %s files, ignoring.",
+ NInoCompressed(ni) ? "compressed" :
+ "encrypted");
+ err = -EOPNOTSUPP;
+ goto bad_out;
+ }
+ if (a->non_resident)
+ goto do_non_resident_truncate;
+ BUG_ON(NInoNonResident(ni));
+ /* Resize the attribute record to best fit the new attribute size. */
+ if (new_size < vol->mft_record_size &&
+ !ntfs_resident_attr_value_resize(m, a, new_size)) {
+ unsigned long flags;
+
+ /* The resize succeeded! */
+ flush_dcache_mft_record_page(ctx->ntfs_ino);
+ mark_mft_record_dirty(ctx->ntfs_ino);
+ write_lock_irqsave(&ni->size_lock, flags);
+ /* Update the sizes in the ntfs inode and all is done. */
+ ni->allocated_size = le32_to_cpu(a->length) -
+ le16_to_cpu(a->data.resident.value_offset);
+ /*
+ * Note ntfs_resident_attr_value_resize() has already done any
+ * necessary data clearing in the attribute record. When the
+ * file is being shrunk vmtruncate() will already have cleared
+ * the top part of the last partial page, i.e. since this is
+ * the resident case this is the page with index 0. However,
+ * when the file is being expanded, the page cache page data
+ * between the old data_size, i.e. old_size, and the new_size
+ * has not been zeroed. Fortunately, we do not need to zero it
+ * either since on one hand it will either already be zero due
+ * to both readpage and writepage clearing partial page data
+ * beyond i_size in which case there is nothing to do or in the
+ * case of the file being mmap()ped at the same time, POSIX
+ * specifies that the behaviour is unspecified thus we do not
+ * have to do anything. This means that in our implementation
+ * in the rare case that the file is mmap()ped and a write
+ * occured into the mmap()ped region just beyond the file size
+ * and writepage has not yet been called to write out the page
+ * (which would clear the area beyond the file size) and we now
+ * extend the file size to incorporate this dirty region
+ * outside the file size, a write of the page would result in
+ * this data being written to disk instead of being cleared.
+ * Given both POSIX and the Linux mmap(2) man page specify that
+ * this corner case is undefined, we choose to leave it like
+ * that as this is much simpler for us as we cannot lock the
+ * relevant page now since we are holding too many ntfs locks
+ * which would result in a lock reversal deadlock.
+ */
+ ni->initialized_size = new_size;
+ write_unlock_irqrestore(&ni->size_lock, flags);
+ goto unm_done;
+ }
+ /* If the above resize failed, this must be an attribute extension. */
+ BUG_ON(size_change < 0);
+ /*
+ * We have to drop all the locks so we can call
+ * ntfs_attr_make_non_resident(). This could be optimised by try-
+ * locking the first page cache page and only if that fails dropping
+ * the locks, locking the page, and redoing all the locking and
+ * lookups. While this would be a huge optimisation, it is not worth
+ * it as this is definitely a slow code path as it only ever can happen
+ * once for any given file.
+ */
ntfs_attr_put_search_ctx(ctx);
- unmap_mft_record(ni);
- NInoClearTruncateFailed(ni);
- ntfs_debug("Done.");
- return 0;
-err_out:
- if (err != -ENOMEM) {
+ unmap_mft_record(base_ni);
+ up_write(&ni->runlist.lock);
+ /*
+ * Not enough space in the mft record, try to make the attribute
+ * non-resident and if successful restart the truncation process.
+ */
+ err = ntfs_attr_make_non_resident(ni, old_size);
+ if (likely(!err))
+ goto retry_truncate;
+ /*
+ * Could not make non-resident. If this is due to this not being
+ * permitted for this attribute type or there not being enough space,
+ * try to make other attributes non-resident. Otherwise fail.
+ */
+ if (unlikely(err != -EPERM && err != -ENOSPC)) {
+ ntfs_error(vol->sb, "Cannot truncate inode 0x%lx, attribute "
+ "type 0x%x, because the conversion from "
+ "resident to non-resident attribute failed "
+ "with error code %i.", vi->i_ino,
+ (unsigned)le32_to_cpu(ni->type), err);
+ if (err != -ENOMEM)
+ err = -EIO;
+ goto conv_err_out;
+ }
+ /* TODO: Not implemented from here, abort. */
+ if (err == -ENOSPC)
+ ntfs_error(vol->sb, "Not enough space in the mft record/on "
+ "disk for the non-resident attribute value. "
+ "This case is not implemented yet.");
+ else /* if (err == -EPERM) */
+ ntfs_error(vol->sb, "This attribute type may not be "
+ "non-resident. This case is not implemented "
+ "yet.");
+ err = -EOPNOTSUPP;
+ goto conv_err_out;
+#if 0
+ // TODO: Attempt to make other attributes non-resident.
+ if (!err)
+ goto do_resident_extend;
+ /*
+ * Both the attribute list attribute and the standard information
+ * attribute must remain in the base inode. Thus, if this is one of
+ * these attributes, we have to try to move other attributes out into
+ * extent mft records instead.
+ */
+ if (ni->type == AT_ATTRIBUTE_LIST ||
+ ni->type == AT_STANDARD_INFORMATION) {
+ // TODO: Attempt to move other attributes into extent mft
+ // records.
+ err = -EOPNOTSUPP;
+ if (!err)
+ goto do_resident_extend;
+ goto err_out;
+ }
+ // TODO: Attempt to move this attribute to an extent mft record, but
+ // only if it is not already the only attribute in an mft record in
+ // which case there would be nothing to gain.
+ err = -EOPNOTSUPP;
+ if (!err)
+ goto do_resident_extend;
+ /* There is nothing we can do to make enough space. )-: */
+ goto err_out;
+#endif
+do_non_resident_truncate:
+ BUG_ON(!NInoNonResident(ni));
+ if (alloc_change < 0) {
+ highest_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn);
+ if (highest_vcn > 0 &&
+ old_alloc_size >> vol->cluster_size_bits >
+ highest_vcn + 1) {
+ /*
+ * This attribute has multiple extents. Not yet
+ * supported.
+ */
+ ntfs_error(vol->sb, "Cannot truncate inode 0x%lx, "
+ "attribute type 0x%x, because the "
+ "attribute is highly fragmented (it "
+ "consists of multiple extents) and "
+ "this case is not implemented yet.",
+ vi->i_ino,
+ (unsigned)le32_to_cpu(ni->type));
+ err = -EOPNOTSUPP;
+ goto bad_out;
+ }
+ }
+ /*
+ * If the size is shrinking, need to reduce the initialized_size and
+ * the data_size before reducing the allocation.
+ */
+ if (size_change < 0) {
+ /*
+ * Make the valid size smaller (i_size is already up-to-date).
+ */
+ write_lock_irqsave(&ni->size_lock, flags);
+ if (new_size < ni->initialized_size) {
+ ni->initialized_size = new_size;
+ a->data.non_resident.initialized_size =
+ cpu_to_sle64(new_size);
+ }
+ a->data.non_resident.data_size = cpu_to_sle64(new_size);
+ write_unlock_irqrestore(&ni->size_lock, flags);
+ flush_dcache_mft_record_page(ctx->ntfs_ino);
+ mark_mft_record_dirty(ctx->ntfs_ino);
+ /* If the allocated size is not changing, we are done. */
+ if (!alloc_change)
+ goto unm_done;
+ /*
+ * If the size is shrinking it makes no sense for the
+ * allocation to be growing.
+ */
+ BUG_ON(alloc_change > 0);
+ } else /* if (size_change >= 0) */ {
+ /*
+ * The file size is growing or staying the same but the
+ * allocation can be shrinking, growing or staying the same.
+ */
+ if (alloc_change > 0) {
+ /*
+ * We need to extend the allocation and possibly update
+ * the data size. If we are updating the data size,
+ * since we are not touching the initialized_size we do
+ * not need to worry about the actual data on disk.
+ * And as far as the page cache is concerned, there
+ * will be no pages beyond the old data size and any
+ * partial region in the last page between the old and
+ * new data size (or the end of the page if the new
+ * data size is outside the page) does not need to be
+ * modified as explained above for the resident
+ * attribute truncate case. To do this, we simply drop
+ * the locks we hold and leave all the work to our
+ * friendly helper ntfs_attr_extend_allocation().
+ */
+ ntfs_attr_put_search_ctx(ctx);
+ unmap_mft_record(base_ni);
+ up_write(&ni->runlist.lock);
+ err = ntfs_attr_extend_allocation(ni, new_size,
+ size_change > 0 ? new_size : -1, -1);
+ /*
+ * ntfs_attr_extend_allocation() will have done error
+ * output already.
+ */
+ goto done;
+ }
+ if (!alloc_change)
+ goto alloc_done;
+ }
+ /* alloc_change < 0 */
+ /* Free the clusters. */
+ nr_freed = ntfs_cluster_free(ni, new_alloc_size >>
+ vol->cluster_size_bits, -1, ctx);
+ m = ctx->mrec;
+ a = ctx->attr;
+ if (unlikely(nr_freed < 0)) {
+ ntfs_error(vol->sb, "Failed to release cluster(s) (error code "
+ "%lli). Unmount and run chkdsk to recover "
+ "the lost cluster(s).", (long long)nr_freed);
NVolSetErrors(vol);
+ nr_freed = 0;
+ }
+ /* Truncate the runlist. */
+ err = ntfs_rl_truncate_nolock(vol, &ni->runlist,
+ new_alloc_size >> vol->cluster_size_bits);
+ /*
+ * If the runlist truncation failed and/or the search context is no
+ * longer valid, we cannot resize the attribute record or build the
+ * mapping pairs array thus we mark the inode bad so that no access to
+ * the freed clusters can happen.
+ */
+ if (unlikely(err || IS_ERR(m))) {
+ ntfs_error(vol->sb, "Failed to %s (error code %li).%s",
+ IS_ERR(m) ?
+ "restore attribute search context" :
+ "truncate attribute runlist",
+ IS_ERR(m) ? PTR_ERR(m) : err, es);
+ err = -EIO;
+ goto bad_out;
+ }
+ /* Get the size for the shrunk mapping pairs array for the runlist. */
+ mp_size = ntfs_get_size_for_mapping_pairs(vol, ni->runlist.rl, 0, -1);
+ if (unlikely(mp_size <= 0)) {
+ ntfs_error(vol->sb, "Cannot shrink allocation of inode 0x%lx, "
+ "attribute type 0x%x, because determining the "
+ "size for the mapping pairs failed with error "
+ "code %i.%s", vi->i_ino,
+ (unsigned)le32_to_cpu(ni->type), mp_size, es);
+ err = -EIO;
+ goto bad_out;
+ }
+ /*
+ * Shrink the attribute record for the new mapping pairs array. Note,
+ * this cannot fail since we are making the attribute smaller thus by
+ * definition there is enough space to do so.
+ */
+ attr_len = le32_to_cpu(a->length);
+ err = ntfs_attr_record_resize(m, a, mp_size +
+ le16_to_cpu(a->data.non_resident.mapping_pairs_offset));
+ BUG_ON(err);
+ /*
+ * Generate the mapping pairs array directly into the attribute record.
+ */
+ err = ntfs_mapping_pairs_build(vol, (u8*)a +
+ le16_to_cpu(a->data.non_resident.mapping_pairs_offset),
+ mp_size, ni->runlist.rl, 0, -1, NULL);
+ if (unlikely(err)) {
+ ntfs_error(vol->sb, "Cannot shrink allocation of inode 0x%lx, "
+ "attribute type 0x%x, because building the "
+ "mapping pairs failed with error code %i.%s",
+ vi->i_ino, (unsigned)le32_to_cpu(ni->type),
+ err, es);
+ err = -EIO;
+ goto bad_out;
+ }
+ /* Update the allocated/compressed size as well as the highest vcn. */
+ a->data.non_resident.highest_vcn = cpu_to_sle64((new_alloc_size >>
+ vol->cluster_size_bits) - 1);
+ write_lock_irqsave(&ni->size_lock, flags);
+ ni->allocated_size = new_alloc_size;
+ a->data.non_resident.allocated_size = cpu_to_sle64(new_alloc_size);
+ if (NInoSparse(ni) || NInoCompressed(ni)) {
+ if (nr_freed) {
+ ni->itype.compressed.size -= nr_freed <<
+ vol->cluster_size_bits;
+ BUG_ON(ni->itype.compressed.size < 0);
+ a->data.non_resident.compressed_size = cpu_to_sle64(
+ ni->itype.compressed.size);
+ vi->i_blocks = ni->itype.compressed.size >> 9;
+ }
+ } else
+ vi->i_blocks = new_alloc_size >> 9;
+ write_unlock_irqrestore(&ni->size_lock, flags);
+ /*
+ * We have shrunk the allocation. If this is a shrinking truncate we
+ * have already dealt with the initialized_size and the data_size above
+ * and we are done. If the truncate is only changing the allocation
+ * and not the data_size, we are also done. If this is an extending
+ * truncate, need to extend the data_size now which is ensured by the
+ * fact that @size_change is positive.
+ */
+alloc_done:
+ /*
+ * If the size is growing, need to update it now. If it is shrinking,
+ * we have already updated it above (before the allocation change).
+ */
+ if (size_change > 0)
+ a->data.non_resident.data_size = cpu_to_sle64(new_size);
+ /* Ensure the modified mft record is written out. */
+ flush_dcache_mft_record_page(ctx->ntfs_ino);
+ mark_mft_record_dirty(ctx->ntfs_ino);
+unm_done:
+ ntfs_attr_put_search_ctx(ctx);
+ unmap_mft_record(base_ni);
+ up_write(&ni->runlist.lock);
+done:
+ /* Update the mtime and ctime on the base inode. */
+ inode_update_time(VFS_I(base_ni), 1);
+ if (likely(!err)) {
+ NInoClearTruncateFailed(ni);
+ ntfs_debug("Done.");
+ }
+ return err;
+old_bad_out:
+ old_size = -1;
+bad_out:
+ if (err != -ENOMEM && err != -EOPNOTSUPP) {
make_bad_inode(vi);
+ make_bad_inode(VFS_I(base_ni));
+ NVolSetErrors(vol);
}
+ if (err != -EOPNOTSUPP)
+ NInoSetTruncateFailed(ni);
+ else if (old_size >= 0)
+ i_size_write(vi, old_size);
+err_out:
if (ctx)
ntfs_attr_put_search_ctx(ctx);
if (m)
- unmap_mft_record(ni);
- NInoSetTruncateFailed(ni);
+ unmap_mft_record(base_ni);
+ up_write(&ni->runlist.lock);
+out:
+ ntfs_debug("Failed. Returning error code %i.", err);
return err;
+conv_err_out:
+ if (err != -ENOMEM && err != -EOPNOTSUPP) {
+ make_bad_inode(vi);
+ make_bad_inode(VFS_I(base_ni));
+ NVolSetErrors(vol);
+ }
+ if (err != -EOPNOTSUPP)
+ NInoSetTruncateFailed(ni);
+ else
+ i_size_write(vi, old_size);
+ goto out;
}
/**
@@ -2420,8 +2845,7 @@ int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
err = inode_change_ok(vi, attr);
if (err)
- return err;
-
+ goto out;
/* We do not support NTFS ACLs yet. */
if (ia_valid & (ATTR_UID | ATTR_GID | ATTR_MODE)) {
ntfs_warning(vi->i_sb, "Changes in user/group/mode are not "
@@ -2429,14 +2853,22 @@ int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
err = -EOPNOTSUPP;
goto out;
}
-
if (ia_valid & ATTR_SIZE) {
if (attr->ia_size != i_size_read(vi)) {
- ntfs_warning(vi->i_sb, "Changes in inode size are not "
- "supported yet, ignoring.");
- err = -EOPNOTSUPP;
- // TODO: Implement...
- // err = vmtruncate(vi, attr->ia_size);
+ ntfs_inode *ni = NTFS_I(vi);
+ /*
+ * FIXME: For now we do not support resizing of
+ * compressed or encrypted files yet.
+ */
+ if (NInoCompressed(ni) || NInoEncrypted(ni)) {
+ ntfs_warning(vi->i_sb, "Changes in inode size "
+ "are not supported yet for "
+ "%s files, ignoring.",
+ NInoCompressed(ni) ?
+ "compressed" : "encrypted");
+ err = -EOPNOTSUPP;
+ } else
+ err = vmtruncate(vi, attr->ia_size);
if (err || ia_valid == ATTR_SIZE)
goto out;
} else {