From aa6f5ffbdba45aa8e19e5048648fc6c7b25376d3 Mon Sep 17 00:00:00 2001 From: merge Date: Thu, 22 Jan 2009 13:55:32 +0000 Subject: MERGE-via-pending-tracking-hist-MERGE-via-stable-tracking-MERGE-via-mokopatches-tracking-fix-stray-endmenu-patch-1232632040-1232632141 pending-tracking-hist top was MERGE-via-stable-tracking-MERGE-via-mokopatches-tracking-fix-stray-endmenu-patch-1232632040-1232632141 / fdf777a63bcb59e0dfd78bfe2c6242e01f6d4eb9 ... parent commitmessage: From: merge MERGE-via-stable-tracking-hist-MERGE-via-mokopatches-tracking-fix-stray-endmenu-patch-1232632040 stable-tracking-hist top was MERGE-via-mokopatches-tracking-fix-stray-endmenu-patch-1232632040 / 90463bfd2d5a3c8b52f6e6d71024a00e052b0ced ... parent commitmessage: From: merge MERGE-via-mokopatches-tracking-hist-fix-stray-endmenu-patch mokopatches-tracking-hist top was fix-stray-endmenu-patch / 3630e0be570de8057e7f8d2fe501ed353cdf34e6 ... parent commitmessage: From: Andy Green fix-stray-endmenu.patch Signed-off-by: Andy Green --- fs/xfs/linux-2.6/sv.h | 22 +- fs/xfs/linux-2.6/xfs_aops.c | 66 ++- fs/xfs/linux-2.6/xfs_aops.h | 5 +- fs/xfs/linux-2.6/xfs_buf.c | 166 +++----- fs/xfs/linux-2.6/xfs_buf.h | 30 +- fs/xfs/linux-2.6/xfs_cred.h | 12 +- fs/xfs/linux-2.6/xfs_export.c | 24 +- fs/xfs/linux-2.6/xfs_file.c | 189 +++------ fs/xfs/linux-2.6/xfs_fs_subr.c | 23 +- fs/xfs/linux-2.6/xfs_globals.c | 8 - fs/xfs/linux-2.6/xfs_globals.h | 1 - fs/xfs/linux-2.6/xfs_ioctl.c | 241 +++++------ fs/xfs/linux-2.6/xfs_ioctl.h | 82 ++++ fs/xfs/linux-2.6/xfs_ioctl32.c | 851 +++++++++++++++++++++++++-------------- fs/xfs/linux-2.6/xfs_ioctl32.h | 214 +++++++++- fs/xfs/linux-2.6/xfs_iops.c | 122 +++++- fs/xfs/linux-2.6/xfs_iops.h | 1 - fs/xfs/linux-2.6/xfs_linux.h | 13 +- fs/xfs/linux-2.6/xfs_lrw.c | 50 +-- fs/xfs/linux-2.6/xfs_stats.c | 6 +- fs/xfs/linux-2.6/xfs_stats.h | 65 +++ fs/xfs/linux-2.6/xfs_super.c | 894 ++++++++++++++--------------------------- fs/xfs/linux-2.6/xfs_super.h | 15 - fs/xfs/linux-2.6/xfs_sync.c | 762 +++++++++++++++++++++++++++++++++++ fs/xfs/linux-2.6/xfs_sync.h | 55 +++ fs/xfs/linux-2.6/xfs_sysctl.c | 11 - fs/xfs/linux-2.6/xfs_sysctl.h | 3 +- fs/xfs/linux-2.6/xfs_vfs.h | 77 ---- fs/xfs/linux-2.6/xfs_vnode.c | 145 ------- fs/xfs/linux-2.6/xfs_vnode.h | 72 +--- 30 files changed, 2496 insertions(+), 1729 deletions(-) create mode 100644 fs/xfs/linux-2.6/xfs_ioctl.h create mode 100644 fs/xfs/linux-2.6/xfs_sync.c create mode 100644 fs/xfs/linux-2.6/xfs_sync.h delete mode 100644 fs/xfs/linux-2.6/xfs_vfs.h delete mode 100644 fs/xfs/linux-2.6/xfs_vnode.c (limited to 'fs/xfs/linux-2.6') diff --git a/fs/xfs/linux-2.6/sv.h b/fs/xfs/linux-2.6/sv.h index 351a8f454bd..4dfc7c37081 100644 --- a/fs/xfs/linux-2.6/sv.h +++ b/fs/xfs/linux-2.6/sv.h @@ -32,23 +32,15 @@ typedef struct sv_s { wait_queue_head_t waiters; } sv_t; -#define SV_FIFO 0x0 /* sv_t is FIFO type */ -#define SV_LIFO 0x2 /* sv_t is LIFO type */ -#define SV_PRIO 0x4 /* sv_t is PRIO type */ -#define SV_KEYED 0x6 /* sv_t is KEYED type */ -#define SV_DEFAULT SV_FIFO - - -static inline void _sv_wait(sv_t *sv, spinlock_t *lock, int state, - unsigned long timeout) +static inline void _sv_wait(sv_t *sv, spinlock_t *lock) { DECLARE_WAITQUEUE(wait, current); add_wait_queue_exclusive(&sv->waiters, &wait); - __set_current_state(state); + __set_current_state(TASK_UNINTERRUPTIBLE); spin_unlock(lock); - schedule_timeout(timeout); + schedule(); remove_wait_queue(&sv->waiters, &wait); } @@ -58,13 +50,7 @@ static inline void _sv_wait(sv_t *sv, spinlock_t *lock, int state, #define sv_destroy(sv) \ /*NOTHING*/ #define sv_wait(sv, pri, lock, s) \ - _sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT) -#define sv_wait_sig(sv, pri, lock, s) \ - _sv_wait(sv, lock, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT) -#define sv_timedwait(sv, pri, lock, s, svf, ts, rts) \ - _sv_wait(sv, lock, TASK_UNINTERRUPTIBLE, timespec_to_jiffies(ts)) -#define sv_timedwait_sig(sv, pri, lock, s, svf, ts, rts) \ - _sv_wait(sv, lock, TASK_INTERRUPTIBLE, timespec_to_jiffies(ts)) + _sv_wait(sv, lock) #define sv_signal(sv) \ wake_up(&(sv)->waiters) #define sv_broadcast(sv) \ diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index a44d68eb50b..de3a198f771 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -42,6 +42,40 @@ #include #include + +/* + * Prime number of hash buckets since address is used as the key. + */ +#define NVSYNC 37 +#define to_ioend_wq(v) (&xfs_ioend_wq[((unsigned long)v) % NVSYNC]) +static wait_queue_head_t xfs_ioend_wq[NVSYNC]; + +void __init +xfs_ioend_init(void) +{ + int i; + + for (i = 0; i < NVSYNC; i++) + init_waitqueue_head(&xfs_ioend_wq[i]); +} + +void +xfs_ioend_wait( + xfs_inode_t *ip) +{ + wait_queue_head_t *wq = to_ioend_wq(ip); + + wait_event(*wq, (atomic_read(&ip->i_iocount) == 0)); +} + +STATIC void +xfs_ioend_wake( + xfs_inode_t *ip) +{ + if (atomic_dec_and_test(&ip->i_iocount)) + wake_up(to_ioend_wq(ip)); +} + STATIC void xfs_count_page_state( struct page *page, @@ -146,16 +180,25 @@ xfs_destroy_ioend( xfs_ioend_t *ioend) { struct buffer_head *bh, *next; + struct xfs_inode *ip = XFS_I(ioend->io_inode); for (bh = ioend->io_buffer_head; bh; bh = next) { next = bh->b_private; bh->b_end_io(bh, !ioend->io_error); } - if (unlikely(ioend->io_error)) { - vn_ioerror(XFS_I(ioend->io_inode), ioend->io_error, - __FILE__,__LINE__); + + /* + * Volume managers supporting multiple paths can send back ENODEV + * when the final path disappears. In this case continuing to fill + * the page cache with dirty data which cannot be written out is + * evil, so prevent that. + */ + if (unlikely(ioend->io_error == -ENODEV)) { + xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, + __FILE__, __LINE__); } - vn_iowake(XFS_I(ioend->io_inode)); + + xfs_ioend_wake(ip); mempool_free(ioend, xfs_ioend_pool); } @@ -191,7 +234,7 @@ xfs_setfilesize( ip->i_d.di_size = isize; ip->i_update_core = 1; ip->i_update_size = 1; - mark_inode_dirty_sync(ioend->io_inode); + xfs_mark_inode_dirty_sync(ip); } xfs_iunlock(ip, XFS_ILOCK_EXCL); @@ -317,14 +360,9 @@ xfs_map_blocks( xfs_iomap_t *mapp, int flags) { - xfs_inode_t *ip = XFS_I(inode); - int error, nmaps = 1; - - error = xfs_iomap(ip, offset, count, - flags, mapp, &nmaps); - if (!error && (flags & (BMAPI_WRITE|BMAPI_ALLOCATE))) - xfs_iflags_set(ip, XFS_IMODIFIED); - return -error; + int nmaps = 1; + + return -xfs_iomap(XFS_I(inode), offset, count, flags, mapp, &nmaps); } STATIC_INLINE int @@ -512,7 +550,7 @@ xfs_cancel_ioend( unlock_buffer(bh); } while ((bh = next_bh) != NULL); - vn_iowake(XFS_I(ioend->io_inode)); + xfs_ioend_wake(XFS_I(ioend->io_inode)); mempool_free(ioend, xfs_ioend_pool); } while ((ioend = next) != NULL); } diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h index 3ba0631a381..1dd52884975 100644 --- a/fs/xfs/linux-2.6/xfs_aops.h +++ b/fs/xfs/linux-2.6/xfs_aops.h @@ -21,8 +21,6 @@ extern struct workqueue_struct *xfsdatad_workqueue; extern mempool_t *xfs_ioend_pool; -typedef void (*xfs_ioend_func_t)(void *); - /* * xfs_ioend struct manages large extent writes for XFS. * It can manage several multi-page bio's at once. @@ -43,4 +41,7 @@ typedef struct xfs_ioend { extern const struct address_space_operations xfs_address_space_operations; extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int); +extern void xfs_ioend_init(void); +extern void xfs_ioend_wait(struct xfs_inode *); + #endif /* __XFS_AOPS_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 36d5fcd3f59..d71dc44e21e 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -165,75 +165,6 @@ test_page_region( return (mask && (page_private(page) & mask) == mask); } -/* - * Mapping of multi-page buffers into contiguous virtual space - */ - -typedef struct a_list { - void *vm_addr; - struct a_list *next; -} a_list_t; - -static a_list_t *as_free_head; -static int as_list_len; -static DEFINE_SPINLOCK(as_lock); - -/* - * Try to batch vunmaps because they are costly. - */ -STATIC void -free_address( - void *addr) -{ - a_list_t *aentry; - -#ifdef CONFIG_XEN - /* - * Xen needs to be able to make sure it can get an exclusive - * RO mapping of pages it wants to turn into a pagetable. If - * a newly allocated page is also still being vmap()ed by xfs, - * it will cause pagetable construction to fail. This is a - * quick workaround to always eagerly unmap pages so that Xen - * is happy. - */ - vunmap(addr); - return; -#endif - - aentry = kmalloc(sizeof(a_list_t), GFP_NOWAIT); - if (likely(aentry)) { - spin_lock(&as_lock); - aentry->next = as_free_head; - aentry->vm_addr = addr; - as_free_head = aentry; - as_list_len++; - spin_unlock(&as_lock); - } else { - vunmap(addr); - } -} - -STATIC void -purge_addresses(void) -{ - a_list_t *aentry, *old; - - if (as_free_head == NULL) - return; - - spin_lock(&as_lock); - aentry = as_free_head; - as_free_head = NULL; - as_list_len = 0; - spin_unlock(&as_lock); - - while ((old = aentry) != NULL) { - vunmap(aentry->vm_addr); - aentry = aentry->next; - kfree(old); - } -} - /* * Internal xfs_buf_t object manipulation */ @@ -333,7 +264,7 @@ xfs_buf_free( uint i; if ((bp->b_flags & XBF_MAPPED) && (bp->b_page_count > 1)) - free_address(bp->b_addr - bp->b_offset); + vm_unmap_ram(bp->b_addr - bp->b_offset, bp->b_page_count); for (i = 0; i < bp->b_page_count; i++) { struct page *page = bp->b_pages[i]; @@ -455,10 +386,8 @@ _xfs_buf_map_pages( bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset; bp->b_flags |= XBF_MAPPED; } else if (flags & XBF_MAPPED) { - if (as_list_len > 64) - purge_addresses(); - bp->b_addr = vmap(bp->b_pages, bp->b_page_count, - VM_MAP, PAGE_KERNEL); + bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, + -1, PAGE_KERNEL); if (unlikely(bp->b_addr == NULL)) return -ENOMEM; bp->b_addr += bp->b_offset; @@ -630,6 +559,29 @@ xfs_buf_get_flags( return NULL; } +STATIC int +_xfs_buf_read( + xfs_buf_t *bp, + xfs_buf_flags_t flags) +{ + int status; + + XB_TRACE(bp, "_xfs_buf_read", (unsigned long)flags); + + ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE))); + ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); + + bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \ + XBF_READ_AHEAD | _XBF_RUN_QUEUES); + bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | \ + XBF_READ_AHEAD | _XBF_RUN_QUEUES); + + status = xfs_buf_iorequest(bp); + if (!status && !(flags & XBF_ASYNC)) + status = xfs_buf_iowait(bp); + return status; +} + xfs_buf_t * xfs_buf_read_flags( xfs_buftarg_t *target, @@ -646,7 +598,7 @@ xfs_buf_read_flags( if (!XFS_BUF_ISDONE(bp)) { XB_TRACE(bp, "read", (unsigned long)flags); XFS_STATS_INC(xb_get_read); - xfs_buf_iostart(bp, flags); + _xfs_buf_read(bp, flags); } else if (flags & XBF_ASYNC) { XB_TRACE(bp, "read_async", (unsigned long)flags); /* @@ -1048,50 +1000,39 @@ xfs_buf_ioerror( XB_TRACE(bp, "ioerror", (unsigned long)error); } -/* - * Initiate I/O on a buffer, based on the flags supplied. - * The b_iodone routine in the buffer supplied will only be called - * when all of the subsidiary I/O requests, if any, have been completed. - */ int -xfs_buf_iostart( - xfs_buf_t *bp, - xfs_buf_flags_t flags) +xfs_bawrite( + void *mp, + struct xfs_buf *bp) { - int status = 0; + XB_TRACE(bp, "bawrite", 0); - XB_TRACE(bp, "iostart", (unsigned long)flags); + ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL); - if (flags & XBF_DELWRI) { - bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC); - bp->b_flags |= flags & (XBF_DELWRI | XBF_ASYNC); - xfs_buf_delwri_queue(bp, 1); - return 0; - } + xfs_buf_delwri_dequeue(bp); - bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \ - XBF_READ_AHEAD | _XBF_RUN_QUEUES); - bp->b_flags |= flags & (XBF_READ | XBF_WRITE | XBF_ASYNC | \ - XBF_READ_AHEAD | _XBF_RUN_QUEUES); + bp->b_flags &= ~(XBF_READ | XBF_DELWRI | XBF_READ_AHEAD); + bp->b_flags |= (XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES); - BUG_ON(bp->b_bn == XFS_BUF_DADDR_NULL); + bp->b_mount = mp; + bp->b_strat = xfs_bdstrat_cb; + return xfs_bdstrat_cb(bp); +} - /* For writes allow an alternate strategy routine to precede - * the actual I/O request (which may not be issued at all in - * a shutdown situation, for example). - */ - status = (flags & XBF_WRITE) ? - xfs_buf_iostrategy(bp) : xfs_buf_iorequest(bp); +void +xfs_bdwrite( + void *mp, + struct xfs_buf *bp) +{ + XB_TRACE(bp, "bdwrite", 0); - /* Wait for I/O if we are not an async request. - * Note: async I/O request completion will release the buffer, - * and that can already be done by this point. So using the - * buffer pointer from here on, after async I/O, is invalid. - */ - if (!status && !(flags & XBF_ASYNC)) - status = xfs_buf_iowait(bp); + bp->b_strat = xfs_bdstrat_cb; + bp->b_mount = mp; - return status; + bp->b_flags &= ~XBF_READ; + bp->b_flags |= (XBF_DELWRI | XBF_ASYNC); + + xfs_buf_delwri_queue(bp, 1); } STATIC_INLINE void @@ -1114,8 +1055,7 @@ xfs_buf_bio_end_io( unsigned int blocksize = bp->b_target->bt_bsize; struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; - if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) - bp->b_error = EIO; + xfs_buf_ioerror(bp, -error); do { struct page *page = bvec->bv_page; @@ -1732,8 +1672,6 @@ xfsbufd( count++; } - if (as_list_len > 0) - purge_addresses(); if (count) blk_run_address_space(target->bt_mapping); diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 456519a088c..288ae7c4c80 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h @@ -168,7 +168,7 @@ typedef struct xfs_buf { struct completion b_iowait; /* queue for I/O waiters */ void *b_fspriv; void *b_fspriv2; - void *b_fspriv3; + struct xfs_mount *b_mount; unsigned short b_error; /* error code on I/O */ unsigned int b_page_count; /* size of page array */ unsigned int b_offset; /* page offset in first page */ @@ -214,9 +214,10 @@ extern void xfs_buf_lock(xfs_buf_t *); extern void xfs_buf_unlock(xfs_buf_t *); /* Buffer Read and Write Routines */ +extern int xfs_bawrite(void *mp, xfs_buf_t *bp); +extern void xfs_bdwrite(void *mp, xfs_buf_t *bp); extern void xfs_buf_ioend(xfs_buf_t *, int); extern void xfs_buf_ioerror(xfs_buf_t *, int); -extern int xfs_buf_iostart(xfs_buf_t *, xfs_buf_flags_t); extern int xfs_buf_iorequest(xfs_buf_t *); extern int xfs_buf_iowait(xfs_buf_t *); extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, xfs_caddr_t, @@ -311,10 +312,6 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *); #define XFS_BUF_UNORDERED(bp) ((bp)->b_flags &= ~XBF_ORDERED) #define XFS_BUF_ISORDERED(bp) ((bp)->b_flags & XBF_ORDERED) -#define XFS_BUF_SHUT(bp) do { } while (0) -#define XFS_BUF_UNSHUT(bp) do { } while (0) -#define XFS_BUF_ISSHUT(bp) (0) - #define XFS_BUF_HOLD(bp) xfs_buf_hold(bp) #define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ) #define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ) @@ -334,8 +331,6 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *); #define XFS_BUF_SET_FSPRIVATE(bp, val) ((bp)->b_fspriv = (void*)(val)) #define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2) #define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val)) -#define XFS_BUF_FSPRIVATE3(bp, type) ((type)(bp)->b_fspriv3) -#define XFS_BUF_SET_FSPRIVATE3(bp, val) ((bp)->b_fspriv3 = (void*)(val)) #define XFS_BUF_SET_START(bp) do { } while (0) #define XFS_BUF_SET_BRELSE_FUNC(bp, func) ((bp)->b_relse = (func)) @@ -366,14 +361,6 @@ extern void xfs_buf_trace(xfs_buf_t *, char *, void *, void *); #define XFS_BUF_TARGET(bp) ((bp)->b_target) #define XFS_BUFTARG_NAME(target) xfs_buf_target_name(target) -static inline int xfs_bawrite(void *mp, xfs_buf_t *bp) -{ - bp->b_fspriv3 = mp; - bp->b_strat = xfs_bdstrat_cb; - xfs_buf_delwri_dequeue(bp); - return xfs_buf_iostart(bp, XBF_WRITE | XBF_ASYNC | _XBF_RUN_QUEUES); -} - static inline void xfs_buf_relse(xfs_buf_t *bp) { if (!bp->b_relse) @@ -414,17 +401,6 @@ static inline int XFS_bwrite(xfs_buf_t *bp) return error; } -/* - * No error can be returned from xfs_buf_iostart for delwri - * buffers as they are queued and no I/O is issued. - */ -static inline void xfs_bdwrite(void *mp, xfs_buf_t *bp) -{ - bp->b_strat = xfs_bdstrat_cb; - bp->b_fspriv3 = mp; - (void)xfs_buf_iostart(bp, XBF_DELWRI | XBF_ASYNC); -} - #define XFS_bdstrat(bp) xfs_buf_iorequest(bp) #define xfs_iowait(bp) xfs_buf_iowait(bp) diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h index 652721ce0ea..55bddf3b609 100644 --- a/fs/xfs/linux-2.6/xfs_cred.h +++ b/fs/xfs/linux-2.6/xfs_cred.h @@ -23,16 +23,6 @@ /* * Credentials */ -typedef struct cred { - /* EMPTY */ -} cred_t; - -extern struct cred *sys_cred; - -/* this is a hack.. (assumes sys_cred is the only cred_t in the system) */ -static inline int capable_cred(cred_t *cr, int cid) -{ - return (cr == sys_cred) ? 1 : capable(cid); -} +typedef const struct cred cred_t; #endif /* __XFS_CRED_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c index 7f7abec25e1..87b8cbd23d4 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/linux-2.6/xfs_export.c @@ -29,7 +29,6 @@ #include "xfs_vnodeops.h" #include "xfs_bmap_btree.h" #include "xfs_inode.h" -#include "xfs_vfsops.h" /* * Note that we only accept fileids which are long enough rather than allow @@ -127,11 +126,26 @@ xfs_nfs_get_inode( if (ino == 0) return ERR_PTR(-ESTALE); - error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, 0); - if (error) + /* + * The XFS_IGET_BULKSTAT means that an invalid inode number is just + * fine and not an indication of a corrupted filesystem. Because + * clients can send any kind of invalid file handle, e.g. after + * a restore on the server we have to deal with this case gracefully. + */ + error = xfs_iget(mp, NULL, ino, XFS_IGET_BULKSTAT, + XFS_ILOCK_SHARED, &ip, 0); + if (error) { + /* + * EINVAL means the inode cluster doesn't exist anymore. + * This implies the filehandle is stale, so we should + * translate it here. + * We don't use ESTALE directly down the chain to not + * confuse applications using bulkstat that expect EINVAL. + */ + if (error == EINVAL) + error = ESTALE; return ERR_PTR(-error); - if (!ip) - return ERR_PTR(-EIO); + } if (ip->i_d.di_gen != generation) { xfs_iput_new(ip, XFS_ILOCK_SHARED); diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 3fee790f138..e14c4e3aea0 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -36,88 +36,53 @@ #include "xfs_inode.h" #include "xfs_error.h" #include "xfs_rw.h" -#include "xfs_ioctl32.h" #include "xfs_vnodeops.h" +#include "xfs_da_btree.h" +#include "xfs_ioctl.h" #include #include static struct vm_operations_struct xfs_file_vm_ops; -STATIC_INLINE ssize_t -__xfs_file_read( +STATIC ssize_t +xfs_file_aio_read( struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, - int ioflags, loff_t pos) { struct file *file = iocb->ki_filp; + int ioflags = IO_ISAIO; BUG_ON(iocb->ki_pos != pos); if (unlikely(file->f_flags & O_DIRECT)) ioflags |= IO_ISDIRECT; + if (file->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; return xfs_read(XFS_I(file->f_path.dentry->d_inode), iocb, iov, nr_segs, &iocb->ki_pos, ioflags); } STATIC ssize_t -xfs_file_aio_read( - struct kiocb *iocb, - const struct iovec *iov, - unsigned long nr_segs, - loff_t pos) -{ - return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO, pos); -} - -STATIC ssize_t -xfs_file_aio_read_invis( - struct kiocb *iocb, - const struct iovec *iov, - unsigned long nr_segs, - loff_t pos) -{ - return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos); -} - -STATIC_INLINE ssize_t -__xfs_file_write( +xfs_file_aio_write( struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, - int ioflags, loff_t pos) { - struct file *file = iocb->ki_filp; + struct file *file = iocb->ki_filp; + int ioflags = IO_ISAIO; BUG_ON(iocb->ki_pos != pos); if (unlikely(file->f_flags & O_DIRECT)) ioflags |= IO_ISDIRECT; + if (file->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; return xfs_write(XFS_I(file->f_mapping->host), iocb, iov, nr_segs, &iocb->ki_pos, ioflags); } -STATIC ssize_t -xfs_file_aio_write( - struct kiocb *iocb, - const struct iovec *iov, - unsigned long nr_segs, - loff_t pos) -{ - return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO, pos); -} - -STATIC ssize_t -xfs_file_aio_write_invis( - struct kiocb *iocb, - const struct iovec *iov, - unsigned long nr_segs, - loff_t pos) -{ - return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos); -} - STATIC ssize_t xfs_file_splice_read( struct file *infilp, @@ -126,20 +91,13 @@ xfs_file_splice_read( size_t len, unsigned int flags) { - return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode), - infilp, ppos, pipe, len, flags, 0); -} + int ioflags = 0; + + if (infilp->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; -STATIC ssize_t -xfs_file_splice_read_invis( - struct file *infilp, - loff_t *ppos, - struct pipe_inode_info *pipe, - size_t len, - unsigned int flags) -{ return xfs_splice_read(XFS_I(infilp->f_path.dentry->d_inode), - infilp, ppos, pipe, len, flags, IO_INVIS); + infilp, ppos, pipe, len, flags, ioflags); } STATIC ssize_t @@ -150,30 +108,49 @@ xfs_file_splice_write( size_t len, unsigned int flags) { - return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode), - pipe, outfilp, ppos, len, flags, 0); -} + int ioflags = 0; + + if (outfilp->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; -STATIC ssize_t -xfs_file_splice_write_invis( - struct pipe_inode_info *pipe, - struct file *outfilp, - loff_t *ppos, - size_t len, - unsigned int flags) -{ return xfs_splice_write(XFS_I(outfilp->f_path.dentry->d_inode), - pipe, outfilp, ppos, len, flags, IO_INVIS); + pipe, outfilp, ppos, len, flags, ioflags); } STATIC int xfs_file_open( struct inode *inode, - struct file *filp) + struct file *file) { - if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) + if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) return -EFBIG; - return -xfs_open(XFS_I(inode)); + if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb))) + return -EIO; + return 0; +} + +STATIC int +xfs_dir_open( + struct inode *inode, + struct file *file) +{ + struct xfs_inode *ip = XFS_I(inode); + int mode; + int error; + + error = xfs_file_open(inode, file); + if (error) + return error; + + /* + * If there are any blocks, read-ahead block 0 as we're almost + * certain to have the next operation be a read there. + */ + mode = xfs_ilock_map_shared(ip); + if (ip->i_d.di_nextents > 0) + xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK); + xfs_iunlock(ip, mode); + return 0; } STATIC int @@ -227,7 +204,7 @@ xfs_file_readdir( * point we can change the ->readdir prototype to include the * buffer size. */ - bufsize = (size_t)min_t(loff_t, PAGE_SIZE, inode->i_size); + bufsize = (size_t)min_t(loff_t, PAGE_SIZE, ip->i_d.di_size); error = xfs_readdir(ip, dirent, bufsize, (xfs_off_t *)&filp->f_pos, filldir); @@ -248,48 +225,6 @@ xfs_file_mmap( return 0; } -STATIC long -xfs_file_ioctl( - struct file *filp, - unsigned int cmd, - unsigned long p) -{ - int error; - struct inode *inode = filp->f_path.dentry->d_inode; - - error = xfs_ioctl(XFS_I(inode), filp, 0, cmd, (void __user *)p); - xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED); - - /* NOTE: some of the ioctl's return positive #'s as a - * byte count indicating success, such as - * readlink_by_handle. So we don't "sign flip" - * like most other routines. This means true - * errors need to be returned as a negative value. - */ - return error; -} - -STATIC long -xfs_file_ioctl_invis( - struct file *filp, - unsigned int cmd, - unsigned long p) -{ - int error; - struct inode *inode = filp->f_path.dentry->d_inode; - - error = xfs_ioctl(XFS_I(inode), filp, IO_INVIS, cmd, (void __user *)p); - xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED); - - /* NOTE: some of the ioctl's return positive #'s as a - * byte count indicating success, such as - * readlink_by_handle. So we don't "sign flip" - * like most other routines. This means true - * errors need to be returned as a negative value. - */ - return error; -} - /* * mmap()d file has taken write protection fault and is being made * writable. We can set the page state up correctly for a writable @@ -325,26 +260,8 @@ const struct file_operations xfs_file_operations = { #endif }; -const struct file_operations xfs_invis_file_operations = { - .llseek = generic_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = xfs_file_aio_read_invis, - .aio_write = xfs_file_aio_write_invis, - .splice_read = xfs_file_splice_read_invis, - .splice_write = xfs_file_splice_write_invis, - .unlocked_ioctl = xfs_file_ioctl_invis, -#ifdef CONFIG_COMPAT - .compat_ioctl = xfs_file_compat_invis_ioctl, -#endif - .mmap = xfs_file_mmap, - .open = xfs_file_open, - .release = xfs_file_release, - .fsync = xfs_file_fsync, -}; - - const struct file_operations xfs_dir_file_operations = { + .open = xfs_dir_open, .read = generic_read_dir, .readdir = xfs_file_readdir, .llseek = generic_file_llseek, diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c index 36caa6d957d..5aeb7777696 100644 --- a/fs/xfs/linux-2.6/xfs_fs_subr.c +++ b/fs/xfs/linux-2.6/xfs_fs_subr.c @@ -24,6 +24,10 @@ int fs_noerr(void) { return 0; } int fs_nosys(void) { return ENOSYS; } void fs_noval(void) { return; } +/* + * note: all filemap functions return negative error codes. These + * need to be inverted before returning to the xfs core functions. + */ void xfs_tosspages( xfs_inode_t *ip, @@ -53,7 +57,7 @@ xfs_flushinval_pages( if (!ret) truncate_inode_pages(mapping, first); } - return ret; + return -ret; } int @@ -72,10 +76,23 @@ xfs_flush_pages( xfs_iflags_clear(ip, XFS_ITRUNCATED); ret = filemap_fdatawrite(mapping); if (flags & XFS_B_ASYNC) - return ret; + return -ret; ret2 = filemap_fdatawait(mapping); if (!ret) ret = ret2; } - return ret; + return -ret; +} + +int +xfs_wait_on_pages( + xfs_inode_t *ip, + xfs_off_t first, + xfs_off_t last) +{ + struct address_space *mapping = VFS_I(ip)->i_mapping; + + if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) + return -filemap_fdatawait(mapping); + return 0; } diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c index ef90e64641e..2ae8b1ccb02 100644 --- a/fs/xfs/linux-2.6/xfs_globals.c +++ b/fs/xfs/linux-2.6/xfs_globals.c @@ -26,7 +26,6 @@ */ xfs_param_t xfs_params = { /* MIN DFLT MAX */ - .restrict_chown = { 0, 1, 1 }, .sgid_inherit = { 0, 0, 1 }, .symlink_mode = { 0, 0, 1 }, .panic_mask = { 0, 0, 255 }, @@ -43,10 +42,3 @@ xfs_param_t xfs_params = { .inherit_nodfrg = { 0, 1, 1 }, .fstrm_timer = { 1, 30*100, 3600*100}, }; - -/* - * Global system credential structure. - */ -static cred_t sys_cred_val; -cred_t *sys_cred = &sys_cred_val; - diff --git a/fs/xfs/linux-2.6/xfs_globals.h b/fs/xfs/linux-2.6/xfs_globals.h index 2770b0085ee..69f71caf061 100644 --- a/fs/xfs/linux-2.6/xfs_globals.h +++ b/fs/xfs/linux-2.6/xfs_globals.h @@ -19,6 +19,5 @@ #define __XFS_GLOBALS_H__ extern uint64_t xfs_panic_mask; /* set to cause more panics */ -extern struct cred *sys_cred; #endif /* __XFS_GLOBALS_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index d3438c72dca..e5be1e0be80 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -68,26 +68,22 @@ * XFS_IOC_PATH_TO_HANDLE * returns full handle for a path */ -STATIC int +int xfs_find_handle( unsigned int cmd, - void __user *arg) + xfs_fsop_handlereq_t *hreq) { int hsize; xfs_handle_t handle; - xfs_fsop_handlereq_t hreq; struct inode *inode; - if (copy_from_user(&hreq, arg, sizeof(hreq))) - return -XFS_ERROR(EFAULT); - memset((char *)&handle, 0, sizeof(handle)); switch (cmd) { case XFS_IOC_PATH_TO_FSHANDLE: case XFS_IOC_PATH_TO_HANDLE: { struct path path; - int error = user_lpath((const char __user *)hreq.path, &path); + int error = user_lpath((const char __user *)hreq->path, &path); if (error) return error; @@ -101,7 +97,7 @@ xfs_find_handle( case XFS_IOC_FD_TO_HANDLE: { struct file *file; - file = fget(hreq.fd); + file = fget(hreq->fd); if (!file) return -EBADF; @@ -158,8 +154,8 @@ xfs_find_handle( } /* now copy our handle into the user buffer & write out the size */ - if (copy_to_user(hreq.ohandle, &handle, hsize) || - copy_to_user(hreq.ohandlen, &hsize, sizeof(__s32))) { + if (copy_to_user(hreq->ohandle, &handle, hsize) || + copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32))) { iput(inode); return -XFS_ERROR(EFAULT); } @@ -249,27 +245,25 @@ xfs_vget_fsop_handlereq( return 0; } -STATIC int +int xfs_open_by_handle( xfs_mount_t *mp, - void __user *arg, + xfs_fsop_handlereq_t *hreq, struct file *parfilp, struct inode *parinode) { + const struct cred *cred = current_cred(); int error; int new_fd; int permflag; struct file *filp; struct inode *inode; struct dentry *dentry; - xfs_fsop_handlereq_t hreq; if (!capable(CAP_SYS_ADMIN)) return -XFS_ERROR(EPERM); - if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) - return -XFS_ERROR(EFAULT); - error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &inode); + error = xfs_vget_fsop_handlereq(mp, parinode, hreq, &inode); if (error) return -error; @@ -280,10 +274,10 @@ xfs_open_by_handle( } #if BITS_PER_LONG != 32 - hreq.oflags |= O_LARGEFILE; + hreq->oflags |= O_LARGEFILE; #endif /* Put open permission in namei format. */ - permflag = hreq.oflags; + permflag = hreq->oflags; if ((permflag+1) & O_ACCMODE) permflag++; if (permflag & O_TRUNC) @@ -321,15 +315,16 @@ xfs_open_by_handle( mntget(parfilp->f_path.mnt); /* Create file pointer. */ - filp = dentry_open(dentry, parfilp->f_path.mnt, hreq.oflags); + filp = dentry_open(dentry, parfilp->f_path.mnt, hreq->oflags, cred); if (IS_ERR(filp)) { put_unused_fd(new_fd); return -XFS_ERROR(-PTR_ERR(filp)); } + if (inode->i_mode & S_IFREG) { /* invisible operation should not change atime */ filp->f_flags |= O_NOATIME; - filp->f_op = &xfs_invis_file_operations; + filp->f_mode |= FMODE_NOCMTIME; } fd_install(new_fd, filp); @@ -362,24 +357,21 @@ do_readlink( } -STATIC int +int xfs_readlink_by_handle( xfs_mount_t *mp, - void __user *arg, + xfs_fsop_handlereq_t *hreq, struct inode *parinode) { struct inode *inode; - xfs_fsop_handlereq_t hreq; __u32 olen; void *link; int error; if (!capable(CAP_SYS_ADMIN)) return -XFS_ERROR(EPERM); - if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) - return -XFS_ERROR(EFAULT); - error = xfs_vget_fsop_handlereq(mp, parinode, &hreq, &inode); + error = xfs_vget_fsop_handlereq(mp, parinode, hreq, &inode); if (error) return -error; @@ -389,7 +381,7 @@ xfs_readlink_by_handle( goto out_iput; } - if (copy_from_user(&olen, hreq.ohandlen, sizeof(__u32))) { + if (copy_from_user(&olen, hreq->ohandlen, sizeof(__u32))) { error = -XFS_ERROR(EFAULT); goto out_iput; } @@ -401,7 +393,7 @@ xfs_readlink_by_handle( error = -xfs_readlink(XFS_I(inode), link); if (error) goto out_kfree; - error = do_readlink(hreq.ohandle, olen, link); + error = do_readlink(hreq->ohandle, olen, link); if (error) goto out_kfree; @@ -500,7 +492,7 @@ xfs_attrlist_by_handle( return -error; } -STATIC int +int xfs_attrmulti_attr_get( struct inode *inode, char *name, @@ -529,7 +521,7 @@ xfs_attrmulti_attr_get( return error; } -STATIC int +int xfs_attrmulti_attr_set( struct inode *inode, char *name, @@ -559,7 +551,7 @@ xfs_attrmulti_attr_set( return error; } -STATIC int +int xfs_attrmulti_attr_remove( struct inode *inode, char *name, @@ -661,19 +653,26 @@ xfs_attrmulti_by_handle( return -error; } -STATIC int +int xfs_ioc_space( struct xfs_inode *ip, struct inode *inode, struct file *filp, int ioflags, unsigned int cmd, - void __user *arg) + xfs_flock64_t *bf) { - xfs_flock64_t bf; int attr_flags = 0; int error; + /* + * Only allow the sys admin to reserve space unless + * unwritten extents are enabled. + */ + if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) && + !capable(CAP_SYS_ADMIN)) + return -XFS_ERROR(EPERM); + if (inode->i_flags & (S_IMMUTABLE|S_APPEND)) return -XFS_ERROR(EPERM); @@ -683,16 +682,12 @@ xfs_ioc_space( if (!S_ISREG(inode->i_mode)) return -XFS_ERROR(EINVAL); - if (copy_from_user(&bf, arg, sizeof(bf))) - return -XFS_ERROR(EFAULT); - if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) attr_flags |= XFS_ATTR_NONBLOCK; if (ioflags & IO_INVIS) attr_flags |= XFS_ATTR_DMI; - error = xfs_change_file_space(ip, cmd, &bf, filp->f_pos, - NULL, attr_flags); + error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags); return -error; } @@ -1007,7 +1002,7 @@ xfs_ioctl_setattr( * to the file owner ID, except in cases where the * CAP_FSETID capability is applicable. */ - if (current->fsuid != ip->i_d.di_uid && !capable(CAP_FOWNER)) { + if (current_fsuid() != ip->i_d.di_uid && !capable(CAP_FOWNER)) { code = XFS_ERROR(EPERM); goto error_return; } @@ -1104,10 +1099,6 @@ xfs_ioctl_setattr( /* * Change file ownership. Must be the owner or privileged. - * If the system was configured with the "restricted_chown" - * option, the owner is not permitted to give away the file, - * and can change the group id only to a group of which he - * or she is a member. */ if (mask & FSX_PROJID) { /* @@ -1136,7 +1127,7 @@ xfs_ioctl_setattr( * the superblock version number since projids didn't * exist before DINODE_VERSION_2 and SB_VERSION_NLINK. */ - if (ip->i_d.di_version == XFS_DINODE_VERSION_1) + if (ip->i_d.di_version == 1) xfs_bump_ino_vers2(tp, ip); } @@ -1254,6 +1245,19 @@ xfs_ioc_setxflags( return -xfs_ioctl_setattr(ip, &fa, mask); } +STATIC int +xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full) +{ + struct getbmap __user *base = *ap; + + /* copy only getbmap portion (not getbmapx) */ + if (copy_to_user(base, bmv, sizeof(struct getbmap))) + return XFS_ERROR(EFAULT); + + *ap += sizeof(struct getbmap); + return 0; +} + STATIC int xfs_ioc_getbmap( struct xfs_inode *ip, @@ -1261,37 +1265,48 @@ xfs_ioc_getbmap( unsigned int cmd, void __user *arg) { - struct getbmap bm; - int iflags; + struct getbmapx bmx; int error; - if (copy_from_user(&bm, arg, sizeof(bm))) + if (copy_from_user(&bmx, arg, sizeof(struct getbmapx))) return -XFS_ERROR(EFAULT); - if (bm.bmv_count < 2) + if (bmx.bmv_count < 2) return -XFS_ERROR(EINVAL); - iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0); + bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0); if (ioflags & IO_INVIS) - iflags |= BMV_IF_NO_DMAPI_READ; + bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ; - error = xfs_getbmap(ip, &bm, (struct getbmap __user *)arg+1, iflags); + error = xfs_getbmap(ip, &bmx, xfs_getbmap_format, + (struct getbmap *)arg+1); if (error) return -error; - if (copy_to_user(arg, &bm, sizeof(bm))) + /* copy back header - only size of getbmap */ + if (copy_to_user(arg, &bmx, sizeof(struct getbmap))) return -XFS_ERROR(EFAULT); return 0; } +STATIC int +xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full) +{ + struct getbmapx __user *base = *ap; + + if (copy_to_user(base, bmv, sizeof(struct getbmapx))) + return XFS_ERROR(EFAULT); + + *ap += sizeof(struct getbmapx); + return 0; +} + STATIC int xfs_ioc_getbmapx( struct xfs_inode *ip, void __user *arg) { struct getbmapx bmx; - struct getbmap bm; - int iflags; int error; if (copy_from_user(&bmx, arg, sizeof(bmx))) @@ -1300,46 +1315,46 @@ xfs_ioc_getbmapx( if (bmx.bmv_count < 2) return -XFS_ERROR(EINVAL); - /* - * Map input getbmapx structure to a getbmap - * structure for xfs_getbmap. - */ - GETBMAP_CONVERT(bmx, bm); - - iflags = bmx.bmv_iflags; - - if (iflags & (~BMV_IF_VALID)) + if (bmx.bmv_iflags & (~BMV_IF_VALID)) return -XFS_ERROR(EINVAL); - iflags |= BMV_IF_EXTENDED; - - error = xfs_getbmap(ip, &bm, (struct getbmapx __user *)arg+1, iflags); + error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format, + (struct getbmapx *)arg+1); if (error) return -error; - GETBMAP_CONVERT(bm, bmx); - - if (copy_to_user(arg, &bmx, sizeof(bmx))) + /* copy back header */ + if (copy_to_user(arg, &bmx, sizeof(struct getbmapx))) return -XFS_ERROR(EFAULT); return 0; } -int -xfs_ioctl( - xfs_inode_t *ip, +/* + * Note: some of the ioctl's return positive numbers as a + * byte count indicating success, such as readlink_by_handle. + * So we don't "sign flip" like most other routines. This means + * true errors need to be returned as a negative value. + */ +long +xfs_file_ioctl( struct file *filp, - int ioflags, unsigned int cmd, - void __user *arg) + unsigned long p) { struct inode *inode = filp->f_path.dentry->d_inode; - xfs_mount_t *mp = ip->i_mount; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + void __user *arg = (void __user *)p; + int ioflags = 0; int error; - xfs_itrace_entry(XFS_I(inode)); - switch (cmd) { + if (filp->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; + xfs_itrace_entry(ip); + + switch (cmd) { case XFS_IOC_ALLOCSP: case XFS_IOC_FREESP: case XFS_IOC_RESVSP: @@ -1347,17 +1362,13 @@ xfs_ioctl( case XFS_IOC_ALLOCSP64: case XFS_IOC_FREESP64: case XFS_IOC_RESVSP64: - case XFS_IOC_UNRESVSP64: - /* - * Only allow the sys admin to reserve space unless - * unwritten extents are enabled. - */ - if (!xfs_sb_version_hasextflgbit(&mp->m_sb) && - !capable(CAP_SYS_ADMIN)) - return -EPERM; - - return xfs_ioc_space(ip, inode, filp, ioflags, cmd, arg); + case XFS_IOC_UNRESVSP64: { + xfs_flock64_t bf; + if (copy_from_user(&bf, arg, sizeof(bf))) + return -XFS_ERROR(EFAULT); + return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf); + } case XFS_IOC_DIOINFO: { struct dioattr da; xfs_buftarg_t *target = @@ -1417,18 +1428,30 @@ xfs_ioctl( case XFS_IOC_FD_TO_HANDLE: case XFS_IOC_PATH_TO_HANDLE: - case XFS_IOC_PATH_TO_FSHANDLE: - return xfs_find_handle(cmd, arg); + case XFS_IOC_PATH_TO_FSHANDLE: { + xfs_fsop_handlereq_t hreq; - case XFS_IOC_OPEN_BY_HANDLE: - return xfs_open_by_handle(mp, arg, filp, inode); + if (copy_from_user(&hreq, arg, sizeof(hreq))) + return -XFS_ERROR(EFAULT); + return xfs_find_handle(cmd, &hreq); + } + case XFS_IOC_OPEN_BY_HANDLE: { + xfs_fsop_handlereq_t hreq; + if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) + return -XFS_ERROR(EFAULT); + return xfs_open_by_handle(mp, &hreq, filp, inode); + } case XFS_IOC_FSSETDM_BY_HANDLE: return xfs_fssetdm_by_handle(mp, arg, inode); - case XFS_IOC_READLINK_BY_HANDLE: - return xfs_readlink_by_handle(mp, arg, inode); + case XFS_IOC_READLINK_BY_HANDLE: { + xfs_fsop_handlereq_t hreq; + if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t))) + return -XFS_ERROR(EFAULT); + return xfs_readlink_by_handle(mp, &hreq, inode); + } case XFS_IOC_ATTRLIST_BY_HANDLE: return xfs_attrlist_by_handle(mp, arg, inode); @@ -1436,7 +1459,11 @@ xfs_ioctl( return xfs_attrmulti_by_handle(mp, arg, filp, inode); case XFS_IOC_SWAPEXT: { - error = xfs_swapext((struct xfs_swapext __user *)arg); + struct xfs_swapext sxp; + + if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t))) + return -XFS_ERROR(EFAULT); + error = xfs_swapext(&sxp); return -error; } @@ -1492,9 +1519,6 @@ xfs_ioctl( case XFS_IOC_FSGROWFSDATA: { xfs_growfs_data_t in; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (copy_from_user(&in, arg, sizeof(in))) return -XFS_ERROR(EFAULT); @@ -1505,9 +1529,6 @@ xfs_ioctl( case XFS_IOC_FSGROWFSLOG: { xfs_growfs_log_t in; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (copy_from_user(&in, arg, sizeof(in))) return -XFS_ERROR(EFAULT); @@ -1518,9 +1539,6 @@ xfs_ioctl( case XFS_IOC_FSGROWFSRT: { xfs_growfs_rt_t in; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (copy_from_user(&in, arg, sizeof(in))) return -XFS_ERROR(EFAULT); @@ -1528,21 +1546,6 @@ xfs_ioctl( return -error; } - case XFS_IOC_FREEZE: - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (inode->i_sb->s_frozen == SB_UNFROZEN) - freeze_bdev(inode->i_sb->s_bdev); - return 0; - - case XFS_IOC_THAW: - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (inode->i_sb->s_frozen != SB_UNFROZEN) - thaw_bdev(inode->i_sb->s_bdev, inode->i_sb); - return 0; - case XFS_IOC_GOINGDOWN: { __uint32_t in; diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/linux-2.6/xfs_ioctl.h new file mode 100644 index 00000000000..8c16bf2d7e0 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_ioctl.h @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2008 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef __XFS_IOCTL_H__ +#define __XFS_IOCTL_H__ + +extern int +xfs_ioc_space( + struct xfs_inode *ip, + struct inode *inode, + struct file *filp, + int ioflags, + unsigned int cmd, + xfs_flock64_t *bf); + +extern int +xfs_find_handle( + unsigned int cmd, + xfs_fsop_handlereq_t *hreq); + +extern int +xfs_open_by_handle( + xfs_mount_t *mp, + xfs_fsop_handlereq_t *hreq, + struct file *parfilp, + struct inode *parinode); + +extern int +xfs_readlink_by_handle( + xfs_mount_t *mp, + xfs_fsop_handlereq_t *hreq, + struct inode *parinode); + +extern int +xfs_attrmulti_attr_get( + struct inode *inode, + char *name, + char __user *ubuf, + __uint32_t *len, + __uint32_t flags); + +extern int + xfs_attrmulti_attr_set( + struct inode *inode, + char *name, + const char __user *ubuf, + __uint32_t len, + __uint32_t flags); + +extern int +xfs_attrmulti_attr_remove( + struct inode *inode, + char *name, + __uint32_t flags); + +extern long +xfs_file_ioctl( + struct file *filp, + unsigned int cmd, + unsigned long p); + +extern long +xfs_file_compat_ioctl( + struct file *file, + unsigned int cmd, + unsigned long arg); + +#endif diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index a4b254eb43b..50903ad3182 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c @@ -16,11 +16,7 @@ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include -#include #include -#include -#include -#include #include #include "xfs.h" #include "xfs_fs.h" @@ -36,7 +32,6 @@ #include "xfs_bmap_btree.h" #include "xfs_attr_sf.h" #include "xfs_dir2_sf.h" -#include "xfs_vfs.h" #include "xfs_vnode.h" #include "xfs_dinode.h" #include "xfs_inode.h" @@ -44,221 +39,219 @@ #include "xfs_error.h" #include "xfs_dfrag.h" #include "xfs_vnodeops.h" +#include "xfs_fsops.h" +#include "xfs_alloc.h" +#include "xfs_rtalloc.h" +#include "xfs_attr.h" +#include "xfs_ioctl.h" #include "xfs_ioctl32.h" #define _NATIVE_IOC(cmd, type) \ _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type)) -#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) -#define BROKEN_X86_ALIGNMENT -#define _PACKED __attribute__((packed)) -/* on ia32 l_start is on a 32-bit boundary */ -typedef struct xfs_flock64_32 { - __s16 l_type; - __s16 l_whence; - __s64 l_start __attribute__((packed)); - /* len == 0 means until end of file */ - __s64 l_len __attribute__((packed)); - __s32 l_sysid; - __u32 l_pid; - __s32 l_pad[4]; /* reserve area */ -} xfs_flock64_32_t; - -#define XFS_IOC_ALLOCSP_32 _IOW ('X', 10, struct xfs_flock64_32) -#define XFS_IOC_FREESP_32 _IOW ('X', 11, struct xfs_flock64_32) -#define XFS_IOC_ALLOCSP64_32 _IOW ('X', 36, struct xfs_flock64_32) -#define XFS_IOC_FREESP64_32 _IOW ('X', 37, struct xfs_flock64_32) -#define XFS_IOC_RESVSP_32 _IOW ('X', 40, struct xfs_flock64_32) -#define XFS_IOC_UNRESVSP_32 _IOW ('X', 41, struct xfs_flock64_32) -#define XFS_IOC_RESVSP64_32 _IOW ('X', 42, struct xfs_flock64_32) -#define XFS_IOC_UNRESVSP64_32 _IOW ('X', 43, struct xfs_flock64_32) - -/* just account for different alignment */ -STATIC unsigned long -xfs_ioctl32_flock( - unsigned long arg) +#ifdef BROKEN_X86_ALIGNMENT +STATIC int +xfs_compat_flock64_copyin( + xfs_flock64_t *bf, + compat_xfs_flock64_t __user *arg32) { - xfs_flock64_32_t __user *p32 = (void __user *)arg; - xfs_flock64_t __user *p = compat_alloc_user_space(sizeof(*p)); - - if (copy_in_user(&p->l_type, &p32->l_type, sizeof(s16)) || - copy_in_user(&p->l_whence, &p32->l_whence, sizeof(s16)) || - copy_in_user(&p->l_start, &p32->l_start, sizeof(s64)) || - copy_in_user(&p->l_len, &p32->l_len, sizeof(s64)) || - copy_in_user(&p->l_sysid, &p32->l_sysid, sizeof(s32)) || - copy_in_user(&p->l_pid, &p32->l_pid, sizeof(u32)) || - copy_in_user(&p->l_pad, &p32->l_pad, 4*sizeof(u32))) - return -EFAULT; - - return (unsigned long)p; + if (get_user(bf->l_type, &arg32->l_type) || + get_user(bf->l_whence, &arg32->l_whence) || + get_user(bf->l_start, &arg32->l_start) || + get_user(bf->l_len, &arg32->l_len) || + get_user(bf->l_sysid, &arg32->l_sysid) || + get_user(bf->l_pid, &arg32->l_pid) || + copy_from_user(bf->l_pad, &arg32->l_pad, 4*sizeof(u32))) + return -XFS_ERROR(EFAULT); + return 0; } -typedef struct compat_xfs_fsop_geom_v1 { - __u32 blocksize; /* filesystem (data) block size */ - __u32 rtextsize; /* realtime extent size */ - __u32 agblocks; /* fsblocks in an AG */ - __u32 agcount; /* number of allocation groups */ - __u32 logblocks; /* fsblocks in the log */ - __u32 sectsize; /* (data) sector size, bytes */ - __u32 inodesize; /* inode size in bytes */ - __u32 imaxpct; /* max allowed inode space(%) */ - __u64 datablocks; /* fsblocks in data subvolume */ - __u64 rtblocks; /* fsblocks in realtime subvol */ - __u64 rtextents; /* rt extents in realtime subvol*/ - __u64 logstart; /* starting fsblock of the log */ - unsigned char uuid[16]; /* unique id of the filesystem */ - __u32 sunit; /* stripe unit, fsblocks */ - __u32 swidth; /* stripe width, fsblocks */ - __s32 version; /* structure version */ - __u32 flags; /* superblock version flags */ - __u32 logsectsize; /* log sector size, bytes */ - __u32 rtsectsize; /* realtime sector size, bytes */ - __u32 dirblocksize; /* directory block size, bytes */ -} __attribute__((packed)) compat_xfs_fsop_geom_v1_t; - -#define XFS_IOC_FSGEOMETRY_V1_32 \ - _IOR ('X', 100, struct compat_xfs_fsop_geom_v1) - -STATIC unsigned long xfs_ioctl32_geom_v1(unsigned long arg) +STATIC int +xfs_compat_ioc_fsgeometry_v1( + struct xfs_mount *mp, + compat_xfs_fsop_geom_v1_t __user *arg32) { - compat_xfs_fsop_geom_v1_t __user *p32 = (void __user *)arg; - xfs_fsop_geom_v1_t __user *p = compat_alloc_user_space(sizeof(*p)); + xfs_fsop_geom_t fsgeo; + int error; - if (copy_in_user(p, p32, sizeof(*p32))) - return -EFAULT; - return (unsigned long)p; + error = xfs_fs_geometry(mp, &fsgeo, 3); + if (error) + return -error; + /* The 32-bit variant simply has some padding at the end */ + if (copy_to_user(arg32, &fsgeo, sizeof(struct compat_xfs_fsop_geom_v1))) + return -XFS_ERROR(EFAULT); + return 0; } -typedef struct compat_xfs_inogrp { - __u64 xi_startino; /* starting inode number */ - __s32 xi_alloccount; /* # bits set in allocmask */ - __u64 xi_allocmask; /* mask of allocated inodes */ -} __attribute__((packed)) compat_xfs_inogrp_t; - -STATIC int xfs_inumbers_fmt_compat( - void __user *ubuffer, - const xfs_inogrp_t *buffer, - long count, - long *written) +STATIC int +xfs_compat_growfs_data_copyin( + struct xfs_growfs_data *in, + compat_xfs_growfs_data_t __user *arg32) { - compat_xfs_inogrp_t __user *p32 = ubuffer; - long i; + if (get_user(in->newblocks, &arg32->newblocks) || + get_user(in->imaxpct, &arg32->imaxpct)) + return -XFS_ERROR(EFAULT); + return 0; +} + +STATIC int +xfs_compat_growfs_rt_copyin( + struct xfs_growfs_rt *in, + compat_xfs_growfs_rt_t __user *arg32) +{ + if (get_user(in->newblocks, &arg32->newblocks) || + get_user(in->extsize, &arg32->extsize)) + return -XFS_ERROR(EFAULT); + return 0; +} + +STATIC int +xfs_inumbers_fmt_compat( + void __user *ubuffer, + const xfs_inogrp_t *buffer, + long count, + long *written) +{ + compat_xfs_inogrp_t __user *p32 = ubuffer; + long i; for (i = 0; i < count; i++) { if (put_user(buffer[i].xi_startino, &p32[i].xi_startino) || put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) || put_user(buffer[i].xi_allocmask, &p32[i].xi_allocmask)) - return -EFAULT; + return -XFS_ERROR(EFAULT); } *written = count * sizeof(*p32); return 0; } #else - #define xfs_inumbers_fmt_compat xfs_inumbers_fmt -#define _PACKED +#endif /* BROKEN_X86_ALIGNMENT */ -#endif +STATIC int +xfs_ioctl32_bstime_copyin( + xfs_bstime_t *bstime, + compat_xfs_bstime_t __user *bstime32) +{ + compat_time_t sec32; /* tv_sec differs on 64 vs. 32 */ -/* XFS_IOC_FSBULKSTAT and friends */ + if (get_user(sec32, &bstime32->tv_sec) || + get_user(bstime->tv_nsec, &bstime32->tv_nsec)) + return -XFS_ERROR(EFAULT); + bstime->tv_sec = sec32; + return 0; +} + +/* xfs_bstat_t has differing alignment on intel, & bstime_t sizes everywhere */ +STATIC int +xfs_ioctl32_bstat_copyin( + xfs_bstat_t *bstat, + compat_xfs_bstat_t __user *bstat32) +{ + if (get_user(bstat->bs_ino, &bstat32->bs_ino) || + get_user(bstat->bs_mode, &bstat32->bs_mode) || + get_user(bstat->bs_nlink, &bstat32->bs_nlink) || + get_user(bstat->bs_uid, &bstat32->bs_uid) || + get_user(bstat->bs_gid, &bstat32->bs_gid) || + get_user(bstat->bs_rdev, &bstat32->bs_rdev) || + get_user(bstat->bs_blksize, &bstat32->bs_blksize) || + get_user(bstat->bs_size, &bstat32->bs_size) || + xfs_ioctl32_bstime_copyin(&bstat->bs_atime, &bstat32->bs_atime) || + xfs_ioctl32_bstime_copyin(&bstat->bs_mtime, &bstat32->bs_mtime) || + xfs_ioctl32_bstime_copyin(&bstat->bs_ctime, &bstat32->bs_ctime) || + get_user(bstat->bs_blocks, &bstat32->bs_size) || + get_user(bstat->bs_xflags, &bstat32->bs_size) || + get_user(bstat->bs_extsize, &bstat32->bs_extsize) || + get_user(bstat->bs_extents, &bstat32->bs_extents) || + get_user(bstat->bs_gen, &bstat32->bs_gen) || + get_user(bstat->bs_projid, &bstat32->bs_projid) || + get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) || + get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) || + get_user(bstat->bs_aextents, &bstat32->bs_aextents)) + return -XFS_ERROR(EFAULT); + return 0; +} -typedef struct compat_xfs_bstime { - __s32 tv_sec; /* seconds */ - __s32 tv_nsec; /* and nanoseconds */ -} compat_xfs_bstime_t; +/* XFS_IOC_FSBULKSTAT and friends */ -STATIC int xfs_bstime_store_compat( - compat_xfs_bstime_t __user *p32, - const xfs_bstime_t *p) +STATIC int +xfs_bstime_store_compat( + compat_xfs_bstime_t __user *p32, + const xfs_bstime_t *p) { - __s32 sec32; + __s32 sec32; sec32 = p->tv_sec; if (put_user(sec32, &p32->tv_sec) || put_user(p->tv_nsec, &p32->tv_nsec)) - return -EFAULT; + return -XFS_ERROR(EFAULT); return 0; } -typedef struct compat_xfs_bstat { - __u64 bs_ino; /* inode number */ - __u16 bs_mode; /* type and mode */ - __u16 bs_nlink; /* number of links */ - __u32 bs_uid; /* user id */ - __u32 bs_gid; /* group id */ - __u32 bs_rdev; /* device value */ - __s32 bs_blksize; /* block size */ - __s64 bs_size; /* file size */ - compat_xfs_bstime_t bs_atime; /* access time */ - compat_xfs_bstime_t bs_mtime; /* modify time */ - compat_xfs_bstime_t bs_ctime; /* inode change time */ - int64_t bs_blocks; /* number of blocks */ - __u32 bs_xflags; /* extended flags */ - __s32 bs_extsize; /* extent size */ - __s32 bs_extents; /* number of extents */ - __u32 bs_gen; /* generation count */ - __u16 bs_projid; /* project id */ - unsigned char bs_pad[14]; /* pad space, unused */ - __u32 bs_dmevmask; /* DMIG event mask */ - __u16 bs_dmstate; /* DMIG state info */ - __u16 bs_aextents; /* attribute number of extents */ -} _PACKED compat_xfs_bstat_t; - -STATIC int xfs_bulkstat_one_fmt_compat( +/* Return 0 on success or positive error (to xfs_bulkstat()) */ +STATIC int +xfs_bulkstat_one_fmt_compat( void __user *ubuffer, + int ubsize, + int *ubused, const xfs_bstat_t *buffer) { - compat_xfs_bstat_t __user *p32 = ubuffer; - - if (put_user(buffer->bs_ino, &p32->bs_ino) || - put_user(buffer->bs_mode, &p32->bs_mode) || - put_user(buffer->bs_nlink, &p32->bs_nlink) || - put_user(buffer->bs_uid, &p32->bs_uid) || - put_user(buffer->bs_gid, &p32->bs_gid) || - put_user(buffer->bs_rdev, &p32->bs_rdev) || - put_user(buffer->bs_blksize, &p32->bs_blksize) || - put_user(buffer->bs_size, &p32->bs_size) || + compat_xfs_bstat_t __user *p32 = ubuffer; + + if (ubsize < sizeof(*p32)) + return XFS_ERROR(ENOMEM); + + if (put_user(buffer->bs_ino, &p32->bs_ino) || + put_user(buffer->bs_mode, &p32->bs_mode) || + put_user(buffer->bs_nlink, &p32->bs_nlink) || + put_user(buffer->bs_uid, &p32->bs_uid) || + put_user(buffer->bs_gid, &p32->bs_gid) || + put_user(buffer->bs_rdev, &p32->bs_rdev) || + put_user(buffer->bs_blksize, &p32->bs_blksize) || + put_user(buffer->bs_size, &p32->bs_size) || xfs_bstime_store_compat(&p32->bs_atime, &buffer->bs_atime) || xfs_bstime_store_compat(&p32->bs_mtime, &buffer->bs_mtime) || xfs_bstime_store_compat(&p32->bs_ctime, &buffer->bs_ctime) || - put_user(buffer->bs_blocks, &p32->bs_blocks) || - put_user(buffer->bs_xflags, &p32->bs_xflags) || - put_user(buffer->bs_extsize, &p32->bs_extsize) || - put_user(buffer->bs_extents, &p32->bs_extents) || - put_user(buffer->bs_gen, &p32->bs_gen) || - put_user(buffer->bs_projid, &p32->bs_projid) || - put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) || - put_user(buffer->bs_dmstate, &p32->bs_dmstate) || + put_user(buffer->bs_blocks, &p32->bs_blocks) || + put_user(buffer->bs_xflags, &p32->bs_xflags) || + put_user(buffer->bs_extsize, &p32->bs_extsize) || + put_user(buffer->bs_extents, &p32->bs_extents) || + put_user(buffer->bs_gen, &p32->bs_gen) || + put_user(buffer->bs_projid, &p32->bs_projid) || + put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) || + put_user(buffer->bs_dmstate, &p32->bs_dmstate) || put_user(buffer->bs_aextents, &p32->bs_aextents)) - return -EFAULT; - return sizeof(*p32); + return XFS_ERROR(EFAULT); + if (ubused) + *ubused = sizeof(*p32); + return 0; } - - -typedef struct compat_xfs_fsop_bulkreq { - compat_uptr_t lastip; /* last inode # pointer */ - __s32 icount; /* count of entries in buffer */ - compat_uptr_t ubuffer; /* user buffer for inode desc. */ - compat_uptr_t ocount; /* output count pointer */ -} compat_xfs_fsop_bulkreq_t; - -#define XFS_IOC_FSBULKSTAT_32 \ - _IOWR('X', 101, struct compat_xfs_fsop_bulkreq) -#define XFS_IOC_FSBULKSTAT_SINGLE_32 \ - _IOWR('X', 102, struct compat_xfs_fsop_bulkreq) -#define XFS_IOC_FSINUMBERS_32 \ - _IOWR('X', 103, struct compat_xfs_fsop_bulkreq) +STATIC int +xfs_bulkstat_one_compat( + xfs_mount_t *mp, /* mount point for filesystem */ + xfs_ino_t ino, /* inode number to get data for */ + void __user *buffer, /* buffer to place output in */ + int ubsize, /* size of buffer */ + void *private_data, /* my private data */ + xfs_daddr_t bno, /* starting bno of inode cluster */ + int *ubused, /* bytes used by me */ + void *dibuff, /* on-disk inode buffer */ + int *stat) /* BULKSTAT_RV_... */ +{ + return xfs_bulkstat_one_int(mp, ino, buffer, ubsize, + xfs_bulkstat_one_fmt_compat, bno, + ubused, dibuff, stat); +} /* copied from xfs_ioctl.c */ STATIC int -xfs_ioc_bulkstat_compat( - xfs_mount_t *mp, - unsigned int cmd, - void __user *arg) +xfs_compat_ioc_bulkstat( + xfs_mount_t *mp, + unsigned int cmd, + compat_xfs_fsop_bulkreq_t __user *p32) { - compat_xfs_fsop_bulkreq_t __user *p32 = (void __user *)arg; u32 addr; xfs_fsop_bulkreq_t bulkreq; int count; /* # of records returned */ @@ -270,20 +263,20 @@ xfs_ioc_bulkstat_compat( /* should be called again (unused here, but used in dmapi) */ if (!capable(CAP_SYS_ADMIN)) - return -EPERM; + return -XFS_ERROR(EPERM); if (XFS_FORCED_SHUTDOWN(mp)) return -XFS_ERROR(EIO); if (get_user(addr, &p32->lastip)) - return -EFAULT; + return -XFS_ERROR(EFAULT); bulkreq.lastip = compat_ptr(addr); if (get_user(bulkreq.icount, &p32->icount) || get_user(addr, &p32->ubuffer)) - return -EFAULT; + return -XFS_ERROR(EFAULT); bulkreq.ubuffer = compat_ptr(addr); if (get_user(addr, &p32->ocount)) - return -EFAULT; + return -XFS_ERROR(EFAULT); bulkreq.ocount = compat_ptr(addr); if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64))) @@ -295,17 +288,22 @@ xfs_ioc_bulkstat_compat( if (bulkreq.ubuffer == NULL) return -XFS_ERROR(EINVAL); - if (cmd == XFS_IOC_FSINUMBERS) + if (cmd == XFS_IOC_FSINUMBERS_32) { error = xfs_inumbers(mp, &inlast, &count, bulkreq.ubuffer, xfs_inumbers_fmt_compat); - else { - /* declare a var to get a warning in case the type changes */ - bulkstat_one_fmt_pf formatter = xfs_bulkstat_one_fmt_compat; + } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) { + int res; + + error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer, + sizeof(compat_xfs_bstat_t), + NULL, 0, NULL, NULL, &res); + } else if (cmd == XFS_IOC_FSBULKSTAT_32) { error = xfs_bulkstat(mp, &inlast, &count, - xfs_bulkstat_one, formatter, + xfs_bulkstat_one_compat, NULL, sizeof(compat_xfs_bstat_t), bulkreq.ubuffer, BULKSTAT_FG_QUICK, &done); - } + } else + error = XFS_ERROR(EINVAL); if (error) return -error; @@ -321,63 +319,306 @@ xfs_ioc_bulkstat_compat( return 0; } +STATIC int +xfs_compat_handlereq_copyin( + xfs_fsop_handlereq_t *hreq, + compat_xfs_fsop_handlereq_t __user *arg32) +{ + compat_xfs_fsop_handlereq_t hreq32; + + if (copy_from_user(&hreq32, arg32, sizeof(compat_xfs_fsop_handlereq_t))) + return -XFS_ERROR(EFAULT); + + hreq->fd = hreq32.fd; + hreq->path = compat_ptr(hreq32.path); + hreq->oflags = hreq32.oflags; + hreq->ihandle = compat_ptr(hreq32.ihandle); + hreq->ihandlen = hreq32.ihandlen; + hreq->ohandle = compat_ptr(hreq32.ohandle); + hreq->ohandlen = compat_ptr(hreq32.ohandlen); + return 0; +} -typedef struct compat_xfs_fsop_handlereq { - __u32 fd; /* fd for FD_TO_HANDLE */ - compat_uptr_t path; /* user pathname */ - __u32 oflags; /* open flags */ - compat_uptr_t ihandle; /* user supplied handle */ - __u32 ihandlen; /* user supplied length */ - compat_uptr_t ohandle; /* user buffer for handle */ - compat_uptr_t ohandlen; /* user buffer length */ -} compat_xfs_fsop_handlereq_t; - -#define XFS_IOC_PATH_TO_FSHANDLE_32 \ - _IOWR('X', 104, struct compat_xfs_fsop_handlereq) -#define XFS_IOC_PATH_TO_HANDLE_32 \ - _IOWR('X', 105, struct compat_xfs_fsop_handlereq) -#define XFS_IOC_FD_TO_HANDLE_32 \ - _IOWR('X', 106, struct compat_xfs_fsop_handlereq) -#define XFS_IOC_OPEN_BY_HANDLE_32 \ - _IOWR('X', 107, struct compat_xfs_fsop_handlereq) -#define XFS_IOC_READLINK_BY_HANDLE_32 \ - _IOWR('X', 108, struct compat_xfs_fsop_handlereq) - -STATIC unsigned long xfs_ioctl32_fshandle(unsigned long arg) +/* + * Convert userspace handle data into inode. + * + * We use the fact that all the fsop_handlereq ioctl calls have a data + * structure argument whose first component is always a xfs_fsop_handlereq_t, + * so we can pass that sub structure into this handy, shared routine. + * + * If no error, caller must always iput the returned inode. + */ +STATIC int +xfs_vget_fsop_handlereq_compat( + xfs_mount_t *mp, + struct inode *parinode, /* parent inode pointer */ + compat_xfs_fsop_handlereq_t *hreq, + struct inode **inode) { - compat_xfs_fsop_handlereq_t __user *p32 = (void __user *)arg; - xfs_fsop_handlereq_t __user *p = compat_alloc_user_space(sizeof(*p)); - u32 addr; - - if (copy_in_user(&p->fd, &p32->fd, sizeof(__u32)) || - get_user(addr, &p32->path) || - put_user(compat_ptr(addr), &p->path) || - copy_in_user(&p->oflags, &p32->oflags, sizeof(__u32)) || - get_user(addr, &p32->ihandle) || - put_user(compat_ptr(addr), &p->ihandle) || - copy_in_user(&p->ihandlen, &p32->ihandlen, sizeof(__u32)) || - get_user(addr, &p32->ohandle) || - put_user(compat_ptr(addr), &p->ohandle) || - get_user(addr, &p32->ohandlen) || - put_user(compat_ptr(addr), &p->ohandlen)) - return -EFAULT; - - return (unsigned long)p; + void __user *hanp; + size_t hlen; + xfs_fid_t *xfid; + xfs_handle_t *handlep; + xfs_handle_t handle; + xfs_inode_t *ip; + xfs_ino_t ino; + __u32 igen; + int error; + + /* + * Only allow handle opens under a directory. + */ + if (!S_ISDIR(parinode->i_mode)) + return XFS_ERROR(ENOTDIR); + + hanp = compat_ptr(hreq->ihandle); + hlen = hreq->ihandlen; + handlep = &handle; + + if (hlen < sizeof(handlep->ha_fsid) || hlen > sizeof(*handlep)) + return XFS_ERROR(EINVAL); + if (copy_from_user(handlep, hanp, hlen)) + return XFS_ERROR(EFAULT); + if (hlen < sizeof(*handlep)) + memset(((char *)handlep) + hlen, 0, sizeof(*handlep) - hlen); + if (hlen > sizeof(handlep->ha_fsid)) { + if (handlep->ha_fid.fid_len != + (hlen - sizeof(handlep->ha_fsid) - + sizeof(handlep->ha_fid.fid_len)) || + handlep->ha_fid.fid_pad) + return XFS_ERROR(EINVAL); + } + + /* + * Crack the handle, obtain the inode # & generation # + */ + xfid = (struct xfs_fid *)&handlep->ha_fid; + if (xfid->fid_len == sizeof(*xfid) - sizeof(xfid->fid_len)) { + ino = xfid->fid_ino; + igen = xfid->fid_gen; + } else { + return XFS_ERROR(EINVAL); + } + + /* + * Get the XFS inode, building a Linux inode to go with it. + */ + error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, 0); + if (error) + return error; + if (ip == NULL) + return XFS_ERROR(EIO); + if (ip->i_d.di_gen != igen) { + xfs_iput_new(ip, XFS_ILOCK_SHARED); + return XFS_ERROR(ENOENT); + } + + xfs_iunlock(ip, XFS_ILOCK_SHARED); + + *inode = VFS_I(ip); + return 0; } +STATIC int +xfs_compat_attrlist_by_handle( + xfs_mount_t *mp, + void __user *arg, + struct inode *parinode) +{ + int error; + attrlist_cursor_kern_t *cursor; + compat_xfs_fsop_attrlist_handlereq_t al_hreq; + struct inode *inode; + char *kbuf; + + if (!capable(CAP_SYS_ADMIN)) + return -XFS_ERROR(EPERM); + if (copy_from_user(&al_hreq, arg, + sizeof(compat_xfs_fsop_attrlist_handlereq_t))) + return -XFS_ERROR(EFAULT); + if (al_hreq.buflen > XATTR_LIST_MAX) + return -XFS_ERROR(EINVAL); + + /* + * Reject flags, only allow namespaces. + */ + if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE)) + return -XFS_ERROR(EINVAL); + + error = xfs_vget_fsop_handlereq_compat(mp, parinode, &al_hreq.hreq, + &inode); + if (error) + goto out; + + kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL); + if (!kbuf) + goto out_vn_rele; + + cursor = (attrlist_cursor_kern_t *)&al_hreq.pos; + error = xfs_attr_list(XFS_I(inode), kbuf, al_hreq.buflen, + al_hreq.flags, cursor); + if (error) + goto out_kfree; + + if (copy_to_user(compat_ptr(al_hreq.buffer), kbuf, al_hreq.buflen)) + error = -EFAULT; + + out_kfree: + kfree(kbuf); + out_vn_rele: + iput(inode); + out: + return -error; +} -STATIC long -xfs_compat_ioctl( - int mode, - struct file *file, - unsigned cmd, - unsigned long arg) +STATIC int +xfs_compat_attrmulti_by_handle( + xfs_mount_t *mp, + void __user *arg, + struct inode *parinode) +{ + int error; + compat_xfs_attr_multiop_t *ops; + compat_xfs_fsop_attrmulti_handlereq_t am_hreq; + struct inode *inode; + unsigned int i, size; + char *attr_name; + + if (!capable(CAP_SYS_ADMIN)) + return -XFS_ERROR(EPERM); + if (copy_from_user(&am_hreq, arg, + sizeof(compat_xfs_fsop_attrmulti_handlereq_t))) + return -XFS_ERROR(EFAULT); + + error = xfs_vget_fsop_handlereq_compat(mp, parinode, &am_hreq.hreq, + &inode); + if (error) + goto out; + + error = E2BIG; + size = am_hreq.opcount * sizeof(compat_xfs_attr_multiop_t); + if (!size || size > 16 * PAGE_SIZE) + goto out_vn_rele; + + error = ENOMEM; + ops = kmalloc(size, GFP_KERNEL); + if (!ops) + goto out_vn_rele; + + error = EFAULT; + if (copy_from_user(ops, compat_ptr(am_hreq.ops), size)) + goto out_kfree_ops; + + attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL); + if (!attr_name) + goto out_kfree_ops; + + + error = 0; + for (i = 0; i < am_hreq.opcount; i++) { + ops[i].am_error = strncpy_from_user(attr_name, + compat_ptr(ops[i].am_attrname), + MAXNAMELEN); + if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) + error = -ERANGE; + if (ops[i].am_error < 0) + break; + + switch (ops[i].am_opcode) { + case ATTR_OP_GET: + ops[i].am_error = xfs_attrmulti_attr_get(inode, + attr_name, + compat_ptr(ops[i].am_attrvalue), + &ops[i].am_length, ops[i].am_flags); + break; + case ATTR_OP_SET: + ops[i].am_error = xfs_attrmulti_attr_set(inode, + attr_name, + compat_ptr(ops[i].am_attrvalue), + ops[i].am_length, ops[i].am_flags); + break; + case ATTR_OP_REMOVE: + ops[i].am_error = xfs_attrmulti_attr_remove(inode, + attr_name, ops[i].am_flags); + break; + default: + ops[i].am_error = EINVAL; + } + } + + if (copy_to_user(compat_ptr(am_hreq.ops), ops, size)) + error = XFS_ERROR(EFAULT); + + kfree(attr_name); + out_kfree_ops: + kfree(ops); + out_vn_rele: + iput(inode); + out: + return -error; +} + +STATIC int +xfs_compat_fssetdm_by_handle( + xfs_mount_t *mp, + void __user *arg, + struct inode *parinode) +{ + int error; + struct fsdmidata fsd; + compat_xfs_fsop_setdm_handlereq_t dmhreq; + struct inode *inode; + + if (!capable(CAP_MKNOD)) + return -XFS_ERROR(EPERM); + if (copy_from_user(&dmhreq, arg, + sizeof(compat_xfs_fsop_setdm_handlereq_t))) + return -XFS_ERROR(EFAULT); + + error = xfs_vget_fsop_handlereq_compat(mp, parinode, &dmhreq.hreq, + &inode); + if (error) + return -error; + + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) { + error = -XFS_ERROR(EPERM); + goto out; + } + + if (copy_from_user(&fsd, compat_ptr(dmhreq.data), sizeof(fsd))) { + error = -XFS_ERROR(EFAULT); + goto out; + } + + error = -xfs_set_dmattrs(XFS_I(inode), fsd.fsd_dmevmask, + fsd.fsd_dmstate); + +out: + iput(inode); + return error; +} + +long +xfs_file_compat_ioctl( + struct file *filp, + unsigned cmd, + unsigned long p) { - struct inode *inode = file->f_path.dentry->d_inode; - int error; + struct inode *inode = filp->f_path.dentry->d_inode; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + void __user *arg = (void __user *)p; + int ioflags = 0; + int error; + + if (filp->f_mode & FMODE_NOCMTIME) + ioflags |= IO_INVIS; + + xfs_itrace_entry(ip); switch (cmd) { + /* No size or alignment issues on any arch */ case XFS_IOC_DIOINFO: case XFS_IOC_FSGEOMETRY: case XFS_IOC_FSGETXATTR: @@ -387,48 +628,16 @@ xfs_compat_ioctl( case XFS_IOC_GETBMAP: case XFS_IOC_GETBMAPA: case XFS_IOC_GETBMAPX: -/* not handled - case XFS_IOC_FSSETDM_BY_HANDLE: - case XFS_IOC_ATTRLIST_BY_HANDLE: - case XFS_IOC_ATTRMULTI_BY_HANDLE: -*/ case XFS_IOC_FSCOUNTS: case XFS_IOC_SET_RESBLKS: case XFS_IOC_GET_RESBLKS: - case XFS_IOC_FSGROWFSDATA: case XFS_IOC_FSGROWFSLOG: - case XFS_IOC_FSGROWFSRT: - case XFS_IOC_FREEZE: - case XFS_IOC_THAW: case XFS_IOC_GOINGDOWN: case XFS_IOC_ERROR_INJECTION: case XFS_IOC_ERROR_CLEARALL: - break; - - case XFS_IOC32_GETXFLAGS: - case XFS_IOC32_SETXFLAGS: - case XFS_IOC32_GETVERSION: - cmd = _NATIVE_IOC(cmd, long); - break; -#ifdef BROKEN_X86_ALIGNMENT - /* xfs_flock_t has wrong u32 vs u64 alignment */ - case XFS_IOC_ALLOCSP_32: - case XFS_IOC_FREESP_32: - case XFS_IOC_ALLOCSP64_32: - case XFS_IOC_FREESP64_32: - case XFS_IOC_RESVSP_32: - case XFS_IOC_UNRESVSP_32: - case XFS_IOC_RESVSP64_32: - case XFS_IOC_UNRESVSP64_32: - arg = xfs_ioctl32_flock(arg); - cmd = _NATIVE_IOC(cmd, struct xfs_flock64); - break; - case XFS_IOC_FSGEOMETRY_V1_32: - arg = xfs_ioctl32_geom_v1(arg); - cmd = _NATIVE_IOC(cmd, struct xfs_fsop_geom_v1); - break; - -#else /* These are handled fine if no alignment issues */ + return xfs_file_ioctl(filp, cmd, p); +#ifndef BROKEN_X86_ALIGNMENT + /* These are handled fine if no alignment issues */ case XFS_IOC_ALLOCSP: case XFS_IOC_FREESP: case XFS_IOC_RESVSP: @@ -438,51 +647,97 @@ xfs_compat_ioctl( case XFS_IOC_RESVSP64: case XFS_IOC_UNRESVSP64: case XFS_IOC_FSGEOMETRY_V1: - break; + case XFS_IOC_FSGROWFSDATA: + case XFS_IOC_FSGROWFSRT: + return xfs_file_ioctl(filp, cmd, p); +#else + case XFS_IOC_ALLOCSP_32: + case XFS_IOC_FREESP_32: + case XFS_IOC_ALLOCSP64_32: + case XFS_IOC_FREESP64_32: + case XFS_IOC_RESVSP_32: + case XFS_IOC_UNRESVSP_32: + case XFS_IOC_RESVSP64_32: + case XFS_IOC_UNRESVSP64_32: { + struct xfs_flock64 bf; - /* xfs_bstat_t still has wrong u32 vs u64 alignment */ - case XFS_IOC_SWAPEXT: - break; + if (xfs_compat_flock64_copyin(&bf, arg)) + return -XFS_ERROR(EFAULT); + cmd = _NATIVE_IOC(cmd, struct xfs_flock64); + return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf); + } + case XFS_IOC_FSGEOMETRY_V1_32: + return xfs_compat_ioc_fsgeometry_v1(mp, arg); + case XFS_IOC_FSGROWFSDATA_32: { + struct xfs_growfs_data in; + + if (xfs_compat_growfs_data_copyin(&in, arg)) + return -XFS_ERROR(EFAULT); + error = xfs_growfs_data(mp, &in); + return -error; + } + case XFS_IOC_FSGROWFSRT_32: { + struct xfs_growfs_rt in; + if (xfs_compat_growfs_rt_copyin(&in, arg)) + return -XFS_ERROR(EFAULT); + error = xfs_growfs_rt(mp, &in); + return -error; + } #endif + /* long changes size, but xfs only copiese out 32 bits */ + case XFS_IOC_GETXFLAGS_32: + case XFS_IOC_SETXFLAGS_32: + case XFS_IOC_GETVERSION_32: + cmd = _NATIVE_IOC(cmd, long); + return xfs_file_ioctl(filp, cmd, p); + case XFS_IOC_SWAPEXT: { + struct xfs_swapext sxp; + struct compat_xfs_swapext __user *sxu = arg; + + /* Bulk copy in up to the sx_stat field, then copy bstat */ + if (copy_from_user(&sxp, sxu, + offsetof(struct xfs_swapext, sx_stat)) || + xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat)) + return -XFS_ERROR(EFAULT); + error = xfs_swapext(&sxp); + return -error; + } case XFS_IOC_FSBULKSTAT_32: case XFS_IOC_FSBULKSTAT_SINGLE_32: case XFS_IOC_FSINUMBERS_32: - cmd = _NATIVE_IOC(cmd, struct xfs_fsop_bulkreq); - return xfs_ioc_bulkstat_compat(XFS_I(inode)->i_mount, - cmd, (void __user*)arg); + return xfs_compat_ioc_bulkstat(mp, cmd, arg); case XFS_IOC_FD_TO_HANDLE_32: case XFS_IOC_PATH_TO_HANDLE_32: - case XFS_IOC_PATH_TO_FSHANDLE_32: - case XFS_IOC_OPEN_BY_HANDLE_32: - case XFS_IOC_READLINK_BY_HANDLE_32: - arg = xfs_ioctl32_fshandle(arg); + case XFS_IOC_PATH_TO_FSHANDLE_32: { + struct xfs_fsop_handlereq hreq; + + if (xfs_compat_handlereq_copyin(&hreq, arg)) + return -XFS_ERROR(EFAULT); cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq); - break; - default: - return -ENOIOCTLCMD; + return xfs_find_handle(cmd, &hreq); } + case XFS_IOC_OPEN_BY_HANDLE_32: { + struct xfs_fsop_handlereq hreq; - error = xfs_ioctl(XFS_I(inode), file, mode, cmd, (void __user *)arg); - xfs_iflags_set(XFS_I(inode), XFS_IMODIFIED); - - return error; -} - -long -xfs_file_compat_ioctl( - struct file *file, - unsigned cmd, - unsigned long arg) -{ - return xfs_compat_ioctl(0, file, cmd, arg); -} + if (xfs_compat_handlereq_copyin(&hreq, arg)) + return -XFS_ERROR(EFAULT); + return xfs_open_by_handle(mp, &hreq, filp, inode); + } + case XFS_IOC_READLINK_BY_HANDLE_32: { + struct xfs_fsop_handlereq hreq; -long -xfs_file_compat_invis_ioctl( - struct file *file, - unsigned cmd, - unsigned long arg) -{ - return xfs_compat_ioctl(IO_INVIS, file, cmd, arg); + if (xfs_compat_handlereq_copyin(&hreq, arg)) + return -XFS_ERROR(EFAULT); + return xfs_readlink_by_handle(mp, &hreq, inode); + } + case XFS_IOC_ATTRLIST_BY_HANDLE_32: + return xfs_compat_attrlist_by_handle(mp, arg, inode); + case XFS_IOC_ATTRMULTI_BY_HANDLE_32: + return xfs_compat_attrmulti_by_handle(mp, arg, inode); + case XFS_IOC_FSSETDM_BY_HANDLE_32: + return xfs_compat_fssetdm_by_handle(mp, arg, inode); + default: + return -XFS_ERROR(ENOIOCTLCMD); + } } diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h index 02de6e62ee3..1024c4f8ba0 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.h +++ b/fs/xfs/linux-2.6/xfs_ioctl32.h @@ -18,7 +18,217 @@ #ifndef __XFS_IOCTL32_H__ #define __XFS_IOCTL32_H__ -extern long xfs_file_compat_ioctl(struct file *, unsigned, unsigned long); -extern long xfs_file_compat_invis_ioctl(struct file *, unsigned, unsigned long); +#include + +/* + * on 32-bit arches, ioctl argument structures may have different sizes + * and/or alignment. We define compat structures which match the + * 32-bit sizes/alignments here, and their associated ioctl numbers. + * + * xfs_ioctl32.c contains routines to copy these structures in and out. + */ + +/* stock kernel-level ioctls we support */ +#define XFS_IOC_GETXFLAGS_32 FS_IOC32_GETFLAGS +#define XFS_IOC_SETXFLAGS_32 FS_IOC32_SETFLAGS +#define XFS_IOC_GETVERSION_32 FS_IOC32_GETVERSION + +/* + * On intel, even if sizes match, alignment and/or padding may differ. + */ +#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) +#define BROKEN_X86_ALIGNMENT +#define __compat_packed __attribute__((packed)) +#else +#define __compat_packed +#endif + +typedef struct compat_xfs_bstime { + compat_time_t tv_sec; /* seconds */ + __s32 tv_nsec; /* and nanoseconds */ +} compat_xfs_bstime_t; + +typedef struct compat_xfs_bstat { + __u64 bs_ino; /* inode number */ + __u16 bs_mode; /* type and mode */ + __u16 bs_nlink; /* number of links */ + __u32 bs_uid; /* user id */ + __u32 bs_gid; /* group id */ + __u32 bs_rdev; /* device value */ + __s32 bs_blksize; /* block size */ + __s64 bs_size; /* file size */ + compat_xfs_bstime_t bs_atime; /* access time */ + compat_xfs_bstime_t bs_mtime; /* modify time */ + compat_xfs_bstime_t bs_ctime; /* inode change time */ + int64_t bs_blocks; /* number of blocks */ + __u32 bs_xflags; /* extended flags */ + __s32 bs_extsize; /* extent size */ + __s32 bs_extents; /* number of extents */ + __u32 bs_gen; /* generation count */ + __u16 bs_projid; /* project id */ + unsigned char bs_pad[14]; /* pad space, unused */ + __u32 bs_dmevmask; /* DMIG event mask */ + __u16 bs_dmstate; /* DMIG state info */ + __u16 bs_aextents; /* attribute number of extents */ +} __compat_packed compat_xfs_bstat_t; + +typedef struct compat_xfs_fsop_bulkreq { + compat_uptr_t lastip; /* last inode # pointer */ + __s32 icount; /* count of entries in buffer */ + compat_uptr_t ubuffer; /* user buffer for inode desc. */ + compat_uptr_t ocount; /* output count pointer */ +} compat_xfs_fsop_bulkreq_t; + +#define XFS_IOC_FSBULKSTAT_32 \ + _IOWR('X', 101, struct compat_xfs_fsop_bulkreq) +#define XFS_IOC_FSBULKSTAT_SINGLE_32 \ + _IOWR('X', 102, struct compat_xfs_fsop_bulkreq) +#define XFS_IOC_FSINUMBERS_32 \ + _IOWR('X', 103, struct compat_xfs_fsop_bulkreq) + +typedef struct compat_xfs_fsop_handlereq { + __u32 fd; /* fd for FD_TO_HANDLE */ + compat_uptr_t path; /* user pathname */ + __u32 oflags; /* open flags */ + compat_uptr_t ihandle; /* user supplied handle */ + __u32 ihandlen; /* user supplied length */ + compat_uptr_t ohandle; /* user buffer for handle */ + compat_uptr_t ohandlen; /* user buffer length */ +} compat_xfs_fsop_handlereq_t; + +#define XFS_IOC_PATH_TO_FSHANDLE_32 \ + _IOWR('X', 104, struct compat_xfs_fsop_handlereq) +#define XFS_IOC_PATH_TO_HANDLE_32 \ + _IOWR('X', 105, struct compat_xfs_fsop_handlereq) +#define XFS_IOC_FD_TO_HANDLE_32 \ + _IOWR('X', 106, struct compat_xfs_fsop_handlereq) +#define XFS_IOC_OPEN_BY_HANDLE_32 \ + _IOWR('X', 107, struct compat_xfs_fsop_handlereq) +#define XFS_IOC_READLINK_BY_HANDLE_32 \ + _IOWR('X', 108, struct compat_xfs_fsop_handlereq) + +/* The bstat field in the swapext struct needs translation */ +typedef struct compat_xfs_swapext { + __int64_t sx_version; /* version */ + __int64_t sx_fdtarget; /* fd of target file */ + __int64_t sx_fdtmp; /* fd of tmp file */ + xfs_off_t sx_offset; /* offset into file */ + xfs_off_t sx_length; /* leng from offset */ + char sx_pad[16]; /* pad space, unused */ + compat_xfs_bstat_t sx_stat; /* stat of target b4 copy */ +} __compat_packed compat_xfs_swapext_t; + +#define XFS_IOC_SWAPEXT_32 _IOWR('X', 109, struct compat_xfs_swapext) + +typedef struct compat_xfs_fsop_attrlist_handlereq { + struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */ + struct xfs_attrlist_cursor pos; /* opaque cookie, list offset */ + __u32 flags; /* which namespace to use */ + __u32 buflen; /* length of buffer supplied */ + compat_uptr_t buffer; /* returned names */ +} __compat_packed compat_xfs_fsop_attrlist_handlereq_t; + +/* Note: actually this is read/write */ +#define XFS_IOC_ATTRLIST_BY_HANDLE_32 \ + _IOW('X', 122, struct compat_xfs_fsop_attrlist_handlereq) + +/* am_opcodes defined in xfs_fs.h */ +typedef struct compat_xfs_attr_multiop { + __u32 am_opcode; + __s32 am_error; + compat_uptr_t am_attrname; + compat_uptr_t am_attrvalue; + __u32 am_length; + __u32 am_flags; +} compat_xfs_attr_multiop_t; + +typedef struct compat_xfs_fsop_attrmulti_handlereq { + struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */ + __u32 opcount;/* count of following multiop */ + /* ptr to compat_xfs_attr_multiop */ + compat_uptr_t ops; /* attr_multi data */ +} compat_xfs_fsop_attrmulti_handlereq_t; + +#define XFS_IOC_ATTRMULTI_BY_HANDLE_32 \ + _IOW('X', 123, struct compat_xfs_fsop_attrmulti_handlereq) + +typedef struct compat_xfs_fsop_setdm_handlereq { + struct compat_xfs_fsop_handlereq hreq; /* handle information */ + /* ptr to struct fsdmidata */ + compat_uptr_t data; /* DMAPI data */ +} compat_xfs_fsop_setdm_handlereq_t; + +#define XFS_IOC_FSSETDM_BY_HANDLE_32 \ + _IOW('X', 121, struct compat_xfs_fsop_setdm_handlereq) + +#ifdef BROKEN_X86_ALIGNMENT +/* on ia32 l_start is on a 32-bit boundary */ +typedef struct compat_xfs_flock64 { + __s16 l_type; + __s16 l_whence; + __s64 l_start __attribute__((packed)); + /* len == 0 means until end of file */ + __s64 l_len __attribute__((packed)); + __s32 l_sysid; + __u32 l_pid; + __s32 l_pad[4]; /* reserve area */ +} compat_xfs_flock64_t; + +#define XFS_IOC_ALLOCSP_32 _IOW('X', 10, struct compat_xfs_flock64) +#define XFS_IOC_FREESP_32 _IOW('X', 11, struct compat_xfs_flock64) +#define XFS_IOC_ALLOCSP64_32 _IOW('X', 36, struct compat_xfs_flock64) +#define XFS_IOC_FREESP64_32 _IOW('X', 37, struct compat_xfs_flock64) +#define XFS_IOC_RESVSP_32 _IOW('X', 40, struct compat_xfs_flock64) +#define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64) +#define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64) +#define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64) + +typedef struct compat_xfs_fsop_geom_v1 { + __u32 blocksize; /* filesystem (data) block size */ + __u32 rtextsize; /* realtime extent size */ + __u32 agblocks; /* fsblocks in an AG */ + __u32 agcount; /* number of allocation groups */ + __u32 logblocks; /* fsblocks in the log */ + __u32 sectsize; /* (data) sector size, bytes */ + __u32 inodesize; /* inode size in bytes */ + __u32 imaxpct; /* max allowed inode space(%) */ + __u64 datablocks; /* fsblocks in data subvolume */ + __u64 rtblocks; /* fsblocks in realtime subvol */ + __u64 rtextents; /* rt extents in realtime subvol*/ + __u64 logstart; /* starting fsblock of the log */ + unsigned char uuid[16]; /* unique id of the filesystem */ + __u32 sunit; /* stripe unit, fsblocks */ + __u32 swidth; /* stripe width, fsblocks */ + __s32 version; /* structure version */ + __u32 flags; /* superblock version flags */ + __u32 logsectsize; /* log sector size, bytes */ + __u32 rtsectsize; /* realtime sector size, bytes */ + __u32 dirblocksize; /* directory block size, bytes */ +} __attribute__((packed)) compat_xfs_fsop_geom_v1_t; + +#define XFS_IOC_FSGEOMETRY_V1_32 \ + _IOR('X', 100, struct compat_xfs_fsop_geom_v1) + +typedef struct compat_xfs_inogrp { + __u64 xi_startino; /* starting inode number */ + __s32 xi_alloccount; /* # bits set in allocmask */ + __u64 xi_allocmask; /* mask of allocated inodes */ +} __attribute__((packed)) compat_xfs_inogrp_t; + +/* These growfs input structures have padding on the end, so must translate */ +typedef struct compat_xfs_growfs_data { + __u64 newblocks; /* new data subvol size, fsblocks */ + __u32 imaxpct; /* new inode space percentage limit */ +} __attribute__((packed)) compat_xfs_growfs_data_t; + +typedef struct compat_xfs_growfs_rt { + __u64 newblocks; /* new realtime size, fsblocks */ + __u32 extsize; /* new realtime extent size, fsblocks */ +} __attribute__((packed)) compat_xfs_growfs_rt_t; + +#define XFS_IOC_FSGROWFSDATA_32 _IOW('X', 110, struct compat_xfs_growfs_data) +#define XFS_IOC_FSGROWFSRT_32 _IOW('X', 112, struct compat_xfs_growfs_rt) + +#endif /* BROKEN_X86_ALIGNMENT */ #endif /* __XFS_IOCTL32_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 095d271f343..7aa53fefc67 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -53,6 +53,7 @@ #include #include #include +#include /* * Bring the atime in the XFS inode uptodate. @@ -64,14 +65,14 @@ xfs_synchronize_atime( { struct inode *inode = VFS_I(ip); - if (inode) { + if (!(inode->i_state & I_CLEAR)) { ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec; } } /* - * If the linux inode exists, mark it dirty. + * If the linux inode is valid, mark it dirty. * Used when commiting a dirty inode into a transaction so that * the inode will get written back by the linux code */ @@ -81,7 +82,7 @@ xfs_mark_inode_dirty_sync( { struct inode *inode = VFS_I(ip); - if (inode) + if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR))) mark_inode_dirty_sync(inode); } @@ -128,7 +129,7 @@ xfs_ichgtime( if (sync_it) { SYNCHRONIZE(); ip->i_update_core = 1; - mark_inode_dirty_sync(inode); + xfs_mark_inode_dirty_sync(ip); } } @@ -158,8 +159,6 @@ xfs_init_security( } error = xfs_attr_set(ip, name, value, length, ATTR_SECURE); - if (!error) - xfs_iflags_set(ip, XFS_IMODIFIED); kfree(name); kfree(value); @@ -260,7 +259,6 @@ xfs_vn_mknod( error = _ACL_INHERIT(inode, mode, default_acl); if (unlikely(error)) goto out_cleanup_inode; - xfs_iflags_set(ip, XFS_IMODIFIED); _ACL_FREE(default_acl); } @@ -366,21 +364,17 @@ xfs_vn_link( struct inode *dir, struct dentry *dentry) { - struct inode *inode; /* inode of guy being linked to */ + struct inode *inode = old_dentry->d_inode; struct xfs_name name; int error; - inode = old_dentry->d_inode; xfs_dentry_to_name(&name, dentry); - igrab(inode); error = xfs_link(XFS_I(dir), XFS_I(inode), &name); - if (unlikely(error)) { - iput(inode); + if (unlikely(error)) return -error; - } - xfs_iflags_set(XFS_I(dir), XFS_IMODIFIED); + atomic_inc(&inode->i_count); d_instantiate(dentry, inode); return 0; } @@ -601,7 +595,7 @@ xfs_vn_setattr( struct dentry *dentry, struct iattr *iattr) { - return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0, NULL); + return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0); } /* @@ -642,7 +636,7 @@ xfs_vn_fallocate( xfs_ilock(ip, XFS_IOLOCK_EXCL); error = xfs_change_file_space(ip, XFS_IOC_RESVSP, &bf, - 0, NULL, XFS_ATTR_NOLOCK); + 0, XFS_ATTR_NOLOCK); if (!error && !(mode & FALLOC_FL_KEEP_SIZE) && offset + len > i_size_read(inode)) new_size = offset + len; @@ -653,7 +647,7 @@ xfs_vn_fallocate( iattr.ia_valid = ATTR_SIZE; iattr.ia_size = new_size; - error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK, NULL); + error = xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK); } xfs_iunlock(ip, XFS_IOLOCK_EXCL); @@ -661,6 +655,88 @@ out_error: return error; } +#define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) + +/* + * Call fiemap helper to fill in user data. + * Returns positive errors to xfs_getbmap. + */ +STATIC int +xfs_fiemap_format( + void **arg, + struct getbmapx *bmv, + int *full) +{ + int error; + struct fiemap_extent_info *fieinfo = *arg; + u32 fiemap_flags = 0; + u64 logical, physical, length; + + /* Do nothing for a hole */ + if (bmv->bmv_block == -1LL) + return 0; + + logical = BBTOB(bmv->bmv_offset); + physical = BBTOB(bmv->bmv_block); + length = BBTOB(bmv->bmv_length); + + if (bmv->bmv_oflags & BMV_OF_PREALLOC) + fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN; + else if (bmv->bmv_oflags & BMV_OF_DELALLOC) { + fiemap_flags |= FIEMAP_EXTENT_DELALLOC; + physical = 0; /* no block yet */ + } + if (bmv->bmv_oflags & BMV_OF_LAST) + fiemap_flags |= FIEMAP_EXTENT_LAST; + + error = fiemap_fill_next_extent(fieinfo, logical, physical, + length, fiemap_flags); + if (error > 0) { + error = 0; + *full = 1; /* user array now full */ + } + + return -error; +} + +STATIC int +xfs_vn_fiemap( + struct inode *inode, + struct fiemap_extent_info *fieinfo, + u64 start, + u64 length) +{ + xfs_inode_t *ip = XFS_I(inode); + struct getbmapx bm; + int error; + + error = fiemap_check_flags(fieinfo, XFS_FIEMAP_FLAGS); + if (error) + return error; + + /* Set up bmap header for xfs internal routine */ + bm.bmv_offset = BTOBB(start); + /* Special case for whole file */ + if (length == FIEMAP_MAX_OFFSET) + bm.bmv_length = -1LL; + else + bm.bmv_length = BTOBB(length); + + /* our formatter will tell xfs_getbmap when to stop. */ + bm.bmv_count = MAXEXTNUM; + bm.bmv_iflags = BMV_IF_PREALLOC; + if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) + bm.bmv_iflags |= BMV_IF_ATTRFORK; + if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC)) + bm.bmv_iflags |= BMV_IF_DELALLOC; + + error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo); + if (error) + return -error; + + return 0; +} + static const struct inode_operations xfs_inode_operations = { .permission = xfs_vn_permission, .truncate = xfs_vn_truncate, @@ -671,6 +747,7 @@ static const struct inode_operations xfs_inode_operations = { .removexattr = generic_removexattr, .listxattr = xfs_vn_listxattr, .fallocate = xfs_vn_fallocate, + .fiemap = xfs_vn_fiemap, }; static const struct inode_operations xfs_dir_inode_operations = { @@ -766,12 +843,20 @@ xfs_diflags_to_iflags( * When reading existing inodes from disk this is called directly * from xfs_iget, when creating a new inode it is called from * xfs_ialloc after setting up the inode. + * + * We are always called with an uninitialised linux inode here. + * We need to initialise the necessary fields and take a reference + * on it. */ void xfs_setup_inode( struct xfs_inode *ip) { - struct inode *inode = ip->i_vnode; + struct inode *inode = &ip->i_vnode; + + inode->i_ino = ip->i_ino; + inode->i_state = I_NEW|I_LOCK; + inode_add_to_lists(ip->i_mount->m_super, inode); inode->i_mode = ip->i_d.di_mode; inode->i_nlink = ip->i_d.di_nlink; @@ -799,7 +884,6 @@ xfs_setup_inode( inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec; inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; xfs_diflags_to_iflags(inode, ip); - xfs_iflags_clear(ip, XFS_IMODIFIED); switch (inode->i_mode & S_IFMT) { case S_IFREG: diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h index 8b1a1e31dc2..ef41c92ce66 100644 --- a/fs/xfs/linux-2.6/xfs_iops.h +++ b/fs/xfs/linux-2.6/xfs_iops.h @@ -22,7 +22,6 @@ struct xfs_inode; extern const struct file_operations xfs_file_operations; extern const struct file_operations xfs_dir_file_operations; -extern const struct file_operations xfs_invis_file_operations; extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size); diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index cc0f7b3a979..507492d6dcc 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h @@ -21,18 +21,12 @@ #include /* - * Some types are conditional depending on the target system. * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits. - * XFS_BIG_INUMS needs the VFS inode number to be 64 bits, as well - * as requiring XFS_BIG_BLKNOS to be set. + * XFS_BIG_INUMS requires XFS_BIG_BLKNOS to be set. */ #if defined(CONFIG_LBD) || (BITS_PER_LONG == 64) # define XFS_BIG_BLKNOS 1 -# if BITS_PER_LONG == 64 -# define XFS_BIG_INUMS 1 -# else -# define XFS_BIG_INUMS 0 -# endif +# define XFS_BIG_INUMS 1 #else # define XFS_BIG_BLKNOS 0 # define XFS_BIG_INUMS 0 @@ -77,6 +71,7 @@ #include #include #include +#include #include #include @@ -85,7 +80,6 @@ #include #include -#include #include #include #include @@ -107,7 +101,6 @@ #undef HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */ #endif -#define restricted_chown xfs_params.restrict_chown.val #define irix_sgid_inherit xfs_params.sgid_inherit.val #define irix_symlink_mode xfs_params.symlink_mode.val #define xfs_panic_mask xfs_params.panic_mask.val diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 1957e5357d0..7e90daa0d1d 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -51,7 +51,6 @@ #include "xfs_vnodeops.h" #include -#include #include @@ -243,7 +242,7 @@ xfs_read( if (unlikely(ioflags & IO_ISDIRECT)) { if (inode->i_mapping->nrpages) - ret = xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK), + ret = -xfs_flushinval_pages(ip, (*offset & PAGE_CACHE_MASK), -1, FI_REMAPF_LOCKED); mutex_unlock(&inode->i_mutex); if (ret) { @@ -668,15 +667,8 @@ start: if (new_size > xip->i_size) xip->i_new_size = new_size; - /* - * We're not supposed to change timestamps in readonly-mounted - * filesystems. Throw it away if anyone asks us. - */ - if (likely(!(ioflags & IO_INVIS) && - !mnt_want_write(file->f_path.mnt))) { + if (likely(!(ioflags & IO_INVIS))) xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); - mnt_drop_write(file->f_path.mnt); - } /* * If the offset is beyond the size of the file, we have a couple @@ -715,7 +707,6 @@ start: } } -retry: /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; @@ -771,6 +762,17 @@ retry: if (ret == -EIOCBQUEUED && !(ioflags & IO_ISAIO)) ret = wait_on_sync_kiocb(iocb); + isize = i_size_read(inode); + if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize)) + *offset = isize; + + if (*offset > xip->i_size) { + xfs_ilock(xip, XFS_ILOCK_EXCL); + if (*offset > xip->i_size) + xip->i_size = *offset; + xfs_iunlock(xip, XFS_ILOCK_EXCL); + } + if (ret == -ENOSPC && DM_EVENT_ENABLED(xip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) { xfs_iunlock(xip, iolock); @@ -784,20 +786,7 @@ retry: xfs_ilock(xip, iolock); if (error) goto out_unlock_internal; - pos = xip->i_size; - ret = 0; - goto retry; - } - - isize = i_size_read(inode); - if (unlikely(ret < 0 && ret != -EFAULT && *offset > isize)) - *offset = isize; - - if (*offset > xip->i_size) { - xfs_ilock(xip, XFS_ILOCK_EXCL); - if (*offset > xip->i_size) - xip->i_size = *offset; - xfs_iunlock(xip, XFS_ILOCK_EXCL); + goto start; } error = -ret; @@ -855,13 +844,7 @@ retry: int xfs_bdstrat_cb(struct xfs_buf *bp) { - xfs_mount_t *mp; - - mp = XFS_BUF_FSPRIVATE3(bp, xfs_mount_t *); - if (!XFS_FORCED_SHUTDOWN(mp)) { - xfs_buf_iorequest(bp); - return 0; - } else { + if (XFS_FORCED_SHUTDOWN(bp->b_mount)) { xfs_buftrace("XFS__BDSTRAT IOERROR", bp); /* * Metadata write that didn't get logged but @@ -874,6 +857,9 @@ xfs_bdstrat_cb(struct xfs_buf *bp) else return (xfs_bioerror(bp)); } + + xfs_buf_iorequest(bp); + return 0; } /* diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c index 3d5b67c075c..c3526d445f6 100644 --- a/fs/xfs/linux-2.6/xfs_stats.c +++ b/fs/xfs/linux-2.6/xfs_stats.c @@ -53,11 +53,15 @@ xfs_read_xfsstats( { "icluster", XFSSTAT_END_INODE_CLUSTER }, { "vnodes", XFSSTAT_END_VNODE_OPS }, { "buf", XFSSTAT_END_BUF }, + { "abtb2", XFSSTAT_END_ABTB_V2 }, + { "abtc2", XFSSTAT_END_ABTC_V2 }, + { "bmbt2", XFSSTAT_END_BMBT_V2 }, + { "ibt2", XFSSTAT_END_IBT_V2 }, }; /* Loop over all stats groups */ for (i=j=len = 0; i < ARRAY_SIZE(xstats); i++) { - len += sprintf(buffer + len, xstats[i].desc); + len += sprintf(buffer + len, "%s", xstats[i].desc); /* inner loop does each group */ while (j < xstats[i].endpoint) { val = 0; diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h index e83820febc9..736854b1ca1 100644 --- a/fs/xfs/linux-2.6/xfs_stats.h +++ b/fs/xfs/linux-2.6/xfs_stats.h @@ -118,6 +118,71 @@ struct xfsstats { __uint32_t xb_page_retries; __uint32_t xb_page_found; __uint32_t xb_get_read; +/* Version 2 btree counters */ +#define XFSSTAT_END_ABTB_V2 (XFSSTAT_END_BUF+15) + __uint32_t xs_abtb_2_lookup; + __uint32_t xs_abtb_2_compare; + __uint32_t xs_abtb_2_insrec; + __uint32_t xs_abtb_2_delrec; + __uint32_t xs_abtb_2_newroot; + __uint32_t xs_abtb_2_killroot; + __uint32_t xs_abtb_2_increment; + __uint32_t xs_abtb_2_decrement; + __uint32_t xs_abtb_2_lshift; + __uint32_t xs_abtb_2_rshift; + __uint32_t xs_abtb_2_split; + __uint32_t xs_abtb_2_join; + __uint32_t xs_abtb_2_alloc; + __uint32_t xs_abtb_2_free; + __uint32_t xs_abtb_2_moves; +#define XFSSTAT_END_ABTC_V2 (XFSSTAT_END_ABTB_V2+15) + __uint32_t xs_abtc_2_lookup; + __uint32_t xs_abtc_2_compare; + __uint32_t xs_abtc_2_insrec; + __uint32_t xs_abtc_2_delrec; + __uint32_t xs_abtc_2_newroot; + __uint32_t xs_abtc_2_killroot; + __uint32_t xs_abtc_2_increment; + __uint32_t xs_abtc_2_decrement; + __uint32_t xs_abtc_2_lshift; + __uint32_t xs_abtc_2_rshift; + __uint32_t xs_abtc_2_split; + __uint32_t xs_abtc_2_join; + __uint32_t xs_abtc_2_alloc; + __uint32_t xs_abtc_2_free; + __uint32_t xs_abtc_2_moves; +#define XFSSTAT_END_BMBT_V2 (XFSSTAT_END_ABTC_V2+15) + __uint32_t xs_bmbt_2_lookup; + __uint32_t xs_bmbt_2_compare; + __uint32_t xs_bmbt_2_insrec; + __uint32_t xs_bmbt_2_delrec; + __uint32_t xs_bmbt_2_newroot; + __uint32_t xs_bmbt_2_killroot; + __uint32_t xs_bmbt_2_increment; + __uint32_t xs_bmbt_2_decrement; + __uint32_t xs_bmbt_2_lshift; + __uint32_t xs_bmbt_2_rshift; + __uint32_t xs_bmbt_2_split; + __uint32_t xs_bmbt_2_join; + __uint32_t xs_bmbt_2_alloc; + __uint32_t xs_bmbt_2_free; + __uint32_t xs_bmbt_2_moves; +#define XFSSTAT_END_IBT_V2 (XFSSTAT_END_BMBT_V2+15) + __uint32_t xs_ibt_2_lookup; + __uint32_t xs_ibt_2_compare; + __uint32_t xs_ibt_2_insrec; + __uint32_t xs_ibt_2_delrec; + __uint32_t xs_ibt_2_newroot; + __uint32_t xs_ibt_2_killroot; + __uint32_t xs_ibt_2_increment; + __uint32_t xs_ibt_2_decrement; + __uint32_t xs_ibt_2_lshift; + __uint32_t xs_ibt_2_rshift; + __uint32_t xs_ibt_2_split; + __uint32_t xs_ibt_2_join; + __uint32_t xs_ibt_2_alloc; + __uint32_t xs_ibt_2_free; + __uint32_t xs_ibt_2_moves; /* Extra precision counters */ __uint64_t xs_xstrat_bytes; __uint64_t xs_write_bytes; diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 37ebe36056e..95a97108036 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -18,7 +18,6 @@ #include "xfs.h" #include "xfs_bit.h" #include "xfs_log.h" -#include "xfs_clnt.h" #include "xfs_inum.h" #include "xfs_trans.h" #include "xfs_sb.h" @@ -36,6 +35,7 @@ #include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" +#include "xfs_btree_trace.h" #include "xfs_ialloc.h" #include "xfs_bmap.h" #include "xfs_rtalloc.h" @@ -48,7 +48,6 @@ #include "xfs_buf_item.h" #include "xfs_utils.h" #include "xfs_vnodeops.h" -#include "xfs_vfsops.h" #include "xfs_version.h" #include "xfs_log_priv.h" #include "xfs_trans_priv.h" @@ -58,6 +57,7 @@ #include "xfs_extfree_item.h" #include "xfs_mru_cache.h" #include "xfs_inode_item.h" +#include "xfs_sync.h" #include #include @@ -70,36 +70,9 @@ static struct quotactl_ops xfs_quotactl_operations; static struct super_operations xfs_super_operations; -static kmem_zone_t *xfs_vnode_zone; static kmem_zone_t *xfs_ioend_zone; mempool_t *xfs_ioend_pool; -STATIC struct xfs_mount_args * -xfs_args_allocate( - struct super_block *sb, - int silent) -{ - struct xfs_mount_args *args; - - args = kzalloc(sizeof(struct xfs_mount_args), GFP_KERNEL); - if (!args) - return NULL; - - args->logbufs = args->logbufsize = -1; - strncpy(args->fsname, sb->s_id, MAXNAMELEN); - - /* Copy the already-parsed mount(2) flags we're interested in */ - if (sb->s_flags & MS_DIRSYNC) - args->flags |= XFSMNT_DIRSYNC; - if (sb->s_flags & MS_SYNCHRONOUS) - args->flags |= XFSMNT_WSYNC; - if (silent) - args->flags |= XFSMNT_QUIET; - args->flags |= XFSMNT_32BITINODES; - - return args; -} - #define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */ #define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */ #define MNTOPT_LOGDEV "logdev" /* log device */ @@ -188,26 +161,54 @@ suffix_strtoul(char *s, char **endp, unsigned int base) return simple_strtoul((const char *)s, endp, base) << shift_left_factor; } +/* + * This function fills in xfs_mount_t fields based on mount args. + * Note: the superblock has _not_ yet been read in. + * + * Note that this function leaks the various device name allocations on + * failure. The caller takes care of them. + */ STATIC int xfs_parseargs( struct xfs_mount *mp, char *options, - struct xfs_mount_args *args, - int update) + char **mtpt) { + struct super_block *sb = mp->m_super; char *this_char, *value, *eov; - int dsunit, dswidth, vol_dsunit, vol_dswidth; - int iosize; + int dsunit = 0; + int dswidth = 0; + int iosize = 0; int dmapi_implies_ikeep = 1; + uchar_t iosizelog = 0; + + /* + * Copy binary VFS mount flags we are interested in. + */ + if (sb->s_flags & MS_RDONLY) + mp->m_flags |= XFS_MOUNT_RDONLY; + if (sb->s_flags & MS_DIRSYNC) + mp->m_flags |= XFS_MOUNT_DIRSYNC; + if (sb->s_flags & MS_SYNCHRONOUS) + mp->m_flags |= XFS_MOUNT_WSYNC; + + /* + * Set some default flags that could be cleared by the mount option + * parsing. + */ + mp->m_flags |= XFS_MOUNT_BARRIER; + mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; + mp->m_flags |= XFS_MOUNT_SMALL_INUMS; - args->flags |= XFSMNT_BARRIER; - args->flags2 |= XFSMNT2_COMPAT_IOSIZE; + /* + * These can be overridden by the mount option parsing. + */ + mp->m_logbufs = -1; + mp->m_logbsize = -1; if (!options) goto done; - iosize = dsunit = dswidth = vol_dsunit = vol_dswidth = 0; - while ((this_char = strsep(&options, ",")) != NULL) { if (!*this_char) continue; @@ -221,7 +222,7 @@ xfs_parseargs( this_char); return EINVAL; } - args->logbufs = simple_strtoul(value, &eov, 10); + mp->m_logbufs = simple_strtoul(value, &eov, 10); } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) { if (!value || !*value) { cmn_err(CE_WARN, @@ -229,7 +230,7 @@ xfs_parseargs( this_char); return EINVAL; } - args->logbufsize = suffix_strtoul(value, &eov, 10); + mp->m_logbsize = suffix_strtoul(value, &eov, 10); } else if (!strcmp(this_char, MNTOPT_LOGDEV)) { if (!value || !*value) { cmn_err(CE_WARN, @@ -237,7 +238,9 @@ xfs_parseargs( this_char); return EINVAL; } - strncpy(args->logname, value, MAXNAMELEN); + mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL); + if (!mp->m_logname) + return ENOMEM; } else if (!strcmp(this_char, MNTOPT_MTPT)) { if (!value || !*value) { cmn_err(CE_WARN, @@ -245,7 +248,9 @@ xfs_parseargs( this_char); return EINVAL; } - strncpy(args->mtpt, value, MAXNAMELEN); + *mtpt = kstrndup(value, MAXNAMELEN, GFP_KERNEL); + if (!*mtpt) + return ENOMEM; } else if (!strcmp(this_char, MNTOPT_RTDEV)) { if (!value || !*value) { cmn_err(CE_WARN, @@ -253,7 +258,9 @@ xfs_parseargs( this_char); return EINVAL; } - strncpy(args->rtname, value, MAXNAMELEN); + mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL); + if (!mp->m_rtname) + return ENOMEM; } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) { if (!value || !*value) { cmn_err(CE_WARN, @@ -262,8 +269,7 @@ xfs_parseargs( return EINVAL; } iosize = simple_strtoul(value, &eov, 10); - args->flags |= XFSMNT_IOSIZE; - args->iosizelog = (uint8_t) iosize; + iosizelog = ffs(iosize) - 1; } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) { if (!value || !*value) { cmn_err(CE_WARN, @@ -272,8 +278,7 @@ xfs_parseargs( return EINVAL; } iosize = suffix_strtoul(value, &eov, 10); - args->flags |= XFSMNT_IOSIZE; - args->iosizelog = ffs(iosize) - 1; + iosizelog = ffs(iosize) - 1; } else if (!strcmp(this_char, MNTOPT_GRPID) || !strcmp(this_char, MNTOPT_BSDGROUPS)) { mp->m_flags |= XFS_MOUNT_GRPID; @@ -281,23 +286,25 @@ xfs_parseargs( !strcmp(this_char, MNTOPT_SYSVGROUPS)) { mp->m_flags &= ~XFS_MOUNT_GRPID; } else if (!strcmp(this_char, MNTOPT_WSYNC)) { - args->flags |= XFSMNT_WSYNC; + mp->m_flags |= XFS_MOUNT_WSYNC; } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) { - args->flags |= XFSMNT_OSYNCISOSYNC; + mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC; } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) { - args->flags |= XFSMNT_NORECOVERY; + mp->m_flags |= XFS_MOUNT_NORECOVERY; } else if (!strcmp(this_char, MNTOPT_INO64)) { - args->flags |= XFSMNT_INO64; -#if !XFS_BIG_INUMS +#if XFS_BIG_INUMS + mp->m_flags |= XFS_MOUNT_INO64; + mp->m_inoadd = XFS_INO64_OFFSET; +#else cmn_err(CE_WARN, "XFS: %s option not allowed on this system", this_char); return EINVAL; #endif } else if (!strcmp(this_char, MNTOPT_NOALIGN)) { - args->flags |= XFSMNT_NOALIGN; + mp->m_flags |= XFS_MOUNT_NOALIGN; } else if (!strcmp(this_char, MNTOPT_SWALLOC)) { - args->flags |= XFSMNT_SWALLOC; + mp->m_flags |= XFS_MOUNT_SWALLOC; } else if (!strcmp(this_char, MNTOPT_SUNIT)) { if (!value || !*value) { cmn_err(CE_WARN, @@ -315,7 +322,7 @@ xfs_parseargs( } dswidth = simple_strtoul(value, &eov, 10); } else if (!strcmp(this_char, MNTOPT_64BITINODE)) { - args->flags &= ~XFSMNT_32BITINODES; + mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS; #if !XFS_BIG_INUMS cmn_err(CE_WARN, "XFS: %s option not allowed on this system", @@ -323,56 +330,61 @@ xfs_parseargs( return EINVAL; #endif } else if (!strcmp(this_char, MNTOPT_NOUUID)) { - args->flags |= XFSMNT_NOUUID; + mp->m_flags |= XFS_MOUNT_NOUUID; } else if (!strcmp(this_char, MNTOPT_BARRIER)) { - args->flags |= XFSMNT_BARRIER; + mp->m_flags |= XFS_MOUNT_BARRIER; } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) { - args->flags &= ~XFSMNT_BARRIER; + mp->m_flags &= ~XFS_MOUNT_BARRIER; } else if (!strcmp(this_char, MNTOPT_IKEEP)) { - args->flags |= XFSMNT_IKEEP; + mp->m_flags |= XFS_MOUNT_IKEEP; } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) { dmapi_implies_ikeep = 0; - args->flags &= ~XFSMNT_IKEEP; + mp->m_flags &= ~XFS_MOUNT_IKEEP; } else if (!strcmp(this_char, MNTOPT_LARGEIO)) { - args->flags2 &= ~XFSMNT2_COMPAT_IOSIZE; + mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE; } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) { - args->flags2 |= XFSMNT2_COMPAT_IOSIZE; + mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; } else if (!strcmp(this_char, MNTOPT_ATTR2)) { - args->flags |= XFSMNT_ATTR2; + mp->m_flags |= XFS_MOUNT_ATTR2; } else if (!strcmp(this_char, MNTOPT_NOATTR2)) { - args->flags &= ~XFSMNT_ATTR2; - args->flags |= XFSMNT_NOATTR2; + mp->m_flags &= ~XFS_MOUNT_ATTR2; + mp->m_flags |= XFS_MOUNT_NOATTR2; } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) { - args->flags2 |= XFSMNT2_FILESTREAMS; + mp->m_flags |= XFS_MOUNT_FILESTREAMS; } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) { - args->flags &= ~(XFSMNT_UQUOTAENF|XFSMNT_UQUOTA); - args->flags &= ~(XFSMNT_GQUOTAENF|XFSMNT_GQUOTA); + mp->m_qflags &= ~(XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE | + XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | + XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | + XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD); } else if (!strcmp(this_char, MNTOPT_QUOTA) || !strcmp(this_char, MNTOPT_UQUOTA) || !strcmp(this_char, MNTOPT_USRQUOTA)) { - args->flags |= XFSMNT_UQUOTA | XFSMNT_UQUOTAENF; + mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE | + XFS_UQUOTA_ENFD); } else if (!strcmp(this_char, MNTOPT_QUOTANOENF) || !strcmp(this_char, MNTOPT_UQUOTANOENF)) { - args->flags |= XFSMNT_UQUOTA; - args->flags &= ~XFSMNT_UQUOTAENF; + mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE); + mp->m_qflags &= ~XFS_UQUOTA_ENFD; } else if (!strcmp(this_char, MNTOPT_PQUOTA) || !strcmp(this_char, MNTOPT_PRJQUOTA)) { - args->flags |= XFSMNT_PQUOTA | XFSMNT_PQUOTAENF; + mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE | + XFS_OQUOTA_ENFD); } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) { - args->flags |= XFSMNT_PQUOTA; - args->flags &= ~XFSMNT_PQUOTAENF; + mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE); + mp->m_qflags &= ~XFS_OQUOTA_ENFD; } else if (!strcmp(this_char, MNTOPT_GQUOTA) || !strcmp(this_char, MNTOPT_GRPQUOTA)) { - args->flags |= XFSMNT_GQUOTA | XFSMNT_GQUOTAENF; + mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE | + XFS_OQUOTA_ENFD); } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) { - args->flags |= XFSMNT_GQUOTA; - args->flags &= ~XFSMNT_GQUOTAENF; + mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); + mp->m_qflags &= ~XFS_OQUOTA_ENFD; } else if (!strcmp(this_char, MNTOPT_DMAPI)) { - args->flags |= XFSMNT_DMAPI; + mp->m_flags |= XFS_MOUNT_DMAPI; } else if (!strcmp(this_char, MNTOPT_XDSM)) { - args->flags |= XFSMNT_DMAPI; + mp->m_flags |= XFS_MOUNT_DMAPI; } else if (!strcmp(this_char, MNTOPT_DMI)) { - args->flags |= XFSMNT_DMAPI; + mp->m_flags |= XFS_MOUNT_DMAPI; } else if (!strcmp(this_char, "ihashsize")) { cmn_err(CE_WARN, "XFS: ihashsize no longer used, option is deprecated."); @@ -390,27 +402,29 @@ xfs_parseargs( } } - if (args->flags & XFSMNT_NORECOVERY) { - if ((mp->m_flags & XFS_MOUNT_RDONLY) == 0) { - cmn_err(CE_WARN, - "XFS: no-recovery mounts must be read-only."); - return EINVAL; - } + /* + * no recovery flag requires a read-only mount + */ + if ((mp->m_flags & XFS_MOUNT_NORECOVERY) && + !(mp->m_flags & XFS_MOUNT_RDONLY)) { + cmn_err(CE_WARN, "XFS: no-recovery mounts must be read-only."); + return EINVAL; } - if ((args->flags & XFSMNT_NOALIGN) && (dsunit || dswidth)) { + if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) { cmn_err(CE_WARN, "XFS: sunit and swidth options incompatible with the noalign option"); return EINVAL; } - if ((args->flags & XFSMNT_GQUOTA) && (args->flags & XFSMNT_PQUOTA)) { + if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) && + (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) { cmn_err(CE_WARN, "XFS: cannot mount with both project and group quota"); return EINVAL; } - if ((args->flags & XFSMNT_DMAPI) && *args->mtpt == '\0') { + if ((mp->m_flags & XFS_MOUNT_DMAPI) && (!*mtpt || *mtpt[0] == '\0')) { printk("XFS: %s option needs the mount point option as well\n", MNTOPT_DMAPI); return EINVAL; @@ -438,27 +452,66 @@ xfs_parseargs( * Note that if "ikeep" or "noikeep" mount options are * supplied, then they are honored. */ - if ((args->flags & XFSMNT_DMAPI) && dmapi_implies_ikeep) - args->flags |= XFSMNT_IKEEP; + if ((mp->m_flags & XFS_MOUNT_DMAPI) && dmapi_implies_ikeep) + mp->m_flags |= XFS_MOUNT_IKEEP; - if ((args->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) { +done: + if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) { + /* + * At this point the superblock has not been read + * in, therefore we do not know the block size. + * Before the mount call ends we will convert + * these to FSBs. + */ if (dsunit) { - args->sunit = dsunit; - args->flags |= XFSMNT_RETERR; - } else { - args->sunit = vol_dsunit; + mp->m_dalign = dsunit; + mp->m_flags |= XFS_MOUNT_RETERR; } - dswidth ? (args->swidth = dswidth) : - (args->swidth = vol_dswidth); - } else { - args->sunit = args->swidth = 0; + + if (dswidth) + mp->m_swidth = dswidth; + } + + if (mp->m_logbufs != -1 && + mp->m_logbufs != 0 && + (mp->m_logbufs < XLOG_MIN_ICLOGS || + mp->m_logbufs > XLOG_MAX_ICLOGS)) { + cmn_err(CE_WARN, + "XFS: invalid logbufs value: %d [not %d-%d]", + mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); + return XFS_ERROR(EINVAL); + } + if (mp->m_logbsize != -1 && + mp->m_logbsize != 0 && + (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE || + mp->m_logbsize > XLOG_MAX_RECORD_BSIZE || + !is_power_of_2(mp->m_logbsize))) { + cmn_err(CE_WARN, + "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", + mp->m_logbsize); + return XFS_ERROR(EINVAL); + } + + mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL); + if (!mp->m_fsname) + return ENOMEM; + mp->m_fsname_len = strlen(mp->m_fsname) + 1; + + if (iosizelog) { + if (iosizelog > XFS_MAX_IO_LOG || + iosizelog < XFS_MIN_IO_LOG) { + cmn_err(CE_WARN, + "XFS: invalid log iosize: %d [not %d-%d]", + iosizelog, XFS_MIN_IO_LOG, + XFS_MAX_IO_LOG); + return XFS_ERROR(EINVAL); + } + + mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE; + mp->m_readio_log = iosizelog; + mp->m_writeio_log = iosizelog; } -done: - if (args->flags & XFSMNT_32BITINODES) - mp->m_flags |= XFS_MOUNT_SMALL_INUMS; - if (args->flags2) - args->flags |= XFSMNT_FLAGS2; return 0; } @@ -704,8 +757,7 @@ xfs_close_devices( */ STATIC int xfs_open_devices( - struct xfs_mount *mp, - struct xfs_mount_args *args) + struct xfs_mount *mp) { struct block_device *ddev = mp->m_super->s_bdev; struct block_device *logdev = NULL, *rtdev = NULL; @@ -714,14 +766,14 @@ xfs_open_devices( /* * Open real time and log devices - order is important. */ - if (args->logname[0]) { - error = xfs_blkdev_get(mp, args->logname, &logdev); + if (mp->m_logname) { + error = xfs_blkdev_get(mp, mp->m_logname, &logdev); if (error) goto out; } - if (args->rtname[0]) { - error = xfs_blkdev_get(mp, args->rtname, &rtdev); + if (mp->m_rtname) { + error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev); if (error) goto out_close_logdev; @@ -813,18 +865,18 @@ xfs_setup_devices( */ void xfsaild_wakeup( - xfs_mount_t *mp, + struct xfs_ail *ailp, xfs_lsn_t threshold_lsn) { - mp->m_ail.xa_target = threshold_lsn; - wake_up_process(mp->m_ail.xa_task); + ailp->xa_target = threshold_lsn; + wake_up_process(ailp->xa_task); } int xfsaild( void *data) { - xfs_mount_t *mp = (xfs_mount_t *)data; + struct xfs_ail *ailp = data; xfs_lsn_t last_pushed_lsn = 0; long tout = 0; @@ -836,11 +888,11 @@ xfsaild( /* swsusp */ try_to_freeze(); - ASSERT(mp->m_log); - if (XFS_FORCED_SHUTDOWN(mp)) + ASSERT(ailp->xa_mount->m_log); + if (XFS_FORCED_SHUTDOWN(ailp->xa_mount)) continue; - tout = xfsaild_push(mp, &last_pushed_lsn); + tout = xfsaild_push(ailp, &last_pushed_lsn); } return 0; @@ -848,43 +900,82 @@ xfsaild( int xfsaild_start( - xfs_mount_t *mp) + struct xfs_ail *ailp) { - mp->m_ail.xa_target = 0; - mp->m_ail.xa_task = kthread_run(xfsaild, mp, "xfsaild"); - if (IS_ERR(mp->m_ail.xa_task)) - return -PTR_ERR(mp->m_ail.xa_task); + ailp->xa_target = 0; + ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild"); + if (IS_ERR(ailp->xa_task)) + return -PTR_ERR(ailp->xa_task); return 0; } void xfsaild_stop( - xfs_mount_t *mp) + struct xfs_ail *ailp) { - kthread_stop(mp->m_ail.xa_task); + kthread_stop(ailp->xa_task); } - +/* Catch misguided souls that try to use this interface on XFS */ STATIC struct inode * xfs_fs_alloc_inode( struct super_block *sb) { - return kmem_zone_alloc(xfs_vnode_zone, KM_SLEEP); + BUG(); + return NULL; } +/* + * Now that the generic code is guaranteed not to be accessing + * the linux inode, we can reclaim the inode. + */ STATIC void xfs_fs_destroy_inode( - struct inode *inode) + struct inode *inode) { - kmem_zone_free(xfs_vnode_zone, inode); + xfs_inode_t *ip = XFS_I(inode); + + XFS_STATS_INC(vn_reclaim); + if (xfs_reclaim(ip)) + panic("%s: cannot reclaim 0x%p\n", __func__, inode); } +/* + * Slab object creation initialisation for the XFS inode. + * This covers only the idempotent fields in the XFS inode; + * all other fields need to be initialised on allocation + * from the slab. This avoids the need to repeatedly intialise + * fields in the xfs inode that left in the initialise state + * when freeing the inode. + */ STATIC void xfs_fs_inode_init_once( - void *vnode) + void *inode) { - inode_init_once((struct inode *)vnode); + struct xfs_inode *ip = inode; + + memset(ip, 0, sizeof(struct xfs_inode)); + + /* vfs inode */ + inode_init_once(VFS_I(ip)); + + /* xfs inode */ + atomic_set(&ip->i_iocount, 0); + atomic_set(&ip->i_pincount, 0); + spin_lock_init(&ip->i_flags_lock); + init_waitqueue_head(&ip->i_ipin_wait); + /* + * Because we want to use a counting completion, complete + * the flush completion once to allow a single access to + * the flush completion without blocking. + */ + init_completion(&ip->i_flush); + complete(&ip->i_flush); + + mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER, + "xfsino", ip->i_ino); + mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino); } /* @@ -898,21 +989,26 @@ xfs_fs_write_inode( struct inode *inode, int sync) { + struct xfs_inode *ip = XFS_I(inode); int error = 0; int flags = 0; - xfs_itrace_entry(XFS_I(inode)); + xfs_itrace_entry(ip); if (sync) { - filemap_fdatawait(inode->i_mapping); + error = xfs_wait_on_pages(ip, 0, -1); + if (error) + goto out_error; flags |= FLUSH_SYNC; } - error = xfs_inode_flush(XFS_I(inode), flags); + error = xfs_inode_flush(ip, flags); + +out_error: /* * if we failed to write out the inode then mark * it dirty again so we'll try again later. */ if (error) - mark_inode_dirty_sync(inode); + xfs_mark_inode_dirty_sync(ip); return -error; } @@ -923,164 +1019,12 @@ xfs_fs_clear_inode( { xfs_inode_t *ip = XFS_I(inode); - /* - * ip can be null when xfs_iget_core calls xfs_idestroy if we - * find an inode with di_mode == 0 but without IGET_CREATE set. - */ - if (ip) { - xfs_itrace_entry(ip); - XFS_STATS_INC(vn_rele); - XFS_STATS_INC(vn_remove); - XFS_STATS_INC(vn_reclaim); - XFS_STATS_DEC(vn_active); - - xfs_inactive(ip); - xfs_iflags_clear(ip, XFS_IMODIFIED); - if (xfs_reclaim(ip)) - panic("%s: cannot reclaim 0x%p\n", __func__, inode); - } - - ASSERT(XFS_I(inode) == NULL); -} - -/* - * Enqueue a work item to be picked up by the vfs xfssyncd thread. - * Doing this has two advantages: - * - It saves on stack space, which is tight in certain situations - * - It can be used (with care) as a mechanism to avoid deadlocks. - * Flushing while allocating in a full filesystem requires both. - */ -STATIC void -xfs_syncd_queue_work( - struct xfs_mount *mp, - void *data, - void (*syncer)(struct xfs_mount *, void *)) -{ - struct bhv_vfs_sync_work *work; - - work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP); - INIT_LIST_HEAD(&work->w_list); - work->w_syncer = syncer; - work->w_data = data; - work->w_mount = mp; - spin_lock(&mp->m_sync_lock); - list_add_tail(&work->w_list, &mp->m_sync_list); - spin_unlock(&mp->m_sync_lock); - wake_up_process(mp->m_sync_task); -} - -/* - * Flush delayed allocate data, attempting to free up reserved space - * from existing allocations. At this point a new allocation attempt - * has failed with ENOSPC and we are in the process of scratching our - * heads, looking about for more room... - */ -STATIC void -xfs_flush_inode_work( - struct xfs_mount *mp, - void *arg) -{ - struct inode *inode = arg; - filemap_flush(inode->i_mapping); - iput(inode); -} - -void -xfs_flush_inode( - xfs_inode_t *ip) -{ - struct inode *inode = VFS_I(ip); - - igrab(inode); - xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work); - delay(msecs_to_jiffies(500)); -} - -/* - * This is the "bigger hammer" version of xfs_flush_inode_work... - * (IOW, "If at first you don't succeed, use a Bigger Hammer"). - */ -STATIC void -xfs_flush_device_work( - struct xfs_mount *mp, - void *arg) -{ - struct inode *inode = arg; - sync_blockdev(mp->m_super->s_bdev); - iput(inode); -} - -void -xfs_flush_device( - xfs_inode_t *ip) -{ - struct inode *inode = VFS_I(ip); + xfs_itrace_entry(ip); + XFS_STATS_INC(vn_rele); + XFS_STATS_INC(vn_remove); + XFS_STATS_DEC(vn_active); - igrab(inode); - xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work); - delay(msecs_to_jiffies(500)); - xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC); -} - -STATIC void -xfs_sync_worker( - struct xfs_mount *mp, - void *unused) -{ - int error; - - if (!(mp->m_flags & XFS_MOUNT_RDONLY)) - error = xfs_sync(mp, SYNC_FSDATA | SYNC_BDFLUSH | SYNC_ATTR); - mp->m_sync_seq++; - wake_up(&mp->m_wait_single_sync_task); -} - -STATIC int -xfssyncd( - void *arg) -{ - struct xfs_mount *mp = arg; - long timeleft; - bhv_vfs_sync_work_t *work, *n; - LIST_HEAD (tmp); - - set_freezable(); - timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); - for (;;) { - timeleft = schedule_timeout_interruptible(timeleft); - /* swsusp */ - try_to_freeze(); - if (kthread_should_stop() && list_empty(&mp->m_sync_list)) - break; - - spin_lock(&mp->m_sync_lock); - /* - * We can get woken by laptop mode, to do a sync - - * that's the (only!) case where the list would be - * empty with time remaining. - */ - if (!timeleft || list_empty(&mp->m_sync_list)) { - if (!timeleft) - timeleft = xfs_syncd_centisecs * - msecs_to_jiffies(10); - INIT_LIST_HEAD(&mp->m_sync_work.w_list); - list_add_tail(&mp->m_sync_work.w_list, - &mp->m_sync_list); - } - list_for_each_entry_safe(work, n, &mp->m_sync_list, w_list) - list_move(&work->w_list, &tmp); - spin_unlock(&mp->m_sync_lock); - - list_for_each_entry_safe(work, n, &tmp, w_list) { - (*work->w_syncer)(mp, work->w_data); - list_del(&work->w_list); - if (work == &mp->m_sync_work) - continue; - kmem_free(work); - } - } - - return 0; + xfs_inactive(ip); } STATIC void @@ -1099,11 +1043,9 @@ xfs_fs_put_super( struct xfs_mount *mp = XFS_M(sb); struct xfs_inode *rip = mp->m_rootip; int unmount_event_flags = 0; - int error; - - kthread_stop(mp->m_sync_task); - xfs_sync(mp, SYNC_ATTR | SYNC_DELWRI); + xfs_syncd_stop(mp); + xfs_sync_inodes(mp, SYNC_ATTR|SYNC_DELWRI); #ifdef HAVE_DMAPI if (mp->m_flags & XFS_MOUNT_DMAPI) { @@ -1128,18 +1070,6 @@ xfs_fs_put_super( xfs_filestream_unmount(mp); XFS_bflush(mp->m_ddev_targp); - error = xfs_unmount_flush(mp, 0); - WARN_ON(error); - - /* - * If we're forcing a shutdown, typically because of a media error, - * we want to make sure we invalidate dirty pages that belong to - * referenced vnodes as well. - */ - if (XFS_FORCED_SHUTDOWN(mp)) { - error = xfs_sync(mp, SYNC_WAIT | SYNC_CLOSE); - ASSERT(error != EFSCORRUPTED); - } if (mp->m_flags & XFS_MOUNT_DMAPI) { XFS_SEND_UNMOUNT(mp, rip, DM_RIGHT_NULL, 0, 0, @@ -1161,7 +1091,7 @@ xfs_fs_write_super( struct super_block *sb) { if (!(sb->s_flags & MS_RDONLY)) - xfs_sync(XFS_M(sb), SYNC_FSDATA); + xfs_sync_fsdata(XFS_M(sb), 0); sb->s_dirt = 0; } @@ -1172,7 +1102,6 @@ xfs_fs_sync_super( { struct xfs_mount *mp = XFS_M(sb); int error; - int flags; /* * Treat a sync operation like a freeze. This is to work @@ -1186,20 +1115,10 @@ xfs_fs_sync_super( * dirty the Linux inode until after the transaction I/O * completes. */ - if (wait || unlikely(sb->s_frozen == SB_FREEZE_WRITE)) { - /* - * First stage of freeze - no more writers will make progress - * now we are here, so we flush delwri and delalloc buffers - * here, then wait for all I/O to complete. Data is frozen at - * that point. Metadata is not frozen, transactions can still - * occur here so don't bother flushing the buftarg (i.e - * SYNC_QUIESCE) because it'll just get dirty again. - */ - flags = SYNC_DATA_QUIESCE; - } else - flags = SYNC_FSDATA; - - error = xfs_sync(mp, flags); + if (wait || unlikely(sb->s_frozen == SB_FREEZE_WRITE)) + error = xfs_quiesce_data(mp); + else + error = xfs_sync_fsdata(mp, 0); sb->s_dirt = 0; if (unlikely(laptop_mode)) { @@ -1337,9 +1256,8 @@ xfs_fs_remount( /* rw -> ro */ if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) { - xfs_filestream_flush(mp); - xfs_sync(mp, SYNC_DATA_QUIESCE); - xfs_attr_quiesce(mp); + xfs_quiesce_data(mp); + xfs_quiesce_attr(mp); mp->m_flags |= XFS_MOUNT_RDONLY; } @@ -1348,17 +1266,17 @@ xfs_fs_remount( /* * Second stage of a freeze. The data is already frozen so we only - * need to take care of themetadata. Once that's done write a dummy + * need to take care of the metadata. Once that's done write a dummy * record to dirty the log in case of a crash while frozen. */ -STATIC void -xfs_fs_lockfs( +STATIC int +xfs_fs_freeze( struct super_block *sb) { struct xfs_mount *mp = XFS_M(sb); - xfs_attr_quiesce(mp); - xfs_fs_log_dummy(mp); + xfs_quiesce_attr(mp); + return -xfs_fs_log_dummy(mp); } STATIC int @@ -1420,177 +1338,30 @@ xfs_fs_setxquota( Q_XSETPQLIM), id, (caddr_t)fdq); } -/* - * This function fills in xfs_mount_t fields based on mount args. - * Note: the superblock has _not_ yet been read in. - */ -STATIC int -xfs_start_flags( - struct xfs_mount_args *ap, - struct xfs_mount *mp) -{ - int error; - - /* Values are in BBs */ - if ((ap->flags & XFSMNT_NOALIGN) != XFSMNT_NOALIGN) { - /* - * At this point the superblock has not been read - * in, therefore we do not know the block size. - * Before the mount call ends we will convert - * these to FSBs. - */ - mp->m_dalign = ap->sunit; - mp->m_swidth = ap->swidth; - } - - if (ap->logbufs != -1 && - ap->logbufs != 0 && - (ap->logbufs < XLOG_MIN_ICLOGS || - ap->logbufs > XLOG_MAX_ICLOGS)) { - cmn_err(CE_WARN, - "XFS: invalid logbufs value: %d [not %d-%d]", - ap->logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS); - return XFS_ERROR(EINVAL); - } - mp->m_logbufs = ap->logbufs; - if (ap->logbufsize != -1 && - ap->logbufsize != 0 && - (ap->logbufsize < XLOG_MIN_RECORD_BSIZE || - ap->logbufsize > XLOG_MAX_RECORD_BSIZE || - !is_power_of_2(ap->logbufsize))) { - cmn_err(CE_WARN, - "XFS: invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]", - ap->logbufsize); - return XFS_ERROR(EINVAL); - } - - error = ENOMEM; - - mp->m_logbsize = ap->logbufsize; - mp->m_fsname_len = strlen(ap->fsname) + 1; - - mp->m_fsname = kstrdup(ap->fsname, GFP_KERNEL); - if (!mp->m_fsname) - goto out; - - if (ap->rtname[0]) { - mp->m_rtname = kstrdup(ap->rtname, GFP_KERNEL); - if (!mp->m_rtname) - goto out_free_fsname; - - } - - if (ap->logname[0]) { - mp->m_logname = kstrdup(ap->logname, GFP_KERNEL); - if (!mp->m_logname) - goto out_free_rtname; - } - - if (ap->flags & XFSMNT_WSYNC) - mp->m_flags |= XFS_MOUNT_WSYNC; -#if XFS_BIG_INUMS - if (ap->flags & XFSMNT_INO64) { - mp->m_flags |= XFS_MOUNT_INO64; - mp->m_inoadd = XFS_INO64_OFFSET; - } -#endif - if (ap->flags & XFSMNT_RETERR) - mp->m_flags |= XFS_MOUNT_RETERR; - if (ap->flags & XFSMNT_NOALIGN) - mp->m_flags |= XFS_MOUNT_NOALIGN; - if (ap->flags & XFSMNT_SWALLOC) - mp->m_flags |= XFS_MOUNT_SWALLOC; - if (ap->flags & XFSMNT_OSYNCISOSYNC) - mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC; - if (ap->flags & XFSMNT_32BITINODES) - mp->m_flags |= XFS_MOUNT_32BITINODES; - - if (ap->flags & XFSMNT_IOSIZE) { - if (ap->iosizelog > XFS_MAX_IO_LOG || - ap->iosizelog < XFS_MIN_IO_LOG) { - cmn_err(CE_WARN, - "XFS: invalid log iosize: %d [not %d-%d]", - ap->iosizelog, XFS_MIN_IO_LOG, - XFS_MAX_IO_LOG); - return XFS_ERROR(EINVAL); - } - - mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE; - mp->m_readio_log = mp->m_writeio_log = ap->iosizelog; - } - - if (ap->flags & XFSMNT_IKEEP) - mp->m_flags |= XFS_MOUNT_IKEEP; - if (ap->flags & XFSMNT_DIRSYNC) - mp->m_flags |= XFS_MOUNT_DIRSYNC; - if (ap->flags & XFSMNT_ATTR2) - mp->m_flags |= XFS_MOUNT_ATTR2; - if (ap->flags & XFSMNT_NOATTR2) - mp->m_flags |= XFS_MOUNT_NOATTR2; - - if (ap->flags2 & XFSMNT2_COMPAT_IOSIZE) - mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE; - - /* - * no recovery flag requires a read-only mount - */ - if (ap->flags & XFSMNT_NORECOVERY) { - if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { - cmn_err(CE_WARN, - "XFS: tried to mount a FS read-write without recovery!"); - return XFS_ERROR(EINVAL); - } - mp->m_flags |= XFS_MOUNT_NORECOVERY; - } - - if (ap->flags & XFSMNT_NOUUID) - mp->m_flags |= XFS_MOUNT_NOUUID; - if (ap->flags & XFSMNT_BARRIER) - mp->m_flags |= XFS_MOUNT_BARRIER; - else - mp->m_flags &= ~XFS_MOUNT_BARRIER; - - if (ap->flags2 & XFSMNT2_FILESTREAMS) - mp->m_flags |= XFS_MOUNT_FILESTREAMS; - - if (ap->flags & XFSMNT_DMAPI) - mp->m_flags |= XFS_MOUNT_DMAPI; - return 0; - - - out_free_rtname: - kfree(mp->m_rtname); - out_free_fsname: - kfree(mp->m_fsname); - out: - return error; -} - /* * This function fills in xfs_mount_t fields based on mount args. * Note: the superblock _has_ now been read in. */ STATIC int xfs_finish_flags( - struct xfs_mount_args *ap, struct xfs_mount *mp) { int ronly = (mp->m_flags & XFS_MOUNT_RDONLY); - /* Fail a mount where the logbuf is smaller then the log stripe */ + /* Fail a mount where the logbuf is smaller than the log stripe */ if (xfs_sb_version_haslogv2(&mp->m_sb)) { - if ((ap->logbufsize <= 0) && - (mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE)) { + if (mp->m_logbsize <= 0 && + mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) { mp->m_logbsize = mp->m_sb.sb_logsunit; - } else if (ap->logbufsize > 0 && - ap->logbufsize < mp->m_sb.sb_logsunit) { + } else if (mp->m_logbsize > 0 && + mp->m_logbsize < mp->m_sb.sb_logsunit) { cmn_err(CE_WARN, "XFS: logbuf size must be greater than or equal to log stripe size"); return XFS_ERROR(EINVAL); } } else { /* Fail a mount if the logbuf is larger than 32K */ - if (ap->logbufsize > XLOG_BIG_RECORD_BSIZE) { + if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) { cmn_err(CE_WARN, "XFS: logbuf size for version 1 logs must be 16K or 32K"); return XFS_ERROR(EINVAL); @@ -1602,7 +1373,7 @@ xfs_finish_flags( * told by noattr2 to turn it off */ if (xfs_sb_version_hasattr2(&mp->m_sb) && - !(ap->flags & XFSMNT_NOATTR2)) + !(mp->m_flags & XFS_MOUNT_NOATTR2)) mp->m_flags |= XFS_MOUNT_ATTR2; /* @@ -1614,48 +1385,6 @@ xfs_finish_flags( return XFS_ERROR(EROFS); } - /* - * check for shared mount. - */ - if (ap->flags & XFSMNT_SHARED) { - if (!xfs_sb_version_hasshared(&mp->m_sb)) - return XFS_ERROR(EINVAL); - - /* - * For IRIX 6.5, shared mounts must have the shared - * version bit set, have the persistent readonly - * field set, must be version 0 and can only be mounted - * read-only. - */ - if (!ronly || !(mp->m_sb.sb_flags & XFS_SBF_READONLY) || - (mp->m_sb.sb_shared_vn != 0)) - return XFS_ERROR(EINVAL); - - mp->m_flags |= XFS_MOUNT_SHARED; - - /* - * Shared XFS V0 can't deal with DMI. Return EINVAL. - */ - if (mp->m_sb.sb_shared_vn == 0 && (ap->flags & XFSMNT_DMAPI)) - return XFS_ERROR(EINVAL); - } - - if (ap->flags & XFSMNT_UQUOTA) { - mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE); - if (ap->flags & XFSMNT_UQUOTAENF) - mp->m_qflags |= XFS_UQUOTA_ENFD; - } - - if (ap->flags & XFSMNT_GQUOTA) { - mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE); - if (ap->flags & XFSMNT_GQUOTAENF) - mp->m_qflags |= XFS_OQUOTA_ENFD; - } else if (ap->flags & XFSMNT_PQUOTA) { - mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE); - if (ap->flags & XFSMNT_PQUOTAENF) - mp->m_qflags |= XFS_OQUOTA_ENFD; - } - return 0; } @@ -1667,19 +1396,14 @@ xfs_fs_fill_super( { struct inode *root; struct xfs_mount *mp = NULL; - struct xfs_mount_args *args; int flags = 0, error = ENOMEM; - - args = xfs_args_allocate(sb, silent); - if (!args) - return -ENOMEM; + char *mtpt = NULL; mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL); if (!mp) - goto out_free_args; + goto out; spin_lock_init(&mp->m_sb_lock); - mutex_init(&mp->m_ilock); mutex_init(&mp->m_growlock); atomic_set(&mp->m_active_trans, 0); INIT_LIST_HEAD(&mp->m_sync_list); @@ -1689,12 +1413,9 @@ xfs_fs_fill_super( mp->m_super = sb; sb->s_fs_info = mp; - if (sb->s_flags & MS_RDONLY) - mp->m_flags |= XFS_MOUNT_RDONLY; - - error = xfs_parseargs(mp, (char *)data, args, 0); + error = xfs_parseargs(mp, (char *)data, &mtpt); if (error) - goto out_free_mp; + goto out_free_fsname; sb_min_blocksize(sb, BBSIZE); sb->s_xattr = xfs_xattr_handlers; @@ -1702,33 +1423,28 @@ xfs_fs_fill_super( sb->s_qcop = &xfs_quotactl_operations; sb->s_op = &xfs_super_operations; - error = xfs_dmops_get(mp, args); + error = xfs_dmops_get(mp); if (error) - goto out_free_mp; - error = xfs_qmops_get(mp, args); + goto out_free_fsname; + error = xfs_qmops_get(mp); if (error) goto out_put_dmops; - if (args->flags & XFSMNT_QUIET) + if (silent) flags |= XFS_MFSI_QUIET; - error = xfs_open_devices(mp, args); + error = xfs_open_devices(mp); if (error) goto out_put_qmops; if (xfs_icsb_init_counters(mp)) mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB; - /* - * Setup flags based on mount(2) options and then the superblock - */ - error = xfs_start_flags(args, mp); - if (error) - goto out_free_fsname; error = xfs_readsb(mp, flags); if (error) - goto out_free_fsname; - error = xfs_finish_flags(args, mp); + goto out_destroy_counters; + + error = xfs_finish_flags(mp); if (error) goto out_free_sb; @@ -1747,7 +1463,7 @@ xfs_fs_fill_super( if (error) goto out_filestream_unmount; - XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, args->mtpt, args->fsname); + XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, mtpt, mp->m_fsname); sb->s_dirt = 1; sb->s_magic = XFS_SB_MAGIC; @@ -1772,35 +1488,31 @@ xfs_fs_fill_super( goto fail_vnrele; } - mp->m_sync_work.w_syncer = xfs_sync_worker; - mp->m_sync_work.w_mount = mp; - mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd"); - if (IS_ERR(mp->m_sync_task)) { - error = -PTR_ERR(mp->m_sync_task); + error = xfs_syncd_init(mp); + if (error) goto fail_vnrele; - } - xfs_itrace_exit(XFS_I(sb->s_root->d_inode)); + kfree(mtpt); - kfree(args); + xfs_itrace_exit(XFS_I(sb->s_root->d_inode)); return 0; out_filestream_unmount: xfs_filestream_unmount(mp); out_free_sb: xfs_freesb(mp); - out_free_fsname: - xfs_free_fsname(mp); + out_destroy_counters: xfs_icsb_destroy_counters(mp); xfs_close_devices(mp); out_put_qmops: xfs_qmops_put(mp); out_put_dmops: xfs_dmops_put(mp); - out_free_mp: + out_free_fsname: + xfs_free_fsname(mp); + kfree(mtpt); kfree(mp); - out_free_args: - kfree(args); + out: return -error; fail_vnrele: @@ -1820,8 +1532,6 @@ xfs_fs_fill_super( xfs_filestream_unmount(mp); XFS_bflush(mp->m_ddev_targp); - error = xfs_unmount_flush(mp, 0); - WARN_ON(error); xfs_unmountfs(mp); goto out_free_sb; @@ -1847,7 +1557,7 @@ static struct super_operations xfs_super_operations = { .put_super = xfs_fs_put_super, .write_super = xfs_fs_write_super, .sync_fs = xfs_fs_sync_super, - .write_super_lockfs = xfs_fs_lockfs, + .freeze_fs = xfs_fs_freeze, .statfs = xfs_fs_statfs, .remount_fs = xfs_fs_remount, .show_options = xfs_fs_show_options, @@ -1882,10 +1592,19 @@ xfs_alloc_trace_bufs(void) if (!xfs_bmap_trace_buf) goto out_free_alloc_trace; #endif -#ifdef XFS_BMBT_TRACE +#ifdef XFS_BTREE_TRACE + xfs_allocbt_trace_buf = ktrace_alloc(XFS_ALLOCBT_TRACE_SIZE, + KM_MAYFAIL); + if (!xfs_allocbt_trace_buf) + goto out_free_bmap_trace; + + xfs_inobt_trace_buf = ktrace_alloc(XFS_INOBT_TRACE_SIZE, KM_MAYFAIL); + if (!xfs_inobt_trace_buf) + goto out_free_allocbt_trace; + xfs_bmbt_trace_buf = ktrace_alloc(XFS_BMBT_TRACE_SIZE, KM_MAYFAIL); if (!xfs_bmbt_trace_buf) - goto out_free_bmap_trace; + goto out_free_inobt_trace; #endif #ifdef XFS_ATTR_TRACE xfs_attr_trace_buf = ktrace_alloc(XFS_ATTR_TRACE_SIZE, KM_MAYFAIL); @@ -1907,8 +1626,12 @@ xfs_alloc_trace_bufs(void) ktrace_free(xfs_attr_trace_buf); out_free_bmbt_trace: #endif -#ifdef XFS_BMBT_TRACE +#ifdef XFS_BTREE_TRACE ktrace_free(xfs_bmbt_trace_buf); + out_free_inobt_trace: + ktrace_free(xfs_inobt_trace_buf); + out_free_allocbt_trace: + ktrace_free(xfs_allocbt_trace_buf); out_free_bmap_trace: #endif #ifdef XFS_BMAP_TRACE @@ -1931,8 +1654,10 @@ xfs_free_trace_bufs(void) #ifdef XFS_ATTR_TRACE ktrace_free(xfs_attr_trace_buf); #endif -#ifdef XFS_BMBT_TRACE +#ifdef XFS_BTREE_TRACE ktrace_free(xfs_bmbt_trace_buf); + ktrace_free(xfs_inobt_trace_buf); + ktrace_free(xfs_allocbt_trace_buf); #endif #ifdef XFS_BMAP_TRACE ktrace_free(xfs_bmap_trace_buf); @@ -1945,16 +1670,10 @@ xfs_free_trace_bufs(void) STATIC int __init xfs_init_zones(void) { - xfs_vnode_zone = kmem_zone_init_flags(sizeof(struct inode), "xfs_vnode", - KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | - KM_ZONE_SPREAD, - xfs_fs_inode_init_once); - if (!xfs_vnode_zone) - goto out; xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend"); if (!xfs_ioend_zone) - goto out_destroy_vnode_zone; + goto out; xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE, xfs_ioend_zone); @@ -1970,6 +1689,7 @@ xfs_init_zones(void) "xfs_bmap_free_item"); if (!xfs_bmap_free_item_zone) goto out_destroy_log_ticket_zone; + xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t), "xfs_btree_cur"); if (!xfs_btree_cur_zone) @@ -2017,8 +1737,8 @@ xfs_init_zones(void) xfs_inode_zone = kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode", - KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | - KM_ZONE_SPREAD, NULL); + KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD, + xfs_fs_inode_init_once); if (!xfs_inode_zone) goto out_destroy_efi_zone; @@ -2066,8 +1786,6 @@ xfs_init_zones(void) mempool_destroy(xfs_ioend_pool); out_destroy_ioend_zone: kmem_zone_destroy(xfs_ioend_zone); - out_destroy_vnode_zone: - kmem_zone_destroy(xfs_vnode_zone); out: return -ENOMEM; } @@ -2092,7 +1810,6 @@ xfs_destroy_zones(void) kmem_zone_destroy(xfs_log_ticket_zone); mempool_destroy(xfs_ioend_pool); kmem_zone_destroy(xfs_ioend_zone); - kmem_zone_destroy(xfs_vnode_zone); } @@ -2100,13 +1817,12 @@ STATIC int __init init_xfs_fs(void) { int error; - static char message[] __initdata = KERN_INFO \ - XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled\n"; - printk(message); + printk(KERN_INFO XFS_VERSION_STRING " with " + XFS_BUILD_OPTIONS " enabled\n"); ktrace_init(64); - vn_init(); + xfs_ioend_init(); xfs_dir_startup(); error = xfs_init_zones(); diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h index fe2ef4e6a0f..d5d776d4cd6 100644 --- a/fs/xfs/linux-2.6/xfs_super.h +++ b/fs/xfs/linux-2.6/xfs_super.h @@ -20,24 +20,12 @@ #include -#ifdef CONFIG_XFS_DMAPI -# define vfs_insertdmapi(vfs) vfs_insertops(vfsp, &xfs_dmops) -# define vfs_initdmapi() dmapi_init() -# define vfs_exitdmapi() dmapi_uninit() -#else -# define vfs_insertdmapi(vfs) do { } while (0) -# define vfs_initdmapi() do { } while (0) -# define vfs_exitdmapi() do { } while (0) -#endif - #ifdef CONFIG_XFS_QUOTA -# define vfs_insertquota(vfs) vfs_insertops(vfsp, &xfs_qmops) extern void xfs_qm_init(void); extern void xfs_qm_exit(void); # define vfs_initquota() xfs_qm_init() # define vfs_exitquota() xfs_qm_exit() #else -# define vfs_insertquota(vfs) do { } while (0) # define vfs_initquota() do { } while (0) # define vfs_exitquota() do { } while (0) #endif @@ -101,9 +89,6 @@ struct block_device; extern __uint64_t xfs_max_file_offset(unsigned int); -extern void xfs_flush_inode(struct xfs_inode *); -extern void xfs_flush_device(struct xfs_inode *); - extern void xfs_blkdev_issue_flush(struct xfs_buftarg *); extern const struct export_operations xfs_export_operations; diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c new file mode 100644 index 00000000000..2ed035354c2 --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -0,0 +1,762 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_types.h" +#include "xfs_bit.h" +#include "xfs_log.h" +#include "xfs_inum.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_dir2.h" +#include "xfs_dmapi.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h" +#include "xfs_btree.h" +#include "xfs_dir2_sf.h" +#include "xfs_attr_sf.h" +#include "xfs_inode.h" +#include "xfs_dinode.h" +#include "xfs_error.h" +#include "xfs_mru_cache.h" +#include "xfs_filestream.h" +#include "xfs_vnodeops.h" +#include "xfs_utils.h" +#include "xfs_buf_item.h" +#include "xfs_inode_item.h" +#include "xfs_rw.h" + +#include +#include + +/* + * Sync all the inodes in the given AG according to the + * direction given by the flags. + */ +STATIC int +xfs_sync_inodes_ag( + xfs_mount_t *mp, + int ag, + int flags) +{ + xfs_perag_t *pag = &mp->m_perag[ag]; + int nr_found; + uint32_t first_index = 0; + int error = 0; + int last_error = 0; + int fflag = XFS_B_ASYNC; + + if (flags & SYNC_DELWRI) + fflag = XFS_B_DELWRI; + if (flags & SYNC_WAIT) + fflag = 0; /* synchronous overrides all */ + + do { + struct inode *inode; + xfs_inode_t *ip = NULL; + int lock_flags = XFS_ILOCK_SHARED; + + /* + * use a gang lookup to find the next inode in the tree + * as the tree is sparse and a gang lookup walks to find + * the number of objects requested. + */ + read_lock(&pag->pag_ici_lock); + nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, + (void**)&ip, first_index, 1); + + if (!nr_found) { + read_unlock(&pag->pag_ici_lock); + break; + } + + /* + * Update the index for the next lookup. Catch overflows + * into the next AG range which can occur if we have inodes + * in the last block of the AG and we are currently + * pointing to the last inode. + */ + first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); + if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) { + read_unlock(&pag->pag_ici_lock); + break; + } + + /* nothing to sync during shutdown */ + if (XFS_FORCED_SHUTDOWN(mp)) { + read_unlock(&pag->pag_ici_lock); + return 0; + } + + /* + * If we can't get a reference on the inode, it must be + * in reclaim. Leave it for the reclaim code to flush. + */ + inode = VFS_I(ip); + if (!igrab(inode)) { + read_unlock(&pag->pag_ici_lock); + continue; + } + read_unlock(&pag->pag_ici_lock); + + /* avoid new or bad inodes */ + if (is_bad_inode(inode) || + xfs_iflags_test(ip, XFS_INEW)) { + IRELE(ip); + continue; + } + + /* + * If we have to flush data or wait for I/O completion + * we need to hold the iolock. + */ + if ((flags & SYNC_DELWRI) && VN_DIRTY(inode)) { + xfs_ilock(ip, XFS_IOLOCK_SHARED); + lock_flags |= XFS_IOLOCK_SHARED; + error = xfs_flush_pages(ip, 0, -1, fflag, FI_NONE); + if (flags & SYNC_IOWAIT) + xfs_ioend_wait(ip); + } + xfs_ilock(ip, XFS_ILOCK_SHARED); + + if ((flags & SYNC_ATTR) && !xfs_inode_clean(ip)) { + if (flags & SYNC_WAIT) { + xfs_iflock(ip); + if (!xfs_inode_clean(ip)) + error = xfs_iflush(ip, XFS_IFLUSH_SYNC); + else + xfs_ifunlock(ip); + } else if (xfs_iflock_nowait(ip)) { + if (!xfs_inode_clean(ip)) + error = xfs_iflush(ip, XFS_IFLUSH_DELWRI); + else + xfs_ifunlock(ip); + } + } + xfs_iput(ip, lock_flags); + + if (error) + last_error = error; + /* + * bail out if the filesystem is corrupted. + */ + if (error == EFSCORRUPTED) + return XFS_ERROR(error); + + } while (nr_found); + + return last_error; +} + +int +xfs_sync_inodes( + xfs_mount_t *mp, + int flags) +{ + int error; + int last_error; + int i; + int lflags = XFS_LOG_FORCE; + + if (mp->m_flags & XFS_MOUNT_RDONLY) + return 0; + error = 0; + last_error = 0; + + if (flags & SYNC_WAIT) + lflags |= XFS_LOG_SYNC; + + for (i = 0; i < mp->m_sb.sb_agcount; i++) { + if (!mp->m_perag[i].pag_ici_init) + continue; + error = xfs_sync_inodes_ag(mp, i, flags); + if (error) + last_error = error; + if (error == EFSCORRUPTED) + break; + } + if (flags & SYNC_DELWRI) + xfs_log_force(mp, 0, lflags); + + return XFS_ERROR(last_error); +} + +STATIC int +xfs_commit_dummy_trans( + struct xfs_mount *mp, + uint log_flags) +{ + struct xfs_inode *ip = mp->m_rootip; + struct xfs_trans *tp; + int error; + + /* + * Put a dummy transaction in the log to tell recovery + * that all others are OK. + */ + tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1); + error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0); + if (error) { + xfs_trans_cancel(tp, 0); + return error; + } + + xfs_ilock(ip, XFS_ILOCK_EXCL); + + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + xfs_trans_ihold(tp, ip); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + /* XXX(hch): ignoring the error here.. */ + error = xfs_trans_commit(tp, 0); + + xfs_iunlock(ip, XFS_ILOCK_EXCL); + + xfs_log_force(mp, 0, log_flags); + return 0; +} + +int +xfs_sync_fsdata( + struct xfs_mount *mp, + int flags) +{ + struct xfs_buf *bp; + struct xfs_buf_log_item *bip; + int error = 0; + + /* + * If this is xfssyncd() then only sync the superblock if we can + * lock it without sleeping and it is not pinned. + */ + if (flags & SYNC_BDFLUSH) { + ASSERT(!(flags & SYNC_WAIT)); + + bp = xfs_getsb(mp, XFS_BUF_TRYLOCK); + if (!bp) + goto out; + + bip = XFS_BUF_FSPRIVATE(bp, struct xfs_buf_log_item *); + if (!bip || !xfs_buf_item_dirty(bip) || XFS_BUF_ISPINNED(bp)) + goto out_brelse; + } else { + bp = xfs_getsb(mp, 0); + + /* + * If the buffer is pinned then push on the log so we won't + * get stuck waiting in the write for someone, maybe + * ourselves, to flush the log. + * + * Even though we just pushed the log above, we did not have + * the superblock buffer locked at that point so it can + * become pinned in between there and here. + */ + if (XFS_BUF_ISPINNED(bp)) + xfs_log_force(mp, 0, XFS_LOG_FORCE); + } + + + if (flags & SYNC_WAIT) + XFS_BUF_UNASYNC(bp); + else + XFS_BUF_ASYNC(bp); + + return xfs_bwrite(mp, bp); + + out_brelse: + xfs_buf_relse(bp); + out: + return error; +} + +/* + * When remounting a filesystem read-only or freezing the filesystem, we have + * two phases to execute. This first phase is syncing the data before we + * quiesce the filesystem, and the second is flushing all the inodes out after + * we've waited for all the transactions created by the first phase to + * complete. The second phase ensures that the inodes are written to their + * location on disk rather than just existing in transactions in the log. This + * means after a quiesce there is no log replay required to write the inodes to + * disk (this is the main difference between a sync and a quiesce). + */ +/* + * First stage of freeze - no writers will make progress now we are here, + * so we flush delwri and delalloc buffers here, then wait for all I/O to + * complete. Data is frozen at that point. Metadata is not frozen, + * transactions can still occur here so don't bother flushing the buftarg + * because it'll just get dirty again. + */ +int +xfs_quiesce_data( + struct xfs_mount *mp) +{ + int error; + + /* push non-blocking */ + xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_BDFLUSH); + XFS_QM_DQSYNC(mp, SYNC_BDFLUSH); + xfs_filestream_flush(mp); + + /* push and block */ + xfs_sync_inodes(mp, SYNC_DELWRI|SYNC_WAIT|SYNC_IOWAIT); + XFS_QM_DQSYNC(mp, SYNC_WAIT); + + /* write superblock and hoover up shutdown errors */ + error = xfs_sync_fsdata(mp, 0); + + /* flush data-only devices */ + if (mp->m_rtdev_targp) + XFS_bflush(mp->m_rtdev_targp); + + return error; +} + +STATIC void +xfs_quiesce_fs( + struct xfs_mount *mp) +{ + int count = 0, pincount; + + xfs_flush_buftarg(mp->m_ddev_targp, 0); + xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC); + + /* + * This loop must run at least twice. The first instance of the loop + * will flush most meta data but that will generate more meta data + * (typically directory updates). Which then must be flushed and + * logged before we can write the unmount record. + */ + do { + xfs_sync_inodes(mp, SYNC_ATTR|SYNC_WAIT); + pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); + if (!pincount) { + delay(50); + count++; + } + } while (count < 2); +} + +/* + * Second stage of a quiesce. The data is already synced, now we have to take + * care of the metadata. New transactions are already blocked, so we need to + * wait for any remaining transactions to drain out before proceding. + */ +void +xfs_quiesce_attr( + struct xfs_mount *mp) +{ + int error = 0; + + /* wait for all modifications to complete */ + while (atomic_read(&mp->m_active_trans) > 0) + delay(100); + + /* flush inodes and push all remaining buffers out to disk */ + xfs_quiesce_fs(mp); + + ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0); + + /* Push the superblock and write an unmount record */ + error = xfs_log_sbcount(mp, 1); + if (error) + xfs_fs_cmn_err(CE_WARN, mp, + "xfs_attr_quiesce: failed to log sb changes. " + "Frozen image may not be consistent."); + xfs_log_unmount_write(mp); + xfs_unmountfs_writesb(mp); +} + +/* + * Enqueue a work item to be picked up by the vfs xfssyncd thread. + * Doing this has two advantages: + * - It saves on stack space, which is tight in certain situations + * - It can be used (with care) as a mechanism to avoid deadlocks. + * Flushing while allocating in a full filesystem requires both. + */ +STATIC void +xfs_syncd_queue_work( + struct xfs_mount *mp, + void *data, + void (*syncer)(struct xfs_mount *, void *)) +{ + struct bhv_vfs_sync_work *work; + + work = kmem_alloc(sizeof(struct bhv_vfs_sync_work), KM_SLEEP); + INIT_LIST_HEAD(&work->w_list); + work->w_syncer = syncer; + work->w_data = data; + work->w_mount = mp; + spin_lock(&mp->m_sync_lock); + list_add_tail(&work->w_list, &mp->m_sync_list); + spin_unlock(&mp->m_sync_lock); + wake_up_process(mp->m_sync_task); +} + +/* + * Flush delayed allocate data, attempting to free up reserved space + * from existing allocations. At this point a new allocation attempt + * has failed with ENOSPC and we are in the process of scratching our + * heads, looking about for more room... + */ +STATIC void +xfs_flush_inode_work( + struct xfs_mount *mp, + void *arg) +{ + struct inode *inode = arg; + filemap_flush(inode->i_mapping); + iput(inode); +} + +void +xfs_flush_inode( + xfs_inode_t *ip) +{ + struct inode *inode = VFS_I(ip); + + igrab(inode); + xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_inode_work); + delay(msecs_to_jiffies(500)); +} + +/* + * This is the "bigger hammer" version of xfs_flush_inode_work... + * (IOW, "If at first you don't succeed, use a Bigger Hammer"). + */ +STATIC void +xfs_flush_device_work( + struct xfs_mount *mp, + void *arg) +{ + struct inode *inode = arg; + sync_blockdev(mp->m_super->s_bdev); + iput(inode); +} + +void +xfs_flush_device( + xfs_inode_t *ip) +{ + struct inode *inode = VFS_I(ip); + + igrab(inode); + xfs_syncd_queue_work(ip->i_mount, inode, xfs_flush_device_work); + delay(msecs_to_jiffies(500)); + xfs_log_force(ip->i_mount, (xfs_lsn_t)0, XFS_LOG_FORCE|XFS_LOG_SYNC); +} + +/* + * Every sync period we need to unpin all items, reclaim inodes, sync + * quota and write out the superblock. We might need to cover the log + * to indicate it is idle. + */ +STATIC void +xfs_sync_worker( + struct xfs_mount *mp, + void *unused) +{ + int error; + + if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { + xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); + xfs_reclaim_inodes(mp, 0, XFS_IFLUSH_DELWRI_ELSE_ASYNC); + /* dgc: errors ignored here */ + error = XFS_QM_DQSYNC(mp, SYNC_BDFLUSH); + error = xfs_sync_fsdata(mp, SYNC_BDFLUSH); + if (xfs_log_need_covered(mp)) + error = xfs_commit_dummy_trans(mp, XFS_LOG_FORCE); + } + mp->m_sync_seq++; + wake_up(&mp->m_wait_single_sync_task); +} + +STATIC int +xfssyncd( + void *arg) +{ + struct xfs_mount *mp = arg; + long timeleft; + bhv_vfs_sync_work_t *work, *n; + LIST_HEAD (tmp); + + set_freezable(); + timeleft = xfs_syncd_centisecs * msecs_to_jiffies(10); + for (;;) { + timeleft = schedule_timeout_interruptible(timeleft); + /* swsusp */ + try_to_freeze(); + if (kthread_should_stop() && list_empty(&mp->m_sync_list)) + break; + + spin_lock(&mp->m_sync_lock); + /* + * We can get woken by laptop mode, to do a sync - + * that's the (only!) case where the list would be + * empty with time remaining. + */ + if (!timeleft || list_empty(&mp->m_sync_list)) { + if (!timeleft) + timeleft = xfs_syncd_centisecs * + msecs_to_jiffies(10); + INIT_LIST_HEAD(&mp->m_sync_work.w_list); + list_add_tail(&mp->m_sync_work.w_list, + &mp->m_sync_list); + } + list_for_each_entry_safe(work, n, &mp->m_sync_list, w_list) + list_move(&work->w_list, &tmp); + spin_unlock(&mp->m_sync_lock); + + list_for_each_entry_safe(work, n, &tmp, w_list) { + (*work->w_syncer)(mp, work->w_data); + list_del(&work->w_list); + if (work == &mp->m_sync_work) + continue; + kmem_free(work); + } + } + + return 0; +} + +int +xfs_syncd_init( + struct xfs_mount *mp) +{ + mp->m_sync_work.w_syncer = xfs_sync_worker; + mp->m_sync_work.w_mount = mp; + mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd"); + if (IS_ERR(mp->m_sync_task)) + return -PTR_ERR(mp->m_sync_task); + return 0; +} + +void +xfs_syncd_stop( + struct xfs_mount *mp) +{ + kthread_stop(mp->m_sync_task); +} + +int +xfs_reclaim_inode( + xfs_inode_t *ip, + int locked, + int sync_mode) +{ + xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino); + + /* The hash lock here protects a thread in xfs_iget_core from + * racing with us on linking the inode back with a vnode. + * Once we have the XFS_IRECLAIM flag set it will not touch + * us. + */ + write_lock(&pag->pag_ici_lock); + spin_lock(&ip->i_flags_lock); + if (__xfs_iflags_test(ip, XFS_IRECLAIM) || + !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) { + spin_unlock(&ip->i_flags_lock); + write_unlock(&pag->pag_ici_lock); + if (locked) { + xfs_ifunlock(ip); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + } + return 1; + } + __xfs_iflags_set(ip, XFS_IRECLAIM); + spin_unlock(&ip->i_flags_lock); + write_unlock(&pag->pag_ici_lock); + xfs_put_perag(ip->i_mount, pag); + + /* + * If the inode is still dirty, then flush it out. If the inode + * is not in the AIL, then it will be OK to flush it delwri as + * long as xfs_iflush() does not keep any references to the inode. + * We leave that decision up to xfs_iflush() since it has the + * knowledge of whether it's OK to simply do a delwri flush of + * the inode or whether we need to wait until the inode is + * pulled from the AIL. + * We get the flush lock regardless, though, just to make sure + * we don't free it while it is being flushed. + */ + if (!locked) { + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_iflock(ip); + } + + /* + * In the case of a forced shutdown we rely on xfs_iflush() to + * wait for the inode to be unpinned before returning an error. + */ + if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) { + /* synchronize with xfs_iflush_done */ + xfs_iflock(ip); + xfs_ifunlock(ip); + } + + xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_ireclaim(ip); + return 0; +} + +/* + * We set the inode flag atomically with the radix tree tag. + * Once we get tag lookups on the radix tree, this inode flag + * can go away. + */ +void +xfs_inode_set_reclaim_tag( + xfs_inode_t *ip) +{ + xfs_mount_t *mp = ip->i_mount; + xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); + + read_lock(&pag->pag_ici_lock); + spin_lock(&ip->i_flags_lock); + radix_tree_tag_set(&pag->pag_ici_root, + XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); + __xfs_iflags_set(ip, XFS_IRECLAIMABLE); + spin_unlock(&ip->i_flags_lock); + read_unlock(&pag->pag_ici_lock); + xfs_put_perag(mp, pag); +} + +void +__xfs_inode_clear_reclaim_tag( + xfs_mount_t *mp, + xfs_perag_t *pag, + xfs_inode_t *ip) +{ + radix_tree_tag_clear(&pag->pag_ici_root, + XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); +} + +void +xfs_inode_clear_reclaim_tag( + xfs_inode_t *ip) +{ + xfs_mount_t *mp = ip->i_mount; + xfs_perag_t *pag = xfs_get_perag(mp, ip->i_ino); + + read_lock(&pag->pag_ici_lock); + spin_lock(&ip->i_flags_lock); + __xfs_inode_clear_reclaim_tag(mp, pag, ip); + spin_unlock(&ip->i_flags_lock); + read_unlock(&pag->pag_ici_lock); + xfs_put_perag(mp, pag); +} + + +STATIC void +xfs_reclaim_inodes_ag( + xfs_mount_t *mp, + int ag, + int noblock, + int mode) +{ + xfs_inode_t *ip = NULL; + xfs_perag_t *pag = &mp->m_perag[ag]; + int nr_found; + uint32_t first_index; + int skipped; + +restart: + first_index = 0; + skipped = 0; + do { + /* + * use a gang lookup to find the next inode in the tree + * as the tree is sparse and a gang lookup walks to find + * the number of objects requested. + */ + read_lock(&pag->pag_ici_lock); + nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root, + (void**)&ip, first_index, 1, + XFS_ICI_RECLAIM_TAG); + + if (!nr_found) { + read_unlock(&pag->pag_ici_lock); + break; + } + + /* + * Update the index for the next lookup. Catch overflows + * into the next AG range which can occur if we have inodes + * in the last block of the AG and we are currently + * pointing to the last inode. + */ + first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); + if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) { + read_unlock(&pag->pag_ici_lock); + break; + } + + /* ignore if already under reclaim */ + if (xfs_iflags_test(ip, XFS_IRECLAIM)) { + read_unlock(&pag->pag_ici_lock); + continue; + } + + if (noblock) { + if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) { + read_unlock(&pag->pag_ici_lock); + continue; + } + if (xfs_ipincount(ip) || + !xfs_iflock_nowait(ip)) { + xfs_iunlock(ip, XFS_ILOCK_EXCL); + read_unlock(&pag->pag_ici_lock); + continue; + } + } + read_unlock(&pag->pag_ici_lock); + + /* + * hmmm - this is an inode already in reclaim. Do + * we even bother catching it here? + */ + if (xfs_reclaim_inode(ip, noblock, mode)) + skipped++; + } while (nr_found); + + if (skipped) { + delay(1); + goto restart; + } + return; + +} + +int +xfs_reclaim_inodes( + xfs_mount_t *mp, + int noblock, + int mode) +{ + int i; + + for (i = 0; i < mp->m_sb.sb_agcount; i++) { + if (!mp->m_perag[i].pag_ici_init) + continue; + xfs_reclaim_inodes_ag(mp, i, noblock, mode); + } + return 0; +} + + diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h new file mode 100644 index 00000000000..5f6de1efe1f --- /dev/null +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2000-2006 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#ifndef XFS_SYNC_H +#define XFS_SYNC_H 1 + +struct xfs_mount; + +typedef struct bhv_vfs_sync_work { + struct list_head w_list; + struct xfs_mount *w_mount; + void *w_data; /* syncer routine argument */ + void (*w_syncer)(struct xfs_mount *, void *); +} bhv_vfs_sync_work_t; + +#define SYNC_ATTR 0x0001 /* sync attributes */ +#define SYNC_DELWRI 0x0002 /* look at delayed writes */ +#define SYNC_WAIT 0x0004 /* wait for i/o to complete */ +#define SYNC_BDFLUSH 0x0008 /* BDFLUSH is calling -- don't block */ +#define SYNC_IOWAIT 0x0010 /* wait for all I/O to complete */ + +int xfs_syncd_init(struct xfs_mount *mp); +void xfs_syncd_stop(struct xfs_mount *mp); + +int xfs_sync_inodes(struct xfs_mount *mp, int flags); +int xfs_sync_fsdata(struct xfs_mount *mp, int flags); + +int xfs_quiesce_data(struct xfs_mount *mp); +void xfs_quiesce_attr(struct xfs_mount *mp); + +void xfs_flush_inode(struct xfs_inode *ip); +void xfs_flush_device(struct xfs_inode *ip); + +int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode); +int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode); + +void xfs_inode_set_reclaim_tag(struct xfs_inode *ip); +void xfs_inode_clear_reclaim_tag(struct xfs_inode *ip); +void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, + struct xfs_inode *ip); +#endif diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c index 7dacb5bbde3..916c0ffb608 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.c +++ b/fs/xfs/linux-2.6/xfs_sysctl.c @@ -55,17 +55,6 @@ xfs_stats_clear_proc_handler( #endif /* CONFIG_PROC_FS */ static ctl_table xfs_table[] = { - { - .ctl_name = XFS_RESTRICT_CHOWN, - .procname = "restrict_chown", - .data = &xfs_params.restrict_chown.val, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &xfs_params.restrict_chown.min, - .extra2 = &xfs_params.restrict_chown.max - }, { .ctl_name = XFS_SGID_INHERIT, .procname = "irix_sgid_inherit", diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h index 4aadb8056c3..b9937d450f8 100644 --- a/fs/xfs/linux-2.6/xfs_sysctl.h +++ b/fs/xfs/linux-2.6/xfs_sysctl.h @@ -31,7 +31,6 @@ typedef struct xfs_sysctl_val { } xfs_sysctl_val_t; typedef struct xfs_param { - xfs_sysctl_val_t restrict_chown;/* Root/non-root can give away files.*/ xfs_sysctl_val_t sgid_inherit; /* Inherit S_ISGID if process' GID is * not a member of parent dir GID. */ xfs_sysctl_val_t symlink_mode; /* Link creat mode affected by umask */ @@ -68,7 +67,7 @@ typedef struct xfs_param { enum { /* XFS_REFCACHE_SIZE = 1 */ /* XFS_REFCACHE_PURGE = 2 */ - XFS_RESTRICT_CHOWN = 3, + /* XFS_RESTRICT_CHOWN = 3 */ XFS_SGID_INHERIT = 4, XFS_SYMLINK_MODE = 5, XFS_PANIC_MASK = 6, diff --git a/fs/xfs/linux-2.6/xfs_vfs.h b/fs/xfs/linux-2.6/xfs_vfs.h deleted file mode 100644 index 7e60c7776b1..00000000000 --- a/fs/xfs/linux-2.6/xfs_vfs.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2000-2006 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#ifndef __XFS_VFS_H__ -#define __XFS_VFS_H__ - -#include -#include "xfs_fs.h" - -struct inode; - -struct fid; -struct cred; -struct seq_file; -struct super_block; -struct xfs_inode; -struct xfs_mount; -struct xfs_mount_args; - -typedef struct kstatfs bhv_statvfs_t; - -typedef struct bhv_vfs_sync_work { - struct list_head w_list; - struct xfs_mount *w_mount; - void *w_data; /* syncer routine argument */ - void (*w_syncer)(struct xfs_mount *, void *); -} bhv_vfs_sync_work_t; - -#define SYNC_ATTR 0x0001 /* sync attributes */ -#define SYNC_CLOSE 0x0002 /* close file system down */ -#define SYNC_DELWRI 0x0004 /* look at delayed writes */ -#define SYNC_WAIT 0x0008 /* wait for i/o to complete */ -#define SYNC_BDFLUSH 0x0010 /* BDFLUSH is calling -- don't block */ -#define SYNC_FSDATA 0x0020 /* flush fs data (e.g. superblocks) */ -#define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */ -#define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */ -#define SYNC_IOWAIT 0x0100 /* wait for all I/O to complete */ - -/* - * When remounting a filesystem read-only or freezing the filesystem, - * we have two phases to execute. This first phase is syncing the data - * before we quiesce the fielsystem, and the second is flushing all the - * inodes out after we've waited for all the transactions created by - * the first phase to complete. The second phase uses SYNC_INODE_QUIESCE - * to ensure that the inodes are written to their location on disk - * rather than just existing in transactions in the log. This means - * after a quiesce there is no log replay required to write the inodes - * to disk (this is the main difference between a sync and a quiesce). - */ -#define SYNC_DATA_QUIESCE (SYNC_DELWRI|SYNC_FSDATA|SYNC_WAIT|SYNC_IOWAIT) -#define SYNC_INODE_QUIESCE (SYNC_REMOUNT|SYNC_ATTR|SYNC_WAIT) - -#define SHUTDOWN_META_IO_ERROR 0x0001 /* write attempt to metadata failed */ -#define SHUTDOWN_LOG_IO_ERROR 0x0002 /* write attempt to the log failed */ -#define SHUTDOWN_FORCE_UMOUNT 0x0004 /* shutdown from a forced unmount */ -#define SHUTDOWN_CORRUPT_INCORE 0x0008 /* corrupt in-memory data structures */ -#define SHUTDOWN_REMOTE_REQ 0x0010 /* shutdown came from remote cell */ -#define SHUTDOWN_DEVICE_REQ 0x0020 /* failed all paths to the device */ - -#define xfs_test_for_freeze(mp) ((mp)->m_super->s_frozen) -#define xfs_wait_for_freeze(mp,l) vfs_check_frozen((mp)->m_super, (l)) - -#endif /* __XFS_VFS_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_vnode.c b/fs/xfs/linux-2.6/xfs_vnode.c deleted file mode 100644 index b52528bbbff..00000000000 --- a/fs/xfs/linux-2.6/xfs_vnode.c +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ -#include "xfs.h" -#include "xfs_vnodeops.h" -#include "xfs_bmap_btree.h" -#include "xfs_inode.h" - -/* - * And this gunk is needed for xfs_mount.h" - */ -#include "xfs_log.h" -#include "xfs_trans.h" -#include "xfs_sb.h" -#include "xfs_dmapi.h" -#include "xfs_inum.h" -#include "xfs_ag.h" -#include "xfs_mount.h" - - -/* - * Dedicated vnode inactive/reclaim sync wait queues. - * Prime number of hash buckets since address is used as the key. - */ -#define NVSYNC 37 -#define vptosync(v) (&vsync[((unsigned long)v) % NVSYNC]) -static wait_queue_head_t vsync[NVSYNC]; - -void __init -vn_init(void) -{ - int i; - - for (i = 0; i < NVSYNC; i++) - init_waitqueue_head(&vsync[i]); -} - -void -vn_iowait( - xfs_inode_t *ip) -{ - wait_queue_head_t *wq = vptosync(ip); - - wait_event(*wq, (atomic_read(&ip->i_iocount) == 0)); -} - -void -vn_iowake( - xfs_inode_t *ip) -{ - if (atomic_dec_and_test(&ip->i_iocount)) - wake_up(vptosync(ip)); -} - -/* - * Volume managers supporting multiple paths can send back ENODEV when the - * final path disappears. In this case continuing to fill the page cache - * with dirty data which cannot be written out is evil, so prevent that. - */ -void -vn_ioerror( - xfs_inode_t *ip, - int error, - char *f, - int l) -{ - if (unlikely(error == -ENODEV)) - xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ, f, l); -} - -#ifdef XFS_INODE_TRACE - -/* - * Reference count of Linux inode if present, -1 if the xfs_inode - * has no associated Linux inode. - */ -static inline int xfs_icount(struct xfs_inode *ip) -{ - struct inode *vp = VFS_I(ip); - - if (vp) - return vn_count(vp); - return -1; -} - -#define KTRACE_ENTER(ip, vk, s, line, ra) \ - ktrace_enter( (ip)->i_trace, \ -/* 0 */ (void *)(__psint_t)(vk), \ -/* 1 */ (void *)(s), \ -/* 2 */ (void *)(__psint_t) line, \ -/* 3 */ (void *)(__psint_t)xfs_icount(ip), \ -/* 4 */ (void *)(ra), \ -/* 5 */ NULL, \ -/* 6 */ (void *)(__psint_t)current_cpu(), \ -/* 7 */ (void *)(__psint_t)current_pid(), \ -/* 8 */ (void *)__return_address, \ -/* 9 */ NULL, NULL, NULL, NULL, NULL, NULL, NULL) - -/* - * Vnode tracing code. - */ -void -_xfs_itrace_entry(xfs_inode_t *ip, const char *func, inst_t *ra) -{ - KTRACE_ENTER(ip, INODE_KTRACE_ENTRY, func, 0, ra); -} - -void -_xfs_itrace_exit(xfs_inode_t *ip, const char *func, inst_t *ra) -{ - KTRACE_ENTER(ip, INODE_KTRACE_EXIT, func, 0, ra); -} - -void -xfs_itrace_hold(xfs_inode_t *ip, char *file, int line, inst_t *ra) -{ - KTRACE_ENTER(ip, INODE_KTRACE_HOLD, file, line, ra); -} - -void -_xfs_itrace_ref(xfs_inode_t *ip, char *file, int line, inst_t *ra) -{ - KTRACE_ENTER(ip, INODE_KTRACE_REF, file, line, ra); -} - -void -xfs_itrace_rele(xfs_inode_t *ip, char *file, int line, inst_t *ra) -{ - KTRACE_ENTER(ip, INODE_KTRACE_RELE, file, line, ra); -} -#endif /* XFS_INODE_TRACE */ diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h index 683ce16210f..f65983a230d 100644 --- a/fs/xfs/linux-2.6/xfs_vnode.h +++ b/fs/xfs/linux-2.6/xfs_vnode.h @@ -18,7 +18,10 @@ #ifndef __XFS_VNODE_H__ #define __XFS_VNODE_H__ +#include "xfs_fs.h" + struct file; +struct xfs_inode; struct xfs_iomap; struct attrlist_cursor_kern; @@ -51,40 +54,6 @@ struct attrlist_cursor_kern; Prevent VM access to the pages until the operation completes. */ - -extern void vn_init(void); - -/* - * Yeah, these don't take vnode anymore at all, all this should be - * cleaned up at some point. - */ -extern void vn_iowait(struct xfs_inode *ip); -extern void vn_iowake(struct xfs_inode *ip); -extern void vn_ioerror(struct xfs_inode *ip, int error, char *f, int l); - -static inline int vn_count(struct inode *vp) -{ - return atomic_read(&vp->i_count); -} - -#define IHOLD(ip) \ -do { \ - ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ - atomic_inc(&(VFS_I(ip)->i_count)); \ - xfs_itrace_hold((ip), __FILE__, __LINE__, (inst_t *)__return_address); \ -} while (0) - -#define IRELE(ip) \ -do { \ - xfs_itrace_rele((ip), __FILE__, __LINE__, (inst_t *)__return_address); \ - iput(VFS_I(ip)); \ -} while (0) - -static inline struct inode *vn_grab(struct inode *vp) -{ - return igrab(vp); -} - /* * Dealing with bad inodes */ @@ -121,39 +90,4 @@ static inline void vn_atime_to_time_t(struct inode *vp, time_t *tt) PAGECACHE_TAG_DIRTY) -/* - * Tracking vnode activity. - */ -#if defined(XFS_INODE_TRACE) - -#define INODE_TRACE_SIZE 16 /* number of trace entries */ -#define INODE_KTRACE_ENTRY 1 -#define INODE_KTRACE_EXIT 2 -#define INODE_KTRACE_HOLD 3 -#define INODE_KTRACE_REF 4 -#define INODE_KTRACE_RELE 5 - -extern void _xfs_itrace_entry(struct xfs_inode *, const char *, inst_t *); -extern void _xfs_itrace_exit(struct xfs_inode *, const char *, inst_t *); -extern void xfs_itrace_hold(struct xfs_inode *, char *, int, inst_t *); -extern void _xfs_itrace_ref(struct xfs_inode *, char *, int, inst_t *); -extern void xfs_itrace_rele(struct xfs_inode *, char *, int, inst_t *); -#define xfs_itrace_entry(ip) \ - _xfs_itrace_entry(ip, __func__, (inst_t *)__return_address) -#define xfs_itrace_exit(ip) \ - _xfs_itrace_exit(ip, __func__, (inst_t *)__return_address) -#define xfs_itrace_exit_tag(ip, tag) \ - _xfs_itrace_exit(ip, tag, (inst_t *)__return_address) -#define xfs_itrace_ref(ip) \ - _xfs_itrace_ref(ip, __FILE__, __LINE__, (inst_t *)__return_address) - -#else -#define xfs_itrace_entry(a) -#define xfs_itrace_exit(a) -#define xfs_itrace_exit_tag(a, b) -#define xfs_itrace_hold(a, b, c, d) -#define xfs_itrace_ref(a) -#define xfs_itrace_rele(a, b, c, d) -#endif - #endif /* __XFS_VNODE_H__ */ -- cgit v1.2.3