From a8d770d987ee20b59fba6c37d7f0f2a351913c4b Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 6 Apr 2009 18:44:54 +0200 Subject: xfs: use xfs_sync_inodes() for device flushing Currently xfs_device_flush calls sync_blockdev() which is a no-op for XFS as all it's metadata is held in a different address to the one sync_blockdev() works on. Call xfs_sync_inodes() instead to flush all the delayed allocation blocks out. To do this as efficiently as possible, do it via two passes - one to do an async flush of all the dirty blocks and a second to wait for all the IO to complete. This requires some modification to the xfs-sync_inodes_ag() flush code to do efficiently. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_sync.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs/xfs/linux-2.6/xfs_sync.h') diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 04f058c848a..ec95e264805 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -21,18 +21,19 @@ struct xfs_mount; struct xfs_perag; -typedef struct bhv_vfs_sync_work { +typedef struct xfs_sync_work { struct list_head w_list; struct xfs_mount *w_mount; void *w_data; /* syncer routine argument */ void (*w_syncer)(struct xfs_mount *, void *); -} bhv_vfs_sync_work_t; +} xfs_sync_work_t; #define SYNC_ATTR 0x0001 /* sync attributes */ #define SYNC_DELWRI 0x0002 /* look at delayed writes */ #define SYNC_WAIT 0x0004 /* wait for i/o to complete */ #define SYNC_BDFLUSH 0x0008 /* BDFLUSH is calling -- don't block */ #define SYNC_IOWAIT 0x0010 /* wait for all I/O to complete */ +#define SYNC_TRYLOCK 0x0020 /* only try to lock inodes */ int xfs_syncd_init(struct xfs_mount *mp); void xfs_syncd_stop(struct xfs_mount *mp); @@ -44,7 +45,7 @@ int xfs_quiesce_data(struct xfs_mount *mp); void xfs_quiesce_attr(struct xfs_mount *mp); void xfs_flush_inode(struct xfs_inode *ip); -void xfs_flush_device(struct xfs_inode *ip); +void xfs_flush_inodes(struct xfs_inode *ip); int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode); int xfs_reclaim_inodes(struct xfs_mount *mp, int noblock, int mode); -- cgit v1.2.3 From 5825294edd3364cbba6514f70d88debec4f6cec7 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 6 Apr 2009 18:45:44 +0200 Subject: xfs: make inode flush at ENOSPC synchronous When we are writing to a single file and hit ENOSPC, we trigger a background flush of the inode and try again. Because we hold page locks and the iolock, the flush won't proceed until after we release these locks. This occurs once we've given up and ENOSPC has been reported. Hence if this one is the only dirty inode in the system, we'll get an ENOSPC prematurely. To fix this, remove the async flush from the allocation routines and move it to the top of the write path where we can do a synchronous flush and retry the write again. Only retry once as a second ENOSPC indicates that we really are ENOSPC. This avoids a page cache deadlock when trying to do this flush synchronously in the allocation layer that was identified by Mikulas Patocka. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_sync.h | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/xfs/linux-2.6/xfs_sync.h') diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index ec95e264805..6e83a35626e 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -44,7 +44,6 @@ int xfs_sync_fsdata(struct xfs_mount *mp, int flags); int xfs_quiesce_data(struct xfs_mount *mp); void xfs_quiesce_attr(struct xfs_mount *mp); -void xfs_flush_inode(struct xfs_inode *ip); void xfs_flush_inodes(struct xfs_inode *ip); int xfs_reclaim_inode(struct xfs_inode *ip, int locked, int sync_mode); -- cgit v1.2.3 From e43afd72d2455defd63a3f94f22fa09b586e58ed Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 6 Apr 2009 18:47:27 +0200 Subject: xfs: block callers of xfs_flush_inodes() correctly xfs_flush_inodes() currently uses a magic timeout to wait for some inodes to be flushed before returning. This isn't really reliable but used to be the best that could be done due to deadlock potential of waiting for the entire flush. Now the inode flush is safe to execute while we hold page and inode locks, we can wait for all the inodes to flush synchronously. Convert the wait mechanism to a completion to do this efficiently. This should remove all remaining spurious ENOSPC errors from the delayed allocation reservation path. This is extracted almost line for line from a larger patch from Mikulas Patocka. Signed-off-by: Mikulas Patocka Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig --- fs/xfs/linux-2.6/xfs_sync.h | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/xfs/linux-2.6/xfs_sync.h') diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index 6e83a35626e..308d5bf6dfb 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h @@ -26,6 +26,7 @@ typedef struct xfs_sync_work { struct xfs_mount *w_mount; void *w_data; /* syncer routine argument */ void (*w_syncer)(struct xfs_mount *, void *); + struct completion *w_completion; } xfs_sync_work_t; #define SYNC_ATTR 0x0001 /* sync attributes */ -- cgit v1.2.3