From 5abc97aa25b2c41413b3a520faee83f2282d9f18 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 30 Mar 2006 15:16:46 +0200 Subject: [PATCH] splice: add support for SPLICE_F_MOVE flag This enables the caller to migrate pages from one address space page cache to another. In buzz word marketing, you can do zero-copy file copies! Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds --- include/linux/pipe_fs_i.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux/pipe_fs_i.h') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index b12e59c7575..75c7f55023a 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -9,6 +9,7 @@ struct pipe_buffer { struct page *page; unsigned int offset, len; struct pipe_buf_operations *ops; + unsigned int stolen; }; struct pipe_buf_operations { @@ -16,6 +17,7 @@ struct pipe_buf_operations { void * (*map)(struct file *, struct pipe_inode_info *, struct pipe_buffer *); void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *); void (*release)(struct pipe_inode_info *, struct pipe_buffer *); + int (*steal)(struct pipe_inode_info *, struct pipe_buffer *); }; struct pipe_inode_info { @@ -53,4 +55,10 @@ void pipe_wait(struct inode * inode); struct inode* pipe_new(struct inode* inode); void free_pipe_info(struct inode* inode); +/* + * splice is tied to pipes as a transport (at least for now), so we'll just + * add the splice flags here. + */ +#define SPLICE_F_MOVE (0x01) /* move pages instead of copying */ + #endif -- cgit v1.2.3 From 29e350944fdc2dfca102500790d8ad6d6ff4f69d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 2 Apr 2006 12:46:35 -0700 Subject: splice: add SPLICE_F_NONBLOCK flag It doesn't make the splice itself necessarily nonblocking (because the actual file descriptors that are spliced from/to may block unless they have the O_NONBLOCK flag set), but it makes the splice pipe operations nonblocking. Signed-off-by: Linus Torvalds --- include/linux/pipe_fs_i.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/pipe_fs_i.h') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 75c7f55023a..d218fc72931 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -60,5 +60,8 @@ void free_pipe_info(struct inode* inode); * add the splice flags here. */ #define SPLICE_F_MOVE (0x01) /* move pages instead of copying */ +#define SPLICE_F_NONBLOCK (0x02) /* don't block on the pipe splicing (but */ + /* we may still block on the fd we splice */ + /* from/to, of course */ #endif -- cgit v1.2.3 From 4f6f0bd2ffa4e31c3524f5e65c84a29b6ab73307 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 2 Apr 2006 23:04:46 +0200 Subject: [PATCH] splice: improve writeback and clean up page stealing By cleaning up the writeback logic (killing write_one_page() and the manual set_page_dirty()), we can get rid of ->stolen inside the pipe_buffer and just keep it local in pipe_to_file(). This also adds dirty page balancing logic and O_SYNC handling. Signed-off-by: Jens Axboe --- include/linux/pipe_fs_i.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/pipe_fs_i.h') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index d218fc72931..32865004751 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -9,7 +9,6 @@ struct pipe_buffer { struct page *page; unsigned int offset, len; struct pipe_buf_operations *ops; - unsigned int stolen; }; struct pipe_buf_operations { -- cgit v1.2.3 From b2b39fa478db6db89b7ccafb0649973845b0eb75 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 2 Apr 2006 23:05:41 +0200 Subject: [PATCH] splice: add a SPLICE_F_MORE flag This lets userspace indicate whether more data will be coming in a subsequent splice call. Signed-off-by: Jens Axboe --- include/linux/pipe_fs_i.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/pipe_fs_i.h') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 32865004751..70ae9332ec1 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -62,5 +62,6 @@ void free_pipe_info(struct inode* inode); #define SPLICE_F_NONBLOCK (0x02) /* don't block on the pipe splicing (but */ /* we may still block on the fd we splice */ /* from/to, of course */ +#define SPLICE_F_MORE (0x04) /* expect more data */ #endif -- cgit v1.2.3 From 3e7ee3e7b36fa4e2d88d8fb0a2577be95fc4636d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 2 Apr 2006 23:11:04 +0200 Subject: [PATCH] splice: fix page stealing LRU handling. Originally from Nick Piggin, just adapted to the newer branch. You can't check PageLRU without holding zone->lru_lock. The page release code can get away with it only because the page refcount is 0 at that point. Also, you can't reliably remove pages from the LRU unless the refcount is 0. Ever. Signed-off-by: Nick Piggin Signed-off-by: Jens Axboe --- include/linux/pipe_fs_i.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux/pipe_fs_i.h') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 70ae9332ec1..ec384958d50 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -5,10 +5,14 @@ #define PIPE_BUFFERS (16) +#define PIPE_BUF_FLAG_STOLEN 0x01 +#define PIPE_BUF_FLAG_LRU 0x02 + struct pipe_buffer { struct page *page; unsigned int offset, len; struct pipe_buf_operations *ops; + unsigned int flags; }; struct pipe_buf_operations { -- cgit v1.2.3 From 3a326a2ce88e71d00ac0d133e314a3342a7709f8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 10 Apr 2006 15:18:35 +0200 Subject: [PATCH] introduce a "kernel-internal pipe object" abstraction separate out the 'internal pipe object' abstraction, and make it usable to splice. This cleans up and fixes several aspects of the internal splice APIs and the pipe code: - pipes: the allocation and freeing of pipe_inode_info is now more symmetric and more streamlined with existing kernel practices. - splice: small micro-optimization: less pointer dereferencing in splice methods Signed-off-by: Ingo Molnar Update XFS for the ->splice_read/->splice_write changes. Signed-off-by: Jens Axboe --- include/linux/pipe_fs_i.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux/pipe_fs_i.h') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index ec384958d50..9cf99cb34c1 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -36,6 +36,7 @@ struct pipe_inode_info { unsigned int w_counter; struct fasync_struct *fasync_readers; struct fasync_struct *fasync_writers; + struct inode *inode; }; /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual @@ -53,10 +54,10 @@ struct pipe_inode_info { #define PIPE_FASYNC_WRITERS(inode) (&((inode).i_pipe->fasync_writers)) /* Drop the inode semaphore and wait for a pipe event, atomically */ -void pipe_wait(struct inode * inode); +void pipe_wait(struct pipe_inode_info *pipe); -struct inode* pipe_new(struct inode* inode); -void free_pipe_info(struct inode* inode); +struct pipe_inode_info * alloc_pipe_info(struct inode * inode); +void free_pipe_info(struct inode * inode); /* * splice is tied to pipes as a transport (at least for now), so we'll just -- cgit v1.2.3 From b92ce55893745e011edae70830b8bc863be881f9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 11 Apr 2006 13:52:07 +0200 Subject: [PATCH] splice: add direct fd <-> fd splicing support It's more efficient for sendfile() emulation. Basically we cache an internal private pipe and just use that as the intermediate area for pages. Direct splicing is not available from sys_splice(), it is only meant to be used for sendfile() emulation. Additional patch from Ingo Molnar to avoid the PIPE_BUFFERS loop at exit for the normal fast path. Signed-off-by: Jens Axboe --- include/linux/pipe_fs_i.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/pipe_fs_i.h') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 9cf99cb34c1..660e9d866e5 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -58,6 +58,7 @@ void pipe_wait(struct pipe_inode_info *pipe); struct pipe_inode_info * alloc_pipe_info(struct inode * inode); void free_pipe_info(struct inode * inode); +void __free_pipe_info(struct pipe_inode_info *); /* * splice is tied to pipes as a transport (at least for now), so we'll just -- cgit v1.2.3 From 9aeedfc4712ed58d9f7ae41596185c72b8dc97e8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 11 Apr 2006 13:53:10 +0200 Subject: [PATCH] get rid of the PIPE_*() macros get rid of the PIPE_*() macros. Scripted transformation. Signed-off-by: Ingo Molnar Signed-off-by: Jens Axboe --- include/linux/pipe_fs_i.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux/pipe_fs_i.h') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 660e9d866e5..123a7c24bc7 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -43,16 +43,6 @@ struct pipe_inode_info { memory allocation, whereas PIPE_BUF makes atomicity guarantees. */ #define PIPE_SIZE PAGE_SIZE -#define PIPE_MUTEX(inode) (&(inode).i_mutex) -#define PIPE_WAIT(inode) (&(inode).i_pipe->wait) -#define PIPE_READERS(inode) ((inode).i_pipe->readers) -#define PIPE_WRITERS(inode) ((inode).i_pipe->writers) -#define PIPE_WAITING_WRITERS(inode) ((inode).i_pipe->waiting_writers) -#define PIPE_RCOUNTER(inode) ((inode).i_pipe->r_counter) -#define PIPE_WCOUNTER(inode) ((inode).i_pipe->w_counter) -#define PIPE_FASYNC_READERS(inode) (&((inode).i_pipe->fasync_readers)) -#define PIPE_FASYNC_WRITERS(inode) (&((inode).i_pipe->fasync_writers)) - /* Drop the inode semaphore and wait for a pipe event, atomically */ void pipe_wait(struct pipe_inode_info *pipe); -- cgit v1.2.3 From 70524490ee2ea1bbf6cee6c106597b3ac25a3fc2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 11 Apr 2006 15:51:17 +0200 Subject: [PATCH] splice: add support for sys_tee() Basically an in-kernel implementation of tee, which uses splice and the pipe buffers as an intelligent way to pass data around by reference. Where the user space tee consumes the input and produces a stdout and file output, this syscall merely duplicates the data inside a pipe to another pipe. No data is copied, the output just grabs a reference to the input pipe data. Signed-off-by: Jens Axboe --- include/linux/pipe_fs_i.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/pipe_fs_i.h') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 123a7c24bc7..ef7f33c0be1 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -21,6 +21,7 @@ struct pipe_buf_operations { void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *); void (*release)(struct pipe_inode_info *, struct pipe_buffer *); int (*steal)(struct pipe_inode_info *, struct pipe_buffer *); + void (*get)(struct pipe_inode_info *, struct pipe_buffer *); }; struct pipe_inode_info { -- cgit v1.2.3