diff options
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 227 |
1 files changed, 134 insertions, 93 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index d1060b8d3cd..7b48b2ad00e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -750,6 +750,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, read_unlock_irq(&mapping->tree_lock); return i; } +EXPORT_SYMBOL(find_get_pages_contig); /** * find_get_pages_tag - find and return pages that match @tag @@ -778,6 +779,7 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, read_unlock_irq(&mapping->tree_lock); return ret; } +EXPORT_SYMBOL(find_get_pages_tag); /** * grab_cache_page_nowait - returns locked page at given index in given cache @@ -868,6 +870,7 @@ void do_generic_mapping_read(struct address_space *mapping, unsigned long last_index; unsigned long next_index; unsigned long prev_index; + unsigned int prev_offset; loff_t isize; struct page *cached_page; int error; @@ -876,7 +879,8 @@ void do_generic_mapping_read(struct address_space *mapping, cached_page = NULL; index = *ppos >> PAGE_CACHE_SHIFT; next_index = index; - prev_index = ra.prev_page; + prev_index = ra.prev_index; + prev_offset = ra.prev_offset; last_index = (*ppos + desc->count + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; offset = *ppos & ~PAGE_CACHE_MASK; @@ -924,10 +928,10 @@ page_ok: flush_dcache_page(page); /* - * When (part of) the same page is read multiple times - * in succession, only mark it as accessed the first time. + * When a sequential read accesses a page several times, + * only mark it as accessed the first time. */ - if (prev_index != index) + if (prev_index != index || offset != prev_offset) mark_page_accessed(page); prev_index = index; @@ -945,6 +949,8 @@ page_ok: offset += ret; index += offset >> PAGE_CACHE_SHIFT; offset &= ~PAGE_CACHE_MASK; + prev_offset = offset; + ra.prev_offset = offset; page_cache_release(page); if (ret == nr && desc->count) @@ -1106,6 +1112,45 @@ success: return size; } +/* + * Performs necessary checks before doing a write + * @iov: io vector request + * @nr_segs: number of segments in the iovec + * @count: number of bytes to write + * @access_flags: type of access: %VERIFY_READ or %VERIFY_WRITE + * + * Adjust number of segments and amount of bytes to write (nr_segs should be + * properly initialized first). Returns appropriate error code that caller + * should return or zero in case that write should be allowed. + */ +int generic_segment_checks(const struct iovec *iov, + unsigned long *nr_segs, size_t *count, int access_flags) +{ + unsigned long seg; + size_t cnt = 0; + for (seg = 0; seg < *nr_segs; seg++) { + const struct iovec *iv = &iov[seg]; + + /* + * If any segment has a negative length, or the cumulative + * length ever wraps negative then return -EINVAL. + */ + cnt += iv->iov_len; + if (unlikely((ssize_t)(cnt|iv->iov_len) < 0)) + return -EINVAL; + if (access_ok(access_flags, iv->iov_base, iv->iov_len)) + continue; + if (seg == 0) + return -EFAULT; + *nr_segs = seg; + cnt -= iv->iov_len; /* This segment is no good */ + break; + } + *count = cnt; + return 0; +} +EXPORT_SYMBOL(generic_segment_checks); + /** * generic_file_aio_read - generic filesystem read routine * @iocb: kernel I/O control block @@ -1127,24 +1172,9 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, loff_t *ppos = &iocb->ki_pos; count = 0; - for (seg = 0; seg < nr_segs; seg++) { - const struct iovec *iv = &iov[seg]; - - /* - * If any segment has a negative length, or the cumulative - * length ever wraps negative then return -EINVAL. - */ - count += iv->iov_len; - if (unlikely((ssize_t)(count|iv->iov_len) < 0)) - return -EINVAL; - if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len)) - continue; - if (seg == 0) - return -EFAULT; - nr_segs = seg; - count -= iv->iov_len; /* This segment is no good */ - break; - } + retval = generic_segment_checks(iov, &nr_segs, &count, VERIFY_WRITE); + if (retval) + return retval; /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ if (filp->f_flags & O_DIRECT) { @@ -1446,30 +1476,6 @@ page_not_uptodate: majmin = VM_FAULT_MAJOR; count_vm_event(PGMAJFAULT); } - lock_page(page); - - /* Did it get unhashed while we waited for it? */ - if (!page->mapping) { - unlock_page(page); - page_cache_release(page); - goto retry_all; - } - - /* Did somebody else get it up-to-date? */ - if (PageUptodate(page)) { - unlock_page(page); - goto success; - } - - error = mapping->a_ops->readpage(file, page); - if (!error) { - wait_on_page_locked(page); - if (PageUptodate(page)) - goto success; - } else if (error == AOP_TRUNCATED_PAGE) { - page_cache_release(page); - goto retry_find; - } /* * Umm, take care of errors if the page isn't up-to-date. @@ -1726,7 +1732,7 @@ int generic_file_readonly_mmap(struct file * file, struct vm_area_struct * vma) EXPORT_SYMBOL(generic_file_mmap); EXPORT_SYMBOL(generic_file_readonly_mmap); -static inline struct page *__read_cache_page(struct address_space *mapping, +static struct page *__read_cache_page(struct address_space *mapping, unsigned long index, int (*filler)(void *,struct page*), void *data) @@ -1763,17 +1769,11 @@ repeat: return page; } -/** - * read_cache_page - read into page cache, fill it if needed - * @mapping: the page's address_space - * @index: the page index - * @filler: function to perform the read - * @data: destination for read data - * - * Read into the page cache. If a page already exists, - * and PageUptodate() is not set, try to fill the page. +/* + * Same as read_cache_page, but don't wait for page to become unlocked + * after submitting it to the filler. */ -struct page *read_cache_page(struct address_space *mapping, +struct page *read_cache_page_async(struct address_space *mapping, unsigned long index, int (*filler)(void *,struct page*), void *data) @@ -1784,7 +1784,7 @@ struct page *read_cache_page(struct address_space *mapping, retry: page = __read_cache_page(mapping, index, filler, data); if (IS_ERR(page)) - goto out; + return page; mark_page_accessed(page); if (PageUptodate(page)) goto out; @@ -1802,7 +1802,40 @@ retry: err = filler(data, page); if (err < 0) { page_cache_release(page); - page = ERR_PTR(err); + return ERR_PTR(err); + } +out: + mark_page_accessed(page); + return page; +} +EXPORT_SYMBOL(read_cache_page_async); + +/** + * read_cache_page - read into page cache, fill it if needed + * @mapping: the page's address_space + * @index: the page index + * @filler: function to perform the read + * @data: destination for read data + * + * Read into the page cache. If a page already exists, and PageUptodate() is + * not set, try to fill the page then wait for it to become unlocked. + * + * If the page does not get brought uptodate, return -EIO. + */ +struct page *read_cache_page(struct address_space *mapping, + unsigned long index, + int (*filler)(void *,struct page*), + void *data) +{ + struct page *page; + + page = read_cache_page_async(mapping, index, filler, data); + if (IS_ERR(page)) + goto out; + wait_on_page_locked(page); + if (!PageUptodate(page)) { + page_cache_release(page); + page = ERR_PTR(-EIO); } out: return page; @@ -2211,30 +2244,14 @@ __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, size_t ocount; /* original count */ size_t count; /* after file limit checks */ struct inode *inode = mapping->host; - unsigned long seg; loff_t pos; ssize_t written; ssize_t err; ocount = 0; - for (seg = 0; seg < nr_segs; seg++) { - const struct iovec *iv = &iov[seg]; - - /* - * If any segment has a negative length, or the cumulative - * length ever wraps negative then return -EINVAL. - */ - ocount += iv->iov_len; - if (unlikely((ssize_t)(ocount|iv->iov_len) < 0)) - return -EINVAL; - if (access_ok(VERIFY_READ, iv->iov_base, iv->iov_len)) - continue; - if (seg == 0) - return -EFAULT; - nr_segs = seg; - ocount -= iv->iov_len; /* This segment is no good */ - break; - } + err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); + if (err) + return err; count = ocount; pos = *ppos; @@ -2294,10 +2311,10 @@ __generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, * semantics. */ endbyte = pos + written_buffered - written - 1; - err = do_sync_file_range(file, pos, endbyte, - SYNC_FILE_RANGE_WAIT_BEFORE| - SYNC_FILE_RANGE_WRITE| - SYNC_FILE_RANGE_WAIT_AFTER); + err = do_sync_mapping_range(file->f_mapping, pos, endbyte, + SYNC_FILE_RANGE_WAIT_BEFORE| + SYNC_FILE_RANGE_WRITE| + SYNC_FILE_RANGE_WAIT_AFTER); if (err == 0) { written = written_buffered; invalidate_mapping_pages(mapping, @@ -2379,7 +2396,8 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; ssize_t retval; - size_t write_len = 0; + size_t write_len; + pgoff_t end = 0; /* silence gcc */ /* * If it's a write, unmap all mmappings of the file up-front. This @@ -2388,23 +2406,46 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, */ if (rw == WRITE) { write_len = iov_length(iov, nr_segs); + end = (offset + write_len - 1) >> PAGE_CACHE_SHIFT; if (mapping_mapped(mapping)) unmap_mapping_range(mapping, offset, write_len, 0); } retval = filemap_write_and_wait(mapping); - if (retval == 0) { - retval = mapping->a_ops->direct_IO(rw, iocb, iov, - offset, nr_segs); - if (rw == WRITE && mapping->nrpages) { - pgoff_t end = (offset + write_len - 1) - >> PAGE_CACHE_SHIFT; - int err = invalidate_inode_pages2_range(mapping, + if (retval) + goto out; + + /* + * After a write we want buffered reads to be sure to go to disk to get + * the new data. We invalidate clean cached page from the region we're + * about to write. We do this *before* the write so that we can return + * -EIO without clobbering -EIOCBQUEUED from ->direct_IO(). + */ + if (rw == WRITE && mapping->nrpages) { + retval = invalidate_inode_pages2_range(mapping, offset >> PAGE_CACHE_SHIFT, end); - if (err) - retval = err; - } + if (retval) + goto out; } + + retval = mapping->a_ops->direct_IO(rw, iocb, iov, offset, nr_segs); + if (retval) + goto out; + + /* + * Finally, try again to invalidate clean pages which might have been + * faulted in by get_user_pages() if the source of the write was an + * mmap()ed region of the file we're writing. That's a pretty crazy + * thing to do, so we don't support it 100%. If this invalidation + * fails and we have -EIOCBQUEUED we ignore the failure. + */ + if (rw == WRITE && mapping->nrpages) { + int err = invalidate_inode_pages2_range(mapping, + offset >> PAGE_CACHE_SHIFT, end); + if (err && retval >= 0) + retval = err; + } +out: return retval; } |