From b079fa7baa86b47579f3f60f86d03d21c76159b8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 13 Dec 2005 16:13:52 -0500 Subject: RPC: Do not block on skb allocation If we get something like the following, [ 125.300636] [] schedule_timeout+0x54/0xa5 [ 125.305931] [] io_schedule_timeout+0x29/0x33 [ 125.311495] [] blk_congestion_wait+0x70/0x85 [ 125.317058] [] throttle_vm_writeout+0x69/0x7d [ 125.322720] [] shrink_zone+0xe0/0xfa [ 125.327560] [] shrink_caches+0x6d/0x6f [ 125.332581] [] try_to_free_pages+0xd0/0x1b5 [ 125.338056] [] __alloc_pages+0x135/0x2e8 [ 125.343258] [] tcp_sendmsg+0xaa0/0xb78 [ 125.348281] [] inet_sendmsg+0x48/0x53 [ 125.353212] [] sock_sendmsg+0xb8/0xd3 [ 125.358147] [] kernel_sendmsg+0x42/0x4f [ 125.363259] [] sock_no_sendpage+0x5e/0x77 [ 125.368556] [] xs_tcp_send_request+0x2af/0x375 then the socket is blocked until memory is reclaimed, and no progress can ever be made. Try to access the emergency pools by using GFP_ATOMIC. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 0a51fd46a84..77e8800d412 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -990,6 +990,7 @@ static void xs_udp_connect_worker(void *args) sk->sk_data_ready = xs_udp_data_ready; sk->sk_write_space = xs_udp_write_space; sk->sk_no_check = UDP_CSUM_NORCV; + sk->sk_allocation = GFP_ATOMIC; xprt_set_connected(xprt); @@ -1074,6 +1075,7 @@ static void xs_tcp_connect_worker(void *args) sk->sk_data_ready = xs_tcp_data_ready; sk->sk_state_change = xs_tcp_state_change; sk->sk_write_space = xs_tcp_write_space; + sk->sk_allocation = GFP_ATOMIC; /* socket options */ sk->sk_userlocks |= SOCK_BINDPORT_LOCK; -- cgit v1.2.3 From 29884df0d89c1df0dec3449405bc41569bb44800 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 13 Dec 2005 16:13:54 -0500 Subject: NFS: Fix another O_DIRECT race Ensure we call unmap_mapping_range() and sync dirty pages to disk before doing an NFS direct write. Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 24 ++++++------------------ fs/nfs/file.c | 23 ++++------------------- fs/nfs/inode.c | 28 +++++++++++++++++++++++----- include/linux/nfs_fs.h | 1 + 4 files changed, 34 insertions(+), 42 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index b497c71384e..07922881760 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -678,15 +678,9 @@ nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t if (!count) goto out; - if (mapping->nrpages) { - retval = filemap_fdatawrite(mapping); - if (retval == 0) - retval = nfs_wb_all(inode); - if (retval == 0) - retval = filemap_fdatawait(mapping); - if (retval) - goto out; - } + retval = nfs_sync_mapping(mapping); + if (retval) + goto out; retval = nfs_direct_read(inode, ctx, &iov, pos, 1); if (retval > 0) @@ -764,15 +758,9 @@ nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, if (!count) goto out; - if (mapping->nrpages) { - retval = filemap_fdatawrite(mapping); - if (retval == 0) - retval = nfs_wb_all(inode); - if (retval == 0) - retval = filemap_fdatawait(mapping); - if (retval) - goto out; - } + retval = nfs_sync_mapping(mapping); + if (retval) + goto out; retval = nfs_direct_write(inode, ctx, &iov, pos, 1); if (mapping->nrpages) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 57d3e77d97e..eb5cd4c3bbf 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -433,11 +433,7 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl) * Flush all pending writes before doing anything * with locks.. */ - filemap_fdatawrite(filp->f_mapping); - down(&inode->i_sem); - nfs_wb_all(inode); - up(&inode->i_sem); - filemap_fdatawait(filp->f_mapping); + nfs_sync_mapping(filp->f_mapping); /* NOTE: special case * If we're signalled while cleaning up locks on process exit, we @@ -465,15 +461,8 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl) * Flush all pending writes before doing anything * with locks.. */ - status = filemap_fdatawrite(filp->f_mapping); - if (status == 0) { - down(&inode->i_sem); - status = nfs_wb_all(inode); - up(&inode->i_sem); - if (status == 0) - status = filemap_fdatawait(filp->f_mapping); - } - if (status < 0) + status = nfs_sync_mapping(filp->f_mapping); + if (status != 0) goto out; lock_kernel(); @@ -497,11 +486,7 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl) * Make sure we clear the cache whenever we try to get the lock. * This makes locking act as a cache coherency point. */ - filemap_fdatawrite(filp->f_mapping); - down(&inode->i_sem); - nfs_wb_all(inode); /* we may have slept */ - up(&inode->i_sem); - filemap_fdatawait(filp->f_mapping); + nfs_sync_mapping(filp->f_mapping); nfs_zap_caches(inode); out: rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index afd75d0463f..432f41cd75e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -640,6 +640,27 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) return 0; } +/** + * nfs_sync_mapping - helper to flush all mmapped dirty data to disk + */ +int nfs_sync_mapping(struct address_space *mapping) +{ + int ret; + + if (mapping->nrpages == 0) + return 0; + unmap_mapping_range(mapping, 0, 0, 0); + ret = filemap_fdatawrite(mapping); + if (ret != 0) + goto out; + ret = filemap_fdatawait(mapping); + if (ret != 0) + goto out; + ret = nfs_wb_all(mapping->host); +out: + return ret; +} + /* * Invalidate the local caches */ @@ -1179,11 +1200,8 @@ void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) struct nfs_inode *nfsi = NFS_I(inode); if (nfsi->cache_validity & NFS_INO_INVALID_DATA) { - if (S_ISREG(inode->i_mode)) { - if (filemap_fdatawrite(mapping) == 0) - filemap_fdatawait(mapping); - nfs_wb_all(inode); - } + if (S_ISREG(inode->i_mode)) + nfs_sync_mapping(mapping); invalidate_inode_pages2(mapping); spin_lock(&inode->i_lock); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 12787a9b025..2516adeccec 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -291,6 +291,7 @@ static inline int nfs_verify_change_attribute(struct inode *inode, unsigned long /* * linux/fs/nfs/inode.c */ +extern int nfs_sync_mapping(struct address_space *mapping); extern void nfs_zap_caches(struct inode *); extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *, struct nfs_fattr *); -- cgit v1.2.3 From 48e49187753ec3b4fa84a7165c9b7a59f3875b56 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 19 Dec 2005 17:11:22 -0500 Subject: SUNRPC: Fix "EPIPE" error on mount of rpcsec_gss-protected partitions gss_create_upcall() should not error just because rpc.gssd closed the pipe on its end. Instead, it should requeue the pending requests and then retry. Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 6 ++++-- net/sunrpc/rpc_pipe.c | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index f44f46f1d8e..8d782282ec1 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -638,7 +638,7 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg *msg) gss_msg); atomic_inc(&gss_msg->count); gss_unhash_msg(gss_msg); - if (msg->errno == -ETIMEDOUT || msg->errno == -EPIPE) { + if (msg->errno == -ETIMEDOUT) { unsigned long now = jiffies; if (time_after(now, ratelimit)) { printk(KERN_WARNING "RPC: AUTH_GSS upcall timed out.\n" @@ -786,7 +786,9 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int taskflags) cred->gc_flags = 0; cred->gc_base.cr_ops = &gss_credops; cred->gc_service = gss_auth->service; - err = gss_create_upcall(gss_auth, cred); + do { + err = gss_create_upcall(gss_auth, cred); + } while (err == -EAGAIN); if (err < 0) goto out_err; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index c76ea221798..16a2458f38f 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -174,7 +174,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp) goto out; msg = (struct rpc_pipe_msg *)filp->private_data; if (msg != NULL) { - msg->errno = -EPIPE; + msg->errno = -EAGAIN; list_del_init(&msg->list); rpci->ops->destroy_msg(msg); } @@ -183,7 +183,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp) if (filp->f_mode & FMODE_READ) rpci->nreaders --; if (!rpci->nreaders) - __rpc_purge_upcall(inode, -EPIPE); + __rpc_purge_upcall(inode, -EAGAIN); if (rpci->ops->release_pipe) rpci->ops->release_pipe(inode); out: -- cgit v1.2.3 From 9b5b1f5bf9dcdb6f23abf65977a675eb4deba3c0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 19 Dec 2005 17:11:25 -0500 Subject: NLM: Fix Oops in nlmclnt_mark_reclaim() When mixing -olock and -onolock mounts on the same client, we have to check that fl->fl_u.nfs_fl.owner is set before dereferencing it. Signed-off-by: Trond Myklebust --- fs/lockd/clntlock.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 006bb9e1457..3eaf6e70108 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -157,6 +157,8 @@ void nlmclnt_mark_reclaim(struct nlm_host *host) inode = fl->fl_file->f_dentry->d_inode; if (inode->i_sb->s_magic != NFS_SUPER_MAGIC) continue; + if (fl->fl_u.nfs_fl.owner == NULL) + continue; if (fl->fl_u.nfs_fl.owner->host != host) continue; if (!(fl->fl_u.nfs_fl.flags & NFS_LCK_GRANTED)) @@ -226,6 +228,8 @@ restart: inode = fl->fl_file->f_dentry->d_inode; if (inode->i_sb->s_magic != NFS_SUPER_MAGIC) continue; + if (fl->fl_u.nfs_fl.owner == NULL) + continue; if (fl->fl_u.nfs_fl.owner->host != host) continue; if (!(fl->fl_u.nfs_fl.flags & NFS_LCK_RECLAIM)) -- cgit v1.2.3