From eb9b55ab4d73280597fd183b367d50452f4d7846 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 10 Mar 2009 20:33:16 -0400 Subject: SUNRPC: Tighten up the task locking rules in __rpc_execute() We should probably not be testing any flags after we've cleared the RPC_TASK_RUNNING flag, since rpc_make_runnable() is then free to assign the rpc_task to another workqueue, which may then destroy it. We can fix any races with rpc_make_runnable() by ensuring that we only clear the RPC_TASK_RUNNING flag while holding the rpc_wait_queue->lock that the task is supposed to be sleeping on (and then checking whether or not the task really is sleeping). Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 385f427beda..ff50a054686 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -293,11 +293,6 @@ static void rpc_make_runnable(struct rpc_task *task) rpc_clear_queued(task); if (rpc_test_and_set_running(task)) return; - /* We might have raced */ - if (RPC_IS_QUEUED(task)) { - rpc_clear_running(task); - return; - } if (RPC_IS_ASYNC(task)) { int status; @@ -607,7 +602,9 @@ void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata) */ static void __rpc_execute(struct rpc_task *task) { - int status = 0; + struct rpc_wait_queue *queue; + int task_is_async = RPC_IS_ASYNC(task); + int status = 0; dprintk("RPC: %5u __rpc_execute flags=0x%x\n", task->tk_pid, task->tk_flags); @@ -647,15 +644,25 @@ static void __rpc_execute(struct rpc_task *task) */ if (!RPC_IS_QUEUED(task)) continue; - rpc_clear_running(task); - if (RPC_IS_ASYNC(task)) { - /* Careful! we may have raced... */ - if (RPC_IS_QUEUED(task)) - return; - if (rpc_test_and_set_running(task)) - return; + /* + * The queue->lock protects against races with + * rpc_make_runnable(). + * + * Note that once we clear RPC_TASK_RUNNING on an asynchronous + * rpc_task, rpc_make_runnable() can assign it to a + * different workqueue. We therefore cannot assume that the + * rpc_task pointer may still be dereferenced. + */ + queue = task->tk_waitqueue; + spin_lock_bh(&queue->lock); + if (!RPC_IS_QUEUED(task)) { + spin_unlock_bh(&queue->lock); continue; } + rpc_clear_running(task); + spin_unlock_bh(&queue->lock); + if (task_is_async) + return; /* sync task: sleep here */ dprintk("RPC: %5u sync task going to sleep\n", task->tk_pid); -- cgit v1.2.3 From fba91afbec2c004e2c8733ae9e0ca6998e962c64 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:06:41 -0400 Subject: SUNRPC: Fix an Oops due to socket not set up yet... We can Oops in both xs_udp_send_request() and xs_tcp_send_request() if the call to xs_sendpages() returns an error due to the socket not yet being set up. Deal with that situation by returning a new error: ENOTSOCK, so that we know to avoid dereferencing transport->sock. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 5cbb404c4cd..a71fefd6191 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -467,7 +467,7 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, int err, sent = 0; if (unlikely(!sock)) - return -ENOTCONN; + return -ENOTSOCK; clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags); if (base != 0) { @@ -594,6 +594,10 @@ static int xs_udp_send_request(struct rpc_task *task) } switch (status) { + case -ENOTSOCK: + status = -ENOTCONN; + /* Should we call xs_close() here? */ + break; case -EAGAIN: xs_nospace(task); break; @@ -693,6 +697,10 @@ static int xs_tcp_send_request(struct rpc_task *task) } switch (status) { + case -ENOTSOCK: + status = -ENOTCONN; + /* Should we call xs_close() here? */ + break; case -EAGAIN: xs_nospace(task); break; -- cgit v1.2.3 From 01d37c428ae080563c0a3bb8bdfa88c65a6891d3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:09:39 -0400 Subject: SUNRPC: xprt_connect() don't abort the task if the transport isn't bound If the transport isn't bound, then we should just return ENOTCONN, letting call_connect_status() and/or call_status() deal with retrying. Currently, we appear to abort all pending tasks with an EIO error. Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 2 +- net/sunrpc/xprtsock.c | 13 +++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 29e401bb612..62098d101a1 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -663,7 +663,7 @@ void xprt_connect(struct rpc_task *task) xprt, (xprt_connected(xprt) ? "is" : "is not")); if (!xprt_bound(xprt)) { - task->tk_status = -EIO; + task->tk_status = -EAGAIN; return; } if (!xprt_lock_write(xprt, task)) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index a71fefd6191..29c71e645b2 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -577,6 +577,8 @@ static int xs_udp_send_request(struct rpc_task *task) req->rq_svec->iov_base, req->rq_svec->iov_len); + if (!xprt_bound(xprt)) + return -ENOTCONN; status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen, xdr, @@ -1531,7 +1533,7 @@ static void xs_udp_connect_worker4(struct work_struct *work) struct socket *sock = transport->sock; int err, status = -EIO; - if (xprt->shutdown || !xprt_bound(xprt)) + if (xprt->shutdown) goto out; /* Start by resetting any existing state */ @@ -1572,7 +1574,7 @@ static void xs_udp_connect_worker6(struct work_struct *work) struct socket *sock = transport->sock; int err, status = -EIO; - if (xprt->shutdown || !xprt_bound(xprt)) + if (xprt->shutdown) goto out; /* Start by resetting any existing state */ @@ -1656,6 +1658,9 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) write_unlock_bh(&sk->sk_callback_lock); } + if (!xprt_bound(xprt)) + return -ENOTCONN; + /* Tell the socket layer to start connecting... */ xprt->stat.connect_count++; xprt->stat.connect_start = jiffies; @@ -1676,7 +1681,7 @@ static void xs_tcp_connect_worker4(struct work_struct *work) struct socket *sock = transport->sock; int err, status = -EIO; - if (xprt->shutdown || !xprt_bound(xprt)) + if (xprt->shutdown) goto out; if (!sock) { @@ -1736,7 +1741,7 @@ static void xs_tcp_connect_worker6(struct work_struct *work) struct socket *sock = transport->sock; int err, status = -EIO; - if (xprt->shutdown || !xprt_bound(xprt)) + if (xprt->shutdown) goto out; if (!sock) { -- cgit v1.2.3