aboutsummaryrefslogtreecommitdiff
path: root/fs/ceph
diff options
context:
space:
mode:
authorSage Weil <sage@newdream.net>2010-02-25 12:40:45 -0800
committerSage Weil <sage@newdream.net>2010-02-25 12:40:45 -0800
commite80a52d14f868059e8ec790c9fae88cdb8a1df98 (patch)
tree8353f33245509d7c3bd7fb25f5c254df6db8db9b /fs/ceph
parent161fd65ac934608345aed35226fc889ea3b0b500 (diff)
ceph: fix connection fault STANDBY check
Move any out_sent messages to out_queue _before_ checking if out_queue is empty and going to STANDBY, or else we may drop something that was never acked. And clean up the code a bit (less goto). Signed-off-by: Sage Weil <sage@newdream.net>
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/messenger.c31
1 files changed, 13 insertions, 18 deletions
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c
index 9ea7b763c8d..0ddc2c75f6b 100644
--- a/fs/ceph/messenger.c
+++ b/fs/ceph/messenger.c
@@ -1853,32 +1853,27 @@ static void ceph_fault(struct ceph_connection *con)
con->in_msg = NULL;
}
+ /* Requeue anything that hasn't been acked */
+ list_splice_init(&con->out_sent, &con->out_queue);
/* If there are no messages in the queue, place the connection
* in a STANDBY state (i.e., don't try to reconnect just yet). */
if (list_empty(&con->out_queue) && !con->out_keepalive_pending) {
dout("fault setting STANDBY\n");
set_bit(STANDBY, &con->state);
- mutex_unlock(&con->mutex);
- goto out;
+ } else {
+ /* retry after a delay. */
+ if (con->delay == 0)
+ con->delay = BASE_DELAY_INTERVAL;
+ else if (con->delay < MAX_DELAY_INTERVAL)
+ con->delay *= 2;
+ dout("fault queueing %p delay %lu\n", con, con->delay);
+ con->ops->get(con);
+ if (queue_delayed_work(ceph_msgr_wq, &con->work,
+ round_jiffies_relative(con->delay)) == 0)
+ con->ops->put(con);
}
- /* Requeue anything that hasn't been acked, and retry after a
- * delay. */
- list_splice_init(&con->out_sent, &con->out_queue);
-
- if (con->delay == 0)
- con->delay = BASE_DELAY_INTERVAL;
- else if (con->delay < MAX_DELAY_INTERVAL)
- con->delay *= 2;
-
- /* explicitly schedule work to try to reconnect again later. */
- dout("fault queueing %p delay %lu\n", con, con->delay);
- con->ops->get(con);
- if (queue_delayed_work(ceph_msgr_wq, &con->work,
- round_jiffies_relative(con->delay)) == 0)
- con->ops->put(con);
-
out_unlock:
mutex_unlock(&con->mutex);
out: