From e2faea4ce340f199c1957986c4c3dc2de76f5746 Mon Sep 17 00:00:00 2001 From: Kurt Hackel Date: Thu, 12 Jan 2006 14:24:55 -0800 Subject: [PATCH] ocfs2/dlm: fixes * fix a hang which can occur during shutdown migration * do not allow nodes to join during recovery * when restarting lock mastery, do not ignore nodes which come up * more than one node could become recovery master, fix this * sleep to allow some time for heartbeat state to catch up to network * extra debug info for bad recovery state problems * make DLM_RECO_NODE_DATA_DONE a valid state for non-master recovery nodes * prune all locks from dead nodes on $RECOVERY lock resources * do NOT automatically add new nodes to mle nodemaps until they have properly joined the domain * make sure dlm_pick_recovery_master only exits when all nodes have synced * properly handle dlmunlock errors in dlm_pick_recovery_master * do not propagate network errors in dlm_send_begin_reco_message * dead nodes were not being put in the recovery map sometimes, fix this * dlmunlock was failing to clear the unlock actions on DLM_DENIED Signed-off-by: Kurt Hackel Signed-off-by: Mark Fasheh --- fs/ocfs2/dlm/dlmmaster.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'fs/ocfs2/dlm/dlmmaster.c') diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 27e984f7e4c..a3194fe173d 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -1050,17 +1050,10 @@ static int dlm_restart_lock_mastery(struct dlm_ctxt *dlm, node = dlm_bitmap_diff_iter_next(&bdi, &sc); while (node >= 0) { if (sc == NODE_UP) { - /* a node came up. easy. might not even need - * to talk to it if its node number is higher - * or if we are already blocked. */ - mlog(0, "node up! %d\n", node); - if (blocked) - goto next; - - if (node > dlm->node_num) { - mlog(0, "node > this node. skipping.\n"); - goto next; - } + /* a node came up. clear any old vote from + * the response map and set it in the vote map + * then restart the mastery. */ + mlog(ML_NOTICE, "node %d up while restarting\n", node); /* redo the master request, but only for the new node */ mlog(0, "sending request to new node\n"); @@ -2005,6 +1998,15 @@ fail: break; mlog(0, "timed out during migration\n"); + /* avoid hang during shutdown when migrating lockres + * to a node which also goes down */ + if (dlm_is_node_dead(dlm, target)) { + mlog(0, "%s:%.*s: expected migration target %u " + "is no longer up. restarting.\n", + dlm->name, res->lockname.len, + res->lockname.name, target); + ret = -ERESTARTSYS; + } } if (ret == -ERESTARTSYS) { /* migration failed, detach and clean up mle */ -- cgit v1.2.3