From 8b0e7b2cf35aa827ed5efb508c1879481b970496 Mon Sep 17 00:00:00 2001 From: David Teigland Date: Fri, 18 May 2007 09:03:35 -0500 Subject: [DLM] wait for config check during join [6/6] Joining the lockspace should wait for the initial round of inter-node config checks to complete before returning. This way, if there's a configuration mismatch between the joining node and the existing nodes, the join can fail and return an error to the application. Signed-off-by: David Teigland Signed-off-by: Steven Whitehouse --- fs/dlm/dlm_internal.h | 2 ++ fs/dlm/lockspace.c | 30 ++++++++++++++++++++++++++++++ fs/dlm/member.c | 6 ++++++ fs/dlm/rcom.c | 4 ++-- 4 files changed, 40 insertions(+), 2 deletions(-) (limited to 'fs/dlm') diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index a8d6e993697..03ba6c4fd5c 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -472,6 +472,8 @@ struct dlm_ls { wait_queue_head_t ls_uevent_wait; /* user part of join/leave */ int ls_uevent_result; + struct completion ls_members_done; + int ls_members_result; struct miscdevice ls_device; diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index a3a50e67e4d..c8f0c15ac16 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -197,13 +197,24 @@ static int do_uevent(struct dlm_ls *ls, int in) else kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE); + log_debug(ls, "%s the lockspace group...", in ? "joining" : "leaving"); + + /* dlm_controld will see the uevent, do the necessary group management + and then write to sysfs to wake us */ + error = wait_event_interruptible(ls->ls_uevent_wait, test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags)); + + log_debug(ls, "group event done %d %d", error, ls->ls_uevent_result); + if (error) goto out; error = ls->ls_uevent_result; out: + if (error) + log_error(ls, "group %s failed %d %d", in ? "join" : "leave", + error, ls->ls_uevent_result); return error; } @@ -490,6 +501,8 @@ static int new_lockspace(char *name, int namelen, void **lockspace, init_waitqueue_head(&ls->ls_uevent_wait); ls->ls_uevent_result = 0; + init_completion(&ls->ls_members_done); + ls->ls_members_result = -1; ls->ls_recoverd_task = NULL; mutex_init(&ls->ls_recoverd_active); @@ -540,10 +553,21 @@ static int new_lockspace(char *name, int namelen, void **lockspace, /* let kobject handle freeing of ls if there's an error */ do_unreg = 1; + /* This uevent triggers dlm_controld in userspace to add us to the + group of nodes that are members of this lockspace (managed by the + cluster infrastructure.) Once it's done that, it tells us who the + current lockspace members are (via configfs) and then tells the + lockspace to start running (via sysfs) in dlm_ls_start(). */ + error = do_uevent(ls, 1); if (error) goto out_stop; + wait_for_completion(&ls->ls_members_done); + error = ls->ls_members_result; + if (error) + goto out_members; + dlm_create_debug_file(ls); log_debug(ls, "join complete"); @@ -551,6 +575,10 @@ static int new_lockspace(char *name, int namelen, void **lockspace, *lockspace = ls; return 0; + out_members: + do_uevent(ls, 0); + dlm_clear_members(ls); + kfree(ls->ls_node_array); out_stop: dlm_recoverd_stop(ls); out_delist: @@ -588,6 +616,8 @@ int dlm_new_lockspace(char *name, int namelen, void **lockspace, error = new_lockspace(name, namelen, lockspace, flags, lvblen); if (!error) ls_count++; + else if (!ls_count) + threads_stop(); out: mutex_unlock(&ls_lock); return error; diff --git a/fs/dlm/member.c b/fs/dlm/member.c index f08faec3d85..073599dced2 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c @@ -233,6 +233,12 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) *neg_out = neg; error = ping_members(ls); + if (!error || error == -EPROTO) { + /* new_lockspace() may be waiting to know if the config + is good or bad */ + ls->ls_members_result = error; + complete(&ls->ls_members_done); + } if (error) goto out; diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c index 6bfbd615380..f71c23542f0 100644 --- a/fs/dlm/rcom.c +++ b/fs/dlm/rcom.c @@ -90,7 +90,7 @@ static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) log_error(ls, "version mismatch: %x nodeid %d: %x", DLM_HEADER_MAJOR | DLM_HEADER_MINOR, nodeid, rc->rc_header.h_version); - return -EINVAL; + return -EPROTO; } if (rf->rf_lvblen != ls->ls_lvblen || @@ -98,7 +98,7 @@ static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x", ls->ls_lvblen, ls->ls_exflags, nodeid, rf->rf_lvblen, rf->rf_lsflags); - return -EINVAL; + return -EPROTO; } return 0; } -- cgit v1.2.3