From d4400156d415540086c34a06e5d233122d6bf56a Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Tue, 31 Oct 2006 11:55:56 -0600
Subject: [DLM] fix requestqueue race

Red Hat BZ 211914

There's a race between dlm_recoverd (1) enabling locking and (2) clearing
out the requestqueue, and dlm_recvd (1) checking if locking is enabled and
(2) adding a message to the requestqueue.  An order of recoverd(1),
recvd(1), recvd(2), recoverd(2) will result in a message being left on the
requestqueue.  The fix is to have dlm_recvd check if dlm_recoverd has
enabled locking after taking the mutex for the requestqueue and if it has
processing the message instead of queueing it.

Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/requestqueue.c | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

(limited to 'fs/dlm/requestqueue.c')

diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c
index 7b2b089634a..0226d2a0a0f 100644
--- a/fs/dlm/requestqueue.c
+++ b/fs/dlm/requestqueue.c
@@ -30,26 +30,39 @@ struct rq_entry {
  * lockspace is enabled on some while still suspended on others.
  */
 
-void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd)
+int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd)
 {
 	struct rq_entry *e;
 	int length = hd->h_length;
+	int rv = 0;
 
 	if (dlm_is_removed(ls, nodeid))
-		return;
+		return 0;
 
 	e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL);
 	if (!e) {
 		log_print("dlm_add_requestqueue: out of memory\n");
-		return;
+		return 0;
 	}
 
 	e->nodeid = nodeid;
 	memcpy(e->request, hd, length);
 
+	/* We need to check dlm_locking_stopped() after taking the mutex to
+	   avoid a race where dlm_recoverd enables locking and runs
+	   process_requestqueue between our earlier dlm_locking_stopped check
+	   and this addition to the requestqueue. */
+
 	mutex_lock(&ls->ls_requestqueue_mutex);
-	list_add_tail(&e->list, &ls->ls_requestqueue);
+	if (dlm_locking_stopped(ls))
+		list_add_tail(&e->list, &ls->ls_requestqueue);
+	else {
+		log_debug(ls, "dlm_add_requestqueue skip from %d", nodeid);
+		kfree(e);
+		rv = -EAGAIN;
+	}
 	mutex_unlock(&ls->ls_requestqueue_mutex);
+	return rv;
 }
 
 int dlm_process_requestqueue(struct dlm_ls *ls)
-- 
cgit v1.2.3


From 2896ee37ccc1f9acb244c9b02becb74a43661009 Mon Sep 17 00:00:00 2001
From: David Teigland <teigland@redhat.com>
Date: Mon, 27 Nov 2006 11:31:22 -0600
Subject: [DLM] fix add_requestqueue checking nodes list

Requests that arrive after recovery has started are saved in the
requestqueue and processed after recovery is done.  Some of these requests
are purged during recovery if they are from nodes that have been removed.
We move the purging of the requests (dlm_purge_requestqueue) to later in
the recovery sequence which allows the routine saving requests
(dlm_add_requestqueue) to avoid filtering out requests by nodeid since the
same will be done by the purge.  The current code has add_requestqueue
filtering by nodeid but doesn't hold any locks when accessing the list of
current nodes.  This also means that we need to call the purge routine
when the lockspace is being shut down since the add routine will not be
rejecting requests itself any more.

Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/dlm/requestqueue.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'fs/dlm/requestqueue.c')

diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c
index 0226d2a0a0f..65008d79c96 100644
--- a/fs/dlm/requestqueue.c
+++ b/fs/dlm/requestqueue.c
@@ -36,9 +36,6 @@ int dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_header *hd)
 	int length = hd->h_length;
 	int rv = 0;
 
-	if (dlm_is_removed(ls, nodeid))
-		return 0;
-
 	e = kmalloc(sizeof(struct rq_entry) + length, GFP_KERNEL);
 	if (!e) {
 		log_print("dlm_add_requestqueue: out of memory\n");
@@ -133,6 +130,10 @@ static int purge_request(struct dlm_ls *ls, struct dlm_message *ms, int nodeid)
 {
 	uint32_t type = ms->m_type;
 
+	/* the ls is being cleaned up and freed by release_lockspace */
+	if (!ls->ls_count)
+		return 1;
+
 	if (dlm_is_removed(ls, nodeid))
 		return 1;
 
-- 
cgit v1.2.3