From 393d2cc354d150b8b4bb888a9da7db4c935e12bd Mon Sep 17 00:00:00 2001
From: Corey Minyard <minyard@acm.org>
Date: Mon, 7 Nov 2005 00:59:54 -0800
Subject: [PATCH] ipmi: use refcount in message handler

This patch is rather large, but it really can't be done in smaller chunks
easily and I believe it is an important change.  This has been out and tested
for a while in the latest IPMI driver release.  There are no functional
changes, just changes as necessary to convert the locking over (and a few
minor style updates).

The IPMI driver uses read/write locks to ensure that things exist while they
are in use.  This is bad from a number of points of view.  This patch removes
the rwlocks and uses refcounts and RCU lists to manage what the locks did.

Signed-off-by: Corey Minyard <minyard@acm.org>
Cc: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/char/ipmi/ipmi_msghandler.c | 953 +++++++++++++++++++-----------------
 1 file changed, 498 insertions(+), 455 deletions(-)

(limited to 'drivers/char/ipmi')

diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 32fa82c78c7..320d7f035bf 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -38,13 +38,13 @@
 #include <linux/sched.h>
 #include <linux/poll.h>
 #include <linux/spinlock.h>
-#include <linux/rwsem.h>
 #include <linux/slab.h>
 #include <linux/ipmi.h>
 #include <linux/ipmi_smi.h>
 #include <linux/notifier.h>
 #include <linux/init.h>
 #include <linux/proc_fs.h>
+#include <linux/rcupdate.h>
 
 #define PFX "IPMI message handler: "
 
@@ -65,10 +65,19 @@ struct proc_dir_entry *proc_ipmi_root = NULL;
    the max message timer.  This is in milliseconds. */
 #define MAX_MSG_TIMEOUT		60000
 
+
+/*
+ * The main "user" data structure.
+ */
 struct ipmi_user
 {
 	struct list_head link;
 
+	/* Set to "0" when the user is destroyed. */
+	int valid;
+
+	struct kref refcount;
+
 	/* The upper layer that handles receive messages. */
 	struct ipmi_user_hndl *handler;
 	void             *handler_data;
@@ -87,6 +96,15 @@ struct cmd_rcvr
 	ipmi_user_t   user;
 	unsigned char netfn;
 	unsigned char cmd;
+
+	/*
+	 * This is used to form a linked lised during mass deletion.
+	 * Since this is in an RCU list, we cannot use the link above
+	 * or change any data until the RCU period completes.  So we
+	 * use this next variable during mass deletion so we can have
+	 * a list and don't have to wait and restart the search on
+	 * every individual deletion of a command. */
+	struct cmd_rcvr *next;
 };
 
 struct seq_table
@@ -150,13 +168,11 @@ struct ipmi_smi
 	/* What interface number are we? */
 	int intf_num;
 
-	/* The list of upper layers that are using me.  We read-lock
-           this when delivering messages to the upper layer to keep
-           the user from going away while we are processing the
-           message.  This means that you cannot add or delete a user
-           from the receive callback. */
-	rwlock_t                users_lock;
-	struct list_head        users;
+	struct kref refcount;
+
+	/* The list of upper layers that are using me.  seq_lock
+	 * protects this. */
+	struct list_head users;
 
 	/* Used for wake ups at startup. */
 	wait_queue_head_t waitq;
@@ -193,7 +209,7 @@ struct ipmi_smi
 
 	/* The list of command receivers that are registered for commands
 	   on this interface. */
-	rwlock_t	 cmd_rcvr_lock;
+	spinlock_t       cmd_rcvrs_lock;
 	struct list_head cmd_rcvrs;
 
 	/* Events that were queues because no one was there to receive
@@ -296,16 +312,17 @@ struct ipmi_smi
 	unsigned int events;
 };
 
+/* Used to mark an interface entry that cannot be used but is not a
+ * free entry, either, primarily used at creation and deletion time so
+ * a slot doesn't get reused too quickly. */
+#define IPMI_INVALID_INTERFACE_ENTRY ((ipmi_smi_t) ((long) 1))
+#define IPMI_INVALID_INTERFACE(i) (((i) == NULL) \
+				   || (i == IPMI_INVALID_INTERFACE_ENTRY))
+
 #define MAX_IPMI_INTERFACES 4
 static ipmi_smi_t ipmi_interfaces[MAX_IPMI_INTERFACES];
 
-/* Used to keep interfaces from going away while operations are
-   operating on interfaces.  Grab read if you are not modifying the
-   interfaces, write if you are. */
-static DECLARE_RWSEM(interfaces_sem);
-
-/* Directly protects the ipmi_interfaces data structure.  This is
-   claimed in the timer interrupt. */
+/* Directly protects the ipmi_interfaces data structure. */
 static DEFINE_SPINLOCK(interfaces_lock);
 
 /* List of watchers that want to know when smi's are added and
@@ -313,20 +330,73 @@ static DEFINE_SPINLOCK(interfaces_lock);
 static struct list_head smi_watchers = LIST_HEAD_INIT(smi_watchers);
 static DECLARE_RWSEM(smi_watchers_sem);
 
+
+static void free_recv_msg_list(struct list_head *q)
+{
+	struct ipmi_recv_msg *msg, *msg2;
+
+	list_for_each_entry_safe(msg, msg2, q, link) {
+		list_del(&msg->link);
+		ipmi_free_recv_msg(msg);
+	}
+}
+
+static void clean_up_interface_data(ipmi_smi_t intf)
+{
+	int              i;
+	struct cmd_rcvr  *rcvr, *rcvr2;
+	unsigned long    flags;
+	struct list_head list;
+
+	free_recv_msg_list(&intf->waiting_msgs);
+	free_recv_msg_list(&intf->waiting_events);
+
+	/* Wholesale remove all the entries from the list in the
+	 * interface and wait for RCU to know that none are in use. */
+	spin_lock_irqsave(&intf->cmd_rcvrs_lock, flags);
+	list_add_rcu(&list, &intf->cmd_rcvrs);
+	list_del_rcu(&intf->cmd_rcvrs);
+	spin_unlock_irqrestore(&intf->cmd_rcvrs_lock, flags);
+	synchronize_rcu();
+
+	list_for_each_entry_safe(rcvr, rcvr2, &list, link)
+		kfree(rcvr);
+
+	for (i = 0; i < IPMI_IPMB_NUM_SEQ; i++) {
+		if ((intf->seq_table[i].inuse)
+		    && (intf->seq_table[i].recv_msg))
+		{
+			ipmi_free_recv_msg(intf->seq_table[i].recv_msg);
+		}
+	}
+}
+
+static void intf_free(struct kref *ref)
+{
+	ipmi_smi_t intf = container_of(ref, struct ipmi_smi, refcount);
+
+	clean_up_interface_data(intf);
+	kfree(intf);
+}
+
 int ipmi_smi_watcher_register(struct ipmi_smi_watcher *watcher)
 {
-	int i;
+	int           i;
+	unsigned long flags;
 
-	down_read(&interfaces_sem);
 	down_write(&smi_watchers_sem);
 	list_add(&(watcher->link), &smi_watchers);
+	up_write(&smi_watchers_sem);
+	spin_lock_irqsave(&interfaces_lock, flags);
 	for (i = 0; i < MAX_IPMI_INTERFACES; i++) {
-		if (ipmi_interfaces[i] != NULL) {
-			watcher->new_smi(i);
-		}
+		ipmi_smi_t intf = ipmi_interfaces[i];
+		if (IPMI_INVALID_INTERFACE(intf))
+			continue;
+		spin_unlock_irqrestore(&interfaces_lock, flags);
+		watcher->new_smi(i);
+		spin_lock_irqsave(&interfaces_lock, flags);
 	}
-	up_write(&smi_watchers_sem);
-	up_read(&interfaces_sem);
+	spin_unlock_irqrestore(&interfaces_lock, flags);
 	return 0;
 }
 
@@ -471,8 +541,8 @@ static void deliver_response(struct ipmi_recv_msg *msg)
 		}
 		ipmi_free_recv_msg(msg);
 	} else {
-		msg->user->handler->ipmi_recv_hndl(msg,
-						   msg->user->handler_data);
+		ipmi_user_t user = msg->user;
+		user->handler->ipmi_recv_hndl(msg, user->handler_data);
 	}
 }
 
@@ -662,15 +732,18 @@ int ipmi_create_user(unsigned int          if_num,
 	if (! new_user)
 		return -ENOMEM;
 
-	down_read(&interfaces_sem);
-	if ((if_num >= MAX_IPMI_INTERFACES) || ipmi_interfaces[if_num] == NULL)
-	{
-		rv = -EINVAL;
-		goto out_unlock;
+	spin_lock_irqsave(&interfaces_lock, flags);
+	intf = ipmi_interfaces[if_num];
+	if ((if_num >= MAX_IPMI_INTERFACES) || IPMI_INVALID_INTERFACE(intf)) {
+		spin_unlock_irqrestore(&interfaces_lock, flags);
+		return -EINVAL;
 	}
 
-	intf = ipmi_interfaces[if_num];
+	/* Note that each existing user holds a refcount to the interface. */
+	kref_get(&intf->refcount);
+	spin_unlock_irqrestore(&interfaces_lock, flags);
 
+	kref_init(&new_user->refcount);
 	new_user->handler = handler;
 	new_user->handler_data = handler_data;
 	new_user->intf = intf;
@@ -678,98 +751,92 @@ int ipmi_create_user(unsigned int          if_num,
 
 	if (!try_module_get(intf->handlers->owner)) {
 		rv = -ENODEV;
-		goto out_unlock;
+		goto out_err;
 	}
 
 	if (intf->handlers->inc_usecount) {
 		rv = intf->handlers->inc_usecount(intf->send_info);
 		if (rv) {
 			module_put(intf->handlers->owner);
-			goto out_unlock;
+			goto out_err;
 		}
 	}
 
-	write_lock_irqsave(&intf->users_lock, flags);
-	list_add_tail(&new_user->link, &intf->users);
-	write_unlock_irqrestore(&intf->users_lock, flags);
-
- out_unlock:	
-	if (rv) {
-		kfree(new_user);
-	} else {
-		*user = new_user;
-	}
+	new_user->valid = 1;
+	spin_lock_irqsave(&intf->seq_lock, flags);
+	list_add_rcu(&new_user->link, &intf->users);
+	spin_unlock_irqrestore(&intf->seq_lock, flags);
+	*user = new_user;
+	return 0;
 
-	up_read(&interfaces_sem);
+ out_err:
+	kfree(new_user);
+	kref_put(&intf->refcount, intf_free);
 	return rv;
 }
 
-static int ipmi_destroy_user_nolock(ipmi_user_t user)
+static void free_user(struct kref *ref)
+{
+	ipmi_user_t user = container_of(ref, struct ipmi_user, refcount);
+	kfree(user);
+}
+
+int ipmi_destroy_user(ipmi_user_t user)
 {
 	int              rv = -ENODEV;
-	ipmi_user_t      t_user;
-	struct cmd_rcvr  *rcvr, *rcvr2;
+	ipmi_smi_t       intf = user->intf;
 	int              i;
 	unsigned long    flags;
+	struct cmd_rcvr  *rcvr;
+	struct list_head *entry1, *entry2;
+	struct cmd_rcvr  *rcvrs = NULL;
 
-	/* Find the user and delete them from the list. */
-	list_for_each_entry(t_user, &(user->intf->users), link) {
-		if (t_user == user) {
-			list_del(&t_user->link);
-			rv = 0;
-			break;
-		}
-	}
+	user->valid = 1;
 
-	if (rv) {
-		goto out_unlock;
-	}
+	/* Remove the user from the interface's sequence table. */
+	spin_lock_irqsave(&intf->seq_lock, flags);
+	list_del_rcu(&user->link);
 
-	/* Remove the user from the interfaces sequence table. */
-	spin_lock_irqsave(&(user->intf->seq_lock), flags);
 	for (i = 0; i < IPMI_IPMB_NUM_SEQ; i++) {
-		if (user->intf->seq_table[i].inuse
-		    && (user->intf->seq_table[i].recv_msg->user == user))
+		if (intf->seq_table[i].inuse
+		    && (intf->seq_table[i].recv_msg->user == user))
 		{
-			user->intf->seq_table[i].inuse = 0;
+			intf->seq_table[i].inuse = 0;
 		}
 	}
-	spin_unlock_irqrestore(&(user->intf->seq_lock), flags);
-
-	/* Remove the user from the command receiver's table. */
-	write_lock_irqsave(&(user->intf->cmd_rcvr_lock), flags);
-	list_for_each_entry_safe(rcvr, rcvr2, &(user->intf->cmd_rcvrs), link) {
+	spin_unlock_irqrestore(&intf->seq_lock, flags);
+
+	/*
+	 * Remove the user from the command receiver's table.  First
+	 * we build a list of everything (not using the standard link,
+	 * since other things may be using it till we do
+	 * synchronize_rcu()) then free everything in that list.
+	 */
+	spin_lock_irqsave(&intf->cmd_rcvrs_lock, flags);
+	list_for_each_safe_rcu(entry1, entry2, &intf->cmd_rcvrs) {
+		rcvr = list_entry(entry1, struct cmd_rcvr, link);
 		if (rcvr->user == user) {
-			list_del(&rcvr->link);
-			kfree(rcvr);
+			list_del_rcu(&rcvr->link);
+			rcvr->next = rcvrs;
+			rcvrs = rcvr;
 		}
 	}
-	write_unlock_irqrestore(&(user->intf->cmd_rcvr_lock), flags);
+	spin_unlock_irqrestore(&intf->cmd_rcvrs_lock, flags);
+	synchronize_rcu();
+	while (rcvrs) {
+		rcvr = rcvrs;
+		rcvrs = rcvr->next;
+		kfree(rcvr);
+	}
 
-	kfree(user);
+	module_put(intf->handlers->owner);
+	if (intf->handlers->dec_usecount)
+		intf->handlers->dec_usecount(intf->send_info);
 
- out_unlock:
+	kref_put(&intf->refcount, intf_free);
 
-	return rv;
-}
-
-int ipmi_destroy_user(ipmi_user_t user)
-{
-	int           rv;
-	ipmi_smi_t    intf = user->intf;
-	unsigned long flags;
+	kref_put(&user->refcount, free_user);
 
-	down_read(&interfaces_sem);
-	write_lock_irqsave(&intf->users_lock, flags);
-	rv = ipmi_destroy_user_nolock(user);
-	if (!rv) {
-		module_put(intf->handlers->owner);
-		if (intf->handlers->dec_usecount)
-			intf->handlers->dec_usecount(intf->send_info);
-	}
-		
-	write_unlock_irqrestore(&intf->users_lock, flags);
-	up_read(&interfaces_sem);
 	return rv;
 }
 
@@ -823,62 +890,78 @@ int ipmi_get_my_LUN(ipmi_user_t   user,
 
 int ipmi_set_gets_events(ipmi_user_t user, int val)
 {
-	unsigned long         flags;
-	struct ipmi_recv_msg  *msg, *msg2;
+	unsigned long        flags;
+	ipmi_smi_t           intf = user->intf;
+	struct ipmi_recv_msg *msg, *msg2;
+	struct list_head     msgs;
 
-	read_lock(&(user->intf->users_lock));
-	spin_lock_irqsave(&(user->intf->events_lock), flags);
+	INIT_LIST_HEAD(&msgs);
+
+	spin_lock_irqsave(&intf->events_lock, flags);
 	user->gets_events = val;
 
 	if (val) {
 		/* Deliver any queued events. */
-		list_for_each_entry_safe(msg, msg2, &(user->intf->waiting_events), link) {
+		list_for_each_entry_safe(msg, msg2, &intf->waiting_events, link) {
 			list_del(&msg->link);
-			msg->user = user;
-			deliver_response(msg);
+			list_add_tail(&msg->link, &msgs);
 		}
 	}
-	
-	spin_unlock_irqrestore(&(user->intf->events_lock), flags);
-	read_unlock(&(user->intf->users_lock));
+
+	/* Hold the events lock while doing this to preserve order. */
+	list_for_each_entry_safe(msg, msg2, &msgs, link) {
+		msg->user = user;
+		kref_get(&user->refcount);
+		deliver_response(msg);
+	}
+
+	spin_unlock_irqrestore(&intf->events_lock, flags);
 
 	return 0;
 }
 
+static struct cmd_rcvr *find_cmd_rcvr(ipmi_smi_t    intf,
+				      unsigned char netfn,
+				      unsigned char cmd)
+{
+	struct cmd_rcvr *rcvr;
+
+	list_for_each_entry_rcu(rcvr, &intf->cmd_rcvrs, link) {
+		if ((rcvr->netfn == netfn) && (rcvr->cmd == cmd))
+			return rcvr;
+	}
+	return NULL;
+}
+
 int ipmi_register_for_cmd(ipmi_user_t   user,
 			  unsigned char netfn,
 			  unsigned char cmd)
 {
-	struct cmd_rcvr  *cmp;
-	unsigned long    flags;
-	struct cmd_rcvr  *rcvr;
-	int              rv = 0;
+	ipmi_smi_t      intf = user->intf;
+	struct cmd_rcvr *rcvr;
+	struct cmd_rcvr *entry;
+	int             rv = 0;
 
 
 	rcvr = kmalloc(sizeof(*rcvr), GFP_KERNEL);
 	if (! rcvr)
 		return -ENOMEM;
+	rcvr->cmd = cmd;
+	rcvr->netfn = netfn;
+	rcvr->user = user;
 
-	read_lock(&(user->intf->users_lock));
-	write_lock_irqsave(&(user->intf->cmd_rcvr_lock), flags);
+	spin_lock_irq(&intf->cmd_rcvrs_lock);
 	/* Make sure the command/netfn is not already registered. */
-	list_for_each_entry(cmp, &(user->intf->cmd_rcvrs), link) {
-		if ((cmp->netfn == netfn) && (cmp->cmd == cmd)) {
-			rv = -EBUSY;
-			break;
-		}
-	}
-
-	if (! rv) {
-		rcvr->cmd = cmd;
-		rcvr->netfn = netfn;
-		rcvr->user = user;
-		list_add_tail(&(rcvr->link), &(user->intf->cmd_rcvrs));
+	entry = find_cmd_rcvr(intf, netfn, cmd);
+	if (entry) {
+		rv = -EBUSY;
+		goto out_unlock;
 	}
 
-	write_unlock_irqrestore(&(user->intf->cmd_rcvr_lock), flags);
-	read_unlock(&(user->intf->users_lock));
+	list_add_rcu(&rcvr->link, &intf->cmd_rcvrs);
 
+ out_unlock:
+	spin_unlock_irq(&intf->cmd_rcvrs_lock);
 	if (rv)
 		kfree(rcvr);
 
@@ -889,31 +972,28 @@ int ipmi_unregister_for_cmd(ipmi_user_t   user,
 			    unsigned char netfn,
 			    unsigned char cmd)
 {
-	unsigned long    flags;
-	struct cmd_rcvr  *rcvr;
-	int              rv = -ENOENT;
+	ipmi_smi_t      intf = user->intf;
+	struct cmd_rcvr *rcvr;
 
-	read_lock(&(user->intf->users_lock));
-	write_lock_irqsave(&(user->intf->cmd_rcvr_lock), flags);
+	spin_lock_irq(&intf->cmd_rcvrs_lock);
 	/* Make sure the command/netfn is not already registered. */
-	list_for_each_entry(rcvr, &(user->intf->cmd_rcvrs), link) {
-		if ((rcvr->netfn == netfn) && (rcvr->cmd == cmd)) {
-			rv = 0;
-			list_del(&rcvr->link);
-			kfree(rcvr);
-			break;
-		}
+	rcvr = find_cmd_rcvr(intf, netfn, cmd);
+	if ((rcvr) && (rcvr->user == user)) {
+		list_del_rcu(&rcvr->link);
+		spin_unlock_irq(&intf->cmd_rcvrs_lock);
+		synchronize_rcu();
+		kfree(rcvr);
+		return 0;
+	} else {
+		spin_unlock_irq(&intf->cmd_rcvrs_lock);
+		return -ENOENT;
 	}
-	write_unlock_irqrestore(&(user->intf->cmd_rcvr_lock), flags);
-	read_unlock(&(user->intf->users_lock));
-
-	return rv;
 }
 
 void ipmi_user_set_run_to_completion(ipmi_user_t user, int val)
 {
-	user->intf->handlers->set_run_to_completion(user->intf->send_info,
-						    val);
+	ipmi_smi_t intf = user->intf;
+	intf->handlers->set_run_to_completion(intf->send_info, val);
 }
 
 static unsigned char
@@ -1010,19 +1090,19 @@ static inline void format_lan_msg(struct ipmi_smi_msg   *smi_msg,
    supplied in certain circumstances (mainly at panic time).  If
    messages are supplied, they will be freed, even if an error
    occurs. */
-static inline int i_ipmi_request(ipmi_user_t          user,
-				 ipmi_smi_t           intf,
-				 struct ipmi_addr     *addr,
-				 long                 msgid,
-				 struct kernel_ipmi_msg *msg,
-				 void                 *user_msg_data,
-				 void                 *supplied_smi,
-				 struct ipmi_recv_msg *supplied_recv,
-				 int                  priority,
-				 unsigned char        source_address,
-				 unsigned char        source_lun,
-				 int                  retries,
-				 unsigned int         retry_time_ms)
+static int i_ipmi_request(ipmi_user_t          user,
+			  ipmi_smi_t           intf,
+			  struct ipmi_addr     *addr,
+			  long                 msgid,
+			  struct kernel_ipmi_msg *msg,
+			  void                 *user_msg_data,
+			  void                 *supplied_smi,
+			  struct ipmi_recv_msg *supplied_recv,
+			  int                  priority,
+			  unsigned char        source_address,
+			  unsigned char        source_lun,
+			  int                  retries,
+			  unsigned int         retry_time_ms)
 {
 	int                  rv = 0;
 	struct ipmi_smi_msg  *smi_msg;
@@ -1051,6 +1131,8 @@ static inline int i_ipmi_request(ipmi_user_t          user,
 	}
 
 	recv_msg->user = user;
+	if (user)
+		kref_get(&user->refcount);
 	recv_msg->msgid = msgid;
 	/* Store the message to send in the receive message so timeout
 	   responses can get the proper response data. */
@@ -1725,11 +1807,11 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers,
 		      unsigned char            version_major,
 		      unsigned char            version_minor,
 		      unsigned char            slave_addr,
-		      ipmi_smi_t               *intf)
+		      ipmi_smi_t               *new_intf)
 {
 	int              i, j;
 	int              rv;
-	ipmi_smi_t       new_intf;
+	ipmi_smi_t       intf;
 	unsigned long    flags;
 
 
@@ -1745,189 +1827,142 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers,
 			return -ENODEV;
 	}
 
-	new_intf = kmalloc(sizeof(*new_intf), GFP_KERNEL);
-	if (!new_intf)
+	intf = kmalloc(sizeof(*intf), GFP_KERNEL);
+	if (!intf)
 		return -ENOMEM;
-	memset(new_intf, 0, sizeof(*new_intf));
-
-	new_intf->proc_dir = NULL;
+	memset(intf, 0, sizeof(*intf));
+	intf->intf_num = -1;
+	kref_init(&intf->refcount);
+	intf->version_major = version_major;
+	intf->version_minor = version_minor;
+	for (j = 0; j < IPMI_MAX_CHANNELS; j++) {
+		intf->channels[j].address = IPMI_BMC_SLAVE_ADDR;
+		intf->channels[j].lun = 2;
+	}
+	if (slave_addr != 0)
+		intf->channels[0].address = slave_addr;
+	INIT_LIST_HEAD(&intf->users);
+	intf->handlers = handlers;
+	intf->send_info = send_info;
+	spin_lock_init(&intf->seq_lock);
+	for (j = 0; j < IPMI_IPMB_NUM_SEQ; j++) {
+		intf->seq_table[j].inuse = 0;
+		intf->seq_table[j].seqid = 0;
+	}
+	intf->curr_seq = 0;
+#ifdef CONFIG_PROC_FS
+	spin_lock_init(&intf->proc_entry_lock);
+#endif
+	spin_lock_init(&intf->waiting_msgs_lock);
+	INIT_LIST_HEAD(&intf->waiting_msgs);
+	spin_lock_init(&intf->events_lock);
+	INIT_LIST_HEAD(&intf->waiting_events);
+	intf->waiting_events_count = 0;
+	spin_lock_init(&intf->cmd_rcvrs_lock);
+	INIT_LIST_HEAD(&intf->cmd_rcvrs);
+	init_waitqueue_head(&intf->waitq);
+
+	spin_lock_init(&intf->counter_lock);
+	intf->proc_dir = NULL;
 
 	rv = -ENOMEM;
-
-	down_write(&interfaces_sem);
+	spin_lock_irqsave(&interfaces_lock, flags);
 	for (i = 0; i < MAX_IPMI_INTERFACES; i++) {
 		if (ipmi_interfaces[i] == NULL) {
-			new_intf->intf_num = i;
-			new_intf->version_major = version_major;
-			new_intf->version_minor = version_minor;
-			for (j = 0; j < IPMI_MAX_CHANNELS; j++) {
-				new_intf->channels[j].address
-					= IPMI_BMC_SLAVE_ADDR;
-				new_intf->channels[j].lun = 2;
-			}
-			if (slave_addr != 0)
-				new_intf->channels[0].address = slave_addr;
-			rwlock_init(&(new_intf->users_lock));
-			INIT_LIST_HEAD(&(new_intf->users));
-			new_intf->handlers = handlers;
-			new_intf->send_info = send_info;
-			spin_lock_init(&(new_intf->seq_lock));
-			for (j = 0; j < IPMI_IPMB_NUM_SEQ; j++) {
-				new_intf->seq_table[j].inuse = 0;
-				new_intf->seq_table[j].seqid = 0;
-			}
-			new_intf->curr_seq = 0;
-#ifdef CONFIG_PROC_FS
-			spin_lock_init(&(new_intf->proc_entry_lock));
-#endif
-			spin_lock_init(&(new_intf->waiting_msgs_lock));
-			INIT_LIST_HEAD(&(new_intf->waiting_msgs));
-			spin_lock_init(&(new_intf->events_lock));
-			INIT_LIST_HEAD(&(new_intf->waiting_events));
-			new_intf->waiting_events_count = 0;
-			rwlock_init(&(new_intf->cmd_rcvr_lock));
-			init_waitqueue_head(&new_intf->waitq);
-			INIT_LIST_HEAD(&(new_intf->cmd_rcvrs));
-
-			spin_lock_init(&(new_intf->counter_lock));
-
-			spin_lock_irqsave(&interfaces_lock, flags);
-			ipmi_interfaces[i] = new_intf;
-			spin_unlock_irqrestore(&interfaces_lock, flags);
-
+			intf->intf_num = i;
+			/* Reserve the entry till we are done. */
+			ipmi_interfaces[i] = IPMI_INVALID_INTERFACE_ENTRY;
 			rv = 0;
-			*intf = new_intf;
 			break;
 		}
 	}
+	spin_unlock_irqrestore(&interfaces_lock, flags);
+	if (rv)
+		goto out;
 
-	downgrade_write(&interfaces_sem);
-
-	if (rv == 0)
-		rv = add_proc_entries(*intf, i);
-
-	if (rv == 0) {
-		if ((version_major > 1)
-		    || ((version_major == 1) && (version_minor >= 5)))
-		{
-			/* Start scanning the channels to see what is
-			   available. */
-			(*intf)->null_user_handler = channel_handler;
-			(*intf)->curr_channel = 0;
-			rv = send_channel_info_cmd(*intf, 0);
-			if (rv)
-				goto out;
+	/* FIXME - this is an ugly kludge, this sets the intf for the
+	   caller before sending any messages with it. */
+	*new_intf = intf;
 
-			/* Wait for the channel info to be read. */
-			up_read(&interfaces_sem);
-			wait_event((*intf)->waitq,
-				   ((*intf)->curr_channel>=IPMI_MAX_CHANNELS));
-			down_read(&interfaces_sem);
+	if ((version_major > 1)
+	    || ((version_major == 1) && (version_minor >= 5)))
+	{
+		/* Start scanning the channels to see what is
+		   available. */
+		intf->null_user_handler = channel_handler;
+		intf->curr_channel = 0;
+		rv = send_channel_info_cmd(intf, 0);
+		if (rv)
+			goto out;
 
-			if (ipmi_interfaces[i] != new_intf)
-				/* Well, it went away.  Just return. */
-				goto out;
-		} else {
-			/* Assume a single IPMB channel at zero. */
-			(*intf)->channels[0].medium = IPMI_CHANNEL_MEDIUM_IPMB;
-			(*intf)->channels[0].protocol
-				= IPMI_CHANNEL_PROTOCOL_IPMB;
-  		}
-
-		/* Call all the watcher interfaces to tell
-		   them that a new interface is available. */
-		call_smi_watchers(i);
+		/* Wait for the channel info to be read. */
+		wait_event(intf->waitq,
+			   intf->curr_channel >= IPMI_MAX_CHANNELS);
+	} else {
+		/* Assume a single IPMB channel at zero. */
+		intf->channels[0].medium = IPMI_CHANNEL_MEDIUM_IPMB;
+		intf->channels[0].protocol = IPMI_CHANNEL_PROTOCOL_IPMB;
 	}
 
- out:
-	up_read(&interfaces_sem);
+	if (rv == 0)
+		rv = add_proc_entries(intf, i);
 
+ out:
 	if (rv) {
-		if (new_intf->proc_dir)
-			remove_proc_entries(new_intf);
-		kfree(new_intf);
+		if (intf->proc_dir)
+			remove_proc_entries(intf);
+		kref_put(&intf->refcount, intf_free);
+		if (i < MAX_IPMI_INTERFACES) {
+			spin_lock_irqsave(&interfaces_lock, flags);
+			ipmi_interfaces[i] = NULL;
+			spin_unlock_irqrestore(&interfaces_lock, flags);
+		}
+	} else {
+		spin_lock_irqsave(&interfaces_lock, flags);
+		ipmi_interfaces[i] = intf;
+		spin_unlock_irqrestore(&interfaces_lock, flags);
+		call_smi_watchers(i);
 	}
 
 	return rv;
 }
 
-static void free_recv_msg_list(struct list_head *q)
-{
-	struct ipmi_recv_msg *msg, *msg2;
-
-	list_for_each_entry_safe(msg, msg2, q, link) {
-		list_del(&msg->link);
-		ipmi_free_recv_msg(msg);
-	}
-}
-
-static void free_cmd_rcvr_list(struct list_head *q)
-{
-	struct cmd_rcvr  *rcvr, *rcvr2;
-
-	list_for_each_entry_safe(rcvr, rcvr2, q, link) {
-		list_del(&rcvr->link);
-		kfree(rcvr);
-	}
-}
-
-static void clean_up_interface_data(ipmi_smi_t intf)
-{
-	int i;
-
-	free_recv_msg_list(&(intf->waiting_msgs));
-	free_recv_msg_list(&(intf->waiting_events));
-	free_cmd_rcvr_list(&(intf->cmd_rcvrs));
-
-	for (i = 0; i < IPMI_IPMB_NUM_SEQ; i++) {
-		if ((intf->seq_table[i].inuse)
-		    && (intf->seq_table[i].recv_msg))
-		{
-			ipmi_free_recv_msg(intf->seq_table[i].recv_msg);
-		}	
-	}
-}
-
 int ipmi_unregister_smi(ipmi_smi_t intf)
 {
-	int                     rv = -ENODEV;
 	int                     i;
 	struct ipmi_smi_watcher *w;
 	unsigned long           flags;
 
-	down_write(&interfaces_sem);
-	if (list_empty(&(intf->users)))
-	{
-		for (i = 0; i < MAX_IPMI_INTERFACES; i++) {
-			if (ipmi_interfaces[i] == intf) {
-				remove_proc_entries(intf);
-				spin_lock_irqsave(&interfaces_lock, flags);
-				ipmi_interfaces[i] = NULL;
-				clean_up_interface_data(intf);
-				spin_unlock_irqrestore(&interfaces_lock,flags);
-				kfree(intf);
-				rv = 0;
-				goto out_call_watcher;
-			}
+	spin_lock_irqsave(&interfaces_lock, flags);
+	for (i = 0; i < MAX_IPMI_INTERFACES; i++) {
+		if (ipmi_interfaces[i] == intf) {
+			/* Set the interface number reserved until we
+			 * are done. */
+			ipmi_interfaces[i] = IPMI_INVALID_INTERFACE_ENTRY;
+			intf->intf_num = -1;
+			break;
 		}
-	} else {
-		rv = -EBUSY;
 	}
-	up_write(&interfaces_sem);
+	spin_unlock_irqrestore(&interfaces_lock,flags);
 
-	return rv;
+	if (i == MAX_IPMI_INTERFACES)
+		return -ENODEV;
 
- out_call_watcher:
-	downgrade_write(&interfaces_sem);
+	remove_proc_entries(intf);
 
 	/* Call all the watcher interfaces to tell them that
 	   an interface is gone. */
 	down_read(&smi_watchers_sem);
-	list_for_each_entry(w, &smi_watchers, link) {
+	list_for_each_entry(w, &smi_watchers, link)
 		w->smi_gone(i);
-	}
 	up_read(&smi_watchers_sem);
-	up_read(&interfaces_sem);
+
+	/* Allow the entry to be reused now. */
+	spin_lock_irqsave(&interfaces_lock, flags);
+	ipmi_interfaces[i] = NULL;
+	spin_unlock_irqrestore(&interfaces_lock,flags);
+
+	kref_put(&intf->refcount, intf_free);
 	return 0;
 }
 
@@ -1998,14 +2033,14 @@ static int handle_ipmb_get_msg_rsp(ipmi_smi_t          intf,
 static int handle_ipmb_get_msg_cmd(ipmi_smi_t          intf,
 				   struct ipmi_smi_msg *msg)
 {
-	struct cmd_rcvr       *rcvr;
-	int                   rv = 0;
-	unsigned char         netfn;
-	unsigned char         cmd;
-	ipmi_user_t           user = NULL;
-	struct ipmi_ipmb_addr *ipmb_addr;
-	struct ipmi_recv_msg  *recv_msg;
-	unsigned long         flags;
+	struct cmd_rcvr          *rcvr;
+	int                      rv = 0;
+	unsigned char            netfn;
+	unsigned char            cmd;
+	ipmi_user_t              user = NULL;
+	struct ipmi_ipmb_addr    *ipmb_addr;
+	struct ipmi_recv_msg     *recv_msg;
+	unsigned long            flags;
 
 	if (msg->rsp_size < 10) {
 		/* Message not big enough, just ignore it. */
@@ -2023,16 +2058,14 @@ static int handle_ipmb_get_msg_cmd(ipmi_smi_t          intf,
 	netfn = msg->rsp[4] >> 2;
 	cmd = msg->rsp[8];
 
-	read_lock(&(intf->cmd_rcvr_lock));
-	
-	/* Find the command/netfn. */
-	list_for_each_entry(rcvr, &(intf->cmd_rcvrs), link) {
-		if ((rcvr->netfn == netfn) && (rcvr->cmd == cmd)) {
-			user = rcvr->user;
-			break;
-		}
-	}
-	read_unlock(&(intf->cmd_rcvr_lock));
+	spin_lock_irqsave(&intf->cmd_rcvrs_lock, flags);
+	rcvr = find_cmd_rcvr(intf, netfn, cmd);
+	if (rcvr) {
+		user = rcvr->user;
+		kref_get(&user->refcount);
+	} else
+		user = NULL;
+	spin_unlock_irqrestore(&intf->cmd_rcvrs_lock, flags);
 
 	if (user == NULL) {
 		/* We didn't find a user, deliver an error response. */
@@ -2079,6 +2112,7 @@ static int handle_ipmb_get_msg_cmd(ipmi_smi_t          intf,
                            message, so requeue it for handling
                            later. */
 			rv = 1;
+			kref_put(&user->refcount, free_user);
 		} else {
 			/* Extract the source address from the data. */
 			ipmb_addr = (struct ipmi_ipmb_addr *) &recv_msg->addr;
@@ -2179,14 +2213,14 @@ static int handle_lan_get_msg_rsp(ipmi_smi_t          intf,
 static int handle_lan_get_msg_cmd(ipmi_smi_t          intf,
 				  struct ipmi_smi_msg *msg)
 {
-	struct cmd_rcvr       *rcvr;
-	int                   rv = 0;
-	unsigned char         netfn;
-	unsigned char         cmd;
-	ipmi_user_t           user = NULL;
-	struct ipmi_lan_addr  *lan_addr;
-	struct ipmi_recv_msg  *recv_msg;
-	unsigned long         flags;
+	struct cmd_rcvr          *rcvr;
+	int                      rv = 0;
+	unsigned char            netfn;
+	unsigned char            cmd;
+	ipmi_user_t              user = NULL;
+	struct ipmi_lan_addr     *lan_addr;
+	struct ipmi_recv_msg     *recv_msg;
+	unsigned long            flags;
 
 	if (msg->rsp_size < 12) {
 		/* Message not big enough, just ignore it. */
@@ -2204,19 +2238,17 @@ static int handle_lan_get_msg_cmd(ipmi_smi_t          intf,
 	netfn = msg->rsp[6] >> 2;
 	cmd = msg->rsp[10];
 
-	read_lock(&(intf->cmd_rcvr_lock));
-
-	/* Find the command/netfn. */
-	list_for_each_entry(rcvr, &(intf->cmd_rcvrs), link) {
-		if ((rcvr->netfn == netfn) && (rcvr->cmd == cmd)) {
-			user = rcvr->user;
-			break;
-		}
-	}
-	read_unlock(&(intf->cmd_rcvr_lock));
+	spin_lock_irqsave(&intf->cmd_rcvrs_lock, flags);
+	rcvr = find_cmd_rcvr(intf, netfn, cmd);
+	if (rcvr) {
+		user = rcvr->user;
+		kref_get(&user->refcount);
+	} else
+		user = NULL;
+	spin_unlock_irqrestore(&intf->cmd_rcvrs_lock, flags);
 
 	if (user == NULL) {
-		/* We didn't find a user, deliver an error response. */
+		/* We didn't find a user, just give up. */
 		spin_lock_irqsave(&intf->counter_lock, flags);
 		intf->unhandled_commands++;
 		spin_unlock_irqrestore(&intf->counter_lock, flags);
@@ -2235,6 +2267,7 @@ static int handle_lan_get_msg_cmd(ipmi_smi_t          intf,
                            message, so requeue it for handling
                            later. */
 			rv = 1;
+			kref_put(&user->refcount, free_user);
 		} else {
 			/* Extract the source address from the data. */
 			lan_addr = (struct ipmi_lan_addr *) &recv_msg->addr;
@@ -2286,8 +2319,6 @@ static void copy_event_into_recv_msg(struct ipmi_recv_msg *recv_msg,
 	recv_msg->msg.data_len = msg->rsp_size - 3;
 }
 
-/* This will be called with the intf->users_lock read-locked, so no need
-   to do that here. */
 static int handle_read_event_rsp(ipmi_smi_t          intf,
 				 struct ipmi_smi_msg *msg)
 {
@@ -2313,7 +2344,7 @@ static int handle_read_event_rsp(ipmi_smi_t          intf,
 
 	INIT_LIST_HEAD(&msgs);
 
-	spin_lock_irqsave(&(intf->events_lock), flags);
+	spin_lock_irqsave(&intf->events_lock, flags);
 
 	spin_lock(&intf->counter_lock);
 	intf->events++;
@@ -2321,12 +2352,14 @@ static int handle_read_event_rsp(ipmi_smi_t          intf,
 
 	/* Allocate and fill in one message for every user that is getting
 	   events. */
-	list_for_each_entry(user, &(intf->users), link) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(user, &intf->users, link) {
 		if (! user->gets_events)
 			continue;
 
 		recv_msg = ipmi_alloc_recv_msg();
 		if (! recv_msg) {
+			rcu_read_unlock();
 			list_for_each_entry_safe(recv_msg, recv_msg2, &msgs, link) {
 				list_del(&recv_msg->link);
 				ipmi_free_recv_msg(recv_msg);
@@ -2342,8 +2375,10 @@ static int handle_read_event_rsp(ipmi_smi_t          intf,
 
 		copy_event_into_recv_msg(recv_msg, msg);
 		recv_msg->user = user;
+		kref_get(&user->refcount);
 		list_add_tail(&(recv_msg->link), &msgs);
 	}
+	rcu_read_unlock();
 
 	if (deliver_count) {
 		/* Now deliver all the messages. */
@@ -2382,9 +2417,8 @@ static int handle_bmc_rsp(ipmi_smi_t          intf,
 			  struct ipmi_smi_msg *msg)
 {
 	struct ipmi_recv_msg *recv_msg;
-	int                  found = 0;
-	struct ipmi_user     *user;
 	unsigned long        flags;
+	struct ipmi_user     *user;
 
 	recv_msg = (struct ipmi_recv_msg *) msg->user_data;
 	if (recv_msg == NULL)
@@ -2396,16 +2430,9 @@ static int handle_bmc_rsp(ipmi_smi_t          intf,
 		return 0;
 	}
 
+	user = recv_msg->user;
 	/* Make sure the user still exists. */
-	list_for_each_entry(user, &(intf->users), link) {
-		if (user == recv_msg->user) {
-			/* Found it, so we can deliver it */
-			found = 1;
-			break;
-		}
-	}
-
-	if ((! found) && recv_msg->user) {
+	if (user && !user->valid) {
 		/* The user for the message went away, so give up. */
 		spin_lock_irqsave(&intf->counter_lock, flags);
 		intf->unhandled_local_responses++;
@@ -2486,7 +2513,7 @@ static int handle_new_recv_msg(ipmi_smi_t          intf,
 	{
 		/* It's a response to a response we sent.  For this we
 		   deliver a send message response to the user. */
-		struct ipmi_recv_msg *recv_msg = msg->user_data;
+		struct ipmi_recv_msg     *recv_msg = msg->user_data;
 
 		requeue = 0;
 		if (msg->rsp_size < 2)
@@ -2498,13 +2525,18 @@ static int handle_new_recv_msg(ipmi_smi_t          intf,
 			/* Invalid channel number */
 			goto out;
 
-		if (recv_msg) {
-			recv_msg->recv_type = IPMI_RESPONSE_RESPONSE_TYPE;
-			recv_msg->msg.data = recv_msg->msg_data;
-			recv_msg->msg.data_len = 1;
-			recv_msg->msg_data[0] = msg->rsp[2];
-			deliver_response(recv_msg);
-		}
+		if (!recv_msg)
+			goto out;
+
+		/* Make sure the user still exists. */
+		if (!recv_msg->user || !recv_msg->user->valid)
+			goto out;
+
+		recv_msg->recv_type = IPMI_RESPONSE_RESPONSE_TYPE;
+		recv_msg->msg.data = recv_msg->msg_data;
+		recv_msg->msg.data_len = 1;
+		recv_msg->msg_data[0] = msg->rsp[2];
+		deliver_response(recv_msg);
 	} else if ((msg->rsp[0] == ((IPMI_NETFN_APP_REQUEST|1) << 2))
 		   && (msg->rsp[1] == IPMI_GET_MSG_CMD))
 	{
@@ -2570,14 +2602,11 @@ void ipmi_smi_msg_received(ipmi_smi_t          intf,
 	int           rv;
 
 
-	/* Lock the user lock so the user can't go away while we are
-	   working on it. */
-	read_lock(&(intf->users_lock));
-
 	if ((msg->data_size >= 2)
 	    && (msg->data[0] == (IPMI_NETFN_APP_REQUEST << 2))
 	    && (msg->data[1] == IPMI_SEND_MSG_CMD)
-	    && (msg->user_data == NULL)) {
+	    && (msg->user_data == NULL))
+	{
 		/* This is the local response to a command send, start
                    the timer for these.  The user_data will not be
                    NULL if this is a response send, and we will let
@@ -2612,46 +2641,46 @@ void ipmi_smi_msg_received(ipmi_smi_t          intf,
 		}
 
 		ipmi_free_smi_msg(msg);
-		goto out_unlock;
+		goto out;
 	}
 
 	/* To preserve message order, if the list is not empty, we
            tack this message onto the end of the list. */
-	spin_lock_irqsave(&(intf->waiting_msgs_lock), flags);
-	if (!list_empty(&(intf->waiting_msgs))) {
-		list_add_tail(&(msg->link), &(intf->waiting_msgs));
-		spin_unlock_irqrestore(&(intf->waiting_msgs_lock), flags);
-		goto out_unlock;
+	spin_lock_irqsave(&intf->waiting_msgs_lock, flags);
+	if (!list_empty(&intf->waiting_msgs)) {
+		list_add_tail(&msg->link, &intf->waiting_msgs);
+		spin_unlock(&intf->waiting_msgs_lock);
+		goto out;
 	}
-	spin_unlock_irqrestore(&(intf->waiting_msgs_lock), flags);
+	spin_unlock_irqrestore(&intf->waiting_msgs_lock, flags);
 		
 	rv = handle_new_recv_msg(intf, msg);
 	if (rv > 0) {
 		/* Could not handle the message now, just add it to a
                    list to handle later. */
-		spin_lock_irqsave(&(intf->waiting_msgs_lock), flags);
-		list_add_tail(&(msg->link), &(intf->waiting_msgs));
-		spin_unlock_irqrestore(&(intf->waiting_msgs_lock), flags);
+		spin_lock(&intf->waiting_msgs_lock);
+		list_add_tail(&msg->link, &intf->waiting_msgs);
+		spin_unlock(&intf->waiting_msgs_lock);
 	} else if (rv == 0) {
 		ipmi_free_smi_msg(msg);
 	}
 
- out_unlock:
-	read_unlock(&(intf->users_lock));
+ out:
+	return;
 }
 
 void ipmi_smi_watchdog_pretimeout(ipmi_smi_t intf)
 {
 	ipmi_user_t user;
 
-	read_lock(&(intf->users_lock));
-	list_for_each_entry(user, &(intf->users), link) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(user, &intf->users, link) {
 		if (! user->handler->ipmi_watchdog_pretimeout)
 			continue;
 
 		user->handler->ipmi_watchdog_pretimeout(user->handler_data);
 	}
-	read_unlock(&(intf->users_lock));
+	rcu_read_unlock();
 }
 
 static void
@@ -2691,8 +2720,65 @@ smi_from_recv_msg(ipmi_smi_t intf, struct ipmi_recv_msg *recv_msg,
 	return smi_msg;
 }
 
-static void
-ipmi_timeout_handler(long timeout_period)
+static void check_msg_timeout(ipmi_smi_t intf, struct seq_table *ent,
+			      struct list_head *timeouts, long timeout_period,
+			      int slot, unsigned long *flags)
+{
+	struct ipmi_recv_msg *msg;
+
+	if (!ent->inuse)
+		return;
+
+	ent->timeout -= timeout_period;
+	if (ent->timeout > 0)
+		return;
+
+	if (ent->retries_left == 0) {
+		/* The message has used all its retries. */
+		ent->inuse = 0;
+		msg = ent->recv_msg;
+		list_add_tail(&msg->link, timeouts);
+		spin_lock(&intf->counter_lock);
+		if (ent->broadcast)
+			intf->timed_out_ipmb_broadcasts++;
+		else if (ent->recv_msg->addr.addr_type == IPMI_LAN_ADDR_TYPE)
+			intf->timed_out_lan_commands++;
+		else
+			intf->timed_out_ipmb_commands++;
+		spin_unlock(&intf->counter_lock);
+	} else {
+		struct ipmi_smi_msg *smi_msg;
+		/* More retries, send again. */
+
+		/* Start with the max timer, set to normal
+		   timer after the message is sent. */
+		ent->timeout = MAX_MSG_TIMEOUT;
+		ent->retries_left--;
+		spin_lock(&intf->counter_lock);
+		if (ent->recv_msg->addr.addr_type == IPMI_LAN_ADDR_TYPE)
+			intf->retransmitted_lan_commands++;
+		else
+			intf->retransmitted_ipmb_commands++;
+		spin_unlock(&intf->counter_lock);
+
+		smi_msg = smi_from_recv_msg(intf, ent->recv_msg, slot,
+					    ent->seqid);
+		if (! smi_msg)
+			return;
+
+		spin_unlock_irqrestore(&intf->seq_lock, *flags);
+		/* Send the new message.  We send with a zero
+		 * priority.  It timed out, I doubt time is
+		 * that critical now, and high priority
+		 * messages are really only for messages to the
+		 * local MC, which don't get resent. */
+		intf->handlers->sender(intf->send_info,
+				       smi_msg, 0);
+		spin_lock_irqsave(&intf->seq_lock, *flags);
+	}
+}
+
+static void ipmi_timeout_handler(long timeout_period)
 {
 	ipmi_smi_t           intf;
 	struct list_head     timeouts;
@@ -2706,14 +2792,14 @@ ipmi_timeout_handler(long timeout_period)
 	spin_lock(&interfaces_lock);
 	for (i = 0; i < MAX_IPMI_INTERFACES; i++) {
 		intf = ipmi_interfaces[i];
-		if (intf == NULL)
+		if (IPMI_INVALID_INTERFACE(intf))
 			continue;
-
-		read_lock(&(intf->users_lock));
+		kref_get(&intf->refcount);
+		spin_unlock(&interfaces_lock);
 
 		/* See if any waiting messages need to be processed. */
-		spin_lock_irqsave(&(intf->waiting_msgs_lock), flags);
-		list_for_each_entry_safe(smi_msg, smi_msg2, &(intf->waiting_msgs), link) {
+		spin_lock_irqsave(&intf->waiting_msgs_lock, flags);
+		list_for_each_entry_safe(smi_msg, smi_msg2, &intf->waiting_msgs, link) {
 			if (! handle_new_recv_msg(intf, smi_msg)) {
 				list_del(&smi_msg->link);
 				ipmi_free_smi_msg(smi_msg);
@@ -2723,73 +2809,23 @@ ipmi_timeout_handler(long timeout_period)
 				break;
 			}
 		}
-		spin_unlock_irqrestore(&(intf->waiting_msgs_lock), flags);
+		spin_unlock_irqrestore(&intf->waiting_msgs_lock, flags);
 
 		/* Go through the seq table and find any messages that
 		   have timed out, putting them in the timeouts
 		   list. */
-		spin_lock_irqsave(&(intf->seq_lock), flags);
-		for (j = 0; j < IPMI_IPMB_NUM_SEQ; j++) {
-			struct seq_table *ent = &(intf->seq_table[j]);
-			if (!ent->inuse)
-				continue;
-
-			ent->timeout -= timeout_period;
-			if (ent->timeout > 0)
-				continue;
-
-			if (ent->retries_left == 0) {
-				/* The message has used all its retries. */
-				ent->inuse = 0;
-				msg = ent->recv_msg;
-				list_add_tail(&(msg->link), &timeouts);
-				spin_lock(&intf->counter_lock);
-				if (ent->broadcast)
-					intf->timed_out_ipmb_broadcasts++;
-				else if (ent->recv_msg->addr.addr_type
-					 == IPMI_LAN_ADDR_TYPE)
-					intf->timed_out_lan_commands++;
-				else
-					intf->timed_out_ipmb_commands++;
-				spin_unlock(&intf->counter_lock);
-			} else {
-				struct ipmi_smi_msg *smi_msg;
-				/* More retries, send again. */
-
-				/* Start with the max timer, set to normal
-				   timer after the message is sent. */
-				ent->timeout = MAX_MSG_TIMEOUT;
-				ent->retries_left--;
-				spin_lock(&intf->counter_lock);
-				if (ent->recv_msg->addr.addr_type
-				    == IPMI_LAN_ADDR_TYPE)
-					intf->retransmitted_lan_commands++;
-				else
-					intf->retransmitted_ipmb_commands++;
-				spin_unlock(&intf->counter_lock);
-				smi_msg = smi_from_recv_msg(intf,
-						ent->recv_msg, j, ent->seqid);
-				if (! smi_msg)
-					continue;
-
-				spin_unlock_irqrestore(&(intf->seq_lock),flags);
-				/* Send the new message.  We send with a zero
-				 * priority.  It timed out, I doubt time is
-				 * that critical now, and high priority
-				 * messages are really only for messages to the
-				 * local MC, which don't get resent. */
-				intf->handlers->sender(intf->send_info,
-							smi_msg, 0);
-				spin_lock_irqsave(&(intf->seq_lock), flags);
-			}
-		}
-		spin_unlock_irqrestore(&(intf->seq_lock), flags);
-
-		list_for_each_entry_safe(msg, msg2, &timeouts, link) {
+		spin_lock_irqsave(&intf->seq_lock, flags);
+		for (j = 0; j < IPMI_IPMB_NUM_SEQ; j++)
+			check_msg_timeout(intf, &(intf->seq_table[j]),
+					  &timeouts, timeout_period, j,
+					  &flags);
+		spin_unlock_irqrestore(&intf->seq_lock, flags);
+
+		list_for_each_entry_safe(msg, msg2, &timeouts, link)
 			handle_msg_timeout(msg);
-		}
 
-		read_unlock(&(intf->users_lock));
+		kref_put(&intf->refcount, intf_free);
+		spin_lock(&interfaces_lock);
 	}
 	spin_unlock(&interfaces_lock);
 }
@@ -2802,7 +2838,7 @@ static void ipmi_request_event(void)
 	spin_lock(&interfaces_lock);
 	for (i = 0; i < MAX_IPMI_INTERFACES; i++) {
 		intf = ipmi_interfaces[i];
-		if (intf == NULL)
+		if (IPMI_INVALID_INTERFACE(intf))
 			continue;
 
 		intf->handlers->request_events(intf->send_info);
@@ -2884,6 +2920,13 @@ struct ipmi_recv_msg *ipmi_alloc_recv_msg(void)
 	return rv;
 }
 
+void ipmi_free_recv_msg(struct ipmi_recv_msg *msg)
+{
+	if (msg->user)
+		kref_put(&msg->user->refcount, free_user);
+	msg->done(msg);
+}
+
 #ifdef CONFIG_IPMI_PANIC_EVENT
 
 static void dummy_smi_done_handler(struct ipmi_smi_msg *msg)
@@ -2964,7 +3007,7 @@ static void send_panic_events(char *str)
 	/* For every registered interface, send the event. */
 	for (i = 0; i < MAX_IPMI_INTERFACES; i++) {
 		intf = ipmi_interfaces[i];
-		if (intf == NULL)
+		if (IPMI_INVALID_INTERFACE(intf))
 			continue;
 
 		/* Send the event announcing the panic. */
@@ -2995,7 +3038,7 @@ static void send_panic_events(char *str)
 		int                   j;
 
 		intf = ipmi_interfaces[i];
-		if (intf == NULL)
+		if (IPMI_INVALID_INTERFACE(intf))
 			continue;
 
 		/* First job here is to figure out where to send the
@@ -3131,7 +3174,7 @@ static int panic_event(struct notifier_block *this,
 	/* For every registered interface, set it to run to completion. */
 	for (i = 0; i < MAX_IPMI_INTERFACES; i++) {
 		intf = ipmi_interfaces[i];
-		if (intf == NULL)
+		if (IPMI_INVALID_INTERFACE(intf))
 			continue;
 
 		intf->handlers->set_run_to_completion(intf->send_info, 1);
@@ -3160,9 +3203,8 @@ static int ipmi_init_msghandler(void)
 	printk(KERN_INFO "ipmi message handler version "
 	       IPMI_DRIVER_VERSION "\n");
 
-	for (i = 0; i < MAX_IPMI_INTERFACES; i++) {
+	for (i = 0; i < MAX_IPMI_INTERFACES; i++)
 		ipmi_interfaces[i] = NULL;
-	}
 
 #ifdef CONFIG_PROC_FS
 	proc_ipmi_root = proc_mkdir("ipmi", NULL);
@@ -3258,3 +3300,4 @@ EXPORT_SYMBOL(ipmi_get_my_LUN);
 EXPORT_SYMBOL(ipmi_smi_add_proc_entry);
 EXPORT_SYMBOL(proc_ipmi_root);
 EXPORT_SYMBOL(ipmi_user_set_run_to_completion);
+EXPORT_SYMBOL(ipmi_free_recv_msg);
-- 
cgit v1.2.3


From c4edff1c19ef23e15aae64ca03f32c6719822d54 Mon Sep 17 00:00:00 2001
From: Corey Minyard <minyard@acm.org>
Date: Mon, 7 Nov 2005 00:59:56 -0800
Subject: [PATCH] ipmi: various si cleanup

A number of small changes for the various system interface drivers,
consolidated from a number of patches from Matt Domsch.

Clear B2H_ATN and drain the BMC message buffer on command timeout.  This
prevents further commands from failing after a timeout.

Add bt_debug and smic_debug module parameters, expose them in sysfs.  This
lets you enable and disable debugging messages at runtime.

Unsigned jiffies math in ipmi_si_intf.c causes a too-large value to be passed
to ->event() after jiffies wrap-around.  The BT driver had caught this, but
didn't know how to fix it.  Now all calls to ->event() use a sane value for
time.

Increase timeout for commands handed to the BT driver from 2 seconds to 5
seconds.  This is necessary particularly when the previous command was a
"Clear SEL", as that command completes, yet the BMC isn't really ready to
handle another command yet.

Silence BT debugging messages which were being printed on the console.

Increase SMIC timeout form 1/10s to 2s.  This is needed on Dell PowerEdge 2650
and PowerEdge 750 with ERA/O cards to allow commands to complete without
timing out.

Adds kcs_debug module param, to match behavior of BT and SMIC.  This also
prevents messages from being sent to the console unless explicitly requested.

Signed-off-by: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Corey Minyard <minyard@acm.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/char/ipmi/ipmi_bt_sm.c   | 34 +++++++++++++---------------------
 drivers/char/ipmi/ipmi_kcs_sm.c  | 40 +++++++++++++++++++++++++++-------------
 drivers/char/ipmi/ipmi_si_intf.c |  4 ++--
 drivers/char/ipmi/ipmi_smic_sm.c |  6 +++++-
 4 files changed, 47 insertions(+), 37 deletions(-)

(limited to 'drivers/char/ipmi')

diff --git a/drivers/char/ipmi/ipmi_bt_sm.c b/drivers/char/ipmi/ipmi_bt_sm.c
index 33862670e28..7c4a195dfc9 100644
--- a/drivers/char/ipmi/ipmi_bt_sm.c
+++ b/drivers/char/ipmi/ipmi_bt_sm.c
@@ -28,6 +28,8 @@
 
 #include <linux/kernel.h> /* For printk. */
 #include <linux/string.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
 #include <linux/ipmi_msgdefs.h>		/* for completion codes */
 #include "ipmi_si_sm.h"
 
@@ -36,6 +38,8 @@ static int bt_debug = 0x00;	/* Production value 0, see following flags */
 #define	BT_DEBUG_ENABLE	1
 #define BT_DEBUG_MSG	2
 #define BT_DEBUG_STATES	4
+module_param(bt_debug, int, 0644);
+MODULE_PARM_DESC(bt_debug, "debug bitmask, 1=enable, 2=messages, 4=states");
 
 /* Typical "Get BT Capabilities" values are 2-3 retries, 5-10 seconds,
    and 64 byte buffers.  However, one HP implementation wants 255 bytes of
@@ -43,7 +47,7 @@ static int bt_debug = 0x00;	/* Production value 0, see following flags */
    Since the Open IPMI architecture is single-message oriented at this
    stage, the queue depth of BT is of no concern. */
 
-#define BT_NORMAL_TIMEOUT	2000000	/* seconds in microseconds */
+#define BT_NORMAL_TIMEOUT	5000000	/* seconds in microseconds */
 #define BT_RETRY_LIMIT		2
 #define BT_RESET_DELAY		6000000	/* 6 seconds after warm reset */
 
@@ -202,7 +206,7 @@ static int bt_get_result(struct si_sm_data *bt,
 	msg_len = bt->read_count - 2;		/* account for length & seq */
 	/* Always NetFn, Cmd, cCode */
 	if (msg_len < 3 || msg_len > IPMI_MAX_MSG_LENGTH) {
-		printk(KERN_WARNING "BT results: bad msg_len = %d\n", msg_len);
+		printk(KERN_DEBUG "BT results: bad msg_len = %d\n", msg_len);
 		data[0] = bt->write_data[1] | 0x4;	/* Kludge a response */
 		data[1] = bt->write_data[3];
 		data[2] = IPMI_ERR_UNSPECIFIED;
@@ -240,7 +244,7 @@ static void reset_flags(struct si_sm_data *bt)
 	       BT_CONTROL(BT_B_BUSY);
 	BT_CONTROL(BT_CLR_WR_PTR);
 	BT_CONTROL(BT_SMS_ATN);
-#ifdef DEVELOPMENT_ONLY_NOT_FOR_PRODUCTION
+
 	if (BT_STATUS & BT_B2H_ATN) {
 		int i;
 		BT_CONTROL(BT_H_BUSY);
@@ -250,7 +254,6 @@ static void reset_flags(struct si_sm_data *bt)
 		       BMC2HOST;
 		BT_CONTROL(BT_H_BUSY);
 	}
-#endif
 }
 
 static inline void write_all_bytes(struct si_sm_data *bt)
@@ -295,7 +298,7 @@ static inline int read_all_bytes(struct si_sm_data *bt)
 	    	printk ("\n");
 	}
 	if (bt->seq != bt->write_data[2])	/* idiot check */
-		printk(KERN_WARNING "BT: internal error: sequence mismatch\n");
+		printk(KERN_DEBUG "BT: internal error: sequence mismatch\n");
 
 	/* per the spec, the (NetFn, Seq, Cmd) tuples should match */
 	if ((bt->read_data[3] == bt->write_data[3]) &&		/* Cmd */
@@ -321,18 +324,18 @@ static void error_recovery(struct si_sm_data *bt, char *reason)
 	bt->timeout = BT_NORMAL_TIMEOUT; /* various places want to retry */
 
 	status = BT_STATUS;
-	printk(KERN_WARNING "BT: %s in %s %s ", reason, STATE2TXT,
+	printk(KERN_DEBUG "BT: %s in %s %s\n", reason, STATE2TXT,
 	       STATUS2TXT(buf));
 
 	(bt->error_retries)++;
 	if (bt->error_retries > BT_RETRY_LIMIT) {
-		printk("retry limit (%d) exceeded\n", BT_RETRY_LIMIT);
+		printk(KERN_DEBUG "retry limit (%d) exceeded\n", BT_RETRY_LIMIT);
 		bt->state = BT_STATE_HOSED;
 		if (!bt->nonzero_status)
 			printk(KERN_ERR "IPMI: BT stuck, try power cycle\n");
 		else if (bt->seq == FIRST_SEQ + BT_RETRY_LIMIT) {
 			/* most likely during insmod */
-			printk(KERN_WARNING "IPMI: BT reset (takes 5 secs)\n");
+			printk(KERN_DEBUG "IPMI: BT reset (takes 5 secs)\n");
         		bt->state = BT_STATE_RESET1;
 		}
 	return;
@@ -340,11 +343,11 @@ static void error_recovery(struct si_sm_data *bt, char *reason)
 
 	/* Sometimes the BMC queues get in an "off-by-one" state...*/
 	if ((bt->state == BT_STATE_B2H_WAIT) && (status & BT_B2H_ATN)) {
-    		printk("retry B2H_WAIT\n");
+    		printk(KERN_DEBUG "retry B2H_WAIT\n");
 		return;
 	}
 
-	printk("restart command\n");
+	printk(KERN_DEBUG "restart command\n");
 	bt->state = BT_STATE_RESTART;
 }
 
@@ -372,17 +375,6 @@ static enum si_sm_result bt_event(struct si_sm_data *bt, long time)
 	       return SI_SM_HOSED;
 
 	if (bt->state != BT_STATE_IDLE) {	/* do timeout test */
-
-		/* Certain states, on error conditions, can lock up a CPU
-		   because they are effectively in an infinite loop with
-		   CALL_WITHOUT_DELAY (right back here with time == 0).
-		   Prevent infinite lockup by ALWAYS decrementing timeout. */
-
-    	/* FIXME: bt_event is sometimes called with time > BT_NORMAL_TIMEOUT
-              (noticed in ipmi_smic_sm.c January 2004) */
-
-		if ((time <= 0) || (time >= BT_NORMAL_TIMEOUT))
-		       time = 100;
 		bt->timeout -= time;
 		if ((bt->timeout < 0) && (bt->state < BT_STATE_RESET1)) {
 			error_recovery(bt, "timed out");
diff --git a/drivers/char/ipmi/ipmi_kcs_sm.c b/drivers/char/ipmi/ipmi_kcs_sm.c
index d21853a594a..dc83365ede4 100644
--- a/drivers/char/ipmi/ipmi_kcs_sm.c
+++ b/drivers/char/ipmi/ipmi_kcs_sm.c
@@ -38,16 +38,24 @@
  */
 
 #include <linux/kernel.h> /* For printk. */
+#include <linux/module.h>
+#include <linux/moduleparam.h>
 #include <linux/string.h>
 #include <linux/ipmi_msgdefs.h>		/* for completion codes */
 #include "ipmi_si_sm.h"
 
-/* Set this if you want a printout of why the state machine was hosed
-   when it gets hosed. */
-#define DEBUG_HOSED_REASON
+/* kcs_debug is a bit-field
+ *	KCS_DEBUG_ENABLE -	turned on for now
+ *	KCS_DEBUG_MSG    -	commands and their responses
+ *	KCS_DEBUG_STATES -	state machine
+ */
+#define KCS_DEBUG_STATES	4
+#define KCS_DEBUG_MSG		2
+#define	KCS_DEBUG_ENABLE	1
 
-/* Print the state machine state on entry every time. */
-#undef DEBUG_STATE
+static int kcs_debug;
+module_param(kcs_debug, int, 0644);
+MODULE_PARM_DESC(kcs_debug, "debug bitmask, 1=enable, 2=messages, 4=states");
 
 /* The states the KCS driver may be in. */
 enum kcs_states {
@@ -175,9 +183,8 @@ static inline void start_error_recovery(struct si_sm_data *kcs, char *reason)
 {
 	(kcs->error_retries)++;
 	if (kcs->error_retries > MAX_ERROR_RETRIES) {
-#ifdef DEBUG_HOSED_REASON
-		printk("ipmi_kcs_sm: kcs hosed: %s\n", reason);
-#endif
+		if (kcs_debug & KCS_DEBUG_ENABLE)
+			printk(KERN_DEBUG "ipmi_kcs_sm: kcs hosed: %s\n", reason);
 		kcs->state = KCS_HOSED;
 	} else {
 		kcs->state = KCS_ERROR0;
@@ -248,14 +255,21 @@ static void restart_kcs_transaction(struct si_sm_data *kcs)
 static int start_kcs_transaction(struct si_sm_data *kcs, unsigned char *data,
 				 unsigned int size)
 {
+	unsigned int i;
+
 	if ((size < 2) || (size > MAX_KCS_WRITE_SIZE)) {
 		return -1;
 	}
-
 	if ((kcs->state != KCS_IDLE) && (kcs->state != KCS_HOSED)) {
 		return -2;
 	}
-
+	if (kcs_debug & KCS_DEBUG_MSG) {
+		printk(KERN_DEBUG "start_kcs_transaction -");
+		for (i = 0; i < size; i ++) {
+			printk(" %02x", (unsigned char) (data [i]));
+		}
+		printk ("\n");
+	}
 	kcs->error_retries = 0;
 	memcpy(kcs->write_data, data, size);
 	kcs->write_count = size;
@@ -305,9 +319,9 @@ static enum si_sm_result kcs_event(struct si_sm_data *kcs, long time)
 
 	status = read_status(kcs);
 
-#ifdef DEBUG_STATE
-	printk("  State = %d, %x\n", kcs->state, status);
-#endif
+	if (kcs_debug & KCS_DEBUG_STATES)
+		printk(KERN_DEBUG "KCS: State = %d, %x\n", kcs->state, status);
+
 	/* All states wait for ibf, so just do it here. */
 	if (!check_ibf(kcs, status, time))
 		return SI_SM_CALL_WITH_DELAY;
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index b6e5cbfb09f..204e2e987e9 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -819,7 +819,7 @@ static void smi_timeout(unsigned long data)
 	enum si_sm_result smi_result;
 	unsigned long     flags;
 	unsigned long     jiffies_now;
-	unsigned long     time_diff;
+	long              time_diff;
 #ifdef DEBUG_TIMING
 	struct timeval    t;
 #endif
@@ -835,7 +835,7 @@ static void smi_timeout(unsigned long data)
 	printk("**Timer: %d.%9.9d\n", t.tv_sec, t.tv_usec);
 #endif
 	jiffies_now = jiffies;
-	time_diff = ((jiffies_now - smi_info->last_timeout_jiffies)
+	time_diff = (((long)jiffies_now - (long)smi_info->last_timeout_jiffies)
 		     * SI_USEC_PER_JIFFY);
 	smi_result = smi_event_handler(smi_info, time_diff);
 
diff --git a/drivers/char/ipmi/ipmi_smic_sm.c b/drivers/char/ipmi/ipmi_smic_sm.c
index add2aa2732f..f17043da9dd 100644
--- a/drivers/char/ipmi/ipmi_smic_sm.c
+++ b/drivers/char/ipmi/ipmi_smic_sm.c
@@ -43,6 +43,8 @@
 
 #include <linux/kernel.h> /* For printk. */
 #include <linux/string.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
 #include <linux/ipmi_msgdefs.h>		/* for completion codes */
 #include "ipmi_si_sm.h"
 
@@ -56,6 +58,8 @@
 #define	SMIC_DEBUG_ENABLE	1
 
 static int smic_debug = 1;
+module_param(smic_debug, int, 0644);
+MODULE_PARM_DESC(smic_debug, "debug bitmask, 1=enable, 2=messages, 4=states");
 
 enum smic_states {
 	SMIC_IDLE,
@@ -76,7 +80,7 @@ enum smic_states {
 #define SMIC_MAX_ERROR_RETRIES 3
 
 /* Timeouts in microseconds. */
-#define SMIC_RETRY_TIMEOUT 100000
+#define SMIC_RETRY_TIMEOUT 2000000
 
 /* SMIC Flags Register Bits */
 #define SMIC_RX_DATA_READY	0x80
-- 
cgit v1.2.3


From cc4673eecdcc4a918e4d8796295d798b5e98d602 Mon Sep 17 00:00:00 2001
From: Corey Minyard <minyard@acm.org>
Date: Mon, 7 Nov 2005 00:59:57 -0800
Subject: [PATCH] ipmi: watchdog parms in sysfs

Modify the IPMI watchdog parameters (the ones that make sense) to be exported
from sysfs.  This is somewhat complicated because these parameters have
side-effects that must be handled.

Signed-off-by: Corey Minyard <minyard@acm.org>
Cc: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/char/ipmi/ipmi_watchdog.c | 246 ++++++++++++++++++++++++++++++--------
 1 file changed, 196 insertions(+), 50 deletions(-)

(limited to 'drivers/char/ipmi')

diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index 2da64bf7469..405697a9f4d 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -47,6 +47,8 @@
 #include <linux/reboot.h>
 #include <linux/wait.h>
 #include <linux/poll.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
 #ifdef CONFIG_X86_LOCAL_APIC
 #include <asm/apic.h>
 #endif
@@ -158,27 +160,120 @@ static struct fasync_struct *fasync_q = NULL;
 static char pretimeout_since_last_heartbeat = 0;
 static char expect_close;
 
+static DECLARE_RWSEM(register_sem);
+
+/* Parameters to ipmi_set_timeout */
+#define IPMI_SET_TIMEOUT_NO_HB			0
+#define IPMI_SET_TIMEOUT_HB_IF_NECESSARY	1
+#define IPMI_SET_TIMEOUT_FORCE_HB		2
+
+static int ipmi_set_timeout(int do_heartbeat);
+
 /* If true, the driver will start running as soon as it is configured
    and ready. */
 static int start_now = 0;
 
-module_param(timeout, int, 0);
+static int set_param_int(const char *val, struct kernel_param *kp)
+{
+	char *endp;
+	int  l;
+	int  rv = 0;
+
+	if (!val)
+		return -EINVAL;
+	l = simple_strtoul(val, &endp, 0);
+	if (endp == val)
+		return -EINVAL;
+
+	down_read(&register_sem);
+	*((int *)kp->arg) = l;
+	if (watchdog_user)
+		rv = ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY);
+	up_read(&register_sem);
+
+	return rv;
+}
+
+static int get_param_int(char *buffer, struct kernel_param *kp)
+{
+	return sprintf(buffer, "%i", *((int *)kp->arg));
+}
+
+typedef int (*action_fn)(const char *intval, char *outval);
+
+static int action_op(const char *inval, char *outval);
+static int preaction_op(const char *inval, char *outval);
+static int preop_op(const char *inval, char *outval);
+static void check_parms(void);
+
+static int set_param_str(const char *val, struct kernel_param *kp)
+{
+	action_fn  fn = (action_fn) kp->arg;
+	int        rv = 0;
+	const char *end;
+	char       valcp[16];
+	int        len;
+
+	/* Truncate leading and trailing spaces. */
+	while (isspace(*val))
+		val++;
+	end = val + strlen(val) - 1;
+	while ((end >= val) && isspace(*end))
+		end--;
+	len = end - val + 1;
+	if (len > sizeof(valcp) - 1)
+		return -EINVAL;
+	memcpy(valcp, val, len);
+	valcp[len] = '\0';
+
+	down_read(&register_sem);
+	rv = fn(valcp, NULL);
+	if (rv)
+		goto out_unlock;
+
+	check_parms();
+	if (watchdog_user)
+		rv = ipmi_set_timeout(IPMI_SET_TIMEOUT_HB_IF_NECESSARY);
+
+ out_unlock:
+	up_read(&register_sem);
+	return rv;
+}
+
+static int get_param_str(char *buffer, struct kernel_param *kp)
+{
+	action_fn fn = (action_fn) kp->arg;
+	int       rv;
+
+	rv = fn(NULL, buffer);
+	if (rv)
+		return rv;
+	return strlen(buffer);
+}
+
+module_param_call(timeout, set_param_int, get_param_int, &timeout, 0644);
 MODULE_PARM_DESC(timeout, "Timeout value in seconds.");
-module_param(pretimeout, int, 0);
+
+module_param_call(pretimeout, set_param_int, get_param_int, &pretimeout, 0644);
 MODULE_PARM_DESC(pretimeout, "Pretimeout value in seconds.");
-module_param_string(action, action, sizeof(action), 0);
+
+module_param_call(action, set_param_str, get_param_str, action_op, 0644);
 MODULE_PARM_DESC(action, "Timeout action. One of: "
 		 "reset, none, power_cycle, power_off.");
-module_param_string(preaction, preaction, sizeof(preaction), 0);
+
+module_param_call(preaction, set_param_str, get_param_str, preaction_op, 0644);
 MODULE_PARM_DESC(preaction, "Pretimeout action.  One of: "
 		 "pre_none, pre_smi, pre_nmi, pre_int.");
-module_param_string(preop, preop, sizeof(preop), 0);
+
+module_param_call(preop, set_param_str, get_param_str, preop_op, 0644);
 MODULE_PARM_DESC(preop, "Pretimeout driver operation.  One of: "
 		 "preop_none, preop_panic, preop_give_data.");
+
 module_param(start_now, int, 0);
 MODULE_PARM_DESC(start_now, "Set to 1 to start the watchdog as"
 		 "soon as the driver is loaded.");
-module_param(nowayout, int, 0);
+
+module_param(nowayout, int, 0644);
 MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default=CONFIG_WATCHDOG_NOWAYOUT)");
 
 /* Default state of the timer. */
@@ -294,11 +389,6 @@ static int i_ipmi_set_timeout(struct ipmi_smi_msg  *smi_msg,
 	return rv;
 }
 
-/* Parameters to ipmi_set_timeout */
-#define IPMI_SET_TIMEOUT_NO_HB			0
-#define IPMI_SET_TIMEOUT_HB_IF_NECESSARY	1
-#define IPMI_SET_TIMEOUT_FORCE_HB		2
-
 static int ipmi_set_timeout(int do_heartbeat)
 {
 	int send_heartbeat_now;
@@ -732,8 +822,6 @@ static struct miscdevice ipmi_wdog_miscdev = {
 	.fops		= &ipmi_wdog_fops
 };
 
-static DECLARE_RWSEM(register_sem);
-
 static void ipmi_wdog_msg_handler(struct ipmi_recv_msg *msg,
 				  void                 *handler_data)
 {
@@ -839,6 +927,7 @@ static struct nmi_handler ipmi_nmi_handler =
 	.handler  = ipmi_nmi,
 	.priority = 0, /* Call us last. */
 };
+int nmi_handler_registered;
 #endif
 
 static int wdog_reboot_handler(struct notifier_block *this,
@@ -921,59 +1010,86 @@ static struct ipmi_smi_watcher smi_watcher =
 	.smi_gone = ipmi_smi_gone
 };
 
-static int __init ipmi_wdog_init(void)
+static int action_op(const char *inval, char *outval)
 {
-	int rv;
+	if (outval)
+		strcpy(outval, action);
+
+	if (!inval)
+		return 0;
 
-	if (strcmp(action, "reset") == 0) {
+	if (strcmp(inval, "reset") == 0)
 		action_val = WDOG_TIMEOUT_RESET;
-	} else if (strcmp(action, "none") == 0) {
+	else if (strcmp(inval, "none") == 0)
 		action_val = WDOG_TIMEOUT_NONE;
-	} else if (strcmp(action, "power_cycle") == 0) {
+	else if (strcmp(inval, "power_cycle") == 0)
 		action_val = WDOG_TIMEOUT_POWER_CYCLE;
-	} else if (strcmp(action, "power_off") == 0) {
+	else if (strcmp(inval, "power_off") == 0)
 		action_val = WDOG_TIMEOUT_POWER_DOWN;
-	} else {
-		action_val = WDOG_TIMEOUT_RESET;
-		printk(KERN_INFO PFX "Unknown action '%s', defaulting to"
-		       " reset\n", action);
-	}
+	else
+		return -EINVAL;
+	strcpy(action, inval);
+	return 0;
+}
+
+static int preaction_op(const char *inval, char *outval)
+{
+	if (outval)
+		strcpy(outval, preaction);
 
-	if (strcmp(preaction, "pre_none") == 0) {
+	if (!inval)
+		return 0;
+
+	if (strcmp(inval, "pre_none") == 0)
 		preaction_val = WDOG_PRETIMEOUT_NONE;
-	} else if (strcmp(preaction, "pre_smi") == 0) {
+	else if (strcmp(inval, "pre_smi") == 0)
 		preaction_val = WDOG_PRETIMEOUT_SMI;
 #ifdef HAVE_NMI_HANDLER
-	} else if (strcmp(preaction, "pre_nmi") == 0) {
+	else if (strcmp(inval, "pre_nmi") == 0)
 		preaction_val = WDOG_PRETIMEOUT_NMI;
 #endif
-	} else if (strcmp(preaction, "pre_int") == 0) {
+	else if (strcmp(inval, "pre_int") == 0)
 		preaction_val = WDOG_PRETIMEOUT_MSG_INT;
-	} else {
-		preaction_val = WDOG_PRETIMEOUT_NONE;
-		printk(KERN_INFO PFX "Unknown preaction '%s', defaulting to"
-		       " none\n", preaction);
-	}
+	else
+		return -EINVAL;
+	strcpy(preaction, inval);
+	return 0;
+}
+
+static int preop_op(const char *inval, char *outval)
+{
+	if (outval)
+		strcpy(outval, preop);
 
-	if (strcmp(preop, "preop_none") == 0) {
+	if (!inval)
+		return 0;
+
+	if (strcmp(inval, "preop_none") == 0)
 		preop_val = WDOG_PREOP_NONE;
-	} else if (strcmp(preop, "preop_panic") == 0) {
+	else if (strcmp(inval, "preop_panic") == 0)
 		preop_val = WDOG_PREOP_PANIC;
-	} else if (strcmp(preop, "preop_give_data") == 0) {
+	else if (strcmp(inval, "preop_give_data") == 0)
 		preop_val = WDOG_PREOP_GIVE_DATA;
-	} else {
-		preop_val = WDOG_PREOP_NONE;
-		printk(KERN_INFO PFX "Unknown preop '%s', defaulting to"
-		       " none\n", preop);
-	}
+	else
+		return -EINVAL;
+	strcpy(preop, inval);
+	return 0;
+}
 
+static void check_parms(void)
+{
 #ifdef HAVE_NMI_HANDLER
+	int do_nmi = 0;
+	int rv;
+
 	if (preaction_val == WDOG_PRETIMEOUT_NMI) {
+		do_nmi = 1;
 		if (preop_val == WDOG_PREOP_GIVE_DATA) {
 			printk(KERN_WARNING PFX "Pretimeout op is to give data"
 			       " but NMI pretimeout is enabled, setting"
 			       " pretimeout op to none\n");
-			preop_val = WDOG_PREOP_NONE;
+			preop_op("preop_none", NULL);
+			do_nmi = 0;
 		}
 #ifdef CONFIG_X86_LOCAL_APIC
 		if (nmi_watchdog == NMI_IO_APIC) {
@@ -983,18 +1099,48 @@ static int __init ipmi_wdog_init(void)
 			       " Disabling IPMI nmi pretimeout.\n",
 			       nmi_watchdog);
 			preaction_val = WDOG_PRETIMEOUT_NONE;
-		} else {
+			do_nmi = 0;
+		}
 #endif
+	}
+	if (do_nmi && !nmi_handler_registered) {
 		rv = request_nmi(&ipmi_nmi_handler);
 		if (rv) {
-			printk(KERN_WARNING PFX "Can't register nmi handler\n");
-			return rv;
-		}
-#ifdef CONFIG_X86_LOCAL_APIC
-		}
-#endif
+			printk(KERN_WARNING PFX
+			       "Can't register nmi handler\n");
+			return;
+		} else
+			nmi_handler_registered = 1;
+	} else if (!do_nmi && nmi_handler_registered) {
+		release_nmi(&ipmi_nmi_handler);
+		nmi_handler_registered = 0;
 	}
 #endif
+}
+
+static int __init ipmi_wdog_init(void)
+{
+	int rv;
+
+	if (action_op(action, NULL)) {
+		action_op("reset", NULL);
+		printk(KERN_INFO PFX "Unknown action '%s', defaulting to"
+		       " reset\n", action);
+	}
+
+	if (preaction_op(preaction, NULL)) {
+		preaction_op("pre_none", NULL);
+		printk(KERN_INFO PFX "Unknown preaction '%s', defaulting to"
+		       " none\n", preaction);
+	}
+
+	if (preop_op(preop, NULL)) {
+		preop_op("preop_none", NULL);
+		printk(KERN_INFO PFX "Unknown preop '%s', defaulting to"
+		       " none\n", preop);
+	}
+
+	check_parms();
 
 	rv = ipmi_smi_watcher_register(&smi_watcher);
 	if (rv) {
@@ -1021,7 +1167,7 @@ static __exit void ipmi_unregister_watchdog(void)
 	down_write(&register_sem);
 
 #ifdef HAVE_NMI_HANDLER
-	if (preaction_val == WDOG_PRETIMEOUT_NMI)
+	if (nmi_handler_registered)
 		release_nmi(&ipmi_nmi_handler);
 #endif
 
-- 
cgit v1.2.3


From 21d6c542153c680f689a9badf5534bf27704350b Mon Sep 17 00:00:00 2001
From: Corey Minyard <minyard@acm.org>
Date: Mon, 7 Nov 2005 00:59:57 -0800
Subject: [PATCH] ipmi: poweroff cleanups

Make module_param and MODULE_PARAM_DESC agree on poweroff_powercycle name.

There was an extraneous ifdef in the IPMI poweroff code that prevented it from
working if PROC_FS was disabled.

Signed-off-by: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Corey Minyard <minyard@acm.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/char/ipmi/ipmi_poweroff.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'drivers/char/ipmi')

diff --git a/drivers/char/ipmi/ipmi_poweroff.c b/drivers/char/ipmi/ipmi_poweroff.c
index f66947722e1..e053eade036 100644
--- a/drivers/char/ipmi/ipmi_poweroff.c
+++ b/drivers/char/ipmi/ipmi_poweroff.c
@@ -56,7 +56,7 @@ static int poweroff_powercycle;
 
 /* parameter definition to allow user to flag power cycle */
 module_param(poweroff_powercycle, int, 0644);
-MODULE_PARM_DESC(poweroff_powercycles, " Set to non-zero to enable power cycle instead of power down. Power cycle is contingent on hardware support, otherwise it defaults back to power down.");
+MODULE_PARM_DESC(poweroff_powercycle, " Set to non-zero to enable power cycle instead of power down. Power cycle is contingent on hardware support, otherwise it defaults back to power down.");
 
 /* Stuff from the get device id command. */
 static unsigned int mfg_id;
@@ -611,9 +611,7 @@ static int ipmi_poweroff_init (void)
 	}
 #endif
 
-#ifdef CONFIG_PROC_FS
 	rv = ipmi_smi_watcher_register(&smi_watcher);
-#endif
 	if (rv) {
 		unregister_sysctl_table(ipmi_table_header);
 		printk(KERN_ERR PFX "Unable to register SMI watcher: %d\n", rv);
-- 
cgit v1.2.3


From d5a2b89a4943b423b5b0a07783fee4e08424b0b2 Mon Sep 17 00:00:00 2001
From: Corey Minyard <minyard@acm.org>
Date: Mon, 7 Nov 2005 00:59:58 -0800
Subject: [PATCH] ipmi: more dell fixes

Make SMIC driver ignore EVT_AVAIL and SMS_ATN bits in flags register, as
they're used by systems management interrupts, not the host OS.

Make the OEM0 Data Available handler work for pre-IPMI 1.5 systems from Dell
too.

Without these two fixes, PowerEdge 2650 and other similar systems with SMIC
may hang a process (modprobe or anything using /dev/ipmi0).

Signed-off-by: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Corey Minyard <minyard@acm.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/char/ipmi/ipmi_si_intf.c | 23 ++++++++++++++++-------
 drivers/char/ipmi/ipmi_smic_sm.c |  9 +++++++--
 2 files changed, 23 insertions(+), 9 deletions(-)

(limited to 'drivers/char/ipmi')

diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 204e2e987e9..df7dbbff57a 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -2052,6 +2052,9 @@ static int oem_data_avail_to_receive_msg_avail(struct smi_info *smi_info)
  * IPMI Version = 0x51             IPMI 1.5
  * Manufacturer ID = A2 02 00      Dell IANA
  *
+ * Additionally, PowerEdge systems with IPMI < 1.5 may also assert
+ * OEM0_DATA_AVAIL and needs to be treated as RECEIVE_MSG_AVAIL.
+ *
  */
 #define DELL_POWEREDGE_8G_BMC_DEVICE_ID  0x20
 #define DELL_POWEREDGE_8G_BMC_DEVICE_REV 0x80
@@ -2061,13 +2064,19 @@ static void setup_dell_poweredge_oem_data_handler(struct smi_info *smi_info)
 {
 	struct ipmi_device_id *id = &smi_info->device_id;
 	const char mfr[3]=DELL_IANA_MFR_ID;
-	if (! memcmp(mfr, id->manufacturer_id, sizeof(mfr))
-	    && (id->device_id       == DELL_POWEREDGE_8G_BMC_DEVICE_ID)
-	    && (id->device_revision == DELL_POWEREDGE_8G_BMC_DEVICE_REV)
-	    && (id->ipmi_version    == DELL_POWEREDGE_8G_BMC_IPMI_VERSION))
-	{
-		smi_info->oem_data_avail_handler =
-			oem_data_avail_to_receive_msg_avail;
+	if (! memcmp(mfr, id->manufacturer_id, sizeof(mfr))) {
+		if (id->device_id       == DELL_POWEREDGE_8G_BMC_DEVICE_ID  &&
+		    id->device_revision == DELL_POWEREDGE_8G_BMC_DEVICE_REV &&
+		    id->ipmi_version    == DELL_POWEREDGE_8G_BMC_IPMI_VERSION) {
+			smi_info->oem_data_avail_handler =
+				oem_data_avail_to_receive_msg_avail;
+		}
+		else if (ipmi_version_major(id) < 1 ||
+			 (ipmi_version_major(id) == 1 &&
+			  ipmi_version_minor(id) < 5)) {
+			smi_info->oem_data_avail_handler =
+				oem_data_avail_to_receive_msg_avail;
+		}
 	}
 }
 
diff --git a/drivers/char/ipmi/ipmi_smic_sm.c b/drivers/char/ipmi/ipmi_smic_sm.c
index f17043da9dd..39d7e5ef1a2 100644
--- a/drivers/char/ipmi/ipmi_smic_sm.c
+++ b/drivers/char/ipmi/ipmi_smic_sm.c
@@ -85,6 +85,12 @@ enum smic_states {
 /* SMIC Flags Register Bits */
 #define SMIC_RX_DATA_READY	0x80
 #define SMIC_TX_DATA_READY	0x40
+/*
+ * SMIC_SMI and SMIC_EVM_DATA_AVAIL are only used by
+ * a few systems, and then only by Systems Management
+ * Interrupts, not by the OS.  Always ignore these bits.
+ *
+ */
 #define SMIC_SMI		0x10
 #define SMIC_EVM_DATA_AVAIL	0x08
 #define SMIC_SMS_DATA_AVAIL	0x04
@@ -368,8 +374,7 @@ static enum si_sm_result smic_event (struct si_sm_data *smic, long time)
 	switch (smic->state) {
 	case SMIC_IDLE:
 		/* in IDLE we check for available messages */
-		if (flags & (SMIC_SMI |
-			     SMIC_EVM_DATA_AVAIL | SMIC_SMS_DATA_AVAIL))
+		if (flags & SMIC_SMS_DATA_AVAIL)
 		{
 			return SI_SM_ATTN;
 		}
-- 
cgit v1.2.3


From ea94027b92dd0d02d238d5984cd9089343c1d6cc Mon Sep 17 00:00:00 2001
From: Corey Minyard <minyard@acm.org>
Date: Mon, 7 Nov 2005 00:59:59 -0800
Subject: [PATCH] ipmi: si start transaction hook

Some commands, on some system BMCs, don't respond at at all.  This is seen on
Dell PowerEdge x6xx and x7xx systems with IPMI 1.0 BT controllers when a "Get
SDR" command is issued, with a length field of 0x3A, which happens to be the
length of about SDR entries.  If another length is passed, this command
succeeds.

This patch adds general infrastructure for receiving commands before they're
passed down to the low-level drivers, such that they can be completed
immediately, or modified, prior to being sent to ->start_transaction().

Signed-off-by: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Corey Minyard <minyard@acm.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/char/ipmi/ipmi_si_intf.c | 84 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 84 insertions(+)

(limited to 'drivers/char/ipmi')

diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index df7dbbff57a..2ace62b1d32 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -51,6 +51,7 @@
 #include <linux/list.h>
 #include <linux/pci.h>
 #include <linux/ioport.h>
+#include <linux/notifier.h>
 #include <asm/irq.h>
 #ifdef CONFIG_HIGH_RES_TIMERS
 #include <linux/hrtime.h>
@@ -222,6 +223,12 @@ struct smi_info
 	unsigned long incoming_messages;
 };
 
+static struct notifier_block *xaction_notifier_list;
+static int register_xaction_notifier(struct notifier_block * nb)
+{
+	return notifier_chain_register(&xaction_notifier_list, nb);
+}
+
 static void si_restart_short_timer(struct smi_info *smi_info);
 
 static void deliver_recv_msg(struct smi_info *smi_info,
@@ -281,6 +288,11 @@ static enum si_sm_result start_next_msg(struct smi_info *smi_info)
 		do_gettimeofday(&t);
 		printk("**Start2: %d.%9.9d\n", t.tv_sec, t.tv_usec);
 #endif
+		err = notifier_call_chain(&xaction_notifier_list, 0, smi_info);
+		if (err & NOTIFY_STOP_MASK) {
+			rv = SI_SM_CALL_WITHOUT_DELAY;
+			goto out;
+		}
 		err = smi_info->handlers->start_transaction(
 			smi_info->si_sm,
 			smi_info->curr_msg->data,
@@ -291,6 +303,7 @@ static enum si_sm_result start_next_msg(struct smi_info *smi_info)
 
 		rv = SI_SM_CALL_WITHOUT_DELAY;
 	}
+	out:
 	spin_unlock(&(smi_info->msg_lock));
 
 	return rv;
@@ -2080,6 +2093,71 @@ static void setup_dell_poweredge_oem_data_handler(struct smi_info *smi_info)
 	}
 }
 
+#define CANNOT_RETURN_REQUESTED_LENGTH 0xCA
+static void return_hosed_msg_badsize(struct smi_info *smi_info)
+{
+	struct ipmi_smi_msg *msg = smi_info->curr_msg;
+
+	/* Make it a reponse */
+	msg->rsp[0] = msg->data[0] | 4;
+	msg->rsp[1] = msg->data[1];
+	msg->rsp[2] = CANNOT_RETURN_REQUESTED_LENGTH;
+	msg->rsp_size = 3;
+	smi_info->curr_msg = NULL;
+	deliver_recv_msg(smi_info, msg);
+}
+
+/*
+ * dell_poweredge_bt_xaction_handler
+ * @info - smi_info.device_id must be populated
+ *
+ * Dell PowerEdge servers with the BT interface (x6xx and 1750) will
+ * not respond to a Get SDR command if the length of the data
+ * requested is exactly 0x3A, which leads to command timeouts and no
+ * data returned.  This intercepts such commands, and causes userspace
+ * callers to try again with a different-sized buffer, which succeeds.
+ */
+
+#define STORAGE_NETFN 0x0A
+#define STORAGE_CMD_GET_SDR 0x23
+static int dell_poweredge_bt_xaction_handler(struct notifier_block *self,
+					     unsigned long unused,
+					     void *in)
+{
+	struct smi_info *smi_info = in;
+	unsigned char *data = smi_info->curr_msg->data;
+	unsigned int size   = smi_info->curr_msg->data_size;
+	if (size >= 8 &&
+	    (data[0]>>2) == STORAGE_NETFN &&
+	    data[1] == STORAGE_CMD_GET_SDR &&
+	    data[7] == 0x3A) {
+		return_hosed_msg_badsize(smi_info);
+		return NOTIFY_STOP;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block dell_poweredge_bt_xaction_notifier = {
+	.notifier_call	= dell_poweredge_bt_xaction_handler,
+};
+
+/*
+ * setup_dell_poweredge_bt_xaction_handler
+ * @info - smi_info.device_id must be filled in already
+ *
+ * Fills in smi_info.device_id.start_transaction_pre_hook
+ * when we know what function to use there.
+ */
+static void
+setup_dell_poweredge_bt_xaction_handler(struct smi_info *smi_info)
+{
+	struct ipmi_device_id *id = &smi_info->device_id;
+	const char mfr[3]=DELL_IANA_MFR_ID;
+ 	if (! memcmp(mfr, id->manufacturer_id, sizeof(mfr)) &&
+	    smi_info->si_type == SI_BT)
+		register_xaction_notifier(&dell_poweredge_bt_xaction_notifier);
+}
+
 /*
  * setup_oem_data_handler
  * @info - smi_info.device_id must be filled in already
@@ -2093,6 +2171,11 @@ static void setup_oem_data_handler(struct smi_info *smi_info)
 	setup_dell_poweredge_oem_data_handler(smi_info);
 }
 
+static void setup_xaction_handlers(struct smi_info *smi_info)
+{
+	setup_dell_poweredge_bt_xaction_handler(smi_info);
+}
+
 /* Returns 0 if initialized, or negative on an error. */
 static int init_one_smi(int intf_num, struct smi_info **smi)
 {
@@ -2188,6 +2271,7 @@ static int init_one_smi(int intf_num, struct smi_info **smi)
 		goto out_err;
 
 	setup_oem_data_handler(new_smi);
+	setup_xaction_handlers(new_smi);
 
 	/* Try to claim any interrupts. */
 	new_smi->irq_setup(new_smi);
-- 
cgit v1.2.3


From 21dcd300b15f87ce10df8773d029708f27499aa7 Mon Sep 17 00:00:00 2001
From: Corey Minyard <minyard@acm.org>
Date: Mon, 7 Nov 2005 01:00:01 -0800
Subject: [PATCH] ipmi: bt restart reset fixes

The current BT retry/reset mechanism fails to succeed on a PowerEdge 1650,
when the controller is wedged with B2H_ATN asserted at XACTION_START.  If this
occurs, no further commands will ever succeed unless the state of the
controller is first cleared out.

Furthermore, the soft reset would only occur if the first command after insmod
was the one that timed out, not if a later command timed out.

This patch changes the retry/reset mechanism to be as follows:

Before retrying a command, clear the state of the BT controller such that the
flags represent ready for a new transaction.  This increases the chance of
success of the restarted transaction.

After 2 retries, issue a soft reset and retry one more time before giving up
and reporting back a failure.

Signed-off-by: Matt Domsch <Matt_Domsch@dell.com>
Acked-by: Rocky Craig <rocky.craig@hp.com>
Signed-off-by: Corey Minyard <minyard@acm.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/char/ipmi/ipmi_bt_sm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/char/ipmi')

diff --git a/drivers/char/ipmi/ipmi_bt_sm.c b/drivers/char/ipmi/ipmi_bt_sm.c
index 7c4a195dfc9..58dcdee1cd7 100644
--- a/drivers/char/ipmi/ipmi_bt_sm.c
+++ b/drivers/char/ipmi/ipmi_bt_sm.c
@@ -333,8 +333,7 @@ static void error_recovery(struct si_sm_data *bt, char *reason)
 		bt->state = BT_STATE_HOSED;
 		if (!bt->nonzero_status)
 			printk(KERN_ERR "IPMI: BT stuck, try power cycle\n");
-		else if (bt->seq == FIRST_SEQ + BT_RETRY_LIMIT) {
-			/* most likely during insmod */
+		else if (bt->error_retries <= BT_RETRY_LIMIT + 1) {
 			printk(KERN_DEBUG "IPMI: BT reset (takes 5 secs)\n");
         		bt->state = BT_STATE_RESET1;
 		}
@@ -475,6 +474,7 @@ static enum si_sm_result bt_event(struct si_sm_data *bt, long time)
 		break;
 
 	case BT_STATE_RESTART:		/* don't reset retries! */
+		reset_flags(bt);
 		bt->write_data[2] = ++bt->seq;
 		bt->read_count = 0;
 		bt->nonzero_status = 0;
-- 
cgit v1.2.3


From c3e7e7916ec61cf58c88af12f4db17f28cffd83a Mon Sep 17 00:00:00 2001
From: Corey Minyard <minyard@acm.org>
Date: Mon, 7 Nov 2005 01:00:02 -0800
Subject: [PATCH] ipmi: kcs error0 delay

BMCs can get into ERROR0 state while flashing new firmware, particularly while
the BMC is erasing the next flash block, which may take a just under 2 seconds
on a Dell PowerEdge 2800 (1.75 seconds typical), during which time the
single-threaded firmware may not be able to process new commands.  In
particular, clearing OBF may not take effect immediately.

We want it to delay in ERROR0 after clearing OBF a bit waiting for OBF to
actually be clear before proceeding.

This introduces a new return value from the LLDD's event loop,
SI_SM_CALL_WITH_TICK_DELAY.  This means the calling thread/timer should
schedule_timeout() at least 1 tick, rather than busy-wait.  This is a longer
delay than SI_SM_CALL_WITH_DELAY, which is typically a 250us busy-wait.

Signed-off-by: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Corey Minyard <minyard@acm.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/char/ipmi/ipmi_kcs_sm.c  | 8 ++++++++
 drivers/char/ipmi/ipmi_si_intf.c | 3 ++-
 drivers/char/ipmi/ipmi_si_sm.h   | 1 +
 3 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'drivers/char/ipmi')

diff --git a/drivers/char/ipmi/ipmi_kcs_sm.c b/drivers/char/ipmi/ipmi_kcs_sm.c
index dc83365ede4..da1554194d3 100644
--- a/drivers/char/ipmi/ipmi_kcs_sm.c
+++ b/drivers/char/ipmi/ipmi_kcs_sm.c
@@ -41,6 +41,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/string.h>
+#include <linux/jiffies.h>
 #include <linux/ipmi_msgdefs.h>		/* for completion codes */
 #include "ipmi_si_sm.h"
 
@@ -99,6 +100,7 @@ enum kcs_states {
 #define IBF_RETRY_TIMEOUT 1000000
 #define OBF_RETRY_TIMEOUT 1000000
 #define MAX_ERROR_RETRIES 10
+#define ERROR0_OBF_WAIT_JIFFIES (2*HZ)
 
 struct si_sm_data
 {
@@ -115,6 +117,7 @@ struct si_sm_data
 	unsigned int  error_retries;
 	long          ibf_timeout;
 	long          obf_timeout;
+	unsigned long  error0_timeout;
 };
 
 static unsigned int init_kcs_data(struct si_sm_data *kcs,
@@ -187,6 +190,7 @@ static inline void start_error_recovery(struct si_sm_data *kcs, char *reason)
 			printk(KERN_DEBUG "ipmi_kcs_sm: kcs hosed: %s\n", reason);
 		kcs->state = KCS_HOSED;
 	} else {
+		kcs->error0_timeout = jiffies + ERROR0_OBF_WAIT_JIFFIES;
 		kcs->state = KCS_ERROR0;
 	}
 }
@@ -423,6 +427,10 @@ static enum si_sm_result kcs_event(struct si_sm_data *kcs, long time)
 
 	case KCS_ERROR0:
 		clear_obf(kcs, status);
+		status = read_status(kcs);
+		if  (GET_STATUS_OBF(status)) /* controller isn't responding */
+			if (time_before(jiffies, kcs->error0_timeout))
+				return SI_SM_CALL_WITH_TICK_DELAY;
 		write_cmd(kcs, KCS_GET_STATUS_ABORT);
 		kcs->state = KCS_ERROR1;
 		break;
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 2ace62b1d32..d514df7c728 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -1932,7 +1932,8 @@ static int try_get_dev_id(struct smi_info *smi_info)
 	smi_result = smi_info->handlers->event(smi_info->si_sm, 0);
 	for (;;)
 	{
-		if (smi_result == SI_SM_CALL_WITH_DELAY) {
+		if (smi_result == SI_SM_CALL_WITH_DELAY ||
+		    smi_result == SI_SM_CALL_WITH_TICK_DELAY) {
 			schedule_timeout_uninterruptible(1);
 			smi_result = smi_info->handlers->event(
 				smi_info->si_sm, 100);
diff --git a/drivers/char/ipmi/ipmi_si_sm.h b/drivers/char/ipmi/ipmi_si_sm.h
index 62791dd4298..bf3d4962d6a 100644
--- a/drivers/char/ipmi/ipmi_si_sm.h
+++ b/drivers/char/ipmi/ipmi_si_sm.h
@@ -62,6 +62,7 @@ enum si_sm_result
 {
 	SI_SM_CALL_WITHOUT_DELAY, /* Call the driver again immediately */
 	SI_SM_CALL_WITH_DELAY,	/* Delay some before calling again. */
+	SI_SM_CALL_WITH_TICK_DELAY,	/* Delay at least 1 tick before calling again. */
 	SI_SM_TRANSACTION_COMPLETE, /* A transaction is finished. */
 	SI_SM_IDLE,		/* The SM is in idle state. */
 	SI_SM_HOSED,		/* The hardware violated the state machine. */
-- 
cgit v1.2.3


From a9a2c44ff0a1350f8bfe3a162ecf71b1c9ce5cc2 Mon Sep 17 00:00:00 2001
From: Corey Minyard <minyard@acm.org>
Date: Mon, 7 Nov 2005 01:00:03 -0800
Subject: [PATCH] ipmi: add timer thread

We must poll for responses to commands when interrupts aren't in use.  The
default poll interval is based on using a kernel timer, which varies with HZ.
For character-based interfaces like KCS and SMIC though, that can be way too
slow (>15 minutes to flash a new firmware with KCS, >20 seconds to retrieve
the sensor list).

This creates a low-priority kernel thread to poll more often.  If the state
machine is idle, so is the kernel thread.  But if there's an active command,
it polls quite rapidly.  This decrease a firmware flash time from 15 minutes
to 1.5 minutes, and the sensor list time to 4.5 seconds, on a Dell PowerEdge
x8x system.

The timer-based polling remains, to ensure some amount of responsiveness even
under high user process CPU load.

Checking for a stopped timer at rmmod now uses atomics and del_timer_sync() to
ensure safe stoppage.

Signed-off-by: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Corey Minyard <minyard@acm.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/char/ipmi/ipmi_si_intf.c | 84 ++++++++++++++++++++++++++++++----------
 1 file changed, 63 insertions(+), 21 deletions(-)

(limited to 'drivers/char/ipmi')

diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index d514df7c728..fa3be622ca9 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -126,6 +126,7 @@ struct ipmi_device_id {
 
 struct smi_info
 {
+	int                    intf_num;
 	ipmi_smi_t             intf;
 	struct si_sm_data      *si_sm;
 	struct si_sm_handlers  *handlers;
@@ -193,8 +194,7 @@ struct smi_info
 	unsigned long       last_timeout_jiffies;
 
 	/* Used to gracefully stop the timer without race conditions. */
-	volatile int        stop_operation;
-	volatile int        timer_stopped;
+	atomic_t            stop_operation;
 
 	/* The driver will disable interrupts when it gets into a
 	   situation where it cannot handle messages due to lack of
@@ -221,6 +221,9 @@ struct smi_info
 	unsigned long events;
 	unsigned long watchdog_pretimeouts;
 	unsigned long incoming_messages;
+
+        struct completion exiting;
+        long              thread_pid;
 };
 
 static struct notifier_block *xaction_notifier_list;
@@ -779,6 +782,38 @@ static void set_run_to_completion(void *send_info, int i_run_to_completion)
 	spin_unlock_irqrestore(&(smi_info->si_lock), flags);
 }
 
+static int ipmi_thread(void *data)
+{
+	struct smi_info *smi_info = data;
+	unsigned long flags, last=1;
+	enum si_sm_result smi_result;
+
+	daemonize("kipmi%d", smi_info->intf_num);
+	allow_signal(SIGKILL);
+	set_user_nice(current, 19);
+	while (!atomic_read(&smi_info->stop_operation)) {
+		schedule_timeout(last);
+		spin_lock_irqsave(&(smi_info->si_lock), flags);
+		smi_result=smi_event_handler(smi_info, 0);
+		spin_unlock_irqrestore(&(smi_info->si_lock), flags);
+		if (smi_result == SI_SM_CALL_WITHOUT_DELAY)
+			last = 0;
+		else if (smi_result == SI_SM_CALL_WITH_DELAY) {
+			udelay(1);
+			last = 0;
+		}
+		else {
+			/* System is idle; go to sleep */
+			last = 1;
+			current->state = TASK_INTERRUPTIBLE;
+		}
+	}
+	smi_info->thread_pid = 0;
+	complete_and_exit(&(smi_info->exiting), 0);
+	return 0;
+}
+
+
 static void poll(void *send_info)
 {
 	struct smi_info *smi_info = send_info;
@@ -837,10 +872,8 @@ static void smi_timeout(unsigned long data)
 	struct timeval    t;
 #endif
 
-	if (smi_info->stop_operation) {
-		smi_info->timer_stopped = 1;
+	if (atomic_read(&smi_info->stop_operation))
 		return;
-	}
 
 	spin_lock_irqsave(&(smi_info->si_lock), flags);
 #ifdef DEBUG_TIMING
@@ -913,7 +946,7 @@ static irqreturn_t si_irq_handler(int irq, void *data, struct pt_regs *regs)
 	smi_info->interrupts++;
 	spin_unlock(&smi_info->count_lock);
 
-	if (smi_info->stop_operation)
+	if (atomic_read(&smi_info->stop_operation))
 		goto out;
 
 #ifdef DEBUG_TIMING
@@ -1432,7 +1465,7 @@ static u32 ipmi_acpi_gpe(void *context)
 	smi_info->interrupts++;
 	spin_unlock(&smi_info->count_lock);
 
-	if (smi_info->stop_operation)
+	if (atomic_read(&smi_info->stop_operation))
 		goto out;
 
 #ifdef DEBUG_TIMING
@@ -2177,6 +2210,16 @@ static void setup_xaction_handlers(struct smi_info *smi_info)
 	setup_dell_poweredge_bt_xaction_handler(smi_info);
 }
 
+static inline void wait_for_timer_and_thread(struct smi_info *smi_info)
+{
+	if (smi_info->thread_pid > 0) {
+		/* wake the potentially sleeping thread */
+		kill_proc(smi_info->thread_pid, SIGKILL, 0);
+		wait_for_completion(&(smi_info->exiting));
+	}
+	del_timer_sync(&smi_info->si_timer);
+}
+
 /* Returns 0 if initialized, or negative on an error. */
 static int init_one_smi(int intf_num, struct smi_info **smi)
 {
@@ -2284,8 +2327,8 @@ static int init_one_smi(int intf_num, struct smi_info **smi)
 	new_smi->run_to_completion = 0;
 
 	new_smi->interrupt_disabled = 0;
-	new_smi->timer_stopped = 0;
-	new_smi->stop_operation = 0;
+	atomic_set(&new_smi->stop_operation, 0);
+	new_smi->intf_num = intf_num;
 
 	/* Start clearing the flags before we enable interrupts or the
 	   timer to avoid racing with the timer. */
@@ -2303,7 +2346,14 @@ static int init_one_smi(int intf_num, struct smi_info **smi)
 	new_smi->si_timer.function = smi_timeout;
 	new_smi->last_timeout_jiffies = jiffies;
 	new_smi->si_timer.expires = jiffies + SI_TIMEOUT_JIFFIES;
+
 	add_timer(&(new_smi->si_timer));
+ 	if (new_smi->si_type != SI_BT) {
+		init_completion(&(new_smi->exiting));
+		new_smi->thread_pid = kernel_thread(ipmi_thread, new_smi,
+						    CLONE_FS|CLONE_FILES|
+						    CLONE_SIGHAND);
+	}
 
 	rv = ipmi_register_smi(&handlers,
 			       new_smi,
@@ -2345,12 +2395,8 @@ static int init_one_smi(int intf_num, struct smi_info **smi)
 	return 0;
 
  out_err_stop_timer:
-	new_smi->stop_operation = 1;
-
-	/* Wait for the timer to stop.  This avoids problems with race
-	   conditions removing the timer here. */
-	while (!new_smi->timer_stopped)
-		schedule_timeout_uninterruptible(1);
+	atomic_inc(&new_smi->stop_operation);
+	wait_for_timer_and_thread(new_smi);
 
  out_err:
 	if (new_smi->intf)
@@ -2456,8 +2502,7 @@ static void __exit cleanup_one_si(struct smi_info *to_clean)
 	spin_lock_irqsave(&(to_clean->si_lock), flags);
 	spin_lock(&(to_clean->msg_lock));
 
-	to_clean->stop_operation = 1;
-
+	atomic_inc(&to_clean->stop_operation);
 	to_clean->irq_cleanup(to_clean);
 
 	spin_unlock(&(to_clean->msg_lock));
@@ -2468,10 +2513,7 @@ static void __exit cleanup_one_si(struct smi_info *to_clean)
 	   interrupt. */
 	synchronize_sched();
 
-	/* Wait for the timer to stop.  This avoids problems with race
-	   conditions removing the timer here. */
-	while (!to_clean->timer_stopped)
-		schedule_timeout_uninterruptible(1);
+	wait_for_timer_and_thread(to_clean);
 
 	/* Interrupts and timeouts are stopped, now make sure the
 	   interface is in a clean state. */
-- 
cgit v1.2.3


From e9a705a0a0ed99833cfef40d509f63a052638f00 Mon Sep 17 00:00:00 2001
From: Matt Domsch <Matt_Domsch@dell.com>
Date: Mon, 7 Nov 2005 01:00:04 -0800
Subject: [PATCH] ipmi: use kthread API

Convert ipmi driver thread to kthread API, only sleep when interface is
idle.

Signed-off-by: Matt Domsch <Matt_Domsch@dell.com>
Cc: Corey Minyard <minyard@acm.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/char/ipmi/ipmi_si_intf.c | 45 ++++++++++++++--------------------------
 1 file changed, 15 insertions(+), 30 deletions(-)

(limited to 'drivers/char/ipmi')

diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index fa3be622ca9..ea89dca3dbb 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -52,6 +52,7 @@
 #include <linux/pci.h>
 #include <linux/ioport.h>
 #include <linux/notifier.h>
+#include <linux/kthread.h>
 #include <asm/irq.h>
 #ifdef CONFIG_HIGH_RES_TIMERS
 #include <linux/hrtime.h>
@@ -222,8 +223,7 @@ struct smi_info
 	unsigned long watchdog_pretimeouts;
 	unsigned long incoming_messages;
 
-        struct completion exiting;
-        long              thread_pid;
+        struct task_struct *thread;
 };
 
 static struct notifier_block *xaction_notifier_list;
@@ -785,31 +785,22 @@ static void set_run_to_completion(void *send_info, int i_run_to_completion)
 static int ipmi_thread(void *data)
 {
 	struct smi_info *smi_info = data;
-	unsigned long flags, last=1;
+	unsigned long flags;
 	enum si_sm_result smi_result;
 
-	daemonize("kipmi%d", smi_info->intf_num);
-	allow_signal(SIGKILL);
 	set_user_nice(current, 19);
-	while (!atomic_read(&smi_info->stop_operation)) {
-		schedule_timeout(last);
+	while (!kthread_should_stop()) {
 		spin_lock_irqsave(&(smi_info->si_lock), flags);
 		smi_result=smi_event_handler(smi_info, 0);
 		spin_unlock_irqrestore(&(smi_info->si_lock), flags);
-		if (smi_result == SI_SM_CALL_WITHOUT_DELAY)
-			last = 0;
-		else if (smi_result == SI_SM_CALL_WITH_DELAY) {
-			udelay(1);
-			last = 0;
-		}
-		else {
-			/* System is idle; go to sleep */
-			last = 1;
-			current->state = TASK_INTERRUPTIBLE;
+		if (smi_result == SI_SM_CALL_WITHOUT_DELAY) {
+			/* do nothing */
 		}
+		else if (smi_result == SI_SM_CALL_WITH_DELAY)
+			udelay(1);
+		else
+			schedule_timeout_interruptible(1);
 	}
-	smi_info->thread_pid = 0;
-	complete_and_exit(&(smi_info->exiting), 0);
 	return 0;
 }
 
@@ -2212,11 +2203,8 @@ static void setup_xaction_handlers(struct smi_info *smi_info)
 
 static inline void wait_for_timer_and_thread(struct smi_info *smi_info)
 {
-	if (smi_info->thread_pid > 0) {
-		/* wake the potentially sleeping thread */
-		kill_proc(smi_info->thread_pid, SIGKILL, 0);
-		wait_for_completion(&(smi_info->exiting));
-	}
+	if (smi_info->thread != ERR_PTR(-ENOMEM))
+		kthread_stop(smi_info->thread);
 	del_timer_sync(&smi_info->si_timer);
 }
 
@@ -2348,12 +2336,9 @@ static int init_one_smi(int intf_num, struct smi_info **smi)
 	new_smi->si_timer.expires = jiffies + SI_TIMEOUT_JIFFIES;
 
 	add_timer(&(new_smi->si_timer));
- 	if (new_smi->si_type != SI_BT) {
-		init_completion(&(new_smi->exiting));
-		new_smi->thread_pid = kernel_thread(ipmi_thread, new_smi,
-						    CLONE_FS|CLONE_FILES|
-						    CLONE_SIGHAND);
-	}
+ 	if (new_smi->si_type != SI_BT)
+		new_smi->thread = kthread_run(ipmi_thread, new_smi,
+					      "kipmi%d", new_smi->intf_num);
 
 	rv = ipmi_register_smi(&handlers,
 			       new_smi,
-- 
cgit v1.2.3


From e61fb5b65150ace7192d1f0ce840fe4cd6285e73 Mon Sep 17 00:00:00 2001
From: Corey Minyard <minyard@acm.org>
Date: Mon, 7 Nov 2005 01:00:05 -0800
Subject: [PATCH] ipmi: use rcu lock for using command receivers

Use rcu_read_lock for the cmd_rcvrs list, since that was what what
intended, anyway.  This means that all the users of the cmd_rcvrs_lock are
tasks, so the irq disables are no longer required for that lock and it can
become a semaphore.

Signed-off-by: Corey Minyard <minyard@acm.org>
Acked-by: "Paul E. McKenney" <paulmck@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/char/ipmi/ipmi_msghandler.c | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

(limited to 'drivers/char/ipmi')

diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 320d7f035bf..c1d06ba449b 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -209,7 +209,7 @@ struct ipmi_smi
 
 	/* The list of command receivers that are registered for commands
 	   on this interface. */
-	spinlock_t       cmd_rcvrs_lock;
+	struct semaphore cmd_rcvrs_lock;
 	struct list_head cmd_rcvrs;
 
 	/* Events that were queues because no one was there to receive
@@ -345,7 +345,6 @@ static void clean_up_interface_data(ipmi_smi_t intf)
 {
 	int              i;
 	struct cmd_rcvr  *rcvr, *rcvr2;
-	unsigned long    flags;
 	struct list_head list;
 
 	free_recv_msg_list(&intf->waiting_msgs);
@@ -353,10 +352,10 @@ static void clean_up_interface_data(ipmi_smi_t intf)
 
 	/* Wholesale remove all the entries from the list in the
 	 * interface and wait for RCU to know that none are in use. */
-	spin_lock_irqsave(&intf->cmd_rcvrs_lock, flags);
+	down(&intf->cmd_rcvrs_lock);
 	list_add_rcu(&list, &intf->cmd_rcvrs);
 	list_del_rcu(&intf->cmd_rcvrs);
-	spin_unlock_irqrestore(&intf->cmd_rcvrs_lock, flags);
+	up(&intf->cmd_rcvrs_lock);
 	synchronize_rcu();
 
 	list_for_each_entry_safe(rcvr, rcvr2, &list, link)
@@ -812,7 +811,7 @@ int ipmi_destroy_user(ipmi_user_t user)
 	 * since other things may be using it till we do
 	 * synchronize_rcu()) then free everything in that list.
 	 */
-	spin_lock_irqsave(&intf->cmd_rcvrs_lock, flags);
+	down(&intf->cmd_rcvrs_lock);
 	list_for_each_safe_rcu(entry1, entry2, &intf->cmd_rcvrs) {
 		rcvr = list_entry(entry1, struct cmd_rcvr, link);
 		if (rcvr->user == user) {
@@ -821,7 +820,7 @@ int ipmi_destroy_user(ipmi_user_t user)
 			rcvrs = rcvr;
 		}
 	}
-	spin_unlock_irqrestore(&intf->cmd_rcvrs_lock, flags);
+	up(&intf->cmd_rcvrs_lock);
 	synchronize_rcu();
 	while (rcvrs) {
 		rcvr = rcvrs;
@@ -950,7 +949,7 @@ int ipmi_register_for_cmd(ipmi_user_t   user,
 	rcvr->netfn = netfn;
 	rcvr->user = user;
 
-	spin_lock_irq(&intf->cmd_rcvrs_lock);
+	down(&intf->cmd_rcvrs_lock);
 	/* Make sure the command/netfn is not already registered. */
 	entry = find_cmd_rcvr(intf, netfn, cmd);
 	if (entry) {
@@ -961,7 +960,7 @@ int ipmi_register_for_cmd(ipmi_user_t   user,
 	list_add_rcu(&rcvr->link, &intf->cmd_rcvrs);
 
  out_unlock:
-	spin_unlock_irq(&intf->cmd_rcvrs_lock);
+	up(&intf->cmd_rcvrs_lock);
 	if (rv)
 		kfree(rcvr);
 
@@ -975,17 +974,17 @@ int ipmi_unregister_for_cmd(ipmi_user_t   user,
 	ipmi_smi_t      intf = user->intf;
 	struct cmd_rcvr *rcvr;
 
-	spin_lock_irq(&intf->cmd_rcvrs_lock);
+	down(&intf->cmd_rcvrs_lock);
 	/* Make sure the command/netfn is not already registered. */
 	rcvr = find_cmd_rcvr(intf, netfn, cmd);
 	if ((rcvr) && (rcvr->user == user)) {
 		list_del_rcu(&rcvr->link);
-		spin_unlock_irq(&intf->cmd_rcvrs_lock);
+		up(&intf->cmd_rcvrs_lock);
 		synchronize_rcu();
 		kfree(rcvr);
 		return 0;
 	} else {
-		spin_unlock_irq(&intf->cmd_rcvrs_lock);
+		up(&intf->cmd_rcvrs_lock);
 		return -ENOENT;
 	}
 }
@@ -1858,7 +1857,7 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers,
 	spin_lock_init(&intf->events_lock);
 	INIT_LIST_HEAD(&intf->waiting_events);
 	intf->waiting_events_count = 0;
-	spin_lock_init(&intf->cmd_rcvrs_lock);
+	init_MUTEX(&intf->cmd_rcvrs_lock);
 	INIT_LIST_HEAD(&intf->cmd_rcvrs);
 	init_waitqueue_head(&intf->waitq);
 
@@ -2058,14 +2057,14 @@ static int handle_ipmb_get_msg_cmd(ipmi_smi_t          intf,
 	netfn = msg->rsp[4] >> 2;
 	cmd = msg->rsp[8];
 
-	spin_lock_irqsave(&intf->cmd_rcvrs_lock, flags);
+	rcu_read_lock();
 	rcvr = find_cmd_rcvr(intf, netfn, cmd);
 	if (rcvr) {
 		user = rcvr->user;
 		kref_get(&user->refcount);
 	} else
 		user = NULL;
-	spin_unlock_irqrestore(&intf->cmd_rcvrs_lock, flags);
+	rcu_read_unlock();
 
 	if (user == NULL) {
 		/* We didn't find a user, deliver an error response. */
@@ -2238,14 +2237,14 @@ static int handle_lan_get_msg_cmd(ipmi_smi_t          intf,
 	netfn = msg->rsp[6] >> 2;
 	cmd = msg->rsp[10];
 
-	spin_lock_irqsave(&intf->cmd_rcvrs_lock, flags);
+	rcu_read_lock();
 	rcvr = find_cmd_rcvr(intf, netfn, cmd);
 	if (rcvr) {
 		user = rcvr->user;
 		kref_get(&user->refcount);
 	} else
 		user = NULL;
-	spin_unlock_irqrestore(&intf->cmd_rcvrs_lock, flags);
+	rcu_read_unlock();
 
 	if (user == NULL) {
 		/* We didn't find a user, just give up. */
-- 
cgit v1.2.3


From b385676b355549afc9a7507ce09c7df47f166521 Mon Sep 17 00:00:00 2001
From: Corey Minyard <minyard@acm.org>
Date: Mon, 7 Nov 2005 01:00:05 -0800
Subject: [PATCH] ipmi: fix watchdog timeout panic handling

If a panic came from the IPMI watchdog pretimeout and that was reported via
an NMI, it would also be reported via the standard IPMI flags, which would
get picked up when reporting panic events and cause another panic.  This
adds an atomic to avoid calling panic twice.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/char/ipmi/ipmi_watchdog.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'drivers/char/ipmi')

diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c
index 405697a9f4d..1f3159eb1ed 100644
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -49,6 +49,7 @@
 #include <linux/poll.h>
 #include <linux/string.h>
 #include <linux/ctype.h>
+#include <asm/atomic.h>
 #ifdef CONFIG_X86_LOCAL_APIC
 #include <asm/apic.h>
 #endif
@@ -295,6 +296,8 @@ static int ipmi_start_timer_on_heartbeat = 0;
 static unsigned char ipmi_version_major;
 static unsigned char ipmi_version_minor;
 
+/* If a pretimeout occurs, this is used to allow only one panic to happen. */
+static atomic_t preop_panic_excl = ATOMIC_INIT(-1);
 
 static int ipmi_heartbeat(void);
 static void panic_halt_ipmi_heartbeat(void);
@@ -837,9 +840,10 @@ static void ipmi_wdog_msg_handler(struct ipmi_recv_msg *msg,
 static void ipmi_wdog_pretimeout_handler(void *handler_data)
 {
 	if (preaction_val != WDOG_PRETIMEOUT_NONE) {
-		if (preop_val == WDOG_PREOP_PANIC)
-			panic("Watchdog pre-timeout");
-		else if (preop_val == WDOG_PREOP_GIVE_DATA) {
+		if (preop_val == WDOG_PREOP_PANIC) {
+			if (atomic_inc_and_test(&preop_panic_excl))
+				panic("Watchdog pre-timeout");
+		} else if (preop_val == WDOG_PREOP_GIVE_DATA) {
 			spin_lock(&ipmi_read_lock);
 			data_to_read = 1;
 			wake_up_interruptible(&read_q);
@@ -913,7 +917,8 @@ ipmi_nmi(void *dev_id, struct pt_regs *regs, int cpu, int handled)
 		   an error and not work unless we re-enable
 		   the timer.   So do so. */
 		pretimeout_since_last_heartbeat = 1;
-		panic(PFX "pre-timeout");
+		if (atomic_inc_and_test(&preop_panic_excl))
+			panic(PFX "pre-timeout");
 	}
 
 	return NOTIFY_DONE;
-- 
cgit v1.2.3