From c4edff1c19ef23e15aae64ca03f32c6719822d54 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Mon, 7 Nov 2005 00:59:56 -0800 Subject: [PATCH] ipmi: various si cleanup A number of small changes for the various system interface drivers, consolidated from a number of patches from Matt Domsch. Clear B2H_ATN and drain the BMC message buffer on command timeout. This prevents further commands from failing after a timeout. Add bt_debug and smic_debug module parameters, expose them in sysfs. This lets you enable and disable debugging messages at runtime. Unsigned jiffies math in ipmi_si_intf.c causes a too-large value to be passed to ->event() after jiffies wrap-around. The BT driver had caught this, but didn't know how to fix it. Now all calls to ->event() use a sane value for time. Increase timeout for commands handed to the BT driver from 2 seconds to 5 seconds. This is necessary particularly when the previous command was a "Clear SEL", as that command completes, yet the BMC isn't really ready to handle another command yet. Silence BT debugging messages which were being printed on the console. Increase SMIC timeout form 1/10s to 2s. This is needed on Dell PowerEdge 2650 and PowerEdge 750 with ERA/O cards to allow commands to complete without timing out. Adds kcs_debug module param, to match behavior of BT and SMIC. This also prevents messages from being sent to the console unless explicitly requested. Signed-off-by: Matt Domsch Signed-off-by: Corey Minyard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/ipmi/ipmi_bt_sm.c | 34 +++++++++++++--------------------- 1 file changed, 13 insertions(+), 21 deletions(-) (limited to 'drivers/char/ipmi/ipmi_bt_sm.c') diff --git a/drivers/char/ipmi/ipmi_bt_sm.c b/drivers/char/ipmi/ipmi_bt_sm.c index 33862670e28..7c4a195dfc9 100644 --- a/drivers/char/ipmi/ipmi_bt_sm.c +++ b/drivers/char/ipmi/ipmi_bt_sm.c @@ -28,6 +28,8 @@ #include /* For printk. */ #include +#include +#include #include /* for completion codes */ #include "ipmi_si_sm.h" @@ -36,6 +38,8 @@ static int bt_debug = 0x00; /* Production value 0, see following flags */ #define BT_DEBUG_ENABLE 1 #define BT_DEBUG_MSG 2 #define BT_DEBUG_STATES 4 +module_param(bt_debug, int, 0644); +MODULE_PARM_DESC(bt_debug, "debug bitmask, 1=enable, 2=messages, 4=states"); /* Typical "Get BT Capabilities" values are 2-3 retries, 5-10 seconds, and 64 byte buffers. However, one HP implementation wants 255 bytes of @@ -43,7 +47,7 @@ static int bt_debug = 0x00; /* Production value 0, see following flags */ Since the Open IPMI architecture is single-message oriented at this stage, the queue depth of BT is of no concern. */ -#define BT_NORMAL_TIMEOUT 2000000 /* seconds in microseconds */ +#define BT_NORMAL_TIMEOUT 5000000 /* seconds in microseconds */ #define BT_RETRY_LIMIT 2 #define BT_RESET_DELAY 6000000 /* 6 seconds after warm reset */ @@ -202,7 +206,7 @@ static int bt_get_result(struct si_sm_data *bt, msg_len = bt->read_count - 2; /* account for length & seq */ /* Always NetFn, Cmd, cCode */ if (msg_len < 3 || msg_len > IPMI_MAX_MSG_LENGTH) { - printk(KERN_WARNING "BT results: bad msg_len = %d\n", msg_len); + printk(KERN_DEBUG "BT results: bad msg_len = %d\n", msg_len); data[0] = bt->write_data[1] | 0x4; /* Kludge a response */ data[1] = bt->write_data[3]; data[2] = IPMI_ERR_UNSPECIFIED; @@ -240,7 +244,7 @@ static void reset_flags(struct si_sm_data *bt) BT_CONTROL(BT_B_BUSY); BT_CONTROL(BT_CLR_WR_PTR); BT_CONTROL(BT_SMS_ATN); -#ifdef DEVELOPMENT_ONLY_NOT_FOR_PRODUCTION + if (BT_STATUS & BT_B2H_ATN) { int i; BT_CONTROL(BT_H_BUSY); @@ -250,7 +254,6 @@ static void reset_flags(struct si_sm_data *bt) BMC2HOST; BT_CONTROL(BT_H_BUSY); } -#endif } static inline void write_all_bytes(struct si_sm_data *bt) @@ -295,7 +298,7 @@ static inline int read_all_bytes(struct si_sm_data *bt) printk ("\n"); } if (bt->seq != bt->write_data[2]) /* idiot check */ - printk(KERN_WARNING "BT: internal error: sequence mismatch\n"); + printk(KERN_DEBUG "BT: internal error: sequence mismatch\n"); /* per the spec, the (NetFn, Seq, Cmd) tuples should match */ if ((bt->read_data[3] == bt->write_data[3]) && /* Cmd */ @@ -321,18 +324,18 @@ static void error_recovery(struct si_sm_data *bt, char *reason) bt->timeout = BT_NORMAL_TIMEOUT; /* various places want to retry */ status = BT_STATUS; - printk(KERN_WARNING "BT: %s in %s %s ", reason, STATE2TXT, + printk(KERN_DEBUG "BT: %s in %s %s\n", reason, STATE2TXT, STATUS2TXT(buf)); (bt->error_retries)++; if (bt->error_retries > BT_RETRY_LIMIT) { - printk("retry limit (%d) exceeded\n", BT_RETRY_LIMIT); + printk(KERN_DEBUG "retry limit (%d) exceeded\n", BT_RETRY_LIMIT); bt->state = BT_STATE_HOSED; if (!bt->nonzero_status) printk(KERN_ERR "IPMI: BT stuck, try power cycle\n"); else if (bt->seq == FIRST_SEQ + BT_RETRY_LIMIT) { /* most likely during insmod */ - printk(KERN_WARNING "IPMI: BT reset (takes 5 secs)\n"); + printk(KERN_DEBUG "IPMI: BT reset (takes 5 secs)\n"); bt->state = BT_STATE_RESET1; } return; @@ -340,11 +343,11 @@ static void error_recovery(struct si_sm_data *bt, char *reason) /* Sometimes the BMC queues get in an "off-by-one" state...*/ if ((bt->state == BT_STATE_B2H_WAIT) && (status & BT_B2H_ATN)) { - printk("retry B2H_WAIT\n"); + printk(KERN_DEBUG "retry B2H_WAIT\n"); return; } - printk("restart command\n"); + printk(KERN_DEBUG "restart command\n"); bt->state = BT_STATE_RESTART; } @@ -372,17 +375,6 @@ static enum si_sm_result bt_event(struct si_sm_data *bt, long time) return SI_SM_HOSED; if (bt->state != BT_STATE_IDLE) { /* do timeout test */ - - /* Certain states, on error conditions, can lock up a CPU - because they are effectively in an infinite loop with - CALL_WITHOUT_DELAY (right back here with time == 0). - Prevent infinite lockup by ALWAYS decrementing timeout. */ - - /* FIXME: bt_event is sometimes called with time > BT_NORMAL_TIMEOUT - (noticed in ipmi_smic_sm.c January 2004) */ - - if ((time <= 0) || (time >= BT_NORMAL_TIMEOUT)) - time = 100; bt->timeout -= time; if ((bt->timeout < 0) && (bt->state < BT_STATE_RESET1)) { error_recovery(bt, "timed out"); -- cgit v1.2.3 From 21dcd300b15f87ce10df8773d029708f27499aa7 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Mon, 7 Nov 2005 01:00:01 -0800 Subject: [PATCH] ipmi: bt restart reset fixes The current BT retry/reset mechanism fails to succeed on a PowerEdge 1650, when the controller is wedged with B2H_ATN asserted at XACTION_START. If this occurs, no further commands will ever succeed unless the state of the controller is first cleared out. Furthermore, the soft reset would only occur if the first command after insmod was the one that timed out, not if a later command timed out. This patch changes the retry/reset mechanism to be as follows: Before retrying a command, clear the state of the BT controller such that the flags represent ready for a new transaction. This increases the chance of success of the restarted transaction. After 2 retries, issue a soft reset and retry one more time before giving up and reporting back a failure. Signed-off-by: Matt Domsch Acked-by: Rocky Craig Signed-off-by: Corey Minyard Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/ipmi/ipmi_bt_sm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/char/ipmi/ipmi_bt_sm.c') diff --git a/drivers/char/ipmi/ipmi_bt_sm.c b/drivers/char/ipmi/ipmi_bt_sm.c index 7c4a195dfc9..58dcdee1cd7 100644 --- a/drivers/char/ipmi/ipmi_bt_sm.c +++ b/drivers/char/ipmi/ipmi_bt_sm.c @@ -333,8 +333,7 @@ static void error_recovery(struct si_sm_data *bt, char *reason) bt->state = BT_STATE_HOSED; if (!bt->nonzero_status) printk(KERN_ERR "IPMI: BT stuck, try power cycle\n"); - else if (bt->seq == FIRST_SEQ + BT_RETRY_LIMIT) { - /* most likely during insmod */ + else if (bt->error_retries <= BT_RETRY_LIMIT + 1) { printk(KERN_DEBUG "IPMI: BT reset (takes 5 secs)\n"); bt->state = BT_STATE_RESET1; } @@ -475,6 +474,7 @@ static enum si_sm_result bt_event(struct si_sm_data *bt, long time) break; case BT_STATE_RESTART: /* don't reset retries! */ + reset_flags(bt); bt->write_data[2] = ++bt->seq; bt->read_count = 0; bt->nonzero_status = 0; -- cgit v1.2.3