aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/memory-barriers.txt348
-rw-r--r--arch/powerpc/kernel/prom_init.c10
-rw-r--r--arch/powerpc/kernel/signal_32.c11
-rw-r--r--arch/powerpc/kernel/signal_64.c2
-rw-r--r--arch/powerpc/platforms/cell/setup.c11
-rw-r--r--arch/powerpc/platforms/pseries/setup.c8
-rw-r--r--drivers/acpi/processor_perflib.c5
-rw-r--r--drivers/char/Makefile2
-rw-r--r--drivers/message/i2o/exec-osm.c72
-rw-r--r--drivers/message/i2o/iop.c4
-rw-r--r--include/linux/i2o.h5
11 files changed, 352 insertions, 126 deletions
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index c61d8b876fd..4710845dbac 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -19,6 +19,7 @@ Contents:
- Control dependencies.
- SMP barrier pairing.
- Examples of memory barrier sequences.
+ - Read memory barriers vs load speculation.
(*) Explicit kernel barriers.
@@ -248,7 +249,7 @@ And there are a number of things that _must_ or _must_not_ be assumed:
we may get either of:
STORE *A = X; Y = LOAD *A;
- STORE *A = Y;
+ STORE *A = Y = X;
=========================
@@ -344,9 +345,12 @@ Memory barriers come in four basic varieties:
(4) General memory barriers.
- A general memory barrier is a combination of both a read memory barrier
- and a write memory barrier. It is a partial ordering over both loads and
- stores.
+ A general memory barrier gives a guarantee that all the LOAD and STORE
+ operations specified before the barrier will appear to happen before all
+ the LOAD and STORE operations specified after the barrier with respect to
+ the other components of the system.
+
+ A general memory barrier is a partial ordering over both loads and stores.
General memory barriers imply both read and write memory barriers, and so
can substitute for either.
@@ -546,9 +550,9 @@ write barrier, though, again, a general barrier is viable:
=============== ===============
a = 1;
<write barrier>
- b = 2; x = a;
+ b = 2; x = b;
<read barrier>
- y = b;
+ y = a;
Or:
@@ -563,6 +567,18 @@ Or:
Basically, the read barrier always has to be there, even though it can be of
the "weaker" type.
+[!] Note that the stores before the write barrier would normally be expected to
+match the loads after the read barrier or data dependency barrier, and vice
+versa:
+
+ CPU 1 CPU 2
+ =============== ===============
+ a = 1; }---- --->{ v = c
+ b = 2; } \ / { w = d
+ <write barrier> \ <read barrier>
+ c = 3; } / \ { x = a;
+ d = 4; }---- --->{ y = b;
+
EXAMPLES OF MEMORY BARRIER SEQUENCES
------------------------------------
@@ -600,8 +616,8 @@ STORE B, STORE C } all occuring before the unordered set of { STORE D, STORE E
| | +------+
+-------+ : :
|
- | Sequence in which stores committed to memory system
- | by CPU 1
+ | Sequence in which stores are committed to the
+ | memory system by CPU 1
V
@@ -683,14 +699,12 @@ then the following will occur:
| : : | |
| : : | CPU 2 |
| +-------+ | |
- \ | X->9 |------>| |
- \ +-------+ | |
- ----->| B->2 | | |
- +-------+ | |
- Makes sure all effects ---> ddddddddddddddddd | |
- prior to the store of C +-------+ | |
- are perceptible to | B->2 |------>| |
- successive loads +-------+ | |
+ | | X->9 |------>| |
+ | +-------+ | |
+ Makes sure all effects ---> \ ddddddddddddddddd | |
+ prior to the store of C \ +-------+ | |
+ are perceptible to ----->| B->2 |------>| |
+ subsequent loads +-------+ | |
: : +-------+
@@ -699,73 +713,239 @@ following sequence of events:
CPU 1 CPU 2
======================= =======================
+ { A = 0, B = 9 }
STORE A=1
- STORE B=2
- STORE C=3
<write barrier>
- STORE D=4
- STORE E=5
- LOAD A
+ STORE B=2
LOAD B
- LOAD C
- LOAD D
- LOAD E
+ LOAD A
Without intervention, CPU 2 may then choose to perceive the events on CPU 1 in
some effectively random order, despite the write barrier issued by CPU 1:
- +-------+ : :
- | | +------+
- | |------>| C=3 | }
- | | : +------+ }
- | | : | A=1 | }
- | | : +------+ }
- | CPU 1 | : | B=2 | }---
- | | +------+ } \
- | | wwwwwwwwwwwww} \
- | | +------+ } \ : : +-------+
- | | : | E=5 | } \ +-------+ | |
- | | : +------+ } \ { | C->3 |------>| |
- | |------>| D=4 | } \ { +-------+ : | |
- | | +------+ \ { | E->5 | : | |
- +-------+ : : \ { +-------+ : | |
- Transfer -->{ | A->1 | : | CPU 2 |
- from CPU 1 { +-------+ : | |
- to CPU 2 { | D->4 | : | |
- { +-------+ : | |
- { | B->2 |------>| |
- +-------+ | |
- : : +-------+
-
-
-If, however, a read barrier were to be placed between the load of C and the
-load of D on CPU 2, then the partial ordering imposed by CPU 1 will be
-perceived correctly by CPU 2.
+ +-------+ : : : :
+ | | +------+ +-------+
+ | |------>| A=1 |------ --->| A->0 |
+ | | +------+ \ +-------+
+ | CPU 1 | wwwwwwwwwwwwwwww \ --->| B->9 |
+ | | +------+ | +-------+
+ | |------>| B=2 |--- | : :
+ | | +------+ \ | : : +-------+
+ +-------+ : : \ | +-------+ | |
+ ---------->| B->2 |------>| |
+ | +-------+ | CPU 2 |
+ | | A->0 |------>| |
+ | +-------+ | |
+ | : : +-------+
+ \ : :
+ \ +-------+
+ ---->| A->1 |
+ +-------+
+ : :
- +-------+ : :
- | | +------+
- | |------>| C=3 | }
- | | : +------+ }
- | | : | A=1 | }---
- | | : +------+ } \
- | CPU 1 | : | B=2 | } \
- | | +------+ \
- | | wwwwwwwwwwwwwwww \
- | | +------+ \ : : +-------+
- | | : | E=5 | } \ +-------+ | |
- | | : +------+ }--- \ { | C->3 |------>| |
- | |------>| D=4 | } \ \ { +-------+ : | |
- | | +------+ \ -->{ | B->2 | : | |
- +-------+ : : \ { +-------+ : | |
- \ { | A->1 | : | CPU 2 |
- \ +-------+ | |
- At this point the read ----> \ rrrrrrrrrrrrrrrrr | |
- barrier causes all effects \ +-------+ | |
- prior to the storage of C \ { | E->5 | : | |
- to be perceptible to CPU 2 -->{ +-------+ : | |
- { | D->4 |------>| |
- +-------+ | |
- : : +-------+
+
+If, however, a read barrier were to be placed between the load of E and the
+load of A on CPU 2:
+
+ CPU 1 CPU 2
+ ======================= =======================
+ { A = 0, B = 9 }
+ STORE A=1
+ <write barrier>
+ STORE B=2
+ LOAD B
+ <read barrier>
+ LOAD A
+
+then the partial ordering imposed by CPU 1 will be perceived correctly by CPU
+2:
+
+ +-------+ : : : :
+ | | +------+ +-------+
+ | |------>| A=1 |------ --->| A->0 |
+ | | +------+ \ +-------+
+ | CPU 1 | wwwwwwwwwwwwwwww \ --->| B->9 |
+ | | +------+ | +-------+
+ | |------>| B=2 |--- | : :
+ | | +------+ \ | : : +-------+
+ +-------+ : : \ | +-------+ | |
+ ---------->| B->2 |------>| |
+ | +-------+ | CPU 2 |
+ | : : | |
+ | : : | |
+ At this point the read ----> \ rrrrrrrrrrrrrrrrr | |
+ barrier causes all effects \ +-------+ | |
+ prior to the storage of B ---->| A->1 |------>| |
+ to be perceptible to CPU 2 +-------+ | |
+ : : +-------+
+
+
+To illustrate this more completely, consider what could happen if the code
+contained a load of A either side of the read barrier:
+
+ CPU 1 CPU 2
+ ======================= =======================
+ { A = 0, B = 9 }
+ STORE A=1
+ <write barrier>
+ STORE B=2
+ LOAD B
+ LOAD A [first load of A]
+ <read barrier>
+ LOAD A [second load of A]
+
+Even though the two loads of A both occur after the load of B, they may both
+come up with different values:
+
+ +-------+ : : : :
+ | | +------+ +-------+
+ | |------>| A=1 |------ --->| A->0 |
+ | | +------+ \ +-------+
+ | CPU 1 | wwwwwwwwwwwwwwww \ --->| B->9 |
+ | | +------+ | +-------+
+ | |------>| B=2 |--- | : :
+ | | +------+ \ | : : +-------+
+ +-------+ : : \ | +-------+ | |
+ ---------->| B->2 |------>| |
+ | +-------+ | CPU 2 |
+ | : : | |
+ | : : | |
+ | +-------+ | |
+ | | A->0 |------>| 1st |
+ | +-------+ | |
+ At this point the read ----> \ rrrrrrrrrrrrrrrrr | |
+ barrier causes all effects \ +-------+ | |
+ prior to the storage of B ---->| A->1 |------>| 2nd |
+ to be perceptible to CPU 2 +-------+ | |
+ : : +-------+
+
+
+But it may be that the update to A from CPU 1 becomes perceptible to CPU 2
+before the read barrier completes anyway:
+
+ +-------+ : : : :
+ | | +------+ +-------+
+ | |------>| A=1 |------ --->| A->0 |
+ | | +------+ \ +-------+
+ | CPU 1 | wwwwwwwwwwwwwwww \ --->| B->9 |
+ | | +------+ | +-------+
+ | |------>| B=2 |--- | : :
+ | | +------+ \ | : : +-------+
+ +-------+ : : \ | +-------+ | |
+ ---------->| B->2 |------>| |
+ | +-------+ | CPU 2 |
+ | : : | |
+ \ : : | |
+ \ +-------+ | |
+ ---->| A->1 |------>| 1st |
+ +-------+ | |
+ rrrrrrrrrrrrrrrrr | |
+ +-------+ | |
+ | A->1 |------>| 2nd |
+ +-------+ | |
+ : : +-------+
+
+
+The guarantee is that the second load will always come up with A == 1 if the
+load of B came up with B == 2. No such guarantee exists for the first load of
+A; that may come up with either A == 0 or A == 1.
+
+
+READ MEMORY BARRIERS VS LOAD SPECULATION
+----------------------------------------
+
+Many CPUs speculate with loads: that is they see that they will need to load an
+item from memory, and they find a time where they're not using the bus for any
+other loads, and so do the load in advance - even though they haven't actually
+got to that point in the instruction execution flow yet. This permits the
+actual load instruction to potentially complete immediately because the CPU
+already has the value to hand.
+
+It may turn out that the CPU didn't actually need the value - perhaps because a
+branch circumvented the load - in which case it can discard the value or just
+cache it for later use.
+
+Consider:
+
+ CPU 1 CPU 2
+ ======================= =======================
+ LOAD B
+ DIVIDE } Divide instructions generally
+ DIVIDE } take a long time to perform
+ LOAD A
+
+Which might appear as this:
+
+ : : +-------+
+ +-------+ | |
+ --->| B->2 |------>| |
+ +-------+ | CPU 2 |
+ : :DIVIDE | |
+ +-------+ | |
+ The CPU being busy doing a ---> --->| A->0 |~~~~ | |
+ division speculates on the +-------+ ~ | |
+ LOAD of A : : ~ | |
+ : :DIVIDE | |
+ : : ~ | |
+ Once the divisions are complete --> : : ~-->| |
+ the CPU can then perform the : : | |
+ LOAD with immediate effect : : +-------+
+
+
+Placing a read barrier or a data dependency barrier just before the second
+load:
+
+ CPU 1 CPU 2
+ ======================= =======================
+ LOAD B
+ DIVIDE
+ DIVIDE
+ <read barrier>
+ LOAD A
+
+will force any value speculatively obtained to be reconsidered to an extent
+dependent on the type of barrier used. If there was no change made to the
+speculated memory location, then the speculated value will just be used:
+
+ : : +-------+
+ +-------+ | |
+ --->| B->2 |------>| |
+ +-------+ | CPU 2 |
+ : :DIVIDE | |
+ +-------+ | |
+ The CPU being busy doing a ---> --->| A->0 |~~~~ | |
+ division speculates on the +-------+ ~ | |
+ LOAD of A : : ~ | |
+ : :DIVIDE | |
+ : : ~ | |
+ : : ~ | |
+ rrrrrrrrrrrrrrrr~ | |
+ : : ~ | |
+ : : ~-->| |
+ : : | |
+ : : +-------+
+
+
+but if there was an update or an invalidation from another CPU pending, then
+the speculation will be cancelled and the value reloaded:
+
+ : : +-------+
+ +-------+ | |
+ --->| B->2 |------>| |
+ +-------+ | CPU 2 |
+ : :DIVIDE | |
+ +-------+ | |
+ The CPU being busy doing a ---> --->| A->0 |~~~~ | |
+ division speculates on the +-------+ ~ | |
+ LOAD of A : : ~ | |
+ : :DIVIDE | |
+ : : ~ | |
+ : : ~ | |
+ rrrrrrrrrrrrrrrrr | |
+ +-------+ | |
+ The speculation is discarded ---> --->| A->1 |------>| |
+ and an updated value is +-------+ | |
+ retrieved : : +-------+
========================
@@ -901,7 +1081,7 @@ IMPLICIT KERNEL MEMORY BARRIERS
===============================
Some of the other functions in the linux kernel imply memory barriers, amongst
-which are locking, scheduling and memory allocation functions.
+which are locking and scheduling functions.
This specification is a _minimum_ guarantee; any particular architecture may
provide more substantial guarantees, but these may not be relied upon outside
@@ -966,6 +1146,20 @@ equivalent to a full barrier, but a LOCK followed by an UNLOCK is not.
barriers is that the effects instructions outside of a critical section may
seep into the inside of the critical section.
+A LOCK followed by an UNLOCK may not be assumed to be full memory barrier
+because it is possible for an access preceding the LOCK to happen after the
+LOCK, and an access following the UNLOCK to happen before the UNLOCK, and the
+two accesses can themselves then cross:
+
+ *A = a;
+ LOCK
+ UNLOCK
+ *B = b;
+
+may occur as:
+
+ LOCK, STORE *B, STORE *A, UNLOCK
+
Locks and semaphores may not provide any guarantee of ordering on UP compiled
systems, and so cannot be counted on in such a situation to actually achieve
anything at all - especially with respect to I/O accesses - unless combined
@@ -1016,8 +1210,6 @@ Other functions that imply barriers:
(*) schedule() and similar imply full memory barriers.
- (*) Memory allocation and release functions imply full memory barriers.
-
=================================
INTER-CPU LOCKING BARRIER EFFECTS
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 41e9ab40cd5..f70bd090dac 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -822,6 +822,7 @@ static void __init prom_send_capabilities(void)
/* try calling the ibm,client-architecture-support method */
if (call_prom_ret("call-method", 3, 2, &ret,
ADDR("ibm,client-architecture-support"),
+ root,
ADDR(ibm_architecture_vec)) == 0) {
/* the call exists... */
if (ret)
@@ -1622,6 +1623,15 @@ static int __init prom_find_machine_type(void)
if (strstr(p, RELOC("Power Macintosh")) ||
strstr(p, RELOC("MacRISC")))
return PLATFORM_POWERMAC;
+#ifdef CONFIG_PPC64
+ /* We must make sure we don't detect the IBM Cell
+ * blades as pSeries due to some firmware issues,
+ * so we do it here.
+ */
+ if (strstr(p, RELOC("IBM,CBEA")) ||
+ strstr(p, RELOC("IBM,CPBW-1.0")))
+ return PLATFORM_GENERIC;
+#endif /* CONFIG_PPC64 */
i += sl + 1;
}
}
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 01e3c08cb55..8fdeca2d459 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -803,10 +803,13 @@ static int do_setcontext(struct ucontext __user *ucp, struct pt_regs *regs, int
if (__get_user(cmcp, &ucp->uc_regs))
return -EFAULT;
mcp = (struct mcontext __user *)(u64)cmcp;
+ /* no need to check access_ok(mcp), since mcp < 4GB */
}
#else
if (__get_user(mcp, &ucp->uc_regs))
return -EFAULT;
+ if (!access_ok(VERIFY_READ, mcp, sizeof(*mcp)))
+ return -EFAULT;
#endif
restore_sigmask(&set);
if (restore_user_regs(regs, mcp, sig))
@@ -908,13 +911,14 @@ int sys_debug_setcontext(struct ucontext __user *ctx,
{
struct sig_dbg_op op;
int i;
+ unsigned char tmp;
unsigned long new_msr = regs->msr;
#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
unsigned long new_dbcr0 = current->thread.dbcr0;
#endif
for (i=0; i<ndbg; i++) {
- if (__copy_from_user(&op, dbg, sizeof(op)))
+ if (copy_from_user(&op, dbg + i, sizeof(op)))
return -EFAULT;
switch (op.dbg_type) {
case SIG_DBG_SINGLE_STEPPING:
@@ -959,6 +963,11 @@ int sys_debug_setcontext(struct ucontext __user *ctx,
current->thread.dbcr0 = new_dbcr0;
#endif
+ if (!access_ok(VERIFY_READ, ctx, sizeof(*ctx))
+ || __get_user(tmp, (u8 __user *) ctx)
+ || __get_user(tmp, (u8 __user *) (ctx + 1) - 1))
+ return -EFAULT;
+
/*
* If we get a fault copying the context into the kernel's
* image of the user's registers, we can't just return -EFAULT
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 27f65b95184..c2db642f4cd 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -182,6 +182,8 @@ static long restore_sigcontext(struct pt_regs *regs, sigset_t *set, int sig,
err |= __get_user(msr, &sc->gp_regs[PT_MSR]);
if (err)
return err;
+ if (v_regs && !access_ok(VERIFY_READ, v_regs, 34 * sizeof(vector128)))
+ return -EFAULT;
/* Copy 33 vec registers (vr0..31 and vscr) from the stack */
if (v_regs != 0 && (msr & MSR_VEC) != 0)
err |= __copy_from_user(current->thread.vr, v_regs,
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index 6574b22b3cf..fd3e5609e3e 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -125,14 +125,13 @@ static void __init cell_init_early(void)
static int __init cell_probe(void)
{
- /* XXX This is temporary, the Cell maintainer will come up with
- * more appropriate detection logic
- */
unsigned long root = of_get_flat_dt_root();
- if (!of_flat_dt_is_compatible(root, "IBM,CPBW-1.0"))
- return 0;
- return 1;
+ if (of_flat_dt_is_compatible(root, "IBM,CBEA") ||
+ of_flat_dt_is_compatible(root, "IBM,CPBW-1.0"))
+ return 1;
+
+ return 0;
}
/*
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 5f79f01c44f..3ba87835757 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -389,6 +389,7 @@ static int __init pSeries_probe_hypertas(unsigned long node,
static int __init pSeries_probe(void)
{
+ unsigned long root = of_get_flat_dt_root();
char *dtype = of_get_flat_dt_prop(of_get_flat_dt_root(),
"device_type", NULL);
if (dtype == NULL)
@@ -396,6 +397,13 @@ static int __init pSeries_probe(void)
if (strcmp(dtype, "chrp"))
return 0;
+ /* Cell blades firmware claims to be chrp while it's not. Until this
+ * is fixed, we need to avoid those here.
+ */
+ if (of_flat_dt_is_compatible(root, "IBM,CPBW-1.0") ||
+ of_flat_dt_is_compatible(root, "IBM,CBEA"))
+ return 0;
+
DBG("pSeries detected, looking for LPAR capability...\n");
/* Now try to figure out if we are running on LPAR */
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index abbdb37a7f5..f36db22ce1a 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -577,6 +577,8 @@ acpi_processor_register_performance(struct acpi_processor_performance
return_VALUE(-EBUSY);
}
+ WARN_ON(!performance);
+
pr->performance = performance;
if (acpi_processor_get_performance_info(pr)) {
@@ -609,7 +611,8 @@ acpi_processor_unregister_performance(struct acpi_processor_performance
return_VOID;
}
- kfree(pr->performance->states);
+ if (pr->performance)
+ kfree(pr->performance->states);
pr->performance = NULL;
acpi_cpufreq_remove_file(pr);
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index f5b01c6d498..fb919bfb282 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -41,9 +41,9 @@ obj-$(CONFIG_N_HDLC) += n_hdlc.o
obj-$(CONFIG_AMIGA_BUILTIN_SERIAL) += amiserial.o
obj-$(CONFIG_SX) += sx.o generic_serial.o
obj-$(CONFIG_RIO) += rio/ generic_serial.o
-obj-$(CONFIG_HVC_DRIVER) += hvc_console.o
obj-$(CONFIG_HVC_CONSOLE) += hvc_vio.o hvsi.o
obj-$(CONFIG_HVC_RTAS) += hvc_rtas.o
+obj-$(CONFIG_HVC_DRIVER) += hvc_console.o
obj-$(CONFIG_RAW_DRIVER) += raw.o
obj-$(CONFIG_SGI_SNSC) += snsc.o snsc_event.o
obj-$(CONFIG_MMTIMER) += mmtimer.o
diff --git a/drivers/message/i2o/exec-osm.c b/drivers/message/i2o/exec-osm.c
index 5ea133c59af..7bd4d85d0b4 100644
--- a/drivers/message/i2o/exec-osm.c
+++ b/drivers/message/i2o/exec-osm.c
@@ -55,6 +55,7 @@ struct i2o_exec_wait {
u32 m; /* message id */
struct i2o_message *msg; /* pointer to the reply message */
struct list_head list; /* node in global wait list */
+ spinlock_t lock; /* lock before modifying */
};
/* Work struct needed to handle LCT NOTIFY replies */
@@ -87,6 +88,7 @@ static struct i2o_exec_wait *i2o_exec_wait_alloc(void)
return NULL;
INIT_LIST_HEAD(&wait->list);
+ spin_lock_init(&wait->lock);
return wait;
};
@@ -125,6 +127,7 @@ int i2o_msg_post_wait_mem(struct i2o_controller *c, struct i2o_message *msg,
DECLARE_WAIT_QUEUE_HEAD(wq);
struct i2o_exec_wait *wait;
static u32 tcntxt = 0x80000000;
+ long flags;
int rc = 0;
wait = i2o_exec_wait_alloc();
@@ -146,33 +149,28 @@ int i2o_msg_post_wait_mem(struct i2o_controller *c, struct i2o_message *msg,
wait->tcntxt = tcntxt++;
msg->u.s.tcntxt = cpu_to_le32(wait->tcntxt);
+ wait->wq = &wq;
+ /*
+ * we add elements to the head, because if a entry in the list will
+ * never be removed, we have to iterate over it every time
+ */
+ list_add(&wait->list, &i2o_exec_wait_list);
+
/*
* Post the message to the controller. At some point later it will
* return. If we time out before it returns then complete will be zero.
*/
i2o_msg_post(c, msg);
- if (!wait->complete) {
- wait->wq = &wq;
- /*
- * we add elements add the head, because if a entry in the list
- * will never be removed, we have to iterate over it every time
- */
- list_add(&wait->list, &i2o_exec_wait_list);
-
- wait_event_interruptible_timeout(wq, wait->complete,
- timeout * HZ);
+ wait_event_interruptible_timeout(wq, wait->complete, timeout * HZ);
- wait->wq = NULL;
- }
+ spin_lock_irqsave(&wait->lock, flags);
- barrier();
+ wait->wq = NULL;
- if (wait->complete) {
+ if (wait->complete)
rc = le32_to_cpu(wait->msg->body[0]) >> 24;
- i2o_flush_reply(c, wait->m);
- i2o_exec_wait_free(wait);
- } else {
+ else {
/*
* We cannot remove it now. This is important. When it does
* terminate (which it must do if the controller has not
@@ -186,6 +184,13 @@ int i2o_msg_post_wait_mem(struct i2o_controller *c, struct i2o_message *msg,
rc = -ETIMEDOUT;
}
+ spin_unlock_irqrestore(&wait->lock, flags);
+
+ if (rc != -ETIMEDOUT) {
+ i2o_flush_reply(c, wait->m);
+ i2o_exec_wait_free(wait);
+ }
+
return rc;
};
@@ -213,7 +218,6 @@ static int i2o_msg_post_wait_complete(struct i2o_controller *c, u32 m,
{
struct i2o_exec_wait *wait, *tmp;
unsigned long flags;
- static spinlock_t lock = SPIN_LOCK_UNLOCKED;
int rc = 1;
/*
@@ -223,23 +227,24 @@ static int i2o_msg_post_wait_complete(struct i2o_controller *c, u32 m,
* already expired. Not much we can do about that except log it for
* debug purposes, increase timeout, and recompile.
*/
- spin_lock_irqsave(&lock, flags);
list_for_each_entry_safe(wait, tmp, &i2o_exec_wait_list, list) {
if (wait->tcntxt == context) {
- list_del(&wait->list);
+ spin_lock_irqsave(&wait->lock, flags);
- spin_unlock_irqrestore(&lock, flags);
+ list_del(&wait->list);
wait->m = m;
wait->msg = msg;
wait->complete = 1;
- barrier();
-
- if (wait->wq) {
- wake_up_interruptible(wait->wq);
+ if (wait->wq)
rc = 0;
- } else {
+ else
+ rc = -1;
+
+ spin_unlock_irqrestore(&wait->lock, flags);
+
+ if (rc) {
struct device *dev;
dev = &c->pdev->dev;
@@ -248,15 +253,13 @@ static int i2o_msg_post_wait_complete(struct i2o_controller *c, u32 m,
c->name);
i2o_dma_free(dev, &wait->dma);
i2o_exec_wait_free(wait);
- rc = -1;
- }
+ } else
+ wake_up_interruptible(wait->wq);
return rc;
}
}
- spin_unlock_irqrestore(&lock, flags);
-
osm_warn("%s: Bogus reply in POST WAIT (tr-context: %08x)!\n", c->name,
context);
@@ -322,14 +325,9 @@ static DEVICE_ATTR(product_id, S_IRUGO, i2o_exec_show_product_id, NULL);
static int i2o_exec_probe(struct device *dev)
{
struct i2o_device *i2o_dev = to_i2o_device(dev);
- struct i2o_controller *c = i2o_dev->iop;
i2o_event_register(i2o_dev, &i2o_exec_driver, 0, 0xffffffff);
- c->exec = i2o_dev;
-
- i2o_exec_lct_notify(c, c->lct->change_ind + 1);
-
device_create_file(dev, &dev_attr_vendor_id);
device_create_file(dev, &dev_attr_product_id);
@@ -523,6 +521,8 @@ static int i2o_exec_lct_notify(struct i2o_controller *c, u32 change_ind)
struct device *dev;
struct i2o_message *msg;
+ down(&c->lct_lock);
+
dev = &c->pdev->dev;
if (i2o_dma_realloc
@@ -545,6 +545,8 @@ static int i2o_exec_lct_notify(struct i2o_controller *c, u32 change_ind)
i2o_msg_post(c, msg);
+ up(&c->lct_lock);
+
return 0;
};
diff --git a/drivers/message/i2o/iop.c b/drivers/message/i2o/iop.c
index 49216744693..febbdd4e060 100644
--- a/drivers/message/i2o/iop.c
+++ b/drivers/message/i2o/iop.c
@@ -804,8 +804,6 @@ void i2o_iop_remove(struct i2o_controller *c)
/* Ask the IOP to switch to RESET state */
i2o_iop_reset(c);
-
- put_device(&c->device);
}
/**
@@ -1059,7 +1057,7 @@ struct i2o_controller *i2o_iop_alloc(void)
snprintf(poolname, sizeof(poolname), "i2o_%s_msg_inpool", c->name);
if (i2o_pool_alloc
- (&c->in_msg, poolname, I2O_INBOUND_MSG_FRAME_SIZE * 4,
+ (&c->in_msg, poolname, I2O_INBOUND_MSG_FRAME_SIZE * 4 + sizeof(u32),
I2O_MSG_INPOOL_MIN)) {
kfree(c);
return ERR_PTR(-ENOMEM);
diff --git a/include/linux/i2o.h b/include/linux/i2o.h
index dd7d627bf66..c115e9e840b 100644
--- a/include/linux/i2o.h
+++ b/include/linux/i2o.h
@@ -1114,8 +1114,11 @@ static inline struct i2o_message *i2o_msg_get(struct i2o_controller *c)
mmsg->mfa = readl(c->in_port);
if (unlikely(mmsg->mfa >= c->in_queue.len)) {
+ u32 mfa = mmsg->mfa;
+
mempool_free(mmsg, c->in_msg.mempool);
- if(mmsg->mfa == I2O_QUEUE_EMPTY)
+
+ if (mfa == I2O_QUEUE_EMPTY)
return ERR_PTR(-EBUSY);
return ERR_PTR(-EFAULT);
}