From 592a607bbc053bc6f614a0e619326009f4b3829e Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Thu, 7 Feb 2008 14:29:43 +1100
Subject: [POWERPC] Disable G5 NAP mode during SMU commands on U3

It appears that with the U3 northbridge, if the processor is in NAP
mode the whole time while waiting for an SMU command to complete,
then the SMU will fail.  It could be related to the weird backward
mechanism the SMU uses to get to system memory via i2c to the
northbridge that doesn't operate properly when the said bridge is
in napping along with the CPU.  That is on U3 at least, U4 doesn't
seem to be affected.

This didn't show before NO_HZ as the timer wakeup was enough to make
it work it seems, but that is no longer the case.

This fixes it by disabling NAP mode on those machines while
an SMU command is in flight.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/powermac/feature.c | 11 ++++++++++-
 drivers/macintosh/smu.c                   | 25 ++++++++++++++++++++++++-
 include/asm-powerpc/pmac_feature.h        |  8 ++++++++
 3 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c
index ba931be2175..5169ecc3712 100644
--- a/arch/powerpc/platforms/powermac/feature.c
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -2565,6 +2565,8 @@ static void __init probe_uninorth(void)
 
 	/* Locate core99 Uni-N */
 	uninorth_node = of_find_node_by_name(NULL, "uni-n");
+	uninorth_maj = 1;
+
 	/* Locate G5 u3 */
 	if (uninorth_node == NULL) {
 		uninorth_node = of_find_node_by_name(NULL, "u3");
@@ -2575,8 +2577,10 @@ static void __init probe_uninorth(void)
 		uninorth_node = of_find_node_by_name(NULL, "u4");
 		uninorth_maj = 4;
 	}
-	if (uninorth_node == NULL)
+	if (uninorth_node == NULL) {
+		uninorth_maj = 0;
 		return;
+	}
 
 	addrp = of_get_property(uninorth_node, "reg", NULL);
 	if (addrp == NULL)
@@ -3029,3 +3033,8 @@ void pmac_resume_agp_for_card(struct pci_dev *dev)
 	pmac_agp_resume(pmac_agp_bridge);
 }
 EXPORT_SYMBOL(pmac_resume_agp_for_card);
+
+int pmac_get_uninorth_variant(void)
+{
+	return uninorth_maj;
+}
diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c
index 8ba49385c3f..77ad192962c 100644
--- a/drivers/macintosh/smu.c
+++ b/drivers/macintosh/smu.c
@@ -85,6 +85,7 @@ struct smu_device {
 	u32			cmd_buf_abs;	/* command buffer absolute */
 	struct list_head	cmd_list;
 	struct smu_cmd		*cmd_cur;	/* pending command */
+	int			broken_nap;
 	struct list_head	cmd_i2c_list;
 	struct smu_i2c_cmd	*cmd_i2c_cur;	/* pending i2c command */
 	struct timer_list	i2c_timer;
@@ -135,6 +136,19 @@ static void smu_start_cmd(void)
 	fend = faddr + smu->cmd_buf->length + 2;
 	flush_inval_dcache_range(faddr, fend);
 
+
+	/* We also disable NAP mode for the duration of the command
+	 * on U3 based machines.
+	 * This is slightly racy as it can be written back to 1 by a sysctl
+	 * but that never happens in practice. There seem to be an issue with
+	 * U3 based machines such as the iMac G5 where napping for the
+	 * whole duration of the command prevents the SMU from fetching it
+	 * from memory. This might be related to the strange i2c based
+	 * mechanism the SMU uses to access memory.
+	 */
+	if (smu->broken_nap)
+		powersave_nap = 0;
+
 	/* This isn't exactly a DMA mapping here, I suspect
 	 * the SMU is actually communicating with us via i2c to the
 	 * northbridge or the CPU to access RAM.
@@ -211,6 +225,10 @@ static irqreturn_t smu_db_intr(int irq, void *arg)
 	misc = cmd->misc;
 	mb();
 	cmd->status = rc;
+
+	/* Re-enable NAP mode */
+	if (smu->broken_nap)
+		powersave_nap = 1;
  bail:
 	/* Start next command if any */
 	smu_start_cmd();
@@ -461,7 +479,7 @@ int __init smu_init (void)
         if (np == NULL)
 		return -ENODEV;
 
-	printk(KERN_INFO "SMU driver %s %s\n", VERSION, AUTHOR);
+	printk(KERN_INFO "SMU: Driver %s %s\n", VERSION, AUTHOR);
 
 	if (smu_cmdbuf_abs == 0) {
 		printk(KERN_ERR "SMU: Command buffer not allocated !\n");
@@ -533,6 +551,11 @@ int __init smu_init (void)
 		goto fail;
 	}
 
+	/* U3 has an issue with NAP mode when issuing SMU commands */
+	smu->broken_nap = pmac_get_uninorth_variant() < 4;
+	if (smu->broken_nap)
+		printk(KERN_INFO "SMU: using NAP mode workaround\n");
+
 	sys_ctrler = SYS_CTRLER_SMU;
 	return 0;
 
diff --git a/include/asm-powerpc/pmac_feature.h b/include/asm-powerpc/pmac_feature.h
index 26bcb0aa164..877c35a4356 100644
--- a/include/asm-powerpc/pmac_feature.h
+++ b/include/asm-powerpc/pmac_feature.h
@@ -392,6 +392,14 @@ extern u32 __iomem *uninorth_base;
 #define UN_BIS(r,v)	(UN_OUT((r), UN_IN(r) | (v)))
 #define UN_BIC(r,v)	(UN_OUT((r), UN_IN(r) & ~(v)))
 
+/* Uninorth variant:
+ *
+ * 0 = not uninorth
+ * 1 = U1.x or U2.x
+ * 3 = U3
+ * 4 = U4
+ */
+extern int pmac_get_uninorth_variant(void);
 
 #endif /* __ASM_POWERPC_PMAC_FEATURE_H */
 #endif /* __KERNEL__ */
-- 
cgit v1.2.3


From eebead5b8ff89340dc18ceec996157d0eb7d0287 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 8 Feb 2008 15:50:41 +1100
Subject: [POWERPC] spufs: Fix state_mutex leaks

Fix various state_mutex leaks.  The worst one was introduced by the
interrutible state_mutex conversion but there've been a few before
too.  Notably spufs_wait now returns without the state_mutex held
when returning an error, which actually cleans up some code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Luke Browning <lukebrowning@us.ibm.com>
Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/cell/spufs/fault.c | 12 +++-----
 arch/powerpc/platforms/cell/spufs/file.c  | 49 ++++++++++++++++++-------------
 arch/powerpc/platforms/cell/spufs/run.c   |  9 +++++-
 arch/powerpc/platforms/cell/spufs/spufs.h |  5 +++-
 4 files changed, 45 insertions(+), 30 deletions(-)

diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c
index eff4d291ba8..e46d300e21a 100644
--- a/arch/powerpc/platforms/cell/spufs/fault.c
+++ b/arch/powerpc/platforms/cell/spufs/fault.c
@@ -108,7 +108,7 @@ int spufs_handle_class1(struct spu_context *ctx)
 	u64 ea, dsisr, access;
 	unsigned long flags;
 	unsigned flt = 0;
-	int ret, ret2;
+	int ret;
 
 	/*
 	 * dar and dsisr get passed from the registers
@@ -148,13 +148,10 @@ int spufs_handle_class1(struct spu_context *ctx)
 		ret = spu_handle_mm_fault(current->mm, ea, dsisr, &flt);
 
 	/*
-	 * If spu_acquire fails due to a pending signal we just want to return
-	 * EINTR to userspace even if that means missing the dma restart or
-	 * updating the page fault statistics.
+	 * This is nasty: we need the state_mutex for all the bookkeeping even
+	 * if the syscall was interrupted by a signal. ewww.
 	 */
-	ret2 = spu_acquire(ctx);
-	if (ret2)
-		goto out;
+	mutex_lock(&ctx->state_mutex);
 
 	/*
 	 * Clear dsisr under ctxt lock after handling the fault, so that
@@ -185,7 +182,6 @@ int spufs_handle_class1(struct spu_context *ctx)
 	} else
 		spufs_handle_event(ctx, ea, SPE_EVENT_SPE_DATA_STORAGE);
 
- out:
 	spuctx_switch_state(ctx, SPU_UTIL_SYSTEM);
 	return ret;
 }
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index 1018acd1746..e4ab9d5a86f 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -358,6 +358,7 @@ static unsigned long spufs_ps_nopfn(struct vm_area_struct *vma,
 {
 	struct spu_context *ctx = vma->vm_file->private_data;
 	unsigned long area, offset = address - vma->vm_start;
+	int ret = 0;
 
 	spu_context_nospu_trace(spufs_ps_nopfn__enter, ctx);
 
@@ -379,7 +380,7 @@ static unsigned long spufs_ps_nopfn(struct vm_area_struct *vma,
 	if (ctx->state == SPU_STATE_SAVED) {
 		up_read(&current->mm->mmap_sem);
 		spu_context_nospu_trace(spufs_ps_nopfn__sleep, ctx);
-		spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE);
+		ret = spufs_wait(ctx->run_wq, ctx->state == SPU_STATE_RUNNABLE);
 		spu_context_trace(spufs_ps_nopfn__wake, ctx, ctx->spu);
 		down_read(&current->mm->mmap_sem);
 	} else {
@@ -388,7 +389,8 @@ static unsigned long spufs_ps_nopfn(struct vm_area_struct *vma,
 		spu_context_trace(spufs_ps_nopfn__insert, ctx, ctx->spu);
 	}
 
-	spu_release(ctx);
+	if (!ret)
+		spu_release(ctx);
 	return NOPFN_REFAULT;
 }
 
@@ -755,23 +757,25 @@ static ssize_t spufs_ibox_read(struct file *file, char __user *buf,
 
 	count = spu_acquire(ctx);
 	if (count)
-		return count;
+		goto out;
 
 	/* wait only for the first element */
 	count = 0;
 	if (file->f_flags & O_NONBLOCK) {
-		if (!spu_ibox_read(ctx, &ibox_data))
+		if (!spu_ibox_read(ctx, &ibox_data)) {
 			count = -EAGAIN;
+			goto out_unlock;
+		}
 	} else {
 		count = spufs_wait(ctx->ibox_wq, spu_ibox_read(ctx, &ibox_data));
+		if (count)
+			goto out;
 	}
-	if (count)
-		goto out;
 
 	/* if we can't write at all, return -EFAULT */
 	count = __put_user(ibox_data, udata);
 	if (count)
-		goto out;
+		goto out_unlock;
 
 	for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) {
 		int ret;
@@ -788,9 +792,9 @@ static ssize_t spufs_ibox_read(struct file *file, char __user *buf,
 			break;
 	}
 
-out:
+out_unlock:
 	spu_release(ctx);
-
+out:
 	return count;
 }
 
@@ -905,7 +909,7 @@ static ssize_t spufs_wbox_write(struct file *file, const char __user *buf,
 
 	count = spu_acquire(ctx);
 	if (count)
-		return count;
+		goto out;
 
 	/*
 	 * make sure we can at least write one element, by waiting
@@ -913,14 +917,16 @@ static ssize_t spufs_wbox_write(struct file *file, const char __user *buf,
 	 */
 	count = 0;
 	if (file->f_flags & O_NONBLOCK) {
-		if (!spu_wbox_write(ctx, wbox_data))
+		if (!spu_wbox_write(ctx, wbox_data)) {
 			count = -EAGAIN;
+			goto out_unlock;
+		}
 	} else {
 		count = spufs_wait(ctx->wbox_wq, spu_wbox_write(ctx, wbox_data));
+		if (count)
+			goto out;
 	}
 
-	if (count)
-		goto out;
 
 	/* write as much as possible */
 	for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) {
@@ -934,8 +940,9 @@ static ssize_t spufs_wbox_write(struct file *file, const char __user *buf,
 			break;
 	}
 
-out:
+out_unlock:
 	spu_release(ctx);
+out:
 	return count;
 }
 
@@ -1598,12 +1605,11 @@ static ssize_t spufs_mfc_read(struct file *file, char __user *buffer,
 	} else {
 		ret = spufs_wait(ctx->mfc_wq,
 			   spufs_read_mfc_tagstatus(ctx, &status));
+		if (ret)
+			goto out;
 	}
 	spu_release(ctx);
 
-	if (ret)
-		goto out;
-
 	ret = 4;
 	if (copy_to_user(buffer, &status, 4))
 		ret = -EFAULT;
@@ -1732,6 +1738,8 @@ static ssize_t spufs_mfc_write(struct file *file, const char __user *buffer,
 		int status;
 		ret = spufs_wait(ctx->mfc_wq,
 				 spu_send_mfc_command(ctx, cmd, &status));
+		if (ret)
+			goto out;
 		if (status)
 			ret = status;
 	}
@@ -1785,7 +1793,7 @@ static int spufs_mfc_flush(struct file *file, fl_owner_t id)
 
 	ret = spu_acquire(ctx);
 	if (ret)
-		return ret;
+		goto out;
 #if 0
 /* this currently hangs */
 	ret = spufs_wait(ctx->mfc_wq,
@@ -1794,12 +1802,13 @@ static int spufs_mfc_flush(struct file *file, fl_owner_t id)
 		goto out;
 	ret = spufs_wait(ctx->mfc_wq,
 			 ctx->ops->read_mfc_tagstatus(ctx) == ctx->tagwait);
-out:
+	if (ret)
+		goto out;
 #else
 	ret = 0;
 #endif
 	spu_release(ctx);
-
+out:
 	return ret;
 }
 
diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c
index b4814c740d8..e9c61a1a8f9 100644
--- a/arch/powerpc/platforms/cell/spufs/run.c
+++ b/arch/powerpc/platforms/cell/spufs/run.c
@@ -354,8 +354,15 @@ long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event)
 
 	do {
 		ret = spufs_wait(ctx->stop_wq, spu_stopped(ctx, &status));
-		if (unlikely(ret))
+		if (unlikely(ret)) {
+			/*
+			 * This is nasty: we need the state_mutex for all the
+			 * bookkeeping even if the syscall was interrupted by
+			 * a signal. ewww.
+			 */
+			mutex_lock(&ctx->state_mutex);
 			break;
+		}
 		spu = ctx->spu;
 		if (unlikely(test_and_clear_bit(SPU_SCHED_NOTIFY_ACTIVE,
 						&ctx->sched_flags))) {
diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h
index 795a1b52538..2c2fe3c07d7 100644
--- a/arch/powerpc/platforms/cell/spufs/spufs.h
+++ b/arch/powerpc/platforms/cell/spufs/spufs.h
@@ -268,6 +268,9 @@ extern char *isolated_loader;
  *	Same as wait_event_interruptible(), except that here
  *	we need to call spu_release(ctx) before sleeping, and
  *	then spu_acquire(ctx) when awoken.
+ *
+ * 	Returns with state_mutex re-acquired when successfull or
+ * 	with -ERESTARTSYS and the state_mutex dropped when interrupted.
  */
 
 #define spufs_wait(wq, condition)					\
@@ -278,11 +281,11 @@ extern char *isolated_loader;
 		prepare_to_wait(&(wq), &__wait, TASK_INTERRUPTIBLE);	\
 		if (condition)						\
 			break;						\
+		spu_release(ctx);					\
 		if (signal_pending(current)) {				\
 			__ret = -ERESTARTSYS;				\
 			break;						\
 		}							\
-		spu_release(ctx);					\
 		schedule();						\
 		__ret = spu_acquire(ctx);				\
 		if (__ret)						\
-- 
cgit v1.2.3


From 732377c5f5335e227171c76532613f45b4067f25 Mon Sep 17 00:00:00 2001
From: Masato Noguchi <Masato.Noguchi@jp.sony.com>
Date: Fri, 8 Feb 2008 15:50:41 +1100
Subject: [POWERPC] spufs: Update SPU_Status[CISHP] in backing runcntl write

Currently, the kernel may fail to restart a SPE context which
has stopped and been swapped out.

This changes spu_backing_runcntl_write to emulate the real
SPU_Status register exactly.  When the SPU Run Control register
is written with SPU_RunCntl[Run] set to '1', the physical SPU
automatically sets SPU_Status[R] and clears SPU_Status[CISHP].

Signed-off-by: Masato Noguchi <Masato.Noguchi@jp.sony.com>
Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/cell/spufs/backing_ops.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/powerpc/platforms/cell/spufs/backing_ops.c b/arch/powerpc/platforms/cell/spufs/backing_ops.c
index 50d98a154aa..64eb15b2204 100644
--- a/arch/powerpc/platforms/cell/spufs/backing_ops.c
+++ b/arch/powerpc/platforms/cell/spufs/backing_ops.c
@@ -288,6 +288,12 @@ static void spu_backing_runcntl_write(struct spu_context *ctx, u32 val)
 	spin_lock(&ctx->csa.register_lock);
 	ctx->csa.prob.spu_runcntl_RW = val;
 	if (val & SPU_RUNCNTL_RUNNABLE) {
+		ctx->csa.prob.spu_status_R &=
+			~SPU_STATUS_STOPPED_BY_STOP &
+			~SPU_STATUS_STOPPED_BY_HALT &
+			~SPU_STATUS_SINGLE_STEP &
+			~SPU_STATUS_INVALID_INSTR &
+			~SPU_STATUS_INVALID_CH;
 		ctx->csa.prob.spu_status_R |= SPU_STATUS_RUNNING;
 	} else {
 		ctx->csa.prob.spu_status_R &= ~SPU_STATUS_RUNNING;
-- 
cgit v1.2.3


From e66686b414f10f1ef2cd0aa77a03a67e17304773 Mon Sep 17 00:00:00 2001
From: Luke Browning <lukebr@linux.vnet.ibm.com>
Date: Fri, 8 Feb 2008 15:50:41 +1100
Subject: [POWERPC] spufs: No need to have a runnable SPU for libassist update

We don't need to update the libassist statistic with the context in a
runnable state, so do it after spu_disable_spu().

Signed-off-by: Luke Browning <lukebrowning@us.ibm.com>
Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/cell/spufs/run.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c
index e9c61a1a8f9..f401e51a797 100644
--- a/arch/powerpc/platforms/cell/spufs/run.c
+++ b/arch/powerpc/platforms/cell/spufs/run.c
@@ -395,16 +395,14 @@ long spufs_run_spu(struct spu_context *ctx, u32 *npc, u32 *event)
 				      SPU_STATUS_STOPPED_BY_HALT |
 				       SPU_STATUS_SINGLE_STEP)));
 
-	if ((status & SPU_STATUS_STOPPED_BY_STOP) &&
-	    (((status >> SPU_STOP_STATUS_SHIFT) & 0x3f00) == 0x2100) &&
-	    (ctx->state == SPU_STATE_RUNNABLE))
-		ctx->stats.libassist++;
-
-
 	spu_disable_spu(ctx);
 	ret = spu_run_fini(ctx, npc, &status);
 	spu_yield(ctx);
 
+	if ((status & SPU_STATUS_STOPPED_BY_STOP) &&
+	    (((status >> SPU_STOP_STATUS_SHIFT) & 0x3f00) == 0x2100))
+		ctx->stats.libassist++;
+
 	if ((ret == 0) ||
 	    ((ret == -ERESTARTSYS) &&
 	     ((status & SPU_STATUS_STOPPED_BY_HALT) ||
-- 
cgit v1.2.3


From 85687ff2b4eab47f4d637a0d3a482bb955d3cbd4 Mon Sep 17 00:00:00 2001
From: Luke Browning <lukebr@linux.vnet.ibm.com>
Date: Fri, 8 Feb 2008 15:50:41 +1100
Subject: [POWERPC] spufs: Fix timing dependent false return from spufs_run_spu

Stop bits are only valid when the running bit is not set.  Status bits
carry over from one invocation of spufs_run_spu() to another, so the
RUNNING bit gets added to the previous state of the register which may
have been a remote library call.  In this case, it looks like another
library routine should be invoked, but the spe is actually running.

This fixes a problem with a testcase that exercises the scheduler.

Signed-off-by: Luke Browning <lukebrowning@us.ibm.com>
Signed-off-by: Jeremy Kerr <jk@ozlabs.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/cell/spufs/run.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c
index f401e51a797..fca22e18069 100644
--- a/arch/powerpc/platforms/cell/spufs/run.c
+++ b/arch/powerpc/platforms/cell/spufs/run.c
@@ -53,7 +53,7 @@ int spu_stopped(struct spu_context *ctx, u32 *stat)
 
 	stopped = SPU_STATUS_INVALID_INSTR | SPU_STATUS_SINGLE_STEP |
 		SPU_STATUS_STOPPED_BY_HALT | SPU_STATUS_STOPPED_BY_STOP;
-	if (*stat & stopped)
+	if (!(*stat & SPU_STATUS_RUNNING) && (*stat & stopped))
 		return 1;
 
 	dsisr = ctx->csa.dsisr;
-- 
cgit v1.2.3


From ccd05d086f82dba2ab117dcaf4a38cbb2863a439 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Fri, 8 Feb 2008 16:37:02 +1100
Subject: [POWERPC] Fix cell IOMMU null pointer explosion on old firmwares

The cell IOMMU fixed mapping support has a null pointer bug if you run
it on older firmwares that don't contain the "dma-ranges" properties.
Fix it and convert to using of_get_next_parent() while we're there.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/cell/iommu.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index df330666ccc..a276064471b 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -26,6 +26,7 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/notifier.h>
+#include <linux/of.h>
 #include <linux/of_platform.h>
 
 #include <asm/prom.h>
@@ -789,18 +790,16 @@ static int __init cell_iommu_init_disabled(void)
 static u64 cell_iommu_get_fixed_address(struct device *dev)
 {
 	u64 cpu_addr, size, best_size, pci_addr = OF_BAD_ADDR;
-	struct device_node *tmp, *np;
+	struct device_node *np;
 	const u32 *ranges = NULL;
 	int i, len, best;
 
-	np = dev->archdata.of_node;
-	of_node_get(np);
-	ranges = of_get_property(np, "dma-ranges", &len);
-	while (!ranges && np) {
-		tmp = of_get_parent(np);
-		of_node_put(np);
-		np = tmp;
+	np = of_node_get(dev->archdata.of_node);
+	while (np) {
 		ranges = of_get_property(np, "dma-ranges", &len);
+		if (ranges)
+			break;
+		np = of_get_next_parent(np);
 	}
 
 	if (!ranges) {
-- 
cgit v1.2.3


From 0e0b47abb71a2c4aed5895c01f41827dbd8a981c Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Fri, 8 Feb 2008 16:37:03 +1100
Subject: [POWERPC] Don't enable cell IOMMU fixed mapping if there are no
 dma-ranges

In order for the cell IOMMU fixed mapping to work we need "dma-ranges"
properties in the device tree. If there are none then there's no point
enabling the fixed mapping support.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/cell/iommu.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index a276064471b..1f7b2547408 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -917,6 +917,18 @@ static int __init cell_iommu_fixed_mapping_init(void)
 		return -1;
 	}
 
+	/* We must have dma-ranges properties for fixed mapping to work */
+	for (np = NULL; (np = of_find_all_nodes(np));) {
+		if (of_find_property(np, "dma-ranges", NULL))
+			break;
+	}
+	of_node_put(np);
+
+	if (!np) {
+		pr_debug("iommu: no dma-ranges found, no fixed mapping\n");
+		return -1;
+	}
+
 	/* The default setup is to have the fixed mapping sit after the
 	 * dynamic region, so find the top of the largest IOMMU window
 	 * on any axon, then add the size of RAM and that's our max value.
-- 
cgit v1.2.3


From 4a8df1507eaeefc9739e3762db606caa08edba98 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Fri, 8 Feb 2008 16:37:04 +1100
Subject: [POWERPC] Fix potential cell IOMMU bug when switching back to default
 DMA ops

If we get a 64-bit dma mask we switch to the fixed ops and call
cell_dma_dev_setup().  If the driver then switches back to a 32-bit dma
mask for any reason we don't call cell_dma_dev_setup() again, which
has the potential to leave bogus data in dev->archdata.dma_data.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/cell/iommu.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 1f7b2547408..5cdcd363825 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -841,19 +841,18 @@ static int dma_set_mask_and_switch(struct device *dev, u64 dma_mask)
 	if (!dev->dma_mask || !dma_supported(dev, dma_mask))
 		return -EIO;
 
-	if (dma_mask == DMA_BIT_MASK(64)) {
-		if (cell_iommu_get_fixed_address(dev) == OF_BAD_ADDR)
-			dev_dbg(dev, "iommu: 64-bit OK, but bad addr\n");
-		else {
-			dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
-			set_dma_ops(dev, &dma_iommu_fixed_ops);
-			cell_dma_dev_setup(dev);
-		}
+	if (dma_mask == DMA_BIT_MASK(64) &&
+		cell_iommu_get_fixed_address(dev) != OF_BAD_ADDR)
+	{
+		dev_dbg(dev, "iommu: 64-bit OK, using fixed ops\n");
+		set_dma_ops(dev, &dma_iommu_fixed_ops);
 	} else {
 		dev_dbg(dev, "iommu: not 64-bit, using default ops\n");
 		set_dma_ops(dev, get_pci_dma_ops());
 	}
 
+	cell_dma_dev_setup(dev);
+
 	*dev->dma_mask = dma_mask;
 
 	return 0;
-- 
cgit v1.2.3


From 44621be4b563fbce32007ebfac91dfe8f5692743 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Fri, 8 Feb 2008 16:37:04 +1100
Subject: [POWERPC] Make cell IOMMU fixed mapping printk more useful

Currently the cell IOMMU fixed mapping just printks that it's been setup,
which is not particularly useful.  Much more interesting is the address
ranges for the different windows.  This adds one line to dmesg on a blade.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/cell/iommu.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c
index 5cdcd363825..edab631a8dc 100644
--- a/arch/powerpc/platforms/cell/iommu.c
+++ b/arch/powerpc/platforms/cell/iommu.c
@@ -991,8 +991,8 @@ static int __init cell_iommu_fixed_mapping_init(void)
 			dsize = htab_size_bytes;
 		}
 
-		pr_debug("iommu: setting up %d, dynamic window %lx-%lx " \
-			 "fixed window %lx-%lx\n", iommu->nid, dbase,
+		printk(KERN_DEBUG "iommu: node %d, dynamic window 0x%lx-0x%lx "
+			"fixed window 0x%lx-0x%lx\n", iommu->nid, dbase,
 			 dbase + dsize, fbase, fbase + fsize);
 
 		cell_iommu_setup_page_tables(iommu, dbase, dsize, fbase, fsize);
@@ -1008,8 +1008,6 @@ static int __init cell_iommu_fixed_mapping_init(void)
 	dma_iommu_ops.set_dma_mask = dma_set_mask_and_switch;
 	set_pci_dma_ops(&dma_iommu_ops);
 
-	printk(KERN_DEBUG "IOMMU fixed mapping established.\n");
-
 	return 0;
 }
 
-- 
cgit v1.2.3


From aa620abe756207222d234f785d41435fac486e06 Mon Sep 17 00:00:00 2001
From: Badari Pulavarty <pbadari@us.ibm.com>
Date: Tue, 5 Feb 2008 00:10:16 -0800
Subject: [POWERPC] Add remove_memory() for 64-bit powerpc

Supply remove_memory() function for 64-bit powerpc.  This is still
not quite complete as it needs to do some more arch-specific stuff,
which will be added in a later patch.

Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/mm/mem.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 93a5c53e342..f0a1fd268b7 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -129,6 +129,22 @@ int __devinit arch_add_memory(int nid, u64 start, u64 size)
 	return __add_pages(zone, start_pfn, nr_pages);
 }
 
+#ifdef CONFIG_MEMORY_HOTREMOVE
+int remove_memory(u64 start, u64 size)
+{
+	unsigned long start_pfn, end_pfn;
+	int ret;
+
+	start_pfn = start >> PAGE_SHIFT;
+	end_pfn = start_pfn + (size >> PAGE_SHIFT);
+	ret = offline_pages(start_pfn, end_pfn, 120 * HZ);
+	if (ret)
+		goto out;
+	/* Arch-specific calls go here - next patch */
+out:
+	return ret;
+}
+#endif /* CONFIG_MEMORY_HOTREMOVE */
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
 void show_mem(void)
-- 
cgit v1.2.3


From 1482471d19e77d794012dbacaa65c44ceaae37bb Mon Sep 17 00:00:00 2001
From: Badari Pulavarty <pbadari@us.ibm.com>
Date: Tue, 5 Feb 2008 00:10:17 -0800
Subject: [POWERPC] Enable hotplug memory remove for 64-bit powerpc

Enable hotplug memory remove for ppc64.

Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Kumar Gala <galak@gate.crashing.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/Kconfig | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 8dcac0b22d6..d55063c021d 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -272,6 +272,9 @@ config HOTPLUG_CPU
 config ARCH_ENABLE_MEMORY_HOTPLUG
 	def_bool y
 
+config ARCH_ENABLE_MEMORY_HOTREMOVE
+	def_bool y
+
 config KEXEC
 	bool "kexec system call (EXPERIMENTAL)"
 	depends on (PPC_PRPMC2800 || PPC_MULTIPLATFORM) && EXPERIMENTAL
-- 
cgit v1.2.3


From a99824f327c748b2753f4fa570eb1fefcd6a9c4d Mon Sep 17 00:00:00 2001
From: Badari Pulavarty <pbadari@us.ibm.com>
Date: Tue, 5 Feb 2008 00:10:18 -0800
Subject: [POWERPC] Add arch-specific walk_memory_remove() for 64-bit powerpc

walk_memory_resource() verifies if there are holes in a given memory
range, by checking against /proc/iomem.  On x86/ia64 system memory is
represented in /proc/iomem.  On powerpc, we don't show system memory as
IO resource in /proc/iomem - instead it's maintained in
/proc/device-tree.

This provides a way for an architecture to provide its own
walk_memory_resource() function.  On powerpc, the memory region is
small (16MB), contiguous and non-overlapping.  So extra checking
against the device-tree is not needed.

Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Kumar Gala <galak@gate.crashing.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/Kconfig  |  3 +++
 arch/powerpc/mm/mem.c | 17 +++++++++++++++++
 kernel/resource.c     |  2 +-
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index d55063c021d..26b963c33c8 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -272,6 +272,9 @@ config HOTPLUG_CPU
 config ARCH_ENABLE_MEMORY_HOTPLUG
 	def_bool y
 
+config ARCH_HAS_WALK_MEMORY
+	def_bool y
+
 config ARCH_ENABLE_MEMORY_HOTREMOVE
 	def_bool y
 
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index f0a1fd268b7..be5c506779a 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -145,6 +145,23 @@ out:
 	return ret;
 }
 #endif /* CONFIG_MEMORY_HOTREMOVE */
+
+/*
+ * walk_memory_resource() needs to make sure there is no holes in a given
+ * memory range. On PPC64, since this range comes from /sysfs, the range
+ * is guaranteed to be valid, non-overlapping and can not contain any
+ * holes. By the time we get here (memory add or remove), /proc/device-tree
+ * is updated and correct. Only reason we need to check against device-tree
+ * would be if we allow user-land to specify a memory range through a
+ * system call/ioctl etc. instead of doing offline/online through /sysfs.
+ */
+int
+walk_memory_resource(unsigned long start_pfn, unsigned long nr_pages, void *arg,
+			int (*func)(unsigned long, unsigned long, void *))
+{
+	return  (*func)(start_pfn, nr_pages, arg);
+}
+
 #endif /* CONFIG_MEMORY_HOTPLUG */
 
 void show_mem(void)
diff --git a/kernel/resource.c b/kernel/resource.c
index 2eb553d9b51..82aea814d40 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -228,7 +228,7 @@ int release_resource(struct resource *old)
 
 EXPORT_SYMBOL(release_resource);
 
-#ifdef CONFIG_MEMORY_HOTPLUG
+#if defined(CONFIG_MEMORY_HOTPLUG) && !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
 /*
  * Finds the lowest memory reosurce exists within [res->start.res->end)
  * the caller must specify res->start, res->end, res->flags.
-- 
cgit v1.2.3