From 3044646148cdfa83a311bf1c146a70e550280159 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sun, 9 Nov 2008 10:07:58 -0800
Subject: x86: move iomap.h to the new include location

a new file was accidentally added to include/asm-x86;
move it to the new arch/x86/include/asm location

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
---
 arch/x86/include/asm/iomap.h | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 arch/x86/include/asm/iomap.h

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h
new file mode 100644
index 00000000000..c1f06289b14
--- /dev/null
+++ b/arch/x86/include/asm/iomap.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright © 2008 Ingo Molnar
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+#include <asm/pgtable.h>
+#include <asm/tlbflush.h>
+
+void *
+iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
+
+void
+iounmap_atomic(void *kvaddr, enum km_type type);
-- 
cgit v1.2.3


From 6c2e94033df5ca11149e52dd179b8dde3172e9bf Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 7 Nov 2008 12:33:49 +0100
Subject: x86: apic honour irq affinity which was set in early boot

setup_ioapic_dest() is called after the non boot cpus have been
brought up. It sets the irq affinity of all already configured
interrupts to all cpus and ignores affinity settings which were
done by the early bootup code.

If the IRQ_NO_BALANCING or IRQ_AFFINITY_SET flags are set then use the
affinity mask from the irq descriptor and not TARGET_CPUS.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 7a3f2028e2e..988ee89467d 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -3761,7 +3761,9 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
 void __init setup_ioapic_dest(void)
 {
 	int pin, ioapic, irq, irq_entry;
+	struct irq_desc *desc;
 	struct irq_cfg *cfg;
+	cpumask_t mask;
 
 	if (skip_ioapic_setup == 1)
 		return;
@@ -3778,16 +3780,30 @@ void __init setup_ioapic_dest(void)
 			 * cpu is online.
 			 */
 			cfg = irq_cfg(irq);
-			if (!cfg->vector)
+			if (!cfg->vector) {
 				setup_IO_APIC_irq(ioapic, pin, irq,
 						  irq_trigger(irq_entry),
 						  irq_polarity(irq_entry));
+				continue;
+
+			}
+
+			/*
+			 * Honour affinities which have been set in early boot
+			 */
+			desc = irq_to_desc(irq);
+			if (desc->status &
+			    (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
+				mask = desc->affinity;
+			else
+				mask = TARGET_CPUS;
+
 #ifdef CONFIG_INTR_REMAP
-			else if (intr_remapping_enabled)
-				set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
-#endif
+			if (intr_remapping_enabled)
+				set_ir_ioapic_affinity_irq(irq, mask);
 			else
-				set_ioapic_affinity_irq(irq, TARGET_CPUS);
+#endif
+				set_ioapic_affinity_irq(irq, mask);
 		}
 
 	}
-- 
cgit v1.2.3


From 1de5b0854623d30d01d72cd4ea323eb5f39d1f16 Mon Sep 17 00:00:00 2001
From: Matt Fleming <mjf@gentoo.org>
Date: Sun, 2 Nov 2008 16:04:18 +0000
Subject: x86: HPET: convert WARN_ON to WARN_ON_ONCE

It is possible to flood the console with call traces if the WARN_ON
condition is true because of the frequency with which this function is
called.

Signed-off-by: Matt Fleming <mjf@gentoo.org>
Cc: mingo@elte.hu
Cc: venkatesh.pallipadi@intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/hpet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 77017e834cf..f10f9461a43 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -322,7 +322,7 @@ static int hpet_next_event(unsigned long delta,
 	 * what we wrote hit the chip before we compare it to the
 	 * counter.
 	 */
-	WARN_ON((u32)hpet_readl(HPET_T0_CMP) != cnt);
+	WARN_ON_ONCE((u32)hpet_readl(HPET_T0_CMP) != cnt);
 
 	return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
 }
-- 
cgit v1.2.3


From 89d77a1eb60be916d85d9394bedbfa2037af89c5 Mon Sep 17 00:00:00 2001
From: Matt Fleming <mjf@gentoo.org>
Date: Sun, 2 Nov 2008 16:04:20 +0000
Subject: x86: HPET: read from HPET_Tn_CMP() not HPET_T0_CMP

In hpet_next_event() we check that the value we just wrote to
HPET_Tn_CMP(timer) has reached the chip. Currently, we're checking that
the value we wrote to HPET_Tn_CMP(timer) is in HPET_T0_CMP, which, if
timer is anything other than timer 0, is likely to fail.

Signed-off-by: Matt Fleming <mjf@gentoo.org>
Cc: mingo@elte.hu
Cc: venkatesh.pallipadi@intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/hpet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index f10f9461a43..cfe6aa56f71 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -322,7 +322,7 @@ static int hpet_next_event(unsigned long delta,
 	 * what we wrote hit the chip before we compare it to the
 	 * counter.
 	 */
-	WARN_ON_ONCE((u32)hpet_readl(HPET_T0_CMP) != cnt);
+	WARN_ON_ONCE((u32)hpet_readl(HPET_Tn_CMP(timer)) != cnt);
 
 	return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0;
 }
-- 
cgit v1.2.3


From 5ceb1a04187553e08c6ab60d30cee7c454ee139a Mon Sep 17 00:00:00 2001
From: Matt Fleming <mjf@gentoo.org>
Date: Sun, 2 Nov 2008 22:23:13 +0000
Subject: x86: HPET: enter hpet_interrupt_handler with interrupts disabled

Some functions that may be called from this handler require that
interrupts are disabled. Also, combining IRQF_DISABLED and
IRQF_SHARED does not reliably disable interrupts in a handler, so
remove IRQF_SHARED from the irq flags (this irq is not shared anyway).

Signed-off-by: Matt Fleming <mjf@gentoo.org>
Cc: mingo@elte.hu
Cc: venkatesh.pallipadi@intel.com
Cc: "Will Newton" <will.newton@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/hpet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index cfe6aa56f71..067d8de913f 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -445,7 +445,7 @@ static int hpet_setup_irq(struct hpet_dev *dev)
 {
 
 	if (request_irq(dev->irq, hpet_interrupt_handler,
-			IRQF_SHARED|IRQF_NOBALANCING, dev->name, dev))
+			IRQF_DISABLED|IRQF_NOBALANCING, dev->name, dev))
 		return -1;
 
 	disable_irq(dev->irq);
-- 
cgit v1.2.3


From 4694516d1987303dd83bfd0efdd36fa5b65d701b Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Mon, 10 Nov 2008 21:52:47 +0100
Subject: x86: Make NUMA on 32-bit depend on BROKEN

While investigating the failure of hibernation on 32-bit x86 with
CONFIG_NUMA set, as described in this message
http://marc.info/?l=linux-kernel&m=122634118116226&w=4
I asked some people for help and I was told that it wasn't really
worth the effort, because CONFIG_NUMA was generally broken on 32-bit
x86 systems and it shouldn't be used in such configs.  For this
reason, make CONFIG_NUMA depend on BROKEN instead of EXPERIMENTAL on
x86-32.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Pavel Machek <pavel@suse.cz>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4cf0ab13d18..93224b56918 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -957,7 +957,7 @@ config ARCH_PHYS_ADDR_T_64BIT
 config NUMA
 	bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)"
 	depends on SMP
-	depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL)
+	depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && BROKEN)
 	default n if X86_PC
 	default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP)
 	help
-- 
cgit v1.2.3


From 6cd10f8db385ba547811baa5b26f672fdff232e6 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Sun, 9 Nov 2008 11:53:14 -0600
Subject: x86, voyager: fix smp generic helper voyager breakage

Impact: build/boot fix for x86/Voyager

This change:

| commit 3d4422332711ef48ef0f132f1fcbfcbd56c7f3d1
| Author: Jens Axboe <jens.axboe@oracle.com>
| Date:   Thu Jun 26 11:21:34 2008 +0200
|
|     Add generic helpers for arch IPI function calls

didn't wire up the voyager smp call function correctly, so do that
here.  Also make CONFIG_USE_GENERIC_SMP_HELPERS a def_bool y again,
since we now use the generic helpers for every x86 architecture.

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Jens Axboe <Jens.Axboe@oracle.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig                    |  5 ++++-
 arch/x86/mach-voyager/voyager_smp.c | 16 ++++++++++++++--
 2 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4cf0ab13d18..ac22bb7719f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -167,9 +167,12 @@ config GENERIC_PENDING_IRQ
 config X86_SMP
 	bool
 	depends on SMP && ((X86_32 && !X86_VOYAGER) || X86_64)
-	select USE_GENERIC_SMP_HELPERS
 	default y
 
+config USE_GENERIC_SMP_HELPERS
+	def_bool y
+	depends on SMP
+
 config X86_32_SMP
 	def_bool y
 	depends on X86_32 && SMP
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 0e331652681..52145007bd7 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -7,6 +7,7 @@
  * This file provides all the same external entries as smp.c but uses
  * the voyager hal to provide the functionality
  */
+#include <linux/cpu.h>
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <linux/kernel_stat.h>
@@ -1790,6 +1791,17 @@ void __init smp_setup_processor_id(void)
 	x86_write_percpu(cpu_number, hard_smp_processor_id());
 }
 
+static void voyager_send_call_func(cpumask_t callmask)
+{
+	__u32 mask = cpus_addr(callmask)[0] & ~(1 << smp_processor_id());
+	send_CPI(mask, VIC_CALL_FUNCTION_CPI);
+}
+
+static void voyager_send_call_func_single(int cpu)
+{
+	send_CPI(1 << cpu, VIC_CALL_FUNCTION_SINGLE_CPI);
+}
+
 struct smp_ops smp_ops = {
 	.smp_prepare_boot_cpu = voyager_smp_prepare_boot_cpu,
 	.smp_prepare_cpus = voyager_smp_prepare_cpus,
@@ -1799,6 +1811,6 @@ struct smp_ops smp_ops = {
 	.smp_send_stop = voyager_smp_send_stop,
 	.smp_send_reschedule = voyager_smp_send_reschedule,
 
-	.send_call_func_ipi = native_send_call_func_ipi,
-	.send_call_func_single_ipi = native_send_call_func_single_ipi,
+	.send_call_func_ipi = voyager_send_call_func,
+	.send_call_func_single_ipi = voyager_send_call_func_single,
 };
-- 
cgit v1.2.3


From c41ef344de212bd918f7765af21b5008628c03e0 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Tue, 28 Oct 2008 18:16:58 -0200
Subject: KVM: MMU: increase per-vcpu rmap cache alloc size

The page fault path can use two rmap_desc structures, if:

- walk_addr's dirty pte update allocates one rmap_desc.
- mmu_lock is dropped, sptes are zapped resulting in rmap_desc being
freed.
- fetch->mmu_set_spte allocates another rmap_desc.

Increase to 4 for safety.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2a5e64881d9..f1983d9477c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -314,7 +314,7 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
 	if (r)
 		goto out;
 	r = mmu_topup_memory_cache(&vcpu->arch.mmu_rmap_desc_cache,
-				   rmap_desc_cache, 1);
+				   rmap_desc_cache, 4);
 	if (r)
 		goto out;
 	r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8);
-- 
cgit v1.2.3


From a29a2af378f3f6362b68e126e2541c8bde885ead Mon Sep 17 00:00:00 2001
From: Rakib Mullick <rakib.mullick@gmail.com>
Date: Wed, 29 Oct 2008 14:13:39 -0700
Subject: x86: KVM guest: fix section mismatch warning in kvmclock.c

WARNING: arch/x86/kernel/built-in.o(.text+0x1722c): Section mismatch
in reference from the function kvm_setup_secondary_clock() to the
function .devinit.text:setup_secondary_APIC_clock()
The function kvm_setup_secondary_clock() references
the function __devinit setup_secondary_APIC_clock().
This is often because kvm_setup_secondary_clock lacks a __devinit
annotation or the annotation of setup_secondary_APIC_clock is wrong.

Signed-off-by: Md.Rakib H. Mullick <rakib.mullick@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kernel/kvmclock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 774ac499156..1c9cc431ea4 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -128,7 +128,7 @@ static int kvm_register_clock(char *txt)
 }
 
 #ifdef CONFIG_X86_LOCAL_APIC
-static void kvm_setup_secondary_clock(void)
+static void __devinit kvm_setup_secondary_clock(void)
 {
 	/*
 	 * Now that the first cpu already had this clocksource initialized,
-- 
cgit v1.2.3


From ca93e992fdfdc6569ac2845d7560eeb5de4a4e0b Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 4 Nov 2008 11:25:17 +0200
Subject: KVM: Require the PCI subsystem

PCI device assignment makes calls to pci code, so require it to be built
into the kernel.

Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/Kconfig | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index ce3251ce550..b81125f0bde 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -20,6 +20,8 @@ if VIRTUALIZATION
 config KVM
 	tristate "Kernel-based Virtual Machine (KVM) support"
 	depends on HAVE_KVM
+	# for device assignment:
+	depends on PCI
 	select PREEMPT_NOTIFIERS
 	select MMU_NOTIFIER
 	select ANON_INODES
-- 
cgit v1.2.3


From 928d4bf747e9c290b690ff515d8f81e8ee226d97 Mon Sep 17 00:00:00 2001
From: Sheng Yang <sheng@linux.intel.com>
Date: Thu, 6 Nov 2008 14:55:45 +0800
Subject: KVM: VMX: Set IGMT bit in EPT entry

There is a potential issue that, when guest using pagetable without vmexit when
EPT enabled, guest would use PAT/PCD/PWT bits to index PAT msr for it's memory,
which would be inconsistent with host side and would cause host MCE due to
inconsistent cache attribute.

The patch set IGMT bit in EPT entry to ignore guest PAT and use WB as default
memory type to protect host (notice that all memory mapped by KVM should be WB).

Signed-off-by: Sheng Yang <sheng@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 3 ++-
 arch/x86/kvm/vmx.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2643b430d83..d06b4dc0e2e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3564,7 +3564,8 @@ static int __init vmx_init(void)
 		bypass_guest_pf = 0;
 		kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
 			VMX_EPT_WRITABLE_MASK |
-			VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
+			VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT |
+			VMX_EPT_IGMT_BIT);
 		kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull,
 				VMX_EPT_EXECUTABLE_MASK);
 		kvm_enable_tdp();
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
index 3e010d21fdd..ec5edc339da 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@@ -352,6 +352,7 @@ enum vmcs_field {
 #define VMX_EPT_READABLE_MASK			0x1ull
 #define VMX_EPT_WRITABLE_MASK			0x2ull
 #define VMX_EPT_EXECUTABLE_MASK			0x4ull
+#define VMX_EPT_IGMT_BIT    			(1ull << 6)
 
 #define VMX_EPT_IDENTITY_PAGETABLE_ADDR		0xfffbc000ul
 
-- 
cgit v1.2.3


From e17d1dc0863767bab8fde4ba9be92c7f79e7fe50 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 11 Nov 2008 13:09:36 +0200
Subject: KVM: Fix pit memory leak if unable to allocate irq source id
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reported-By: Daniel Marjamäki <danielm77@spray.se>
Signed-off-by: Avi Kivity <avi@qumranet.com>
---
 arch/x86/kvm/i8254.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 8772dc94682..59ebd37ad79 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -548,8 +548,10 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm)
 	mutex_lock(&kvm->lock);
 	pit->irq_source_id = kvm_request_irq_source_id(kvm);
 	mutex_unlock(&kvm->lock);
-	if (pit->irq_source_id < 0)
+	if (pit->irq_source_id < 0) {
+		kfree(pit);
 		return NULL;
+	}
 
 	mutex_init(&pit->pit_state.lock);
 	mutex_lock(&pit->pit_state.lock);
-- 
cgit v1.2.3


From 32836259ff25ce97010569706cd33ba94de81d62 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Wed, 5 Nov 2008 16:17:52 -0700
Subject: ACPI: pci_link: remove acpi_irq_balance_set() interface

This removes the acpi_irq_balance_set() interface from the PCI
interrupt link driver.

x86 used acpi_irq_balance_set() to tell the PCI interrupt link
driver to configure links to minimize IRQ sharing.  But the link
driver can easily figure out whether to turn on IRQ balancing
based on the IRQ model (PIC/IOAPIC/etc), so we can get rid of
that external interface.

It's better for the driver to figure this out at init-time.  If
we set it externally via the x86 code, the interface reduces
modularity, and we depend on the fact that acpi_process_madt()
happens before we process the kernel command line.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/include/asm/acpi.h | 1 -
 arch/x86/kernel/acpi/boot.c | 1 -
 2 files changed, 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 8d676d8ecde..9830681446a 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -113,7 +113,6 @@ static inline void acpi_disable_pci(void)
 	acpi_pci_disabled = 1;
 	acpi_noirq_set();
 }
-extern int acpi_irq_balance_set(char *str);
 
 /* routines for saving/restoring kernel state */
 extern int acpi_save_state_mem(void);
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 8c1f76abae9..4c51a2f8fd3 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1343,7 +1343,6 @@ static void __init acpi_process_madt(void)
 			error = acpi_parse_madt_ioapic_entries();
 			if (!error) {
 				acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
-				acpi_irq_balance_set(NULL);
 				acpi_ioapic = 1;
 
 				smp_found_config = 1;
-- 
cgit v1.2.3


From 97a70e548bd97d5a46ae9d44f24aafcc013fd701 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 12 Nov 2008 23:22:35 +0100
Subject: x86, hibernate: fix breakage on x86_32 with CONFIG_NUMA set

Impact: fix crash during hibernation on 32-bit NUMA

The NUMA code on x86_32 creates special memory mapping that allows
each node's pgdat to be located in this node's memory.  For this
purpose it allocates a memory area at the end of each node's memory
and maps this area so that it is accessible with virtual addresses
belonging to low memory.  As a result, if there is high memory,
these NUMA-allocated areas are physically located in high memory,
although they are mapped to low memory addresses.

Our hibernation code does not take that into account and for this
reason hibernation fails on all x86_32 systems with CONFIG_NUMA=y and
with high memory present.  Fix this by adding a special mapping for
the NUMA-allocated memory areas to the temporary page tables created
during the last phase of resume.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/mmzone_32.h |  4 ++++
 arch/x86/mm/numa_32.c            | 35 +++++++++++++++++++++++++++++++++++
 arch/x86/power/hibernate_32.c    |  4 ++++
 3 files changed, 43 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h
index 485bdf059ff..07f1af494ca 100644
--- a/arch/x86/include/asm/mmzone_32.h
+++ b/arch/x86/include/asm/mmzone_32.h
@@ -34,10 +34,14 @@ static inline void get_memcfg_numa(void)
 
 extern int early_pfn_to_nid(unsigned long pfn);
 
+extern void resume_map_numa_kva(pgd_t *pgd);
+
 #else /* !CONFIG_NUMA */
 
 #define get_memcfg_numa get_memcfg_numa_flat
 
+static inline void resume_map_numa_kva(pgd_t *pgd) {}
+
 #endif /* CONFIG_NUMA */
 
 #ifdef CONFIG_DISCONTIGMEM
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c
index 847c164725f..8518c678d83 100644
--- a/arch/x86/mm/numa_32.c
+++ b/arch/x86/mm/numa_32.c
@@ -222,6 +222,41 @@ static void __init remap_numa_kva(void)
 	}
 }
 
+#ifdef CONFIG_HIBERNATION
+/**
+ * resume_map_numa_kva - add KVA mapping to the temporary page tables created
+ *                       during resume from hibernation
+ * @pgd_base - temporary resume page directory
+ */
+void resume_map_numa_kva(pgd_t *pgd_base)
+{
+	int node;
+
+	for_each_online_node(node) {
+		unsigned long start_va, start_pfn, size, pfn;
+
+		start_va = (unsigned long)node_remap_start_vaddr[node];
+		start_pfn = node_remap_start_pfn[node];
+		size = node_remap_size[node];
+
+		printk(KERN_DEBUG "%s: node %d\n", __FUNCTION__, node);
+
+		for (pfn = 0; pfn < size; pfn += PTRS_PER_PTE) {
+			unsigned long vaddr = start_va + (pfn << PAGE_SHIFT);
+			pgd_t *pgd = pgd_base + pgd_index(vaddr);
+			pud_t *pud = pud_offset(pgd, vaddr);
+			pmd_t *pmd = pmd_offset(pud, vaddr);
+
+			set_pmd(pmd, pfn_pmd(start_pfn + pfn,
+						PAGE_KERNEL_LARGE_EXEC));
+
+			printk(KERN_DEBUG "%s: %08lx -> pfn %08lx\n",
+				__FUNCTION__, vaddr, start_pfn + pfn);
+		}
+	}
+}
+#endif
+
 static unsigned long calculate_numa_remap_pages(void)
 {
 	int nid;
diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c
index f2b6e3f11bf..81197c62d5b 100644
--- a/arch/x86/power/hibernate_32.c
+++ b/arch/x86/power/hibernate_32.c
@@ -12,6 +12,7 @@
 #include <asm/system.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
+#include <asm/mmzone.h>
 
 /* Defined in hibernate_asm_32.S */
 extern int restore_image(void);
@@ -127,6 +128,9 @@ static int resume_physical_mapping_init(pgd_t *pgd_base)
 			}
 		}
 	}
+
+	resume_map_numa_kva(pgd_base);
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 604d20554883cf03f888440d58ea7c6d36899839 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Wed, 12 Nov 2008 23:26:14 +0100
Subject: x86: make NUMA on 32-bit depend on EXPERIMENTAL again

My previous patch to make CONFIG_NUMA on x86_32 depend on BROKEN
turned out to be unnecessary, after all, since the source of the
hibernation vs CONFIG_NUMA problem turned out to be the fact that
we didn't take the NUMA KVA remapping into account in the
hibernation code.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 93224b56918..4cf0ab13d18 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -957,7 +957,7 @@ config ARCH_PHYS_ADDR_T_64BIT
 config NUMA
 	bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)"
 	depends on SMP
-	depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && BROKEN)
+	depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL)
 	default n if X86_PC
 	default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP)
 	help
-- 
cgit v1.2.3


From 52168e60f7d86d83124903098ac8c2dba93cd1c4 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Fri, 14 Nov 2008 13:47:31 +0000
Subject: Revert "x86: blacklist DMAR on Intel G31/G33 chipsets"

This reverts commit e51af6630848406fc97adbd71443818cdcda297b, which was
wrongly hoovered up and submitted about a month after a better fix had
already been merged.

The better fix is commit cbda1ba898647aeb4ee770b803c922f595e97731
("PCI/iommu: blacklist DMAR on Intel G31/G33 chipsets"), where we do
this blacklisting based on the DMI identification for the offending
motherboard, since sometimes this chipset (or at least a chipset with
the same PCI ID) apparently _does_ actually have an IOMMU.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/iommu.h   |  1 -
 arch/x86/kernel/early-quirks.c | 18 ------------------
 2 files changed, 19 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h
index e4a552d4446..0b500c5b644 100644
--- a/arch/x86/include/asm/iommu.h
+++ b/arch/x86/include/asm/iommu.h
@@ -6,7 +6,6 @@ extern void no_iommu_init(void);
 extern struct dma_mapping_ops nommu_dma_ops;
 extern int force_iommu, no_iommu;
 extern int iommu_detected;
-extern int dmar_disabled;
 
 extern unsigned long iommu_nr_pages(unsigned long addr, unsigned long len);
 
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 3ce029ffaa5..1b894b72c0f 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -188,20 +188,6 @@ static void __init ati_bugs_contd(int num, int slot, int func)
 }
 #endif
 
-#ifdef CONFIG_DMAR
-static void __init intel_g33_dmar(int num, int slot, int func)
-{
-	struct acpi_table_header *dmar_tbl;
-	acpi_status status;
-
-	status = acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_tbl);
-	if (ACPI_SUCCESS(status)) {
-		printk(KERN_INFO "BIOS BUG: DMAR advertised on Intel G31/G33 chipset -- ignoring\n");
-		dmar_disabled = 1;
-	}
-}
-#endif
-
 #define QFLAG_APPLY_ONCE 	0x1
 #define QFLAG_APPLIED		0x2
 #define QFLAG_DONE		(QFLAG_APPLY_ONCE|QFLAG_APPLIED)
@@ -225,10 +211,6 @@ static struct chipset early_qrk[] __initdata = {
 	  PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs },
 	{ PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_SBX00_SMBUS,
 	  PCI_CLASS_SERIAL_SMBUS, PCI_ANY_ID, 0, ati_bugs_contd },
-#ifdef CONFIG_DMAR
-	{ PCI_VENDOR_ID_INTEL, 0x29c0,
-	  PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, intel_g33_dmar },
-#endif
 	{}
 };
 
-- 
cgit v1.2.3


From d1f1e9c01006b4b050e090055c75278f80c2a5c5 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@googlemail.com>
Date: Sat, 15 Nov 2008 11:00:17 +0100
Subject: x86, bts: fix unlock problem in ds.c

Fix a problem where ds_request() returned an error without releasing the
ds lock.

Reported-by: Stephane Eranian <eranian@gmail.com>
Signed-off-by: Markus Metzger <markus.t.metzger@gmail.com>
Cc: <stable@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ds.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 2b69994fd3a..ac1d5b0586b 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -384,8 +384,9 @@ static int ds_request(struct task_struct *task, void *base, size_t size,
 
 	spin_lock(&ds_lock);
 
+	error = -EPERM;
 	if (!check_tracer(task))
-		return -EPERM;
+		goto out_unlock;
 
 	error = -ENOMEM;
 	context = ds_alloc_context(task);
-- 
cgit v1.2.3


From d3c6aa1e69f705ac3ab64584101b1d38435b1353 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Sun, 16 Nov 2008 00:49:31 -0800
Subject: x86: fix es7000 compiling

Impact: fix es7000 build

  CC      arch/x86/kernel/es7000_32.o
arch/x86/kernel/es7000_32.c: In function find_unisys_acpi_oem_table:
arch/x86/kernel/es7000_32.c:255: error: implicit declaration of function acpi_get_table_with_size
arch/x86/kernel/es7000_32.c:261: error: implicit declaration of function early_acpi_os_unmap_memory
arch/x86/kernel/es7000_32.c: In function unmap_unisys_acpi_oem_table:
arch/x86/kernel/es7000_32.c:277: error: implicit declaration of function __acpi_unmap_table
make[1]: *** [arch/x86/kernel/es7000_32.o] Error 1

we applied one patch out of order...

| commit a73aaedd95703bd49f4c3f9df06fb7b7373ba905
| Author: Yinghai Lu <yhlu.kernel@gmail.com>
| Date:   Sun Sep 14 02:33:14 2008 -0700
|
|    x86: check dsdt before find oem table for es7000, v2
|
|    v2: use __acpi_unmap_table()

that patch need:

	x86: use early_ioremap in __acpi_map_table
	x86: always explicitly map acpi memory
	acpi: remove final __acpi_map_table mapping before setting acpi_gbl_permanent_mmap
	acpi/x86: introduce __apci_map_table, v4

submitted to the ACPI tree but not upstream yet.

fix it until those patches applied, need to revert this one

Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/es7000_32.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c
index f454c78fcef..0aa2c443d60 100644
--- a/arch/x86/kernel/es7000_32.c
+++ b/arch/x86/kernel/es7000_32.c
@@ -250,31 +250,24 @@ int __init find_unisys_acpi_oem_table(unsigned long *oem_addr)
 {
 	struct acpi_table_header *header = NULL;
 	int i = 0;
-	acpi_size tbl_size;
 
-	while (ACPI_SUCCESS(acpi_get_table_with_size("OEM1", i++, &header, &tbl_size))) {
+	while (ACPI_SUCCESS(acpi_get_table("OEM1", i++, &header))) {
 		if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) {
 			struct oem_table *t = (struct oem_table *)header;
 
 			oem_addrX = t->OEMTableAddr;
 			oem_size = t->OEMTableSize;
-			early_acpi_os_unmap_memory(header, tbl_size);
 
 			*oem_addr = (unsigned long)__acpi_map_table(oem_addrX,
 								    oem_size);
 			return 0;
 		}
-		early_acpi_os_unmap_memory(header, tbl_size);
 	}
 	return -1;
 }
 
 void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr)
 {
-	if (!oem_addr)
-		return;
-
-	__acpi_unmap_table((char *)oem_addr, oem_size);
 }
 #endif
 
-- 
cgit v1.2.3


From a4a16beadea041ab601e65b264b568e8b6b4f68d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Mon, 10 Nov 2008 09:05:37 +0100
Subject: oprofile: fix an overflow in ppro code

reset_value was changed from long to u64 in commit
b99170288421c79f0c2efa8b33e26e65f4bb7fb8 (oprofile: Implement Intel
architectural perfmon support)

But dynamic allocation of this array use a wrong type (long instead of
u64)

Cc: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_ppro.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 3f1b81a83e2..716d26f0e5d 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -69,7 +69,7 @@ static void ppro_setup_ctrs(struct op_msrs const * const msrs)
 	int i;
 
 	if (!reset_value) {
-		reset_value = kmalloc(sizeof(unsigned) * num_counters,
+		reset_value = kmalloc(sizeof(reset_value[0]) * num_counters,
 					GFP_ATOMIC);
 		if (!reset_value)
 			return;
-- 
cgit v1.2.3


From 93ce99e849433ede4ce8b410b749dc0cad1100b2 Mon Sep 17 00:00:00 2001
From: Venki Pallipadi <venkatesh.pallipadi@intel.com>
Date: Mon, 17 Nov 2008 14:43:58 -0800
Subject: x86: add rdtsc barrier to TSC sync check

Impact: fix incorrectly marked unstable TSC clock

Patch (commit 0d12cdd "sched: improve sched_clock() performance") has
a regression on one of the test systems here.

With the patch, I see:

 checking TSC synchronization [CPU#0 -> CPU#1]:
 Measured 28 cycles TSC warp between CPUs, turning off TSC clock.
 Marking TSC unstable due to check_tsc_sync_source failed

Whereas, without the patch syncs pass fine on all CPUs:

 checking TSC synchronization [CPU#0 -> CPU#1]: passed.

Due to this, TSC is marked unstable, when it is not actually unstable.
This is because syncs in check_tsc_wrap() goes away due to this commit.

As per the discussion on this thread, correct way to fix this is to add
explicit syncs as below?

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/tsc_sync.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index 9ffb01c31c4..1c0dfbca87c 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -46,7 +46,9 @@ static __cpuinit void check_tsc_warp(void)
 	cycles_t start, now, prev, end;
 	int i;
 
+	rdtsc_barrier();
 	start = get_cycles();
+	rdtsc_barrier();
 	/*
 	 * The measurement runs for 20 msecs:
 	 */
@@ -61,7 +63,9 @@ static __cpuinit void check_tsc_warp(void)
 		 */
 		__raw_spin_lock(&sync_lock);
 		prev = last_tsc;
+		rdtsc_barrier();
 		now = get_cycles();
+		rdtsc_barrier();
 		last_tsc = now;
 		__raw_spin_unlock(&sync_lock);
 
-- 
cgit v1.2.3


From 10db4ef7b9a65b86e4d047671a1886f4c101a859 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 18 Nov 2008 15:23:08 +0100
Subject: x86, PEBS/DS: fix code flow in ds_request()

this compiler warning:

  arch/x86/kernel/ds.c: In function 'ds_request':
  arch/x86/kernel/ds.c:368: warning: 'context' may be used uninitialized in this function

Shows that the code flow in ds_request() is buggy - it goes into
the unlock+release-context path even when the context is not allocated
yet.

First allocate the context, then do the other checks.

Also, take care with GFP allocations under the ds_lock spinlock.

Cc: <stable@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ds.c | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index ac1d5b0586b..d1a121443bd 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -236,17 +236,33 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task)
 	struct ds_context *context = *p_context;
 
 	if (!context) {
+		spin_unlock(&ds_lock);
+
 		context = kzalloc(sizeof(*context), GFP_KERNEL);
 
-		if (!context)
+		if (!context) {
+			spin_lock(&ds_lock);
 			return NULL;
+		}
 
 		context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
 		if (!context->ds) {
 			kfree(context);
+			spin_lock(&ds_lock);
 			return NULL;
 		}
 
+		spin_lock(&ds_lock);
+		/*
+		 * Check for race - another CPU could have allocated
+		 * it meanwhile:
+		 */
+		if (*p_context) {
+			kfree(context->ds);
+			kfree(context);
+			return *p_context;
+		}
+
 		*p_context = context;
 
 		context->this = p_context;
@@ -384,15 +400,15 @@ static int ds_request(struct task_struct *task, void *base, size_t size,
 
 	spin_lock(&ds_lock);
 
-	error = -EPERM;
-	if (!check_tracer(task))
-		goto out_unlock;
-
 	error = -ENOMEM;
 	context = ds_alloc_context(task);
 	if (!context)
 		goto out_unlock;
 
+	error = -EPERM;
+	if (!check_tracer(task))
+		goto out_unlock;
+
 	error = -EALREADY;
 	if (context->owner[qual] == current)
 		goto out_unlock;
-- 
cgit v1.2.3


From e5e1f606ecbf67e52ebe2df5d14f8b94ec6544d0 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 17 Nov 2008 15:07:17 +0100
Subject: AMD IOMMU: add parameter to disable device isolation

Impact: add a new AMD IOMMU kernel command line parameter

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu_init.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 0cdcda35a05..838a2e1d5bb 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -1213,6 +1213,8 @@ static int __init parse_amd_iommu_options(char *str)
 	for (; *str; ++str) {
 		if (strncmp(str, "isolate", 7) == 0)
 			amd_iommu_isolate = 1;
+		if (strncmp(str, "share", 5) == 0)
+			amd_iommu_isolate = 0;
 		if (strncmp(str, "fullflush", 11) == 0)
 			amd_iommu_unmap_flush = true;
 	}
-- 
cgit v1.2.3


From 3ce1f93c6d53c3f91c3846cf66b018276c8ac2e7 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 17 Nov 2008 15:09:20 +0100
Subject: AMD IOMMU: enable device isolation per default

Impact: makes device isolation the default for AMD IOMMU

Some device drivers showed double-free bugs of DMA memory while testing
them with AMD IOMMU. If all devices share the same protection domain
this can lead to data corruption and data loss. Prevent this by putting
each device into its own protection domain per default.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 838a2e1d5bb..595edd2befc 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -121,7 +121,7 @@ u16 amd_iommu_last_bdf;			/* largest PCI device id we have
 LIST_HEAD(amd_iommu_unity_map);		/* a list of required unity mappings
 					   we find in ACPI */
 unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
-int amd_iommu_isolate;			/* if 1, device isolation is enabled */
+int amd_iommu_isolate = 1;		/* if 1, device isolation is enabled */
 bool amd_iommu_unmap_flush;		/* if true, flush on every unmap */
 
 LIST_HEAD(amd_iommu_list);		/* list of all AMD IOMMUs in the
-- 
cgit v1.2.3


From 695b5676c727d80921a2dc8737d5b3322222db85 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 17 Nov 2008 15:16:43 +0100
Subject: AMD IOMMU: fix fullflush comparison length

Impact: fix comparison length for 'fullflush'

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c
index 595edd2befc..30ae2701b3d 100644
--- a/arch/x86/kernel/amd_iommu_init.c
+++ b/arch/x86/kernel/amd_iommu_init.c
@@ -1215,7 +1215,7 @@ static int __init parse_amd_iommu_options(char *str)
 			amd_iommu_isolate = 1;
 		if (strncmp(str, "share", 5) == 0)
 			amd_iommu_isolate = 0;
-		if (strncmp(str, "fullflush", 11) == 0)
+		if (strncmp(str, "fullflush", 9) == 0)
 			amd_iommu_unmap_flush = true;
 	}
 
-- 
cgit v1.2.3


From 8501c45cc32c311ae755a2d5ac8c4a5f04908d42 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 17 Nov 2008 19:11:46 +0100
Subject: AMD IOMMU: check for next_bit also in unmapped area

Impact: fix possible use of stale IO/TLB entries

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 331b318304e..e4899e0e878 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -537,7 +537,7 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
 	address >>= PAGE_SHIFT;
 	iommu_area_free(dom->bitmap, address, pages);
 
-	if (address + pages >= dom->next_bit)
+	if (address >= dom->next_bit)
 		dom->need_flush = true;
 }
 
-- 
cgit v1.2.3


From 0af40a4b1050c050e62eb1dc30b82d5ab22bf221 Mon Sep 17 00:00:00 2001
From: Philipp Kohlbecher <xt28@gmx.de>
Date: Sun, 16 Nov 2008 12:11:01 +0100
Subject: x86: more general identifier for Phoenix BIOS

Impact: widen the reach of the low-memory-protect DMI quirk

Phoenix BIOSes variously identify their vendor as "Phoenix Technologies,
LTD" or "Phoenix Technologies LTD" (without the comma.)

This patch makes the identification string in the bad_bios_dmi_table
more general (following a suggestion by Ingo Molnar), so that both
versions are handled.

Again, the patched file compiles cleanly and the patch has been tested
successfully on my machine.

Signed-off-by: Philipp Kohlbecher <xt28@gmx.de>
Cc: <stable@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 0fa6790c1dd..9d5674f7b6c 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -764,7 +764,7 @@ static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
 		.callback = dmi_low_memory_corruption,
 		.ident = "Phoenix BIOS",
 		.matches = {
-			DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies, LTD"),
+			DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies"),
 		},
 	},
 #endif
-- 
cgit v1.2.3


From 093bac154c142fa1fb31a3ac69ae1bc08930231b Mon Sep 17 00:00:00 2001
From: Steve Conklin <sconklin@canonical.com>
Date: Fri, 14 Nov 2008 00:55:51 -0600
Subject: x86: quirk for reboot stalls on a Dell Optiplex 330

Dell Optiplex 330 appears to hang on reboot. This is resolved by adding
a quirk to set bios reboot.

Signed-off-by: Leann Ogasawara <leann.ogasawara@canonical.com>
Signed-off-by: Steve Conklin <steve.conklin@canonical.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/reboot.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 724adfc63cb..cc5a2545dd4 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -169,6 +169,15 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
 			DMI_MATCH(DMI_BOARD_NAME, "0KW626"),
 		},
 	},
+	{   /* Handle problems with rebooting on Dell Optiplex 330 with 0KP561 */
+		.callback = set_bios_reboot,
+		.ident = "Dell OptiPlex 330",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 330"),
+			DMI_MATCH(DMI_BOARD_NAME, "0KP561"),
+		},
+	},
 	{	/* Handle problems with rebooting on Dell 2400's */
 		.callback = set_bios_reboot,
 		.ident = "Dell PowerEdge 2400",
-- 
cgit v1.2.3


From 20a4a236c7de5c915551cdc562482aa53eaff40e Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Thu, 13 Nov 2008 18:06:04 -0800
Subject: x86: uaccess_64: fix return value in __copy_from_user()

__copy_from_user() will return invalid value 16 when it fails to
access user space and the size is 10.

Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/uaccess_64.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 664f15280f1..f8cfd00db45 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -46,7 +46,7 @@ int __copy_from_user(void *dst, const void __user *src, unsigned size)
 		return ret;
 	case 10:
 		__get_user_asm(*(u64 *)dst, (u64 __user *)src,
-			       ret, "q", "", "=r", 16);
+			       ret, "q", "", "=r", 10);
 		if (unlikely(ret))
 			return ret;
 		__get_user_asm(*(u16 *)(8 + (char *)dst),
-- 
cgit v1.2.3


From de11defebf00007677fb7ee91d9b089b78786fbb Mon Sep 17 00:00:00 2001
From: Ulrich Drepper <drepper@redhat.com>
Date: Wed, 19 Nov 2008 15:36:14 -0800
Subject: reintroduce accept4

Introduce a new accept4() system call.  The addition of this system call
matches analogous changes in 2.6.27 (dup3(), evenfd2(), signalfd4(),
inotify_init1(), epoll_create1(), pipe2()) which added new system calls
that differed from analogous traditional system calls in adding a flags
argument that can be used to access additional functionality.

The accept4() system call is exactly the same as accept(), except that
it adds a flags bit-mask argument.  Two flags are initially implemented.
(Most of the new system calls in 2.6.27 also had both of these flags.)

SOCK_CLOEXEC causes the close-on-exec (FD_CLOEXEC) flag to be enabled
for the new file descriptor returned by accept4().  This is a useful
security feature to avoid leaking information in a multithreaded
program where one thread is doing an accept() at the same time as
another thread is doing a fork() plus exec().  More details here:
http://udrepper.livejournal.com/20407.html "Secure File Descriptor Handling",
Ulrich Drepper).

The other flag is SOCK_NONBLOCK, which causes the O_NONBLOCK flag
to be enabled on the new open file description created by accept4().
(This flag is merely a convenience, saving the use of additional calls
fcntl(F_GETFL) and fcntl (F_SETFL) to achieve the same result.

Here's a test program.  Works on x86-32.  Should work on x86-64, but
I (mtk) don't have a system to hand to test with.

It tests accept4() with each of the four possible combinations of
SOCK_CLOEXEC and SOCK_NONBLOCK set/clear in 'flags', and verifies
that the appropriate flags are set on the file descriptor/open file
description returned by accept4().

I tested Ulrich's patch in this thread by applying against 2.6.28-rc2,
and it passes according to my test program.

/* test_accept4.c

  Copyright (C) 2008, Linux Foundation, written by Michael Kerrisk
       <mtk.manpages@gmail.com>

  Licensed under the GNU GPLv2 or later.
*/
#define _GNU_SOURCE
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <stdlib.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>

#define PORT_NUM 33333

#define die(msg) do { perror(msg); exit(EXIT_FAILURE); } while (0)

/**********************************************************************/

/* The following is what we need until glibc gets a wrapper for
  accept4() */

/* Flags for socket(), socketpair(), accept4() */
#ifndef SOCK_CLOEXEC
#define SOCK_CLOEXEC    O_CLOEXEC
#endif
#ifndef SOCK_NONBLOCK
#define SOCK_NONBLOCK   O_NONBLOCK
#endif

#ifdef __x86_64__
#define SYS_accept4 288
#elif __i386__
#define USE_SOCKETCALL 1
#define SYS_ACCEPT4 18
#else
#error "Sorry -- don't know the syscall # on this architecture"
#endif

static int
accept4(int fd, struct sockaddr *sockaddr, socklen_t *addrlen, int flags)
{
   printf("Calling accept4(): flags = %x", flags);
   if (flags != 0) {
       printf(" (");
       if (flags & SOCK_CLOEXEC)
           printf("SOCK_CLOEXEC");
       if ((flags & SOCK_CLOEXEC) && (flags & SOCK_NONBLOCK))
           printf(" ");
       if (flags & SOCK_NONBLOCK)
           printf("SOCK_NONBLOCK");
       printf(")");
   }
   printf("\n");

#if USE_SOCKETCALL
   long args[6];

   args[0] = fd;
   args[1] = (long) sockaddr;
   args[2] = (long) addrlen;
   args[3] = flags;

   return syscall(SYS_socketcall, SYS_ACCEPT4, args);
#else
   return syscall(SYS_accept4, fd, sockaddr, addrlen, flags);
#endif
}

/**********************************************************************/

static int
do_test(int lfd, struct sockaddr_in *conn_addr,
       int closeonexec_flag, int nonblock_flag)
{
   int connfd, acceptfd;
   int fdf, flf, fdf_pass, flf_pass;
   struct sockaddr_in claddr;
   socklen_t addrlen;

   printf("=======================================\n");

   connfd = socket(AF_INET, SOCK_STREAM, 0);
   if (connfd == -1)
       die("socket");
   if (connect(connfd, (struct sockaddr *) conn_addr,
               sizeof(struct sockaddr_in)) == -1)
       die("connect");

   addrlen = sizeof(struct sockaddr_in);
   acceptfd = accept4(lfd, (struct sockaddr *) &claddr, &addrlen,
                      closeonexec_flag | nonblock_flag);
   if (acceptfd == -1) {
       perror("accept4()");
       close(connfd);
       return 0;
   }

   fdf = fcntl(acceptfd, F_GETFD);
   if (fdf == -1)
       die("fcntl:F_GETFD");
   fdf_pass = ((fdf & FD_CLOEXEC) != 0) ==
              ((closeonexec_flag & SOCK_CLOEXEC) != 0);
   printf("Close-on-exec flag is %sset (%s); ",
           (fdf & FD_CLOEXEC) ? "" : "not ",
           fdf_pass ? "OK" : "failed");

   flf = fcntl(acceptfd, F_GETFL);
   if (flf == -1)
       die("fcntl:F_GETFD");
   flf_pass = ((flf & O_NONBLOCK) != 0) ==
              ((nonblock_flag & SOCK_NONBLOCK) !=0);
   printf("nonblock flag is %sset (%s)\n",
           (flf & O_NONBLOCK) ? "" : "not ",
           flf_pass ? "OK" : "failed");

   close(acceptfd);
   close(connfd);

   printf("Test result: %s\n", (fdf_pass && flf_pass) ? "PASS" : "FAIL");
   return fdf_pass && flf_pass;
}

static int
create_listening_socket(int port_num)
{
   struct sockaddr_in svaddr;
   int lfd;
   int optval;

   memset(&svaddr, 0, sizeof(struct sockaddr_in));
   svaddr.sin_family = AF_INET;
   svaddr.sin_addr.s_addr = htonl(INADDR_ANY);
   svaddr.sin_port = htons(port_num);

   lfd = socket(AF_INET, SOCK_STREAM, 0);
   if (lfd == -1)
       die("socket");

   optval = 1;
   if (setsockopt(lfd, SOL_SOCKET, SO_REUSEADDR, &optval,
                  sizeof(optval)) == -1)
       die("setsockopt");

   if (bind(lfd, (struct sockaddr *) &svaddr,
            sizeof(struct sockaddr_in)) == -1)
       die("bind");

   if (listen(lfd, 5) == -1)
       die("listen");

   return lfd;
}

int
main(int argc, char *argv[])
{
   struct sockaddr_in conn_addr;
   int lfd;
   int port_num;
   int passed;

   passed = 1;

   port_num = (argc > 1) ? atoi(argv[1]) : PORT_NUM;

   memset(&conn_addr, 0, sizeof(struct sockaddr_in));
   conn_addr.sin_family = AF_INET;
   conn_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
   conn_addr.sin_port = htons(port_num);

   lfd = create_listening_socket(port_num);

   if (!do_test(lfd, &conn_addr, 0, 0))
       passed = 0;
   if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, 0))
       passed = 0;
   if (!do_test(lfd, &conn_addr, 0, SOCK_NONBLOCK))
       passed = 0;
   if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, SOCK_NONBLOCK))
       passed = 0;

   close(lfd);

   exit(passed ? EXIT_SUCCESS : EXIT_FAILURE);
}

[mtk.manpages@gmail.com: rewrote changelog, updated test program]
Signed-off-by: Ulrich Drepper <drepper@redhat.com>
Tested-by: Michael Kerrisk <mtk.manpages@gmail.com>
Acked-by: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: <linux-api@vger.kernel.org>
Cc: <linux-arch@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/unistd_64.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
index 834b2c1d89f..d2e415e6666 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
@@ -639,8 +639,8 @@ __SYSCALL(__NR_fallocate, sys_fallocate)
 __SYSCALL(__NR_timerfd_settime, sys_timerfd_settime)
 #define __NR_timerfd_gettime			287
 __SYSCALL(__NR_timerfd_gettime, sys_timerfd_gettime)
-#define __NR_paccept				288
-__SYSCALL(__NR_paccept, sys_paccept)
+#define __NR_accept4				288
+__SYSCALL(__NR_accept4, sys_accept4)
 #define __NR_signalfd4				289
 __SYSCALL(__NR_signalfd4, sys_signalfd4)
 #define __NR_eventfd2				290
-- 
cgit v1.2.3


From 9bc646f163b136684390081262fab0fd8f5343ca Mon Sep 17 00:00:00 2001
From: Rakib Mullick <rakib.mullick@gmail.com>
Date: Thu, 20 Nov 2008 19:08:45 +0600
Subject: x86: fix __cpuinit/__init tangle in init_thread_xstate()

Impact:	fix incorrect __init annotation

This patch removes the following section mismatch warning. A patch set
was send previously (http://lkml.org/lkml/2008/11/10/407). But
introduce some other problem, reported by Rufus
(http://lkml.org/lkml/2008/11/11/46). Then Ingo Molnar suggest that,
it's best to remove __init from xsave_cntxt_init(void). Which is the
second patch in this series. Now, this one removes the following
warning.

WARNING: arch/x86/kernel/built-in.o(.cpuinit.text+0x2237): Section
mismatch in reference from the function cpu_init() to the function
.init.text:init_thread_xstate()
The function __cpuinit cpu_init() references
a function __init init_thread_xstate().
If init_thread_xstate is only used by cpu_init then
annotate init_thread_xstate with a matching annotation.

Signed-off-by: Rakib Mullick <rakib.mullick@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/i387.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index 1f20608d4ca..b0f61f0dcd0 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -58,7 +58,7 @@ void __cpuinit mxcsr_feature_mask_init(void)
 	stts();
 }
 
-void __init init_thread_xstate(void)
+void __cpuinit init_thread_xstate(void)
 {
 	if (!HAVE_HWFP) {
 		xstate_size = sizeof(struct i387_soft_struct);
-- 
cgit v1.2.3


From bfe085f62f98a49e1b864e4950389c7205174e4f Mon Sep 17 00:00:00 2001
From: Rakib Mullick <rakib.mullick@gmail.com>
Date: Thu, 20 Nov 2008 19:12:50 +0600
Subject: x86: fixing __cpuinit/__init tangle, xsave_cntxt_init()

Annotate xsave_cntxt_init() as "can be called outside of __init".

Signed-off-by: Rakib Mullick <rakib.mullick@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/xsave.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index b13acb75e82..15c3e699918 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -310,7 +310,7 @@ static void __init setup_xstate_init(void)
 /*
  * Enable and initialize the xsave feature.
  */
-void __init xsave_cntxt_init(void)
+void __ref xsave_cntxt_init(void)
 {
 	unsigned int eax, ebx, ecx, edx;
 
-- 
cgit v1.2.3


From 0ca4b6b00113b064c080d26d803d0d7c80fb5dc8 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew@wil.cx>
Date: Thu, 20 Nov 2008 14:09:33 -0700
Subject: x86: Fix interrupt leak due to migration

When we migrate an interrupt from one CPU to another, we set the
move_in_progress flag and clean up the vectors later once they're not
being used.  If you're unlucky and call destroy_irq() before the vectors
become un-used, the move_in_progress flag is never cleared, which causes
the interrupt to become unusable.

This was discovered by Jesse Brandeburg for whom it manifested as an
MSI-X device refusing to use MSI-X mode when the driver was unloaded
and reloaded repeatedly.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/io_apic.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 7a3f2028e2e..c9513e1ff28 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -1140,6 +1140,20 @@ static void __clear_irq_vector(int irq)
 
 	cfg->vector = 0;
 	cpus_clear(cfg->domain);
+
+	if (likely(!cfg->move_in_progress))
+		return;
+	cpus_and(mask, cfg->old_domain, cpu_online_map);
+	for_each_cpu_mask_nr(cpu, mask) {
+		for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
+								vector++) {
+			if (per_cpu(vector_irq, cpu)[vector] != irq)
+				continue;
+			per_cpu(vector_irq, cpu)[vector] = -1;
+			break;
+		}
+	}
+	cfg->move_in_progress = 0;
 }
 
 void __setup_vector_irq(int cpu)
-- 
cgit v1.2.3


From a1967d64414dab500e86cbbddf8eae6ad2047903 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 21 Nov 2008 11:16:48 -0800
Subject: x86: revert irq number limitation

Impact: fix MSIx not enough irq numbers available regression

The manual revert of the sparse_irq patches missed to bring the number
of possible irqs back to the .27 status. This resulted in a regression
when two multichannel network cards were placed in a system with only
one IO_APIC - causing the networking driver to not have the right
IRQ and the device not coming up.

Remove the dynamic allocation logic leftovers and simply return
NR_IRQS in probe_nr_irqs() for now.

   Fixes: http://lkml.org/lkml/2008/11/19/354

Reported-by: Jesper Dangaard Brouer <hawk@diku.dk>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Jesper Dangaard Brouer <hawk@diku.dk>
Acked-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/io_apic.c | 22 +---------------------
 1 file changed, 1 insertion(+), 21 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index c9513e1ff28..1fec0f9b150 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -3608,27 +3608,7 @@ int __init io_apic_get_redir_entries (int ioapic)
 
 int __init probe_nr_irqs(void)
 {
-	int idx;
-	int nr = 0;
-#ifndef CONFIG_XEN
-	int nr_min = 32;
-#else
-	int nr_min = NR_IRQS;
-#endif
-
-	for (idx = 0; idx < nr_ioapics; idx++)
-		nr += io_apic_get_redir_entries(idx) + 1;
-
-	/* double it for hotplug and msi and nmi */
-	nr <<= 1;
-
-	/* something wrong ? */
-	if (nr < nr_min)
-		nr = nr_min;
-	if (WARN_ON(nr > NR_IRQS))
-		nr = NR_IRQS;
-
-	return nr;
+	return NR_IRQS;
 }
 
 /* --------------------------------------------------------------------------
-- 
cgit v1.2.3


From 86bbc2c235e500957b213e7e64ce2e0ccb8bc131 Mon Sep 17 00:00:00 2001
From: Ian Campbell <Ian.Campbell@citrix.com>
Date: Fri, 21 Nov 2008 10:21:33 +0000
Subject: xen: pin correct PGD on suspend

Impact: fix Xen guest boot failure

commit eefb47f6a1e855653d275cb90592a3587ea93a09 ("xen: use
spin_lock_nest_lock when pinning a pagetable") changed xen_pgd_walk to
walk over mm->pgd rather than taking pgd as an argument.

This breaks xen_mm_(un)pin_all() because it makes init_mm.pgd readonly
instead of the pgd we are interested in and therefore the pin subsequently
fails.

(XEN) mm.c:2280:d15 Bad type (saw 00000000e8000001 != exp 0000000060000000) for mfn bc464 (pfn 21ca7)
(XEN) mm.c:2665:d15 Error while pinning mfn bc464

[   14.586913] 1 multicall(s) failed: cpu 0
[   14.586926] Pid: 14, comm: kstop/0 Not tainted 2.6.28-rc5-x86_32p-xenU-00172-gee2f6cc #200
[   14.586940] Call Trace:
[   14.586955]  [<c030c17a>] ? printk+0x18/0x1e
[   14.586972]  [<c0103df3>] xen_mc_flush+0x163/0x1d0
[   14.586986]  [<c0104bc1>] __xen_pgd_pin+0xa1/0x110
[   14.587000]  [<c015a330>] ? stop_cpu+0x0/0xf0
[   14.587015]  [<c0104d7b>] xen_mm_pin_all+0x4b/0x70
[   14.587029]  [<c022bcb9>] xen_suspend+0x39/0xe0
[   14.587042]  [<c015a330>] ? stop_cpu+0x0/0xf0
[   14.587054]  [<c015a3cd>] stop_cpu+0x9d/0xf0
[   14.587067]  [<c01417cd>] run_workqueue+0x8d/0x150
[   14.587080]  [<c030e4b3>] ? _spin_unlock_irqrestore+0x23/0x40
[   14.587094]  [<c014558a>] ? prepare_to_wait+0x3a/0x70
[   14.587107]  [<c0141918>] worker_thread+0x88/0xf0
[   14.587120]  [<c01453c0>] ? autoremove_wake_function+0x0/0x50
[   14.587133]  [<c0141890>] ? worker_thread+0x0/0xf0
[   14.587146]  [<c014509c>] kthread+0x3c/0x70
[   14.587157]  [<c0145060>] ? kthread+0x0/0x70
[   14.587170]  [<c0109d1b>] kernel_thread_helper+0x7/0x10
[   14.587181]   call  1/3: op=14 arg=[c0415000] result=0
[   14.587192]   call  2/3: op=14 arg=[e1ca2000] result=0
[   14.587204]   call  3/3: op=26 arg=[c1808860] result=-22

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Acked-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/xen/mmu.c | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 688936044dc..636ef4caa52 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -661,12 +661,11 @@ void xen_set_pgd(pgd_t *ptr, pgd_t val)
  * For 64-bit, we must skip the Xen hole in the middle of the address
  * space, just after the big x86-64 virtual hole.
  */
-static int xen_pgd_walk(struct mm_struct *mm,
-			int (*func)(struct mm_struct *mm, struct page *,
-				    enum pt_level),
-			unsigned long limit)
+static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
+			  int (*func)(struct mm_struct *mm, struct page *,
+				      enum pt_level),
+			  unsigned long limit)
 {
-	pgd_t *pgd = mm->pgd;
 	int flush = 0;
 	unsigned hole_low, hole_high;
 	unsigned pgdidx_limit, pudidx_limit, pmdidx_limit;
@@ -753,6 +752,14 @@ out:
 	return flush;
 }
 
+static int xen_pgd_walk(struct mm_struct *mm,
+			int (*func)(struct mm_struct *mm, struct page *,
+				    enum pt_level),
+			unsigned long limit)
+{
+	return __xen_pgd_walk(mm, mm->pgd, func, limit);
+}
+
 /* If we're using split pte locks, then take the page's lock and
    return a pointer to it.  Otherwise return NULL. */
 static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm)
@@ -854,7 +861,7 @@ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
 
 	xen_mc_batch();
 
-	 if (xen_pgd_walk(mm, xen_pin_page, USER_LIMIT)) {
+	if (__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT)) {
 		/* re-enable interrupts for flushing */
 		xen_mc_issue(0);
 
@@ -998,7 +1005,7 @@ static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd)
 		       PT_PMD);
 #endif
 
-	xen_pgd_walk(mm, xen_unpin_page, USER_LIMIT);
+	__xen_pgd_walk(mm, pgd, xen_unpin_page, USER_LIMIT);
 
 	xen_mc_issue(0);
 }
-- 
cgit v1.2.3


From bd2b3ca7686d9470b1b58df631daa03179486182 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Thu, 20 Nov 2008 11:47:18 +0200
Subject: KVM: VMX: Fix interrupt loss during race with NMI

If an interrupt cannot be injected for some reason (say, page fault
when fetching the IDT descriptor), the interrupt is marked for
reinjection.  However, if an NMI is queued at this time, the NMI
will be injected instead and the NMI will be lost.

Fix by deferring the NMI injection until the interrupt has been
injected successfully.

Analyzed by Jan Kiszka.

Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d06b4dc0e2e..a4018b01e1f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3149,7 +3149,9 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
 
 	if (cpu_has_virtual_nmis()) {
 		if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
-			if (vmx_nmi_enabled(vcpu)) {
+			if (vcpu->arch.interrupt.pending) {
+				enable_nmi_window(vcpu);
+			} else if (vmx_nmi_enabled(vcpu)) {
 				vcpu->arch.nmi_pending = false;
 				vcpu->arch.nmi_injected = true;
 			} else {
-- 
cgit v1.2.3


From 0c0f40bdbe4ddb48ebecfb5c2b56eeb175a57c45 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Fri, 21 Nov 2008 19:13:58 +0100
Subject: KVM: MMU: fix sync of ptes addressed at owner pagetable

During page sync, if a pagetable contains a self referencing pte (that
points to the pagetable), the corresponding spte may be marked as
writable even though all mappings are supposed to be write protected.

Fix by clearing page unsync before syncing individual sptes.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index f1983d9477c..410ddbc1aa2 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1038,13 +1038,13 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 	}
 
 	rmap_write_protect(vcpu->kvm, sp->gfn);
+	kvm_unlink_unsync_page(vcpu->kvm, sp);
 	if (vcpu->arch.mmu.sync_page(vcpu, sp)) {
 		kvm_mmu_zap_page(vcpu->kvm, sp);
 		return 1;
 	}
 
 	kvm_mmu_flush_tlb(vcpu);
-	kvm_unlink_unsync_page(vcpu->kvm, sp);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 5cf02b7bafddb6c3c16ddfb23d3ce187f70528ba Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Tue, 25 Nov 2008 00:42:37 -0500
Subject: x86: use limited register constraint for setnz

Impact: build fix with certain compilers

GCC can decide to use %dil when "r" is used, which is not valid for
setnz.

This bug was brought out by Stephen Rothwell's merging of the
branch tracer into linux-next.

[ Thanks to Uros Bizjak for recommending 'q' over 'Q' ]

Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/boot/tty.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c
index 0be77b39328..7e8e8b25f5f 100644
--- a/arch/x86/boot/tty.c
+++ b/arch/x86/boot/tty.c
@@ -74,7 +74,7 @@ static int kbd_pending(void)
 {
 	u8 pending;
 	asm volatile("int $0x16; setnz %0"
-		     : "=rm" (pending)
+		     : "=qm" (pending)
 		     : "a" (0x0100));
 	return pending;
 }
-- 
cgit v1.2.3


From eff79aee91dd07e944df65fa448c8baeee7709d8 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Tue, 25 Nov 2008 14:13:03 +0100
Subject: arch/x86/kernel/pci-calgary_64.c: change simple_strtol to
 simple_strtoul

Impact: fix theoretical option string parsing overflow

Since bridge is unsigned, it would seem better to use simple_strtoul that
simple_strtol.

A simplified version of the semantic patch that makes this change is as
follows: (http://www.emn.fr/x-info/coccinelle/)

// <smpl>
@r2@
long e;
position p;
@@

e = simple_strtol@p(...)

@@
position p != r2.p;
type T;
T e;
@@

e =
- simple_strtol@p
+ simple_strtoul
  (...)
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Cc: muli@il.ibm.com
Cc: jdmason@kudzu.us
Cc: discuss@x86-64.org
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/pci-calgary_64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index e1e731d78f3..d28bbdc35e4 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -1567,7 +1567,7 @@ static int __init calgary_parse_options(char *p)
 				++p;
 			if (*p == '\0')
 				break;
-			bridge = simple_strtol(p, &endp, 0);
+			bridge = simple_strtoul(p, &endp, 0);
 			if (p == endp)
 				break;
 
-- 
cgit v1.2.3


From 292c669cd7087a090d6420e223eb1072f3e3c50b Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Tue, 25 Nov 2008 08:45:13 +0100
Subject: x86, bts: exclude ds.c from build when disabled

Impact: cleanup

Move the CONFIG guard from the .c file into the makefile.

Reported-by: Andi Kleen <andi-suse@firstfloor.org>
Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/Makefile | 2 +-
 arch/x86/kernel/ds.c     | 3 ---
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index e489ff9cb3e..b62a7667828 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -41,7 +41,7 @@ obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline.o
 obj-y				+= process.o
 obj-y				+= i387.o xsave.o
 obj-y				+= ptrace.o
-obj-y				+= ds.o
+obj-$(CONFIG_X86_DS)		+= ds.o
 obj-$(CONFIG_X86_32)		+= tls.o
 obj-$(CONFIG_IA32_EMULATION)	+= tls.o
 obj-y				+= step.o
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index d1a121443bd..4c8d57ec966 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -21,8 +21,6 @@
  */
 
 
-#ifdef CONFIG_X86_DS
-
 #include <asm/ds.h>
 
 #include <linux/errno.h>
@@ -878,4 +876,3 @@ void ds_free(struct ds_context *context)
 	while (leftovers--)
 		ds_put_context(context);
 }
-#endif /* CONFIG_X86_DS */
-- 
cgit v1.2.3


From e5e8ca633bbe972eff6f84e064a63c0c08ed6c3d Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Tue, 25 Nov 2008 08:47:19 +0100
Subject: x86, bts: turn macro into static inline function

Impact: cleanup

Replace a macro with a static inline function.

Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/ds.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h
index 72c5a190bf4..a95008457ea 100644
--- a/arch/x86/include/asm/ds.h
+++ b/arch/x86/include/asm/ds.h
@@ -23,12 +23,13 @@
 #ifndef _ASM_X86_DS_H
 #define _ASM_X86_DS_H
 
-#ifdef CONFIG_X86_DS
 
 #include <linux/types.h>
 #include <linux/init.h>
 
 
+#ifdef CONFIG_X86_DS
+
 struct task_struct;
 
 /*
@@ -232,7 +233,8 @@ extern void ds_free(struct ds_context *context);
 
 #else /* CONFIG_X86_DS */
 
-#define ds_init_intel(config) do {} while (0)
+struct cpuinfo_x86;
+static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {}
 
 #endif /* CONFIG_X86_DS */
 #endif /* _ASM_X86_DS_H */
-- 
cgit v1.2.3


From c4858ffc8f2dc850cb1f609c679b1ac1ad36ef0c Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Tue, 25 Nov 2008 08:49:06 +0100
Subject: x86, pebs: fix PEBS record size configuration

Impact: fix DS hw enablement on 64-bit x86

Fix the PEBS record size in the DS configuration.

Reported-by: Stephane Eranian <eranian@googlemail.com>
Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ds.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 4c8d57ec966..04e38ef646a 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -816,13 +816,21 @@ static const struct ds_configuration ds_cfg_var = {
 	.sizeof_ds    = sizeof(long) * 12,
 	.sizeof_field = sizeof(long),
 	.sizeof_rec[ds_bts]   = sizeof(long) * 3,
+#ifdef __i386__
 	.sizeof_rec[ds_pebs]  = sizeof(long) * 10
+#else
+	.sizeof_rec[ds_pebs]  = sizeof(long) * 18
+#endif
 };
 static const struct ds_configuration ds_cfg_64 = {
 	.sizeof_ds    = 8 * 12,
 	.sizeof_field = 8,
 	.sizeof_rec[ds_bts]   = 8 * 3,
+#ifdef __i386__
 	.sizeof_rec[ds_pebs]  = 8 * 10
+#else
+	.sizeof_rec[ds_pebs]  = 8 * 18
+#endif
 };
 
 static inline void
-- 
cgit v1.2.3


From de90add30e79261c3b5be68bb0f22d2ef98e8113 Mon Sep 17 00:00:00 2001
From: Markus Metzger <markus.t.metzger@intel.com>
Date: Tue, 25 Nov 2008 08:52:56 +0100
Subject: x86, bts: fix wrmsr and spinlock over kmalloc

Impact: fix sleeping-with-spinlock-held bugs/crashes

- Turn a wrmsr to write the DS_AREA MSR into a wrmsrl.
- Use irqsave variants of spinlocks.
- Do not allocate memory while holding spinlocks.

Reported-by: Stephane Eranian <eranian@googlemail.com>
Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Markus Metzger <markus.t.metzger@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/ds.c | 77 +++++++++++++++++++++++++++-------------------------
 1 file changed, 40 insertions(+), 37 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 04e38ef646a..a2d1176c38e 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -209,14 +209,15 @@ static DEFINE_PER_CPU(struct ds_context *, system_context);
 static inline struct ds_context *ds_get_context(struct task_struct *task)
 {
 	struct ds_context *context;
+	unsigned long irq;
 
-	spin_lock(&ds_lock);
+	spin_lock_irqsave(&ds_lock, irq);
 
 	context = (task ? task->thread.ds_ctx : this_system_context);
 	if (context)
 		context->count++;
 
-	spin_unlock(&ds_lock);
+	spin_unlock_irqrestore(&ds_lock, irq);
 
 	return context;
 }
@@ -224,55 +225,46 @@ static inline struct ds_context *ds_get_context(struct task_struct *task)
 /*
  * Same as ds_get_context, but allocates the context and it's DS
  * structure, if necessary; returns NULL; if out of memory.
- *
- * pre: requires ds_lock to be held
  */
 static inline struct ds_context *ds_alloc_context(struct task_struct *task)
 {
 	struct ds_context **p_context =
 		(task ? &task->thread.ds_ctx : &this_system_context);
 	struct ds_context *context = *p_context;
+	unsigned long irq;
 
 	if (!context) {
-		spin_unlock(&ds_lock);
-
 		context = kzalloc(sizeof(*context), GFP_KERNEL);
-
-		if (!context) {
-			spin_lock(&ds_lock);
+		if (!context)
 			return NULL;
-		}
 
 		context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL);
 		if (!context->ds) {
 			kfree(context);
-			spin_lock(&ds_lock);
 			return NULL;
 		}
 
-		spin_lock(&ds_lock);
-		/*
-		 * Check for race - another CPU could have allocated
-		 * it meanwhile:
-		 */
+		spin_lock_irqsave(&ds_lock, irq);
+
 		if (*p_context) {
 			kfree(context->ds);
 			kfree(context);
-			return *p_context;
-		}
-
-		*p_context = context;
 
-		context->this = p_context;
-		context->task = task;
+			context = *p_context;
+		} else {
+			*p_context = context;
 
-		if (task)
-			set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
+			context->this = p_context;
+			context->task = task;
 
-		if (!task || (task == current))
-			wrmsr(MSR_IA32_DS_AREA, (unsigned long)context->ds, 0);
+			if (task)
+				set_tsk_thread_flag(task, TIF_DS_AREA_MSR);
 
-		get_tracer(task);
+			if (!task || (task == current))
+				wrmsrl(MSR_IA32_DS_AREA,
+				       (unsigned long)context->ds);
+		}
+		spin_unlock_irqrestore(&ds_lock, irq);
 	}
 
 	context->count++;
@@ -286,10 +278,12 @@ static inline struct ds_context *ds_alloc_context(struct task_struct *task)
  */
 static inline void ds_put_context(struct ds_context *context)
 {
+	unsigned long irq;
+
 	if (!context)
 		return;
 
-	spin_lock(&ds_lock);
+	spin_lock_irqsave(&ds_lock, irq);
 
 	if (--context->count)
 		goto out;
@@ -311,7 +305,7 @@ static inline void ds_put_context(struct ds_context *context)
 	kfree(context->ds);
 	kfree(context);
  out:
-	spin_unlock(&ds_lock);
+	spin_unlock_irqrestore(&ds_lock, irq);
 }
 
 
@@ -382,6 +376,7 @@ static int ds_request(struct task_struct *task, void *base, size_t size,
 	struct ds_context *context;
 	unsigned long buffer, adj;
 	const unsigned long alignment = (1 << 3);
+	unsigned long irq;
 	int error = 0;
 
 	if (!ds_cfg.sizeof_ds)
@@ -396,26 +391,27 @@ static int ds_request(struct task_struct *task, void *base, size_t size,
 		return -EOPNOTSUPP;
 
 
-	spin_lock(&ds_lock);
-
-	error = -ENOMEM;
 	context = ds_alloc_context(task);
 	if (!context)
-		goto out_unlock;
+		return -ENOMEM;
+
+	spin_lock_irqsave(&ds_lock, irq);
 
 	error = -EPERM;
 	if (!check_tracer(task))
 		goto out_unlock;
 
+	get_tracer(task);
+
 	error = -EALREADY;
 	if (context->owner[qual] == current)
-		goto out_unlock;
+		goto out_put_tracer;
 	error = -EPERM;
 	if (context->owner[qual] != NULL)
-		goto out_unlock;
+		goto out_put_tracer;
 	context->owner[qual] = current;
 
-	spin_unlock(&ds_lock);
+	spin_unlock_irqrestore(&ds_lock, irq);
 
 
 	error = -ENOMEM;
@@ -463,10 +459,17 @@ static int ds_request(struct task_struct *task, void *base, size_t size,
  out_release:
 	context->owner[qual] = NULL;
 	ds_put_context(context);
+	put_tracer(task);
+	return error;
+
+ out_put_tracer:
+	spin_unlock_irqrestore(&ds_lock, irq);
+	ds_put_context(context);
+	put_tracer(task);
 	return error;
 
  out_unlock:
-	spin_unlock(&ds_lock);
+	spin_unlock_irqrestore(&ds_lock, irq);
 	ds_put_context(context);
 	return error;
 }
-- 
cgit v1.2.3


From a266d9f1253a38ec2d5655ebcd6846298b0554f4 Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Fri, 21 Nov 2008 14:49:25 +0100
Subject: [CPUFREQ] powernow-k8: ignore out-of-range PstateStatus value

A workaround for AMD CPU family 11h erratum 311 might cause that the
P-state Status Register shows a "current P-state" which is larger than
the "current P-state limit" in P-state Current Limit Register. For the
wrong P-state value there is no ACPI _PSS object defined and
powernow-k8/cpufreq can't determine the proper CPU frequency for that
state.

As a consequence this can cause a panic during boot (potentially with
all recent kernel versions -- at least I have reproduced it with
various 2.6.27 kernels and with the current .28 series), as an
example:

powernow-k8: Found 1 AMD Turion(tm)X2 Ultra DualCore Mobile ZM-82 processors (2 \
)
powernow-k8:    0 : pstate 0 (2200 MHz)
powernow-k8:    1 : pstate 1 (1100 MHz)
powernow-k8:    2 : pstate 2 (600 MHz)
BUG: unable to handle kernel paging request at ffff88086e7528b8
IP: [<ffffffff80486361>] cpufreq_stats_update+0x4a/0x5f
PGD 202063 PUD 0
Oops: 0002 [#1] SMP
last sysfs file:
CPU 1
Modules linked in:
Pid: 1, comm: swapper Not tainted 2.6.28-rc3-dirty #16
RIP: 0010:[<ffffffff80486361>]  [<ffffffff80486361>] cpufreq_stats_update+0x4a/0\
f
Synaptics claims to have extended capabilities, but I'm not able to read them.<6\
6
RAX: 0000000000000000 RBX: 0000000000000001 RCX: ffff88006e7528c0
RDX: 00000000ffffffff RSI: ffff88006e54af00 RDI: ffffffff808f056c
RBP: 00000000fffee697 R08: 0000000000000003 R09: ffff88006e73f080
R10: 0000000000000001 R11: 00000000002191c0 R12: ffff88006fb83c10
R13: 00000000ffffffff R14: 0000000000000001 R15: 0000000000000000
FS:  0000000000000000(0000) GS:ffff88006fb50740(0000) knlGS:0000000000000000
Unable to initialize Synaptics hardware.
CS:  0010 DS: 0018 ES: 0018 CR0: 000000008005003b
CR2: ffff88086e7528b8 CR3: 0000000000201000 CR4: 00000000000006e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process swapper (pid: 1, threadinfo ffff88006fb82000, task ffff88006fb816d0)
Stack:
 ffff88006e74da50 0000000000000000 ffff88006e54af00 ffffffff804863c7
 ffff88006e74da50 0000000000000000 00000000ffffffff 0000000000000000
 ffff88006fb83c10 ffffffff8024b46c ffffffff808f0560 ffff88006fb83c10
Call Trace:
 [<ffffffff804863c7>] ? cpufreq_stat_notifier_trans+0x51/0x83
 [<ffffffff8024b46c>] ? notifier_call_chain+0x29/0x4c
 [<ffffffff8024b561>] ? __srcu_notifier_call_chain+0x46/0x61
 [<ffffffff8048496d>] ? cpufreq_notify_transition+0x93/0xa9
 [<ffffffff8021ab8d>] ? powernowk8_target+0x1e8/0x5f3
 [<ffffffff80486687>] ? cpufreq_governor_performance+0x1b/0x20
 [<ffffffff80484886>] ? __cpufreq_governor+0x71/0xa8
 [<ffffffff80484b21>] ? __cpufreq_set_policy+0x101/0x13e
 [<ffffffff80485bcd>] ? cpufreq_add_dev+0x3f0/0x4cd
 [<ffffffff8048577a>] ? handle_update+0x0/0x8
 [<ffffffff803c2062>] ? sysdev_driver_register+0xb6/0x10d
 [<ffffffff8056592c>] ? powernowk8_init+0x0/0x7e
 [<ffffffff8048604c>] ? cpufreq_register_driver+0x8f/0x140
 [<ffffffff80209056>] ? _stext+0x56/0x14f
 [<ffffffff802c2234>] ? proc_register+0x122/0x17d
 [<ffffffff802c23a0>] ? create_proc_entry+0x73/0x8a
 [<ffffffff8025c259>] ? register_irq_proc+0x92/0xaa
 [<ffffffff8025c2c8>] ? init_irq_proc+0x57/0x69
 [<ffffffff807fc85f>] ? kernel_init+0x116/0x169
 [<ffffffff8020cc79>] ? child_rip+0xa/0x11
 [<ffffffff807fc749>] ? kernel_init+0x0/0x169
 [<ffffffff8020cc6f>] ? child_rip+0x0/0x11
Code: 05 c5 83 36 00 48 c7 c2 48 5d 86 80 48 8b 04 d8 48 8b 40 08 48 8b 34 02 48\

RIP  [<ffffffff80486361>] cpufreq_stats_update+0x4a/0x5f
 RSP <ffff88006fb83b20>
CR2: ffff88086e7528b8
---[ end trace 0678bac75e67a2f7 ]---
Kernel panic - not syncing: Attempted to kill init!

In short, aftereffect of the wrong P-state is that
cpufreq_stats_update() uses "-1" as index for some array in

cpufreq_stats_update (unsigned int cpu)
{
...
     if (stat->time_in_state)
                stat->time_in_state[stat->last_index] =
                        cputime64_add(stat->time_in_state[stat->last_index],
                                      cputime_sub(cur_time, stat->last_time));
...
}

Fortunately, the wrong P-state value is returned only if the core is
in P-state 0. This fix solves the problem by detecting the
out-of-range P-state, ignoring it, and using "0" instead.

Cc: Mark Langsdorf <mark.langsdorf@amd.com>
Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 18 +++++++++++++++---
 arch/x86/kernel/cpu/cpufreq/powernow-k8.h | 17 ++++++++++++++++-
 2 files changed, 31 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index d3dcd58b87c..7f05f44b97e 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -115,9 +115,20 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data)
 	u32 i = 0;
 
 	if (cpu_family == CPU_HW_PSTATE) {
-		rdmsr(MSR_PSTATE_STATUS, lo, hi);
-		i = lo & HW_PSTATE_MASK;
-		data->currpstate = i;
+		if (data->currpstate == HW_PSTATE_INVALID) {
+			/* read (initial) hw pstate if not yet set */
+			rdmsr(MSR_PSTATE_STATUS, lo, hi);
+			i = lo & HW_PSTATE_MASK;
+
+			/*
+			 * a workaround for family 11h erratum 311 might cause
+			 * an "out-of-range Pstate if the core is in Pstate-0
+			 */
+			if (i >= data->numps)
+				data->currpstate = HW_PSTATE_0;
+			else
+				data->currpstate = i;
+		}
 		return 0;
 	}
 	do {
@@ -1121,6 +1132,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
 	}
 
 	data->cpu = pol->cpu;
+	data->currpstate = HW_PSTATE_INVALID;
 
 	if (powernow_k8_cpu_init_acpi(data)) {
 		/*
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
index ab48cfed4d9..65cfb5d7f77 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h
@@ -5,6 +5,19 @@
  *  http://www.gnu.org/licenses/gpl.html
  */
 
+
+enum pstate {
+	HW_PSTATE_INVALID = 0xff,
+	HW_PSTATE_0 = 0,
+	HW_PSTATE_1 = 1,
+	HW_PSTATE_2 = 2,
+	HW_PSTATE_3 = 3,
+	HW_PSTATE_4 = 4,
+	HW_PSTATE_5 = 5,
+	HW_PSTATE_6 = 6,
+	HW_PSTATE_7 = 7,
+};
+
 struct powernow_k8_data {
 	unsigned int cpu;
 
@@ -23,7 +36,9 @@ struct powernow_k8_data {
         u32 exttype; /* extended interface = 1 */
 
 	/* keep track of the current fid / vid or pstate */
-	u32 currvid, currfid, currpstate;
+	u32 currvid;
+	u32 currfid;
+	enum pstate currpstate;
 
 	/* the powernow_table includes all frequency and vid/fid pairings:
 	 * fid are the lower 8 bits of the index, vid are the upper 8 bits.
-- 
cgit v1.2.3


From ffd565a8b817d1eb4b25184e8418e8d96c3f56f6 Mon Sep 17 00:00:00 2001
From: Andreas Herrmann <andreas.herrmann3@amd.com>
Date: Tue, 25 Nov 2008 17:18:03 +0100
Subject: x86: fixup config space size of CPU functions for AMD family 11h

Impact: extend allowed configuration space access on 11h CPUs from 256 to 4K

Signed-off-by: Andreas Herrmann <andreas.herrmann3@amd.com>
Acked-by: Jesse Barnes <jbarnes@virtuousgeek.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/pci/fixup.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
index 3c27a809393..2051dc96b8e 100644
--- a/arch/x86/pci/fixup.c
+++ b/arch/x86/pci/fixup.c
@@ -496,21 +496,24 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015,
 			  pci_siemens_interrupt_controller);
 
 /*
- * Regular PCI devices have 256 bytes, but AMD Family 10h Opteron ext config
- * have 4096 bytes.  Even if the device is capable, that doesn't mean we can
- * access it.  Maybe we don't have a way to generate extended config space
- * accesses.   So check it
+ * Regular PCI devices have 256 bytes, but AMD Family 10h/11h CPUs have
+ * 4096 bytes configuration space for each function of their processor
+ * configuration space.
  */
-static void fam10h_pci_cfg_space_size(struct pci_dev *dev)
+static void amd_cpu_pci_cfg_space_size(struct pci_dev *dev)
 {
 	dev->cfg_size = pci_cfg_space_size_ext(dev);
 }
-
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, fam10h_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, fam10h_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, fam10h_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, fam10h_pci_cfg_space_size);
-DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, fam10h_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1201, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1202, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1203, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1204, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1300, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1301, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1302, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1303, amd_cpu_pci_cfg_space_size);
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1304, amd_cpu_pci_cfg_space_size);
 
 /*
  * SB600: Disable BAR1 on device 14.0 to avoid HPET resources from
-- 
cgit v1.2.3


From 6c475352e87224a8f0b8cc6f6cc96b30563dc5b4 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Tue, 25 Nov 2008 15:33:10 +0100
Subject: KVM: MMU: avoid creation of unreachable pages in the shadow

It is possible for a shadow page to have a parent link
pointing to a freed page. When zapping a high level table,
kvm_mmu_page_unlink_children fails to remove the parent_pte link.
For that to happen, the child must be unreachable via the shadow
tree, which can happen in shadow_walk_entry if the guest pte was
modified in between walk() and fetch(). Remove the parent pte
reference in such case.

Possible cause for oops in bug #2217430.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/paging_tmpl.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 613ec9aa674..84eee43bbe7 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -331,6 +331,7 @@ static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw,
 		r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 2],
 					  &curr_pte, sizeof(curr_pte));
 		if (r || curr_pte != gw->ptes[level - 2]) {
+			kvm_mmu_put_page(shadow_page, sptep);
 			kvm_release_pfn_clean(sw->pfn);
 			sw->sptep = NULL;
 			return 1;
-- 
cgit v1.2.3


From b627c8b17ccacba38c975bc0f69a49fc4e5261c9 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 20 Nov 2008 20:49:56 +0100
Subject: x86: always define DECLARE_PCI_UNMAP* macros

Impact: fix boot crash on AMD IOMMU if CONFIG_GART_IOMMU is off

Currently these macros evaluate to a no-op except the kernel is compiled
with GART or Calgary support. But we also need these macros when we have
SWIOTLB, VT-d or AMD IOMMU in the kernel. Since we always compile at
least with SWIOTLB we can define these macros always.

This patch is also for stable backport for the same reason the SWIOTLB
default selection patch is.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Cc: <stable@kernel.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/pci_64.h | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h
index 5b28995d664..d02d936840a 100644
--- a/arch/x86/include/asm/pci_64.h
+++ b/arch/x86/include/asm/pci_64.h
@@ -34,8 +34,6 @@ extern void pci_iommu_alloc(void);
  */
 #define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys)
 
-#if defined(CONFIG_GART_IOMMU) || defined(CONFIG_CALGARY_IOMMU)
-
 #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)	\
 	dma_addr_t ADDR_NAME;
 #define DECLARE_PCI_UNMAP_LEN(LEN_NAME)		\
@@ -49,18 +47,6 @@ extern void pci_iommu_alloc(void);
 #define pci_unmap_len_set(PTR, LEN_NAME, VAL)		\
 	(((PTR)->LEN_NAME) = (VAL))
 
-#else
-/* No IOMMU */
-
-#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)
-#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)
-#define pci_unmap_addr(PTR, ADDR_NAME)		(0)
-#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL)	do { } while (0)
-#define pci_unmap_len(PTR, LEN_NAME)		(0)
-#define pci_unmap_len_set(PTR, LEN_NAME, VAL)	do { } while (0)
-
-#endif
-
 #endif /* __KERNEL__ */
 
 #endif /* _ASM_X86_PCI_64_H */
-- 
cgit v1.2.3


From 7b1dedca42ac0d0d0be01e39d8461bb53a2389b3 Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Date: Sat, 29 Nov 2008 13:46:27 +0100
Subject: x86: fix dma_mapping_error for 32bit x86

Devices like b44 ethernet can't dma from addresses above 1GB. The driver
handles this cases by falling back to GFP_DMA allocation. But for detecting
the problem it needs to get an indication from dma_mapping_error.
The bug is triggered by using a VMSPLIT option of 2G/2G.

Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Acked-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/dma-mapping.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 7f225a4b2a2..097794ff6b7 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -71,15 +71,13 @@ static inline struct dma_mapping_ops *get_dma_ops(struct device *dev)
 /* Make sure we keep the same behaviour */
 static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
 {
-#ifdef CONFIG_X86_32
-	return 0;
-#else
+#ifdef CONFIG_X86_64
 	struct dma_mapping_ops *ops = get_dma_ops(dev);
 	if (ops->mapping_error)
 		return ops->mapping_error(dev, dma_addr);
 
-	return (dma_addr == bad_dma_address);
 #endif
+	return (dma_addr == bad_dma_address);
 }
 
 #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
-- 
cgit v1.2.3


From 2236d252e001ea57d53cec1954f680e503f3b8bc Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Sat, 22 Nov 2008 17:37:34 +0000
Subject: enable_IR_x2apic() needs to be __init

calls __init, called only from __init

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/apic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 04a7f960bbc..16f94879b52 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -1315,7 +1315,7 @@ void enable_x2apic(void)
 	}
 }
 
-void enable_IR_x2apic(void)
+void __init enable_IR_x2apic(void)
 {
 #ifdef CONFIG_INTR_REMAP
 	int ret;
-- 
cgit v1.2.3


From 23a14b9e9db49ed5f7683857557c26c874d4abb6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Sat, 22 Nov 2008 17:37:44 +0000
Subject: kvm_setup_secondary_clock() is cpuinit

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/kernel/kvmclock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 1c9cc431ea4..e169ae9b6a6 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -128,7 +128,7 @@ static int kvm_register_clock(char *txt)
 }
 
 #ifdef CONFIG_X86_LOCAL_APIC
-static void __devinit kvm_setup_secondary_clock(void)
+static void __cpuinit kvm_setup_secondary_clock(void)
 {
 	/*
 	 * Now that the first cpu already had this clocksource initialized,
-- 
cgit v1.2.3


From 37af46efa5413c6f4c25d9a24b4c43f2cc718eed Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Sat, 22 Nov 2008 17:37:54 +0000
Subject: xen_setup_vcpu_info_placement() is not init on x86

... so get xen-ops.h in agreement with xen/smp.c

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/xen/xen-ops.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index d7422dc2a55..9e1afae8461 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -49,7 +49,7 @@ bool xen_vcpu_stolen(int vcpu);
 
 void xen_mark_init_mm_pinned(void);
 
-void __init xen_setup_vcpu_info_placement(void);
+void xen_setup_vcpu_info_placement(void);
 
 #ifdef CONFIG_SMP
 void xen_smp_init(void);
-- 
cgit v1.2.3


From df6b07949b6cab9d119363d02ef63379160f6c82 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Sat, 22 Nov 2008 17:38:04 +0000
Subject: xen_play_dead() is __cpuinit

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/xen/smp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index d77da613b1d..acd9b6705e0 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -362,7 +362,7 @@ static void xen_cpu_die(unsigned int cpu)
 		alternatives_smp_switch(0);
 }
 
-static void xen_play_dead(void)
+static void __cpuinit xen_play_dead(void) /* used only with CPU_HOTPLUG */
 {
 	play_dead_common();
 	HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
-- 
cgit v1.2.3


From 96b8936a9ed08746e47081458a5eb9e43a751e24 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 25 Nov 2008 08:10:03 +0100
Subject: remove __ARCH_WANT_COMPAT_SYS_PTRACE

All architectures now use the generic compat_sys_ptrace, as should every
new architecture that needs 32bit compat (if we'll ever get another).

Remove the now superflous __ARCH_WANT_COMPAT_SYS_PTRACE define, and also
kill a comment about __ARCH_SYS_PTRACE that was added after
__ARCH_SYS_PTRACE was already gone.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/ptrace.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index d1531c8480b..eefb0594b05 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -271,8 +271,6 @@ extern int do_get_thread_area(struct task_struct *p, int idx,
 extern int do_set_thread_area(struct task_struct *p, int idx,
 			      struct user_desc __user *info, int can_allocate);
 
-#define __ARCH_WANT_COMPAT_SYS_PTRACE
-
 #endif /* __KERNEL__ */
 
 #endif /* !__ASSEMBLY__ */
-- 
cgit v1.2.3


From 43714539eab42b2fa3653ea7bd667b36c2291b11 Mon Sep 17 00:00:00 2001
From: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Date: Sat, 29 Nov 2008 16:50:12 +0530
Subject: sched: don't export sched_mc_power_savings in laptops

Impact: do not expose a control that has no effect

Fix to prevent sched_mc_power_saving from being exported through sysfs
on single-socket systems. (Say multicore single socket (Laptop))

CPU core map of the boot cpu should be equal to possible number
of cpus for single socket system.

This fix has been developed at FOSS.in kernel workout.

Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/topology.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 4850e4b02b6..ff386ff50ed 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -239,7 +239,7 @@ struct pci_bus;
 void set_pci_bus_resources_arch_default(struct pci_bus *b);
 
 #ifdef CONFIG_SMP
-#define mc_capable()			(boot_cpu_data.x86_max_cores > 1)
+#define mc_capable()	(cpus_weight(per_cpu(cpu_core_map, 0)) != nr_cpu_ids)
 #define smt_capable()			(smp_num_siblings > 1)
 #endif
 
-- 
cgit v1.2.3


From 70d7d357578245f1993fd2d3ccd26088bcd38941 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 2 Dec 2008 20:16:03 +0100
Subject: x86: fix broken flushing in GART nofullflush path

Impact: remove stale IOTLB entries

In the non-default nofullflush case the GART is only flushed when
next_bit wraps around. But it can happen that an unmap operation unmaps
memory which is behind the current next_bit location. If these addresses
are reused it may result in stale GART IO/TLB entries. Fix this by
setting the GART next_bit always behind an unmapped location.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/pci-gart_64.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index a42b02b4df6..ba7ad83e20a 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -123,6 +123,8 @@ static void free_iommu(unsigned long offset, int size)
 
 	spin_lock_irqsave(&iommu_bitmap_lock, flags);
 	iommu_area_free(iommu_gart_bitmap, offset, size);
+	if (offset >= next_bit)
+		next_bit = offset + size;
 	spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
 }
 
-- 
cgit v1.2.3


From eac9fbc6a90ab3440f4d98a8c52b15a724fc6f4f Mon Sep 17 00:00:00 2001
From: Richard Kennedy <richard@rsk.demon.co.uk>
Date: Mon, 24 Nov 2008 13:53:24 +0000
Subject: AMD IOMMU: struct amd_iommu remove padding on 64 bit

Remove 16 bytes of padding from struct amd_iommu on 64bit builds
reducing its size to 120 bytes, allowing it to span one fewer
cachelines.

Signed-off-by: Richard Kennedy <richard@rsk.demon.co.uk>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/include/asm/amd_iommu_types.h | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h
index 1a30c0440c6..ac302a2fa33 100644
--- a/arch/x86/include/asm/amd_iommu_types.h
+++ b/arch/x86/include/asm/amd_iommu_types.h
@@ -251,13 +251,6 @@ struct amd_iommu {
 	/* Pointer to PCI device of this IOMMU */
 	struct pci_dev *dev;
 
-	/*
-	 * Capability pointer. There could be more than one IOMMU per PCI
-	 * device function if there are more than one AMD IOMMU capability
-	 * pointers.
-	 */
-	u16 cap_ptr;
-
 	/* physical address of MMIO space */
 	u64 mmio_phys;
 	/* virtual address of MMIO space */
@@ -266,6 +259,13 @@ struct amd_iommu {
 	/* capabilities of that IOMMU read from ACPI */
 	u32 cap;
 
+	/*
+	 * Capability pointer. There could be more than one IOMMU per PCI
+	 * device function if there are more than one AMD IOMMU capability
+	 * pointers.
+	 */
+	u16 cap_ptr;
+
 	/* pci domain of this IOMMU */
 	u16 pci_seg;
 
@@ -284,19 +284,19 @@ struct amd_iommu {
 	/* size of command buffer */
 	u32 cmd_buf_size;
 
-	/* event buffer virtual address */
-	u8 *evt_buf;
 	/* size of event buffer */
 	u32 evt_buf_size;
+	/* event buffer virtual address */
+	u8 *evt_buf;
 	/* MSI number for event interrupt */
 	u16 evt_msi_num;
 
-	/* if one, we need to send a completion wait command */
-	int need_sync;
-
 	/* true if interrupts for this IOMMU are already enabled */
 	bool int_enabled;
 
+	/* if one, we need to send a completion wait command */
+	int need_sync;
+
 	/* default dma_ops domain for that IOMMU */
 	struct dma_ops_domain *default_dom;
 };
-- 
cgit v1.2.3


From f91ba190648be4ff127d6aaf3993ac19d66dc2c2 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Tue, 25 Nov 2008 12:56:12 +0100
Subject: AMD IOMMU: set device table entry for aliased devices

In some rare cases a request can arrive an IOMMU with its originial
requestor id even it is aliased. Handle this by setting the device table
entry to the same protection domain for the original and the aliased
requestor id.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index e4899e0e878..a232e5a85d4 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -858,6 +858,9 @@ static int get_device_resources(struct device *dev,
 		print_devid(_bdf, 1);
 	}
 
+	if (domain_for_device(_bdf) == NULL)
+		set_device_domain(*iommu, *domain, _bdf);
+
 	return 1;
 }
 
-- 
cgit v1.2.3


From 09ee17eb8ea89514c13980c4010bdbbaea8630c2 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 3 Dec 2008 12:19:27 +0100
Subject: AMD IOMMU: fix possible race while accessing iommu->need_sync

The access to the iommu->need_sync member needs to be protected by the
iommu->lock. Otherwise this is a possible race condition. Fix it with
this patch.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 33 +++++++++++++--------------------
 1 file changed, 13 insertions(+), 20 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index a232e5a85d4..5662e226b0c 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -187,6 +187,8 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
 
 	spin_lock_irqsave(&iommu->lock, flags);
 	ret = __iommu_queue_command(iommu, cmd);
+	if (!ret)
+		iommu->need_sync = 1;
 	spin_unlock_irqrestore(&iommu->lock, flags);
 
 	return ret;
@@ -210,10 +212,13 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
 	cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
 	CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
 
-	iommu->need_sync = 0;
-
 	spin_lock_irqsave(&iommu->lock, flags);
 
+	if (!iommu->need_sync)
+		goto out;
+
+	iommu->need_sync = 0;
+
 	ret = __iommu_queue_command(iommu, &cmd);
 
 	if (ret)
@@ -254,8 +259,6 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
 
 	ret = iommu_queue_command(iommu, &cmd);
 
-	iommu->need_sync = 1;
-
 	return ret;
 }
 
@@ -281,8 +284,6 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
 
 	ret = iommu_queue_command(iommu, &cmd);
 
-	iommu->need_sync = 1;
-
 	return ret;
 }
 
@@ -762,8 +763,6 @@ static void set_device_domain(struct amd_iommu *iommu,
 	write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
 
 	iommu_queue_inv_dev_entry(iommu, devid);
-
-	iommu->need_sync = 1;
 }
 
 /*****************************************************************************
@@ -1034,8 +1033,7 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
 	if (addr == bad_dma_address)
 		goto out;
 
-	if (unlikely(iommu->need_sync))
-		iommu_completion_wait(iommu);
+	iommu_completion_wait(iommu);
 
 out:
 	spin_unlock_irqrestore(&domain->lock, flags);
@@ -1063,8 +1061,7 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
 
 	__unmap_single(iommu, domain->priv, dma_addr, size, dir);
 
-	if (unlikely(iommu->need_sync))
-		iommu_completion_wait(iommu);
+	iommu_completion_wait(iommu);
 
 	spin_unlock_irqrestore(&domain->lock, flags);
 }
@@ -1130,8 +1127,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
 			goto unmap;
 	}
 
-	if (unlikely(iommu->need_sync))
-		iommu_completion_wait(iommu);
+	iommu_completion_wait(iommu);
 
 out:
 	spin_unlock_irqrestore(&domain->lock, flags);
@@ -1176,8 +1172,7 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
 		s->dma_address = s->dma_length = 0;
 	}
 
-	if (unlikely(iommu->need_sync))
-		iommu_completion_wait(iommu);
+	iommu_completion_wait(iommu);
 
 	spin_unlock_irqrestore(&domain->lock, flags);
 }
@@ -1228,8 +1223,7 @@ static void *alloc_coherent(struct device *dev, size_t size,
 		goto out;
 	}
 
-	if (unlikely(iommu->need_sync))
-		iommu_completion_wait(iommu);
+	iommu_completion_wait(iommu);
 
 out:
 	spin_unlock_irqrestore(&domain->lock, flags);
@@ -1260,8 +1254,7 @@ static void free_coherent(struct device *dev, size_t size,
 
 	__unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
 
-	if (unlikely(iommu->need_sync))
-		iommu_completion_wait(iommu);
+	iommu_completion_wait(iommu);
 
 	spin_unlock_irqrestore(&domain->lock, flags);
 
-- 
cgit v1.2.3


From 9ea84ad77d635bdb76c9a08f44f21a9af98359ee Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Tue, 2 Dec 2008 07:21:21 +0100
Subject: oprofile: fix CPU unplug panic in ppro_stop()

If oprofile statically compiled in kernel, a cpu unplug triggers
a panic in ppro_stop(), because a NULL pointer is dereferenced.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/op_model_ppro.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c
index 716d26f0e5d..e9f80c744cf 100644
--- a/arch/x86/oprofile/op_model_ppro.c
+++ b/arch/x86/oprofile/op_model_ppro.c
@@ -156,6 +156,8 @@ static void ppro_start(struct op_msrs const * const msrs)
 	unsigned int low, high;
 	int i;
 
+	if (!reset_value)
+		return;
 	for (i = 0; i < num_counters; ++i) {
 		if (reset_value[i]) {
 			CTRL_READ(low, high, msrs, i);
@@ -171,6 +173,8 @@ static void ppro_stop(struct op_msrs const * const msrs)
 	unsigned int low, high;
 	int i;
 
+	if (!reset_value)
+		return;
 	for (i = 0; i < num_counters; ++i) {
 		if (!reset_value[i])
 			continue;
-- 
cgit v1.2.3


From 3d337c653c94be50f11a45fb14a2afa8a8a1a618 Mon Sep 17 00:00:00 2001
From: William Cohen <wcohen@redhat.com>
Date: Sun, 30 Nov 2008 15:39:10 -0500
Subject: x86/oprofile: fix Intel cpu family 6 detection

Alan Jenkins wrote:
> This is on an EeePC 701, /proc/cpuinfo as attached.
>
> Is this expected?  Will the next release work?
>
> Thanks, Alan
>
> # opcontrol --setup --no-vmlinux
> cpu_type 'unset' is not valid
> you should upgrade oprofile or force the use of timer mode
>
> # opcontrol -v
> opcontrol: oprofile 0.9.4 compiled on Nov 29 2008 22:44:10
>
> # cat /dev/oprofile/cpu_type
> i386/p6
> # uname -r
> 2.6.28-rc6eeepc

Hi Alan,

Looking at the kernel driver code for oprofile it can return the "i386/p6" for
the cpu_type. However, looking at the user-space oprofile code there isn't the
matching entry in libop/op_cpu_type.c or the events/unit_mask files in
events/i386 directory.

The Intel AP-485 says this is a "Intel Pentium M processor model D". Seems like
the oprofile kernel driver should be identifying the processor as "i386/p6_mobile"

The driver identification code doesn't look quite right in nmi_init.c

http://git.kernel.org/?p=linux/kernel/git/sfr/linux-next.git;a=blob;f=arch/x86/oprofile/nmi_int.c;h=022cd41ea9b4106e5884277096e80e9088a7c7a9;hb=HEAD

has:

409         case 10 ... 13:
410                 *cpu_type = "i386/p6";
411                 break;

Referring to the Intel AP-485:
case 10 and 11 should produce "i386/piii"
case 13 should produce "i386/p6_mobile"

I didn't see anything for case 12.

Something like the attached patch. I don't have a celeron machine to verify that
changes in this area of the kernel fix thing.

-Will

Signed-off-by: William Cohen <wcohen@redhat.com>
Tested-by: Alan Jenkins <alan-jenkins@tuffmail.co.uk>
Acked-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Robert Richter <robert.richter@amd.com>
---
 arch/x86/oprofile/nmi_int.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 022cd41ea9b..202864ad49a 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -401,14 +401,13 @@ static int __init ppro_init(char **cpu_type)
 		*cpu_type = "i386/pii";
 		break;
 	case 6 ... 8:
+	case 10 ... 11:
 		*cpu_type = "i386/piii";
 		break;
 	case 9:
+	case 13:
 		*cpu_type = "i386/p6_mobile";
 		break;
-	case 10 ... 13:
-		*cpu_type = "i386/p6";
-		break;
 	case 14:
 		*cpu_type = "i386/core";
 		break;
-- 
cgit v1.2.3


From 9adc13867ec5fe0cd35434f92954d90e42381f0b Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Thu, 4 Dec 2008 13:33:35 +0100
Subject: x86: fix early panic with boot option "nosmp"

Impact: fix boot crash with numcpus=0 on certain systems

Fix early exception in __get_smp_config with nosmp.

Bail out early when there is no MP table.

Reported-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Tested-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/mpparse.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index f98f4e1dba0..0f4c1fd5a1f 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -604,6 +604,9 @@ static void __init __get_smp_config(unsigned int early)
 		printk(KERN_INFO "Using ACPI for processor (LAPIC) "
 		       "configuration information\n");
 
+	if (!mpf)
+		return;
+
 	printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
 	       mpf->mpf_specification);
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
-- 
cgit v1.2.3


From bb9d4ff80bc032d7961815c2ff5eaf458ae3adff Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 4 Dec 2008 15:59:48 +0100
Subject: AMD IOMMU: fix iommu_map_page function

Impact: bugfix in iommu_map_page function

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 5662e226b0c..67970a8c2ee 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -344,7 +344,7 @@ static int iommu_map(struct protection_domain *dom,
 	u64 __pte, *pte, *page;
 
 	bus_addr  = PAGE_ALIGN(bus_addr);
-	phys_addr = PAGE_ALIGN(bus_addr);
+	phys_addr = PAGE_ALIGN(phys_addr);
 
 	/* only support 512GB address spaces for now */
 	if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK))
-- 
cgit v1.2.3


From 3cc3d84bffbd93bdb671ac7961b12cd98fbb9266 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 4 Dec 2008 16:44:31 +0100
Subject: AMD IOMMU: fix loop counter in free_pagetable function

Impact: bugfix

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 67970a8c2ee..0637e65ca0c 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -600,7 +600,7 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
 			continue;
 
 		p2 = IOMMU_PTE_PAGE(p1[i]);
-		for (j = 0; j < 512; ++i) {
+		for (j = 0; j < 512; ++j) {
 			if (!IOMMU_PTE_PRESENT(p2[j]))
 				continue;
 			p3 = IOMMU_PTE_PAGE(p2[j]);
-- 
cgit v1.2.3


From 24f811603e8ef4b9173f47fa263230168e237128 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 8 Dec 2008 14:25:39 +0100
Subject: AMD IOMMU: fix typo in comment

Impact: cleanup

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 0637e65ca0c..2fde073b6be 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -922,8 +922,8 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
 
 /*
  * This function contains common code for mapping of a physically
- * contiguous memory region into DMA address space. It is uses by all
- * mapping functions provided by this IOMMU driver.
+ * contiguous memory region into DMA address space. It is used by all
+ * mapping functions provided with this IOMMU driver.
  * Must be called with the domain lock held.
  */
 static dma_addr_t __map_single(struct device *dev,
-- 
cgit v1.2.3


From 8ad909c4c1b91bd35ba6a2af5e7ab3fc8d9fe283 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 8 Dec 2008 14:37:20 +0100
Subject: AMD IOMMU: fix WARN_ON in dma_ops unmap path

Impact: minor fix

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 2fde073b6be..3133a0ea09f 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -910,7 +910,7 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu,
 	if (address >= dom->aperture_size)
 		return;
 
-	WARN_ON(address & 0xfffULL || address > dom->aperture_size);
+	WARN_ON(address & ~PAGE_MASK || address >= dom->aperture_size);
 
 	pte  = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)];
 	pte += IOMMU_PTE_L0_INDEX(address);
-- 
cgit v1.2.3


From b8d9905d025d80a2357e8ce4704fde2923f6a1bd Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 8 Dec 2008 14:40:26 +0100
Subject: AMD IOMMU: __unmap_single: check for bad_dma_address instead of 0

Impact: minor fix

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 3133a0ea09f..a7b6dec6fc3 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -983,7 +983,8 @@ static void __unmap_single(struct amd_iommu *iommu,
 	dma_addr_t i, start;
 	unsigned int pages;
 
-	if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size))
+	if ((dma_addr == bad_dma_address) ||
+	    (dma_addr + size > dma_dom->aperture_size))
 		return;
 
 	pages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
-- 
cgit v1.2.3


From 087052b02f42b50316c6e4d7f2d8dfba3de6fc2e Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 17 Oct 2008 16:09:57 +0200
Subject: x86: fix default_spin_lock_flags() prototype
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

these warnings:

  arch/x86/kernel/paravirt-spinlocks.c: In function ‘default_spin_lock_flags’:
  arch/x86/kernel/paravirt-spinlocks.c:12: warning: passing argument 1 of ‘__raw_spin_lock’ from incompatible pointer type
  arch/x86/kernel/paravirt-spinlocks.c: At top level:
  arch/x86/kernel/paravirt-spinlocks.c:11: warning: ‘default_spin_lock_flags’ defined but not used

showed that the prototype of default_spin_lock_flags() was confused about
what type spinlocks have.

the proper type on UP is raw_spinlock_t.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/paravirt-spinlocks.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 0e9f1982b1d..95777b0faa7 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -7,7 +7,8 @@
 
 #include <asm/paravirt.h>
 
-static void default_spin_lock_flags(struct raw_spinlock *lock, unsigned long flags)
+static inline void
+default_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
 {
 	__raw_spin_lock(lock);
 }
-- 
cgit v1.2.3


From ae8d04e2ecbb233926860e9ce145eac19c7835dc Mon Sep 17 00:00:00 2001
From: Zachary Amsden <zach@vmware.com>
Date: Sat, 13 Dec 2008 12:36:58 -0800
Subject: x86 Fix VMI crash on boot in 2.6.28-rc8

VMI initialiation can relocate the fixmap, causing early_ioremap to
malfunction if it is initialized before the relocation.  To fix this,
VMI activation is split into two phases; the detection, which must
happen before setting up ioremap, and the activation, which must happen
after parsing early boot parameters.

This fixes a crash on boot when VMI is enabled under VMware.

Signed-off-by: Zachary Amsden <zach@vmware.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/vmi.h |  8 +++++++-
 arch/x86/kernel/setup.c    | 12 +++++-------
 arch/x86/kernel/smpboot.c  |  2 --
 arch/x86/kernel/vmi_32.c   | 16 +++++++++++-----
 4 files changed, 23 insertions(+), 15 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/vmi.h b/arch/x86/include/asm/vmi.h
index b7c0dea119f..61e08c0a290 100644
--- a/arch/x86/include/asm/vmi.h
+++ b/arch/x86/include/asm/vmi.h
@@ -223,9 +223,15 @@ struct pci_header {
 } __attribute__((packed));
 
 /* Function prototypes for bootstrapping */
+#ifdef CONFIG_VMI
 extern void vmi_init(void);
+extern void vmi_activate(void);
 extern void vmi_bringup(void);
-extern void vmi_apply_boot_page_allocations(void);
+#else
+static inline void vmi_init(void) {}
+static inline void vmi_activate(void) {}
+static inline void vmi_bringup(void) {}
+#endif
 
 /* State needed to start an application processor in an SMP system. */
 struct vmi_ap_state {
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 9d5674f7b6c..bdec76e5559 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -794,6 +794,9 @@ void __init setup_arch(char **cmdline_p)
 	printk(KERN_INFO "Command line: %s\n", boot_command_line);
 #endif
 
+	/* VMI may relocate the fixmap; do this before touching ioremap area */
+	vmi_init();
+
 	early_cpu_init();
 	early_ioremap_init();
 
@@ -880,13 +883,8 @@ void __init setup_arch(char **cmdline_p)
 	check_efer();
 #endif
 
-#if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
-	/*
-	 * Must be before kernel pagetables are setup
-	 * or fixmap area is touched.
-	 */
-	vmi_init();
-#endif
+	/* Must be before kernel pagetables are setup */
+	vmi_activate();
 
 	/* after early param, so could get panic from serial */
 	reserve_early_setup_data();
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7b109339731..f71f96fc9e6 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -294,9 +294,7 @@ static void __cpuinit start_secondary(void *unused)
 	 * fragile that we want to limit the things done here to the
 	 * most necessary things.
 	 */
-#ifdef CONFIG_VMI
 	vmi_bringup();
-#endif
 	cpu_init();
 	preempt_disable();
 	smp_callin();
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 8b6c393ab9f..22fd6577156 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -960,8 +960,6 @@ static inline int __init activate_vmi(void)
 
 void __init vmi_init(void)
 {
-	unsigned long flags;
-
 	if (!vmi_rom)
 		probe_vmi_rom();
 	else
@@ -973,13 +971,21 @@ void __init vmi_init(void)
 
 	reserve_top_address(-vmi_rom->virtual_top);
 
-	local_irq_save(flags);
-	activate_vmi();
-
 #ifdef CONFIG_X86_IO_APIC
 	/* This is virtual hardware; timer routing is wired correctly */
 	no_timer_check = 1;
 #endif
+}
+
+void vmi_activate(void)
+{
+	unsigned long flags;
+
+	if (!vmi_rom)
+		return;
+
+	local_irq_save(flags);
+	activate_vmi();
 	local_irq_restore(flags & X86_EFLAGS_IF);
 }
 
-- 
cgit v1.2.3