aboutsummaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/mach-davinci/time.c2
-rw-r--r--arch/arm/mach-imx/time.c1
-rw-r--r--arch/arm/mach-ixp4xx/common.c2
-rw-r--r--arch/arm/mach-omap1/time.c1
-rw-r--r--arch/arm/plat-omap/timer32k.c2
-rw-r--r--arch/i386/Kconfig21
-rw-r--r--arch/i386/defconfig264
-rw-r--r--arch/i386/kernel/Makefile1
-rw-r--r--arch/i386/kernel/acpi/boot.c36
-rw-r--r--arch/i386/kernel/alternative.c14
-rw-r--r--arch/i386/kernel/apic.c10
-rw-r--r--arch/i386/kernel/cpu/Makefile1
-rw-r--r--arch/i386/kernel/cpu/amd.c8
-rw-r--r--arch/i386/kernel/cpu/common.c2
-rw-r--r--arch/i386/kernel/cpu/intel_cacheinfo.c79
-rw-r--r--arch/i386/kernel/cpu/mcheck/non-fatal.c4
-rw-r--r--arch/i386/kernel/cpu/mtrr/generic.c2
-rw-r--r--arch/i386/kernel/cpu/mtrr/main.c2
-rw-r--r--arch/i386/kernel/cpu/perfctr-watchdog.c4
-rw-r--r--arch/i386/kernel/cpu/rise.c52
-rw-r--r--arch/i386/kernel/e820.c32
-rw-r--r--arch/i386/kernel/geode.c155
-rw-r--r--arch/i386/kernel/hpet.c98
-rw-r--r--arch/i386/kernel/i8253.c32
-rw-r--r--arch/i386/kernel/io_apic.c26
-rw-r--r--arch/i386/kernel/irq.c8
-rw-r--r--arch/i386/kernel/process.c12
-rw-r--r--arch/i386/kernel/reboot.c9
-rw-r--r--arch/i386/kernel/setup.c11
-rw-r--r--arch/i386/kernel/sysenter.c4
-rw-r--r--arch/i386/kernel/time.c50
-rw-r--r--arch/i386/kernel/traps.c3
-rw-r--r--arch/i386/kernel/vmiclock.c2
-rw-r--r--arch/i386/lib/Makefile2
-rw-r--r--arch/i386/lib/string.c257
-rw-r--r--arch/i386/mm/init.c7
-rw-r--r--arch/i386/mm/ioremap.c2
-rw-r--r--arch/i386/mm/pageattr.c20
-rw-r--r--arch/i386/mm/pgtable.c6
-rw-r--r--arch/i386/pci/acpi.c32
-rw-r--r--arch/i386/pci/common.c13
-rw-r--r--arch/i386/pci/mmconfig-shared.c48
-rw-r--r--arch/i386/xen/time.c3
-rw-r--r--arch/ia64/ia32/binfmt_elf32.c2
-rw-r--r--arch/powerpc/boot/ps3-head.S2
-rw-r--r--arch/powerpc/boot/ps3-hvcall.S2
-rw-r--r--arch/powerpc/mm/tlb_32.c2
-rw-r--r--arch/powerpc/platforms/cell/spufs/file.c18
-rw-r--r--arch/powerpc/platforms/ps3/Kconfig36
-rw-r--r--arch/sh/kernel/timers/timer-tmu.c1
-rw-r--r--arch/sparc64/Kconfig4
-rw-r--r--arch/sparc64/kernel/time.c54
-rw-r--r--arch/x86_64/Kconfig12
-rw-r--r--arch/x86_64/Makefile3
-rw-r--r--arch/x86_64/defconfig288
-rw-r--r--arch/x86_64/ia32/ia32_binfmt.c1
-rw-r--r--arch/x86_64/ia32/ia32entry.S5
-rw-r--r--arch/x86_64/kernel/aperture.c4
-rw-r--r--arch/x86_64/kernel/apic.c77
-rw-r--r--arch/x86_64/kernel/e820.c138
-rw-r--r--arch/x86_64/kernel/early-quirks.c1
-rw-r--r--arch/x86_64/kernel/entry.S6
-rw-r--r--arch/x86_64/kernel/hpet.c6
-rw-r--r--arch/x86_64/kernel/i8259.c18
-rw-r--r--arch/x86_64/kernel/io_apic.c58
-rw-r--r--arch/x86_64/kernel/mce.c241
-rw-r--r--arch/x86_64/kernel/mce_amd.c6
-rw-r--r--arch/x86_64/kernel/mpparse.c21
-rw-r--r--arch/x86_64/kernel/pci-calgary.c570
-rw-r--r--arch/x86_64/kernel/pci-dma.c7
-rw-r--r--arch/x86_64/kernel/pci-gart.c27
-rw-r--r--arch/x86_64/kernel/pci-nommu.c8
-rw-r--r--arch/x86_64/kernel/pci-swiotlb.c2
-rw-r--r--arch/x86_64/kernel/process.c13
-rw-r--r--arch/x86_64/kernel/reboot.c4
-rw-r--r--arch/x86_64/kernel/setup.c9
-rw-r--r--arch/x86_64/kernel/signal.c7
-rw-r--r--arch/x86_64/kernel/smp.c6
-rw-r--r--arch/x86_64/kernel/tce.c12
-rw-r--r--arch/x86_64/kernel/time.c158
-rw-r--r--arch/x86_64/kernel/tsc.c39
-rw-r--r--arch/x86_64/kernel/vmlinux.lds.S25
-rw-r--r--arch/x86_64/kernel/vsyscall.c22
-rw-r--r--arch/x86_64/mm/fault.c4
-rw-r--r--arch/x86_64/mm/init.c11
-rw-r--r--arch/x86_64/mm/k8topology.c13
-rw-r--r--arch/x86_64/mm/numa.c15
-rw-r--r--arch/x86_64/mm/pageattr.c23
-rw-r--r--arch/x86_64/mm/srat.c97
-rw-r--r--arch/x86_64/pci/k8-bus.c6
-rw-r--r--arch/x86_64/vdso/Makefile49
-rw-r--r--arch/x86_64/vdso/vclock_gettime.c120
-rw-r--r--arch/x86_64/vdso/vdso-note.S12
-rw-r--r--arch/x86_64/vdso/vdso-start.S2
-rw-r--r--arch/x86_64/vdso/vdso.S2
-rw-r--r--arch/x86_64/vdso/vdso.lds.S77
-rw-r--r--arch/x86_64/vdso/vextern.h16
-rw-r--r--arch/x86_64/vdso/vgetcpu.c50
-rw-r--r--arch/x86_64/vdso/vma.c139
-rw-r--r--arch/x86_64/vdso/voffset.h1
-rw-r--r--arch/x86_64/vdso/vvar.c12
101 files changed, 2629 insertions, 1275 deletions
diff --git a/arch/arm/mach-davinci/time.c b/arch/arm/mach-davinci/time.c
index 4d8425de692..e96a3dcdc1a 100644
--- a/arch/arm/mach-davinci/time.c
+++ b/arch/arm/mach-davinci/time.c
@@ -285,6 +285,8 @@ static void davinci_set_mode(enum clock_event_mode mode,
case CLOCK_EVT_MODE_SHUTDOWN:
t->opts = TIMER_OPTS_DISABLED;
break;
+ case CLOCK_EVT_MODE_RESUME:
+ break;
}
}
diff --git a/arch/arm/mach-imx/time.c b/arch/arm/mach-imx/time.c
index 010f6fa984a..d86d124aea2 100644
--- a/arch/arm/mach-imx/time.c
+++ b/arch/arm/mach-imx/time.c
@@ -159,6 +159,7 @@ static void imx_set_mode(enum clock_event_mode mode, struct clock_event_device *
break;
case CLOCK_EVT_MODE_SHUTDOWN:
case CLOCK_EVT_MODE_UNUSED:
+ case CLOCK_EVT_MODE_RESUME:
/* Left event sources disabled, no more interrupts appears */
break;
}
diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c
index 8112f726ffa..23e7fba6d3e 100644
--- a/arch/arm/mach-ixp4xx/common.c
+++ b/arch/arm/mach-ixp4xx/common.c
@@ -459,6 +459,8 @@ static void ixp4xx_set_mode(enum clock_event_mode mode,
default:
osrt = opts = 0;
break;
+ case CLOCK_EVT_MODE_RESUME:
+ break;
}
*IXP4XX_OSRT1 = osrt | opts;
diff --git a/arch/arm/mach-omap1/time.c b/arch/arm/mach-omap1/time.c
index 3705d20c4e5..237651ebae5 100644
--- a/arch/arm/mach-omap1/time.c
+++ b/arch/arm/mach-omap1/time.c
@@ -156,6 +156,7 @@ static void omap_mpu_set_mode(enum clock_event_mode mode,
break;
case CLOCK_EVT_MODE_UNUSED:
case CLOCK_EVT_MODE_SHUTDOWN:
+ case CLOCK_EVT_MODE_RESUME:
break;
}
}
diff --git a/arch/arm/plat-omap/timer32k.c b/arch/arm/plat-omap/timer32k.c
index 2feceec8ecc..b0af014b0e2 100644
--- a/arch/arm/plat-omap/timer32k.c
+++ b/arch/arm/plat-omap/timer32k.c
@@ -156,6 +156,8 @@ static void omap_32k_timer_set_mode(enum clock_event_mode mode,
case CLOCK_EVT_MODE_SHUTDOWN:
omap_32k_timer_stop();
break;
+ case CLOCK_EVT_MODE_RESUME:
+ break;
}
}
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig
index 7a11b905ef4..abb582bc218 100644
--- a/arch/i386/Kconfig
+++ b/arch/i386/Kconfig
@@ -18,6 +18,10 @@ config GENERIC_TIME
bool
default y
+config GENERIC_CMOS_UPDATE
+ bool
+ default y
+
config CLOCKSOURCE_WATCHDOG
bool
default y
@@ -544,6 +548,7 @@ config HIGHMEM4G
config HIGHMEM64G
bool "64GB"
depends on !M386 && !M486
+ select X86_PAE
help
Select this if you have a 32-bit processor and more than 4
gigabytes of physical RAM.
@@ -573,12 +578,12 @@ choice
config VMSPLIT_3G
bool "3G/1G user/kernel split"
config VMSPLIT_3G_OPT
- depends on !HIGHMEM
+ depends on !X86_PAE
bool "3G/1G user/kernel split (for full 1G low memory)"
config VMSPLIT_2G
bool "2G/2G user/kernel split"
config VMSPLIT_2G_OPT
- depends on !HIGHMEM
+ depends on !X86_PAE
bool "2G/2G user/kernel split (for full 2G low memory)"
config VMSPLIT_1G
bool "1G/3G user/kernel split"
@@ -598,10 +603,15 @@ config HIGHMEM
default y
config X86_PAE
- bool
- depends on HIGHMEM64G
- default y
+ bool "PAE (Physical Address Extension) Support"
+ default n
+ depends on !HIGHMEM4G
select RESOURCES_64BIT
+ help
+ PAE is required for NX support, and furthermore enables
+ larger swapspace support for non-overcommit purposes. It
+ has the cost of more pagetable lookup overhead, and also
+ consumes more pagetable space per process.
# Common NUMA Features
config NUMA
@@ -817,6 +827,7 @@ config CRASH_DUMP
config PHYSICAL_START
hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP)
+ default "0x1000000" if X86_NUMAQ
default "0x100000"
help
This gives the physical address where the kernel is loaded.
diff --git a/arch/i386/defconfig b/arch/i386/defconfig
index 0ac62cdcd3b..54ee1764fda 100644
--- a/arch/i386/defconfig
+++ b/arch/i386/defconfig
@@ -1,7 +1,7 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.22-rc2
-# Mon May 21 13:23:44 2007
+# Linux kernel version: 2.6.22-git14
+# Fri Jul 20 09:53:15 2007
#
CONFIG_X86_32=y
CONFIG_GENERIC_TIME=y
@@ -37,19 +37,18 @@ CONFIG_LOCALVERSION=""
CONFIG_LOCALVERSION_AUTO=y
CONFIG_SWAP=y
CONFIG_SYSVIPC=y
-# CONFIG_IPC_NS is not set
CONFIG_SYSVIPC_SYSCTL=y
CONFIG_POSIX_MQUEUE=y
# CONFIG_BSD_PROCESS_ACCT is not set
# CONFIG_TASKSTATS is not set
-# CONFIG_UTS_NS is not set
+# CONFIG_USER_NS is not set
# CONFIG_AUDIT is not set
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=18
# CONFIG_CPUSETS is not set
CONFIG_SYSFS_DEPRECATED=y
-# CONFIG_RELAY is not set
+CONFIG_RELAY=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_INITRAMFS_SOURCE=""
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
@@ -73,16 +72,13 @@ CONFIG_TIMERFD=y
CONFIG_EVENTFD=y
CONFIG_SHMEM=y
CONFIG_VM_EVENT_COUNTERS=y
-CONFIG_SLAB=y
-# CONFIG_SLUB is not set
+CONFIG_SLUB_DEBUG=y
+# CONFIG_SLAB is not set
+CONFIG_SLUB=y
# CONFIG_SLOB is not set
CONFIG_RT_MUTEXES=y
# CONFIG_TINY_SHMEM is not set
CONFIG_BASE_SMALL=0
-
-#
-# Loadable module support
-#
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
@@ -90,14 +86,11 @@ CONFIG_MODULE_FORCE_UNLOAD=y
# CONFIG_MODULE_SRCVERSION_ALL is not set
# CONFIG_KMOD is not set
CONFIG_STOP_MACHINE=y
-
-#
-# Block layer
-#
CONFIG_BLOCK=y
CONFIG_LBD=y
# CONFIG_BLK_DEV_IO_TRACE is not set
# CONFIG_LSF is not set
+# CONFIG_BLK_DEV_BSG is not set
#
# IO Schedulers
@@ -201,6 +194,7 @@ CONFIG_X86_CPUID=y
# CONFIG_EDD is not set
# CONFIG_DELL_RBU is not set
# CONFIG_DCDBAS is not set
+CONFIG_DMIID=y
# CONFIG_NOHIGHMEM is not set
CONFIG_HIGHMEM4G=y
# CONFIG_HIGHMEM64G is not set
@@ -217,7 +211,9 @@ CONFIG_FLAT_NODE_MEM_MAP=y
CONFIG_SPLIT_PTLOCK_CPUS=4
CONFIG_RESOURCES_64BIT=y
CONFIG_ZONE_DMA_FLAG=1
+CONFIG_BOUNCE=y
CONFIG_NR_QUICK=1
+CONFIG_VIRT_TO_BUS=y
# CONFIG_HIGHPTE is not set
# CONFIG_MATH_EMULATION is not set
CONFIG_MTRR=y
@@ -244,7 +240,6 @@ CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
CONFIG_PM=y
CONFIG_PM_LEGACY=y
# CONFIG_PM_DEBUG is not set
-# CONFIG_PM_SYSFS_DEPRECATED is not set
#
# ACPI (Advanced Configuration and Power Interface) Support
@@ -284,7 +279,7 @@ CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set
CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_GOV_ONDEMAND=y
-# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
#
# CPUFreq processor drivers
@@ -325,7 +320,7 @@ CONFIG_PCI_MMCONFIG=y
CONFIG_ARCH_SUPPORTS_MSI=y
CONFIG_PCI_MSI=y
# CONFIG_PCI_DEBUG is not set
-CONFIG_HT_IRQ=y
+# CONFIG_HT_IRQ is not set
CONFIG_ISA_DMA_API=y
# CONFIG_ISA is not set
# CONFIG_MCA is not set
@@ -381,7 +376,7 @@ CONFIG_IP_PNP_DHCP=y
CONFIG_INET_TUNNEL=y
CONFIG_INET_XFRM_MODE_TRANSPORT=y
CONFIG_INET_XFRM_MODE_TUNNEL=y
-CONFIG_INET_XFRM_MODE_BEET=y
+# CONFIG_INET_XFRM_MODE_BEET is not set
CONFIG_INET_DIAG=y
CONFIG_INET_TCP_DIAG=y
# CONFIG_TCP_CONG_ADVANCED is not set
@@ -400,27 +395,15 @@ CONFIG_IPV6=y
# CONFIG_INET6_TUNNEL is not set
CONFIG_INET6_XFRM_MODE_TRANSPORT=y
CONFIG_INET6_XFRM_MODE_TUNNEL=y
-CONFIG_INET6_XFRM_MODE_BEET=y
+# CONFIG_INET6_XFRM_MODE_BEET is not set
# CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION is not set
CONFIG_IPV6_SIT=y
# CONFIG_IPV6_TUNNEL is not set
# CONFIG_IPV6_MULTIPLE_TABLES is not set
# CONFIG_NETWORK_SECMARK is not set
# CONFIG_NETFILTER is not set
-
-#
-# DCCP Configuration (EXPERIMENTAL)
-#
# CONFIG_IP_DCCP is not set
-
-#
-# SCTP Configuration (EXPERIMENTAL)
-#
# CONFIG_IP_SCTP is not set
-
-#
-# TIPC Configuration (EXPERIMENTAL)
-#
# CONFIG_TIPC is not set
# CONFIG_ATM is not set
# CONFIG_BRIDGE is not set
@@ -457,6 +440,7 @@ CONFIG_IPV6_SIT=y
# CONFIG_MAC80211 is not set
# CONFIG_IEEE80211 is not set
# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
#
# Device Drivers
@@ -471,21 +455,9 @@ CONFIG_FW_LOADER=y
# CONFIG_DEBUG_DRIVER is not set
# CONFIG_DEBUG_DEVRES is not set
# CONFIG_SYS_HYPERVISOR is not set
-
-#
-# Connector - unified userspace <-> kernelspace linker
-#
# CONFIG_CONNECTOR is not set
# CONFIG_MTD is not set
-
-#
-# Parallel port support
-#
# CONFIG_PARPORT is not set
-
-#
-# Plug and Play support
-#
CONFIG_PNP=y
# CONFIG_PNP_DEBUG is not set
@@ -493,10 +465,7 @@ CONFIG_PNP=y
# Protocols
#
CONFIG_PNPACPI=y
-
-#
-# Block devices
-#
+CONFIG_BLK_DEV=y
CONFIG_BLK_DEV_FD=y
# CONFIG_BLK_CPQ_DA is not set
# CONFIG_BLK_CPQ_CISS_DA is not set
@@ -514,17 +483,14 @@ CONFIG_BLK_DEV_RAM_SIZE=4096
CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024
# CONFIG_CDROM_PKTCDVD is not set
# CONFIG_ATA_OVER_ETH is not set
-
-#
-# Misc devices
-#
+CONFIG_MISC_DEVICES=y
# CONFIG_IBM_ASM is not set
# CONFIG_PHANTOM is not set
+# CONFIG_EEPROM_93CX6 is not set
# CONFIG_SGI_IOC4 is not set
# CONFIG_TIFM_CORE is not set
# CONFIG_SONY_LAPTOP is not set
# CONFIG_THINKPAD_ACPI is not set
-# CONFIG_BLINK is not set
CONFIG_IDE=y
CONFIG_BLK_DEV_IDE=y
@@ -596,6 +562,7 @@ CONFIG_BLK_DEV_IDEDMA=y
#
# CONFIG_RAID_ATTRS is not set
CONFIG_SCSI=y
+CONFIG_SCSI_DMA=y
# CONFIG_SCSI_TGT is not set
CONFIG_SCSI_NETLINK=y
# CONFIG_SCSI_PROC_FS is not set
@@ -606,8 +573,9 @@ CONFIG_SCSI_NETLINK=y
CONFIG_BLK_DEV_SD=y
# CONFIG_CHR_DEV_ST is not set
# CONFIG_CHR_DEV_OSST is not set
-# CONFIG_BLK_DEV_SR is not set
-# CONFIG_CHR_DEV_SG is not set
+CONFIG_BLK_DEV_SR=y
+# CONFIG_BLK_DEV_SR_VENDOR is not set
+CONFIG_CHR_DEV_SG=y
# CONFIG_CHR_DEV_SCH is not set
#
@@ -667,6 +635,7 @@ CONFIG_AIC79XX_DEBUG_MASK=0
# CONFIG_SCSI_INIA100 is not set
# CONFIG_SCSI_STEX is not set
# CONFIG_SCSI_SYM53C8XX_2 is not set
+# CONFIG_SCSI_IPR is not set
# CONFIG_SCSI_QLOGIC_1280 is not set
# CONFIG_SCSI_QLA_FC is not set
# CONFIG_SCSI_QLA_ISCSI is not set
@@ -675,14 +644,73 @@ CONFIG_AIC79XX_DEBUG_MASK=0
# CONFIG_SCSI_DC390T is not set
# CONFIG_SCSI_NSP32 is not set
# CONFIG_SCSI_DEBUG is not set
-# CONFIG_SCSI_ESP_CORE is not set
# CONFIG_SCSI_SRP is not set
-# CONFIG_ATA is not set
-
-#
-# Multi-device support (RAID and LVM)
-#
-# CONFIG_MD is not set
+CONFIG_ATA=y
+# CONFIG_ATA_NONSTANDARD is not set
+CONFIG_ATA_ACPI=y
+CONFIG_SATA_AHCI=y
+CONFIG_SATA_SVW=y
+CONFIG_ATA_PIIX=y
+# CONFIG_SATA_MV is not set
+CONFIG_SATA_NV=y
+# CONFIG_PDC_ADMA is not set
+# CONFIG_SATA_QSTOR is not set
+# CONFIG_SATA_PROMISE is not set
+# CONFIG_SATA_SX4 is not set
+CONFIG_SATA_SIL=y
+# CONFIG_SATA_SIL24 is not set
+# CONFIG_SATA_SIS is not set
+# CONFIG_SATA_ULI is not set
+CONFIG_SATA_VIA=y
+# CONFIG_SATA_VITESSE is not set
+# CONFIG_SATA_INIC162X is not set
+# CONFIG_PATA_ALI is not set
+# CONFIG_PATA_AMD is not set
+# CONFIG_PATA_ARTOP is not set
+# CONFIG_PATA_ATIIXP is not set
+# CONFIG_PATA_CMD640_PCI is not set
+# CONFIG_PATA_CMD64X is not set
+# CONFIG_PATA_CS5520 is not set
+# CONFIG_PATA_CS5530 is not set
+# CONFIG_PATA_CS5535 is not set
+# CONFIG_PATA_CYPRESS is not set
+# CONFIG_PATA_EFAR is not set
+# CONFIG_ATA_GENERIC is not set
+# CONFIG_PATA_HPT366 is not set
+# CONFIG_PATA_HPT37X is not set
+# CONFIG_PATA_HPT3X2N is not set
+# CONFIG_PATA_HPT3X3 is not set
+# CONFIG_PATA_IT821X is not set
+# CONFIG_PATA_IT8213 is not set
+# CONFIG_PATA_JMICRON is not set
+# CONFIG_PATA_TRIFLEX is not set
+# CONFIG_PATA_MARVELL is not set
+# CONFIG_PATA_MPIIX is not set
+# CONFIG_PATA_OLDPIIX is not set
+# CONFIG_PATA_NETCELL is not set
+# CONFIG_PATA_NS87410 is not set
+# CONFIG_PATA_OPTI is not set
+# CONFIG_PATA_OPTIDMA is not set
+# CONFIG_PATA_PDC_OLD is not set
+# CONFIG_PATA_RADISYS is not set
+# CONFIG_PATA_RZ1000 is not set
+# CONFIG_PATA_SC1200 is not set
+# CONFIG_PATA_SERVERWORKS is not set
+# CONFIG_PATA_PDC2027X is not set
+# CONFIG_PATA_SIL680 is not set
+# CONFIG_PATA_SIS is not set
+# CONFIG_PATA_VIA is not set
+# CONFIG_PATA_WINBOND is not set
+CONFIG_MD=y
+# CONFIG_BLK_DEV_MD is not set
+CONFIG_BLK_DEV_DM=y
+# CONFIG_DM_DEBUG is not set
+# CONFIG_DM_CRYPT is not set
+# CONFIG_DM_SNAPSHOT is not set
+# CONFIG_DM_MIRROR is not set
+# CONFIG_DM_ZERO is not set
+# CONFIG_DM_MULTIPATH is not set
+# CONFIG_DM_DELAY is not set
#
# Fusion MPT device support
@@ -723,42 +751,27 @@ CONFIG_IEEE1394_OHCI1394=y
# CONFIG_IEEE1394_ETH1394 is not set
# CONFIG_IEEE1394_DV1394 is not set
CONFIG_IEEE1394_RAWIO=y
-
-#
-# I2O device support
-#
# CONFIG_I2O is not set
-# CONFIG_MACINTOSH_DRIVERS is not set
-
-#
-# Network device support
-#
+CONFIG_MACINTOSH_DRIVERS=y
+# CONFIG_MAC_EMUMOUSEBTN is not set
CONFIG_NETDEVICES=y
+CONFIG_NETDEVICES_MULTIQUEUE=y
# CONFIG_DUMMY is not set
# CONFIG_BONDING is not set
+# CONFIG_MACVLAN is not set
# CONFIG_EQUALIZER is not set
# CONFIG_TUN is not set
# CONFIG_NET_SB1000 is not set
-
-#
-# ARCnet devices
-#
# CONFIG_ARCNET is not set
# CONFIG_PHYLIB is not set
-
-#
-# Ethernet (10 or 100Mbit)
-#
CONFIG_NET_ETHERNET=y
CONFIG_MII=y
# CONFIG_HAPPYMEAL is not set
# CONFIG_SUNGEM is not set
# CONFIG_CASSINI is not set
-# CONFIG_NET_VENDOR_3COM is not set
-
-#
-# Tulip family network device support
-#
+CONFIG_NET_VENDOR_3COM=y
+CONFIG_VORTEX=y
+# CONFIG_TYPHOON is not set
CONFIG_NET_TULIP=y
# CONFIG_DE2104X is not set
CONFIG_TULIP=y
@@ -809,7 +822,6 @@ CONFIG_R8169=y
# CONFIG_SIS190 is not set
# CONFIG_SKGE is not set
CONFIG_SKY2=y
-# CONFIG_SK98LIN is not set
# CONFIG_VIA_VELOCITY is not set
CONFIG_TIGON3=y
CONFIG_BNX2=y
@@ -823,10 +835,6 @@ CONFIG_NETDEV_10000=y
# CONFIG_MYRI10GE is not set
# CONFIG_NETXEN_NIC is not set
# CONFIG_MLX4_CORE is not set
-
-#
-# Token Ring devices
-#
# CONFIG_TR is not set
#
@@ -855,15 +863,7 @@ CONFIG_NETCONSOLE=y
CONFIG_NETPOLL=y
# CONFIG_NETPOLL_TRAP is not set
CONFIG_NET_POLL_CONTROLLER=y
-
-#
-# ISDN subsystem
-#
# CONFIG_ISDN is not set
-
-#
-# Telephony Support
-#
# CONFIG_PHONE is not set
#
@@ -871,6 +871,7 @@ CONFIG_NET_POLL_CONTROLLER=y
#
CONFIG_INPUT=y
# CONFIG_INPUT_FF_MEMLESS is not set
+# CONFIG_INPUT_POLLDEV is not set
#
# Userland interfaces
@@ -936,6 +937,7 @@ CONFIG_HW_CONSOLE=y
#
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_FIX_EARLYCON_MEM=y
CONFIG_SERIAL_8250_PCI=y
CONFIG_SERIAL_8250_PNP=y
CONFIG_SERIAL_8250_NR_UARTS=4
@@ -951,10 +953,6 @@ CONFIG_SERIAL_CORE_CONSOLE=y
CONFIG_UNIX98_PTYS=y
CONFIG_LEGACY_PTYS=y
CONFIG_LEGACY_PTY_COUNT=256
-
-#
-# IPMI
-#
# CONFIG_IPMI_HANDLER is not set
# CONFIG_WATCHDOG is not set
CONFIG_HW_RANDOM=y
@@ -988,11 +986,7 @@ CONFIG_MAX_RAW_DEVS=256
CONFIG_HPET=y
# CONFIG_HPET_RTC_IRQ is not set
CONFIG_HPET_MMAP=y
-CONFIG_HANGCHECK_TIMER=y
-
-#
-# TPM devices
-#
+# CONFIG_HANGCHECK_TIMER is not set
# CONFIG_TCG_TPM is not set
# CONFIG_TELCLOCK is not set
CONFIG_DEVPORT=y
@@ -1003,11 +997,8 @@ CONFIG_DEVPORT=y
#
# CONFIG_SPI is not set
# CONFIG_SPI_MASTER is not set
-
-#
-# Dallas's 1-wire bus
-#
# CONFIG_W1 is not set
+# CONFIG_POWER_SUPPLY is not set
# CONFIG_HWMON is not set
#
@@ -1041,7 +1032,7 @@ CONFIG_DAB=y
CONFIG_VGA_CONSOLE=y
CONFIG_VGACON_SOFT_SCROLLBACK=y
CONFIG_VGACON_SOFT_SCROLLBACK_SIZE=128
-# CONFIG_VIDEO_SELECT is not set
+CONFIG_VIDEO_SELECT=y
CONFIG_DUMMY_CONSOLE=y
#
@@ -1058,15 +1049,11 @@ CONFIG_SOUND=y
# Open Sound System
#
CONFIG_SOUND_PRIME=y
-# CONFIG_OSS_OBSOLETE is not set
# CONFIG_SOUND_TRIDENT is not set
# CONFIG_SOUND_MSNDCLAS is not set
# CONFIG_SOUND_MSNDPIN is not set
# CONFIG_SOUND_OSS is not set
-
-#
-# HID Devices
-#
+CONFIG_HID_SUPPORT=y
CONFIG_HID=y
# CONFIG_HID_DEBUG is not set
@@ -1077,10 +1064,7 @@ CONFIG_USB_HID=y
# CONFIG_USB_HIDINPUT_POWERBOOK is not set
# CONFIG_HID_FF is not set
# CONFIG_USB_HIDDEV is not set
-
-#
-# USB support
-#
+CONFIG_USB_SUPPORT=y
CONFIG_USB_ARCH_HAS_HCD=y
CONFIG_USB_ARCH_HAS_OHCI=y
CONFIG_USB_ARCH_HAS_EHCI=y
@@ -1094,6 +1078,7 @@ CONFIG_USB_DEVICEFS=y
# CONFIG_USB_DEVICE_CLASS is not set
# CONFIG_USB_DYNAMIC_MINORS is not set
# CONFIG_USB_SUSPEND is not set
+# CONFIG_USB_PERSIST is not set
# CONFIG_USB_OTG is not set
#
@@ -1103,7 +1088,6 @@ CONFIG_USB_EHCI_HCD=y
# CONFIG_USB_EHCI_SPLIT_ISO is not set
# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
# CONFIG_USB_EHCI_TT_NEWSCHED is not set
-# CONFIG_USB_EHCI_BIG_ENDIAN_MMIO is not set
# CONFIG_USB_ISP116X_HCD is not set
CONFIG_USB_OHCI_HCD=y
# CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set
@@ -1111,6 +1095,7 @@ CONFIG_USB_OHCI_HCD=y
CONFIG_USB_OHCI_LITTLE_ENDIAN=y
CONFIG_USB_UHCI_HCD=y
# CONFIG_USB_SL811_HCD is not set
+# CONFIG_USB_R8A66597_HCD is not set
#
# USB Device Class drivers
@@ -1201,15 +1186,7 @@ CONFIG_USB_MON=y
#
# LED Triggers
#
-
-#
-# InfiniBand support
-#
# CONFIG_INFINIBAND is not set
-
-#
-# EDAC - error detection and reporting (RAS) (EXPERIMENTAL)
-#
# CONFIG_EDAC is not set
#
@@ -1229,11 +1206,13 @@ CONFIG_USB_MON=y
#
# DMA Devices
#
+CONFIG_VIRTUALIZATION=y
+# CONFIG_KVM is not set
#
-# Virtualization
+# Userspace I/O
#
-# CONFIG_KVM is not set
+# CONFIG_UIO is not set
#
# File systems
@@ -1271,6 +1250,7 @@ CONFIG_DNOTIFY=y
# CONFIG_AUTOFS_FS is not set
CONFIG_AUTOFS4_FS=y
# CONFIG_FUSE_FS is not set
+CONFIG_GENERIC_ACL=y
#
# CD-ROM/DVD Filesystems
@@ -1298,7 +1278,7 @@ CONFIG_PROC_KCORE=y
CONFIG_PROC_SYSCTL=y
CONFIG_SYSFS=y
CONFIG_TMPFS=y
-# CONFIG_TMPFS_POSIX_ACL is not set
+CONFIG_TMPFS_POSIX_ACL=y
CONFIG_HUGETLBFS=y
CONFIG_HUGETLB_PAGE=y
CONFIG_RAMFS=y
@@ -1348,7 +1328,6 @@ CONFIG_SUNRPC=y
# CONFIG_NCP_FS is not set
# CONFIG_CODA_FS is not set
# CONFIG_AFS_FS is not set
-# CONFIG_9P_FS is not set
#
# Partition Types
@@ -1404,10 +1383,7 @@ CONFIG_NLS_UTF8=y
# Distributed Lock Manager
#
# CONFIG_DLM is not set
-
-#
-# Instrumentation Support
-#
+CONFIG_INSTRUMENTATION=y
CONFIG_PROFILING=y
CONFIG_OPROFILE=y
CONFIG_KPROBES=y
@@ -1417,7 +1393,7 @@ CONFIG_KPROBES=y
#
CONFIG_TRACE_IRQFLAGS_SUPPORT=y
# CONFIG_PRINTK_TIME is not set
-CONFIG_ENABLE_MUST_CHECK=y
+# CONFIG_ENABLE_MUST_CHECK is not set
CONFIG_MAGIC_SYSRQ=y
CONFIG_UNUSED_SYMBOLS=y
# CONFIG_DEBUG_FS is not set
@@ -1425,15 +1401,17 @@ CONFIG_UNUSED_SYMBOLS=y
CONFIG_DEBUG_KERNEL=y
# CONFIG_DEBUG_SHIRQ is not set
CONFIG_DETECT_SOFTLOCKUP=y
+# CONFIG_SCHED_DEBUG is not set
# CONFIG_SCHEDSTATS is not set
-# CONFIG_TIMER_STATS is not set
-# CONFIG_DEBUG_SLAB is not set
+CONFIG_TIMER_STATS=y
+# CONFIG_SLUB_DEBUG_ON is not set
# CONFIG_DEBUG_RT_MUTEXES is not set
# CONFIG_RT_MUTEX_TESTER is not set
# CONFIG_DEBUG_SPINLOCK is not set
# CONFIG_DEBUG_MUTEXES is not set
# CONFIG_DEBUG_LOCK_ALLOC is not set
# CONFIG_PROVE_LOCKING is not set
+# CONFIG_LOCK_STAT is not set
# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
# CONFIG_DEBUG_KOBJECT is not set
@@ -1443,7 +1421,6 @@ CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_VM is not set
# CONFIG_DEBUG_LIST is not set
# CONFIG_FRAME_POINTER is not set
-# CONFIG_UNWIND_INFO is not set
# CONFIG_FORCED_INLINING is not set
# CONFIG_RCU_TORTURE_TEST is not set
# CONFIG_LKDTM is not set
@@ -1462,10 +1439,6 @@ CONFIG_DOUBLEFAULT=y
#
# CONFIG_KEYS is not set
# CONFIG_SECURITY is not set
-
-#
-# Cryptographic options
-#
# CONFIG_CRYPTO is not set
#
@@ -1476,6 +1449,7 @@ CONFIG_BITREVERSE=y
# CONFIG_CRC16 is not set
# CONFIG_CRC_ITU_T is not set
CONFIG_CRC32=y
+# CONFIG_CRC7 is not set
# CONFIG_LIBCRC32C is not set
CONFIG_ZLIB_INFLATE=y
CONFIG_PLIST=y
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index 06da59f6f83..dbe5e87e0d6 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -40,6 +40,7 @@ obj-$(CONFIG_VM86) += vm86.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_HPET_TIMER) += hpet.o
obj-$(CONFIG_K8_NB) += k8.o
+obj-$(CONFIG_MGEODE_LX) += geode.o
obj-$(CONFIG_VMI) += vmi.o vmiclock.o
obj-$(CONFIG_PARAVIRT) += paravirt.o
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
index a574cd2c8b6..b87cedeaf59 100644
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -618,6 +618,8 @@ static int __init acpi_parse_sbf(struct acpi_table_header *table)
#ifdef CONFIG_HPET_TIMER
#include <asm/hpet.h>
+static struct __initdata resource *hpet_res;
+
static int __init acpi_parse_hpet(struct acpi_table_header *table)
{
struct acpi_table_hpet *hpet_tbl;
@@ -638,8 +640,42 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table)
printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
hpet_tbl->id, hpet_address);
+ /*
+ * Allocate and initialize the HPET firmware resource for adding into
+ * the resource tree during the lateinit timeframe.
+ */
+#define HPET_RESOURCE_NAME_SIZE 9
+ hpet_res = alloc_bootmem(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE);
+
+ if (!hpet_res)
+ return 0;
+
+ memset(hpet_res, 0, sizeof(*hpet_res));
+ hpet_res->name = (void *)&hpet_res[1];
+ hpet_res->flags = IORESOURCE_MEM;
+ snprintf((char *)hpet_res->name, HPET_RESOURCE_NAME_SIZE, "HPET %u",
+ hpet_tbl->sequence);
+
+ hpet_res->start = hpet_address;
+ hpet_res->end = hpet_address + (1 * 1024) - 1;
+
return 0;
}
+
+/*
+ * hpet_insert_resource inserts the HPET resources used into the resource
+ * tree.
+ */
+static __init int hpet_insert_resource(void)
+{
+ if (!hpet_res)
+ return 1;
+
+ return insert_resource(&iomem_resource, hpet_res);
+}
+
+late_initcall(hpet_insert_resource);
+
#else
#define acpi_parse_hpet NULL
#endif
diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c
index d8cda14fff8..0695be538de 100644
--- a/arch/i386/kernel/alternative.c
+++ b/arch/i386/kernel/alternative.c
@@ -5,9 +5,8 @@
#include <asm/alternative.h>
#include <asm/sections.h>
-static int noreplace_smp = 0;
-static int smp_alt_once = 0;
-static int debug_alternative = 0;
+#ifdef CONFIG_HOTPLUG_CPU
+static int smp_alt_once;
static int __init bootonly(char *str)
{
@@ -15,6 +14,11 @@ static int __init bootonly(char *str)
return 1;
}
__setup("smp-alt-boot", bootonly);
+#else
+#define smp_alt_once 1
+#endif
+
+static int debug_alternative;
static int __init debug_alt(char *str)
{
@@ -23,6 +27,8 @@ static int __init debug_alt(char *str)
}
__setup("debug-alternative", debug_alt);
+static int noreplace_smp;
+
static int __init setup_noreplace_smp(char *str)
{
noreplace_smp = 1;
@@ -376,8 +382,6 @@ void __init alternative_instructions(void)
#ifdef CONFIG_HOTPLUG_CPU
if (num_possible_cpus() < 2)
smp_alt_once = 1;
-#else
- smp_alt_once = 1;
#endif
#ifdef CONFIG_SMP
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index 67824f3bb97..bfc6cb7df7e 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -263,6 +263,9 @@ static void lapic_timer_setup(enum clock_event_mode mode,
v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
apic_write_around(APIC_LVTT, v);
break;
+ case CLOCK_EVT_MODE_RESUME:
+ /* Nothing to do here */
+ break;
}
local_irq_restore(flags);
@@ -315,7 +318,7 @@ static void __devinit setup_APIC_timer(void)
#define LAPIC_CAL_LOOPS (HZ/10)
-static __initdata volatile int lapic_cal_loops = -1;
+static __initdata int lapic_cal_loops = -1;
static __initdata long lapic_cal_t1, lapic_cal_t2;
static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2;
static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
@@ -485,7 +488,7 @@ void __init setup_boot_APIC_clock(void)
/* Let the interrupts run */
local_irq_enable();
- while(lapic_cal_loops <= LAPIC_CAL_LOOPS)
+ while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
cpu_relax();
local_irq_disable();
@@ -521,6 +524,9 @@ void __init setup_boot_APIC_clock(void)
*/
if (nmi_watchdog != NMI_IO_APIC)
lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
+ else
+ printk(KERN_WARNING "APIC timer registered as dummy,"
+ " due to nmi_watchdog=1!\n");
}
/* Setup the lapic or request the broadcast */
diff --git a/arch/i386/kernel/cpu/Makefile b/arch/i386/kernel/cpu/Makefile
index 0b6a8551e9e..778396c78d6 100644
--- a/arch/i386/kernel/cpu/Makefile
+++ b/arch/i386/kernel/cpu/Makefile
@@ -9,7 +9,6 @@ obj-y += cyrix.o
obj-y += centaur.o
obj-y += transmeta.o
obj-y += intel.o intel_cacheinfo.o addon_cpuid_features.o
-obj-y += rise.o
obj-y += nexgen.o
obj-y += umc.o
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
index 6f47eeeb93e..815a5f0aa47 100644
--- a/arch/i386/kernel/cpu/amd.c
+++ b/arch/i386/kernel/cpu/amd.c
@@ -272,8 +272,12 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
}
#endif
- if (cpuid_eax(0x80000000) >= 0x80000006)
- num_cache_leaves = 3;
+ if (cpuid_eax(0x80000000) >= 0x80000006) {
+ if ((c->x86 == 0x10) && (cpuid_edx(0x80000006) & 0xf000))
+ num_cache_leaves = 4;
+ else
+ num_cache_leaves = 3;
+ }
if (amd_apic_timer_broken())
set_bit(X86_FEATURE_LAPIC_TIMER_BROKEN, c->x86_capability);
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index e5419a9dec8..d506201d397 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -606,7 +606,6 @@ extern int nsc_init_cpu(void);
extern int amd_init_cpu(void);
extern int centaur_init_cpu(void);
extern int transmeta_init_cpu(void);
-extern int rise_init_cpu(void);
extern int nexgen_init_cpu(void);
extern int umc_init_cpu(void);
@@ -618,7 +617,6 @@ void __init early_cpu_init(void)
amd_init_cpu();
centaur_init_cpu();
transmeta_init_cpu();
- rise_init_cpu();
nexgen_init_cpu();
umc_init_cpu();
early_cpu_detect();
diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c
index e5be819492e..d5a456d27d8 100644
--- a/arch/i386/kernel/cpu/intel_cacheinfo.c
+++ b/arch/i386/kernel/cpu/intel_cacheinfo.c
@@ -4,7 +4,7 @@
* Changes:
* Venkatesh Pallipadi : Adding cache identification through cpuid(4)
* Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
- * Andi Kleen : CPUID4 emulation on AMD.
+ * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD.
*/
#include <linux/init.h>
@@ -135,7 +135,7 @@ unsigned short num_cache_leaves;
/* AMD doesn't have CPUID4. Emulate it here to report the same
information to the user. This makes some assumptions about the machine:
- No L3, L2 not shared, no SMT etc. that is currently true on AMD CPUs.
+ L2 not shared, no SMT etc. that is currently true on AMD CPUs.
In theory the TLBs could be reported as fake type (they are in "dummy").
Maybe later */
@@ -159,13 +159,26 @@ union l2_cache {
unsigned val;
};
+union l3_cache {
+ struct {
+ unsigned line_size : 8;
+ unsigned lines_per_tag : 4;
+ unsigned assoc : 4;
+ unsigned res : 2;
+ unsigned size_encoded : 14;
+ };
+ unsigned val;
+};
+
static const unsigned short assocs[] = {
[1] = 1, [2] = 2, [4] = 4, [6] = 8,
- [8] = 16,
+ [8] = 16, [0xa] = 32, [0xb] = 48,
+ [0xc] = 64,
[0xf] = 0xffff // ??
- };
-static const unsigned char levels[] = { 1, 1, 2 };
-static const unsigned char types[] = { 1, 2, 3 };
+};
+
+static const unsigned char levels[] = { 1, 1, 2, 3 };
+static const unsigned char types[] = { 1, 2, 3, 3 };
static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
union _cpuid4_leaf_ebx *ebx,
@@ -175,37 +188,58 @@ static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
unsigned line_size, lines_per_tag, assoc, size_in_kb;
union l1_cache l1i, l1d;
union l2_cache l2;
+ union l3_cache l3;
+ union l1_cache *l1 = &l1d;
eax->full = 0;
ebx->full = 0;
ecx->full = 0;
cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
- cpuid(0x80000006, &dummy, &dummy, &l2.val, &dummy);
-
- if (leaf > 2 || !l1d.val || !l1i.val || !l2.val)
- return;
-
- eax->split.is_self_initializing = 1;
- eax->split.type = types[leaf];
- eax->split.level = levels[leaf];
- eax->split.num_threads_sharing = 0;
- eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
-
- if (leaf <= 1) {
- union l1_cache *l1 = leaf == 0 ? &l1d : &l1i;
+ cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
+
+ switch (leaf) {
+ case 1:
+ l1 = &l1i;
+ case 0:
+ if (!l1->val)
+ return;
assoc = l1->assoc;
line_size = l1->line_size;
lines_per_tag = l1->lines_per_tag;
size_in_kb = l1->size_in_kb;
- } else {
+ break;
+ case 2:
+ if (!l2.val)
+ return;
assoc = l2.assoc;
line_size = l2.line_size;
lines_per_tag = l2.lines_per_tag;
/* cpu_data has errata corrections for K7 applied */
size_in_kb = current_cpu_data.x86_cache_size;
+ break;
+ case 3:
+ if (!l3.val)
+ return;
+ assoc = l3.assoc;
+ line_size = l3.line_size;
+ lines_per_tag = l3.lines_per_tag;
+ size_in_kb = l3.size_encoded * 512;
+ break;
+ default:
+ return;
}
+ eax->split.is_self_initializing = 1;
+ eax->split.type = types[leaf];
+ eax->split.level = levels[leaf];
+ if (leaf == 3)
+ eax->split.num_threads_sharing = current_cpu_data.x86_max_cores - 1;
+ else
+ eax->split.num_threads_sharing = 0;
+ eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
+
+
if (assoc == 0xf)
eax->split.is_fully_associative = 1;
ebx->split.coherency_line_size = line_size - 1;
@@ -239,8 +273,7 @@ static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_le
return 0;
}
-/* will only be called once; __init is safe here */
-static int __init find_num_cache_leaves(void)
+static int __cpuinit find_num_cache_leaves(void)
{
unsigned int eax, ebx, ecx, edx;
union _cpuid4_leaf_eax cache_eax;
@@ -710,7 +743,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
return retval;
}
-static void __cpuexit cache_remove_dev(struct sys_device * sys_dev)
+static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
{
unsigned int cpu = sys_dev->id;
unsigned long i;
diff --git a/arch/i386/kernel/cpu/mcheck/non-fatal.c b/arch/i386/kernel/cpu/mcheck/non-fatal.c
index 6b5d3518a1c..bf39409b383 100644
--- a/arch/i386/kernel/cpu/mcheck/non-fatal.c
+++ b/arch/i386/kernel/cpu/mcheck/non-fatal.c
@@ -57,7 +57,7 @@ static DECLARE_DELAYED_WORK(mce_work, mce_work_fn);
static void mce_work_fn(struct work_struct *work)
{
on_each_cpu(mce_checkregs, NULL, 1, 1);
- schedule_delayed_work(&mce_work, MCE_RATE);
+ schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
}
static int __init init_nonfatal_mce_checker(void)
@@ -82,7 +82,7 @@ static int __init init_nonfatal_mce_checker(void)
/*
* Check for non-fatal errors every MCE_RATE s
*/
- schedule_delayed_work(&mce_work, MCE_RATE);
+ schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
printk(KERN_INFO "Machine check exception polling timer started.\n");
return 0;
}
diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c
index f6e46943e6e..56f64e34829 100644
--- a/arch/i386/kernel/cpu/mtrr/generic.c
+++ b/arch/i386/kernel/cpu/mtrr/generic.c
@@ -79,7 +79,7 @@ static void print_fixed(unsigned base, unsigned step, const mtrr_type*types)
}
/* Grab all of the MTRR state for this CPU into *state */
-void get_mtrr_state(void)
+void __init get_mtrr_state(void)
{
unsigned int i;
struct mtrr_var_range *vrs;
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c
index 75dc6d5214b..c48b6fea5ab 100644
--- a/arch/i386/kernel/cpu/mtrr/main.c
+++ b/arch/i386/kernel/cpu/mtrr/main.c
@@ -643,7 +643,7 @@ static struct sysdev_driver mtrr_sysdev_driver = {
* initialized (i.e. before smp_init()).
*
*/
-__init void mtrr_bp_init(void)
+void __init mtrr_bp_init(void)
{
init_ifs();
diff --git a/arch/i386/kernel/cpu/perfctr-watchdog.c b/arch/i386/kernel/cpu/perfctr-watchdog.c
index 4d26d514c56..30b5e48aa76 100644
--- a/arch/i386/kernel/cpu/perfctr-watchdog.c
+++ b/arch/i386/kernel/cpu/perfctr-watchdog.c
@@ -599,8 +599,8 @@ static struct wd_ops intel_arch_wd_ops = {
.setup = setup_intel_arch_watchdog,
.rearm = p6_rearm,
.stop = single_msr_stop_watchdog,
- .perfctr = MSR_ARCH_PERFMON_PERFCTR0,
- .evntsel = MSR_ARCH_PERFMON_EVENTSEL0,
+ .perfctr = MSR_ARCH_PERFMON_PERFCTR1,
+ .evntsel = MSR_ARCH_PERFMON_EVENTSEL1,
};
static void probe_nmi_watchdog(void)
diff --git a/arch/i386/kernel/cpu/rise.c b/arch/i386/kernel/cpu/rise.c
deleted file mode 100644
index 50076f22e90..00000000000
--- a/arch/i386/kernel/cpu/rise.c
+++ /dev/null
@@ -1,52 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/bitops.h>
-#include <asm/processor.h>
-
-#include "cpu.h"
-
-static void __cpuinit init_rise(struct cpuinfo_x86 *c)
-{
- printk("CPU: Rise iDragon");
- if (c->x86_model > 2)
- printk(" II");
- printk("\n");
-
- /* Unhide possibly hidden capability flags
- The mp6 iDragon family don't have MSRs.
- We switch on extra features with this cpuid weirdness: */
- __asm__ (
- "movl $0x6363452a, %%eax\n\t"
- "movl $0x3231206c, %%ecx\n\t"
- "movl $0x2a32313a, %%edx\n\t"
- "cpuid\n\t"
- "movl $0x63634523, %%eax\n\t"
- "movl $0x32315f6c, %%ecx\n\t"
- "movl $0x2333313a, %%edx\n\t"
- "cpuid\n\t" : : : "eax", "ebx", "ecx", "edx"
- );
- set_bit(X86_FEATURE_CX8, c->x86_capability);
-}
-
-static struct cpu_dev rise_cpu_dev __cpuinitdata = {
- .c_vendor = "Rise",
- .c_ident = { "RiseRiseRise" },
- .c_models = {
- { .vendor = X86_VENDOR_RISE, .family = 5, .model_names =
- {
- [0] = "iDragon",
- [2] = "iDragon",
- [8] = "iDragon II",
- [9] = "iDragon II"
- }
- },
- },
- .c_init = init_rise,
-};
-
-int __init rise_init_cpu(void)
-{
- cpu_devs[X86_VENDOR_RISE] = &rise_cpu_dev;
- return 0;
-}
-
diff --git a/arch/i386/kernel/e820.c b/arch/i386/kernel/e820.c
index fc822a46897..e60cddbc4cf 100644
--- a/arch/i386/kernel/e820.c
+++ b/arch/i386/kernel/e820.c
@@ -10,6 +10,7 @@
#include <linux/efi.h>
#include <linux/pfn.h>
#include <linux/uaccess.h>
+#include <linux/suspend.h>
#include <asm/pgtable.h>
#include <asm/page.h>
@@ -320,6 +321,37 @@ static int __init request_standard_resources(void)
subsys_initcall(request_standard_resources);
+#if defined(CONFIG_PM) && defined(CONFIG_SOFTWARE_SUSPEND)
+/**
+ * e820_mark_nosave_regions - Find the ranges of physical addresses that do not
+ * correspond to e820 RAM areas and mark the corresponding pages as nosave for
+ * hibernation.
+ *
+ * This function requires the e820 map to be sorted and without any
+ * overlapping entries and assumes the first e820 area to be RAM.
+ */
+void __init e820_mark_nosave_regions(void)
+{
+ int i;
+ unsigned long pfn;
+
+ pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size);
+ for (i = 1; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+
+ if (pfn < PFN_UP(ei->addr))
+ register_nosave_region(pfn, PFN_UP(ei->addr));
+
+ pfn = PFN_DOWN(ei->addr + ei->size);
+ if (ei->type != E820_RAM)
+ register_nosave_region(PFN_UP(ei->addr), pfn);
+
+ if (pfn >= max_low_pfn)
+ break;
+ }
+}
+#endif
+
void __init add_memory_region(unsigned long long start,
unsigned long long size, int type)
{
diff --git a/arch/i386/kernel/geode.c b/arch/i386/kernel/geode.c
new file mode 100644
index 00000000000..41e8aec4c61
--- /dev/null
+++ b/arch/i386/kernel/geode.c
@@ -0,0 +1,155 @@
+/*
+ * AMD Geode southbridge support code
+ * Copyright (C) 2006, Advanced Micro Devices, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public License
+ * as published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/ioport.h>
+#include <linux/io.h>
+#include <asm/msr.h>
+#include <asm/geode.h>
+
+static struct {
+ char *name;
+ u32 msr;
+ int size;
+ u32 base;
+} lbars[] = {
+ { "geode-pms", MSR_LBAR_PMS, LBAR_PMS_SIZE, 0 },
+ { "geode-acpi", MSR_LBAR_ACPI, LBAR_ACPI_SIZE, 0 },
+ { "geode-gpio", MSR_LBAR_GPIO, LBAR_GPIO_SIZE, 0 },
+ { "geode-mfgpt", MSR_LBAR_MFGPT, LBAR_MFGPT_SIZE, 0 }
+};
+
+static void __init init_lbars(void)
+{
+ u32 lo, hi;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(lbars); i++) {
+ rdmsr(lbars[i].msr, lo, hi);
+ if (hi & 0x01)
+ lbars[i].base = lo & 0x0000ffff;
+
+ if (lbars[i].base == 0)
+ printk(KERN_ERR "geode: Couldn't initialize '%s'\n",
+ lbars[i].name);
+ }
+}
+
+int geode_get_dev_base(unsigned int dev)
+{
+ BUG_ON(dev >= ARRAY_SIZE(lbars));
+ return lbars[dev].base;
+}
+EXPORT_SYMBOL_GPL(geode_get_dev_base);
+
+/* === GPIO API === */
+
+void geode_gpio_set(unsigned int gpio, unsigned int reg)
+{
+ u32 base = geode_get_dev_base(GEODE_DEV_GPIO);
+
+ if (!base)
+ return;
+
+ if (gpio < 16)
+ outl(1 << gpio, base + reg);
+ else
+ outl(1 << (gpio - 16), base + 0x80 + reg);
+}
+EXPORT_SYMBOL_GPL(geode_gpio_set);
+
+void geode_gpio_clear(unsigned int gpio, unsigned int reg)
+{
+ u32 base = geode_get_dev_base(GEODE_DEV_GPIO);
+
+ if (!base)
+ return;
+
+ if (gpio < 16)
+ outl(1 << (gpio + 16), base + reg);
+ else
+ outl(1 << gpio, base + 0x80 + reg);
+}
+EXPORT_SYMBOL_GPL(geode_gpio_clear);
+
+int geode_gpio_isset(unsigned int gpio, unsigned int reg)
+{
+ u32 base = geode_get_dev_base(GEODE_DEV_GPIO);
+
+ if (!base)
+ return 0;
+
+ if (gpio < 16)
+ return (inl(base + reg) & (1 << gpio)) ? 1 : 0;
+ else
+ return (inl(base + 0x80 + reg) & (1 << (gpio - 16))) ? 1 : 0;
+}
+EXPORT_SYMBOL_GPL(geode_gpio_isset);
+
+void geode_gpio_set_irq(unsigned int group, unsigned int irq)
+{
+ u32 lo, hi;
+
+ if (group > 7 || irq > 15)
+ return;
+
+ rdmsr(MSR_PIC_ZSEL_HIGH, lo, hi);
+
+ lo &= ~(0xF << (group * 4));
+ lo |= (irq & 0xF) << (group * 4);
+
+ wrmsr(MSR_PIC_ZSEL_HIGH, lo, hi);
+}
+EXPORT_SYMBOL_GPL(geode_gpio_set_irq);
+
+void geode_gpio_setup_event(unsigned int gpio, int pair, int pme)
+{
+ u32 base = geode_get_dev_base(GEODE_DEV_GPIO);
+ u32 offset, shift, val;
+
+ if (gpio >= 24)
+ offset = GPIO_MAP_W;
+ else if (gpio >= 16)
+ offset = GPIO_MAP_Z;
+ else if (gpio >= 8)
+ offset = GPIO_MAP_Y;
+ else
+ offset = GPIO_MAP_X;
+
+ shift = (gpio % 8) * 4;
+
+ val = inl(base + offset);
+
+ /* Clear whatever was there before */
+ val &= ~(0xF << shift);
+
+ /* And set the new value */
+
+ val |= ((pair & 7) << shift);
+
+ /* Set the PME bit if this is a PME event */
+
+ if (pme)
+ val |= (1 << (shift + 3));
+
+ outl(val, base + offset);
+}
+EXPORT_SYMBOL_GPL(geode_gpio_setup_event);
+
+static int __init geode_southbridge_init(void)
+{
+ if (!is_geode())
+ return -ENODEV;
+
+ init_lbars();
+ return 0;
+}
+
+postcore_initcall(geode_southbridge_init);
diff --git a/arch/i386/kernel/hpet.c b/arch/i386/kernel/hpet.c
index 17d73459fc5..533d4932bc7 100644
--- a/arch/i386/kernel/hpet.c
+++ b/arch/i386/kernel/hpet.c
@@ -5,6 +5,7 @@
#include <linux/init.h>
#include <linux/sysdev.h>
#include <linux/pm.h>
+#include <linux/delay.h>
#include <asm/hpet.h>
#include <asm/io.h>
@@ -187,6 +188,10 @@ static void hpet_set_mode(enum clock_event_mode mode,
cfg &= ~HPET_TN_ENABLE;
hpet_writel(cfg, HPET_T0_CFG);
break;
+
+ case CLOCK_EVT_MODE_RESUME:
+ hpet_enable_int();
+ break;
}
}
@@ -217,6 +222,7 @@ static struct clocksource clocksource_hpet = {
.mask = HPET_MASK,
.shift = HPET_SHIFT,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
+ .resume = hpet_start_counter,
};
/*
@@ -226,7 +232,8 @@ int __init hpet_enable(void)
{
unsigned long id;
uint64_t hpet_freq;
- u64 tmp;
+ u64 tmp, start, now;
+ cycle_t t1;
if (!is_hpet_capable())
return 0;
@@ -273,6 +280,27 @@ int __init hpet_enable(void)
/* Start the counter */
hpet_start_counter();
+ /* Verify whether hpet counter works */
+ t1 = read_hpet();
+ rdtscll(start);
+
+ /*
+ * We don't know the TSC frequency yet, but waiting for
+ * 200000 TSC cycles is safe:
+ * 4 GHz == 50us
+ * 1 GHz == 200us
+ */
+ do {
+ rep_nop();
+ rdtscll(now);
+ } while ((now - start) < 200000UL);
+
+ if (t1 == read_hpet()) {
+ printk(KERN_WARNING
+ "HPET counter not counting. HPET disabled\n");
+ goto out_nohpet;
+ }
+
/* Initialize and register HPET clocksource
*
* hpet period is in femto seconds per cycle
@@ -291,7 +319,6 @@ int __init hpet_enable(void)
clocksource_register(&clocksource_hpet);
-
if (id & HPET_ID_LEGSUP) {
hpet_enable_int();
hpet_reserve_platform_timers(id);
@@ -299,7 +326,7 @@ int __init hpet_enable(void)
* Start hpet with the boot cpu mask and make it
* global after the IO_APIC has been initialized.
*/
- hpet_clockevent.cpumask =cpumask_of_cpu(0);
+ hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id());
clockevents_register_device(&hpet_clockevent);
global_clock_event = &hpet_clockevent;
return 1;
@@ -524,68 +551,3 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
#endif
-
-
-/*
- * Suspend/resume part
- */
-
-#ifdef CONFIG_PM
-
-static int hpet_suspend(struct sys_device *sys_device, pm_message_t state)
-{
- unsigned long cfg = hpet_readl(HPET_CFG);
-
- cfg &= ~(HPET_CFG_ENABLE|HPET_CFG_LEGACY);
- hpet_writel(cfg, HPET_CFG);
-
- return 0;
-}
-
-static int hpet_resume(struct sys_device *sys_device)
-{
- unsigned int id;
-
- hpet_start_counter();
-
- id = hpet_readl(HPET_ID);
-
- if (id & HPET_ID_LEGSUP)
- hpet_enable_int();
-
- return 0;
-}
-
-static struct sysdev_class hpet_class = {
- set_kset_name("hpet"),
- .suspend = hpet_suspend,
- .resume = hpet_resume,
-};
-
-static struct sys_device hpet_device = {
- .id = 0,
- .cls = &hpet_class,
-};
-
-
-static __init int hpet_register_sysfs(void)
-{
- int err;
-
- if (!is_hpet_capable())
- return 0;
-
- err = sysdev_class_register(&hpet_class);
-
- if (!err) {
- err = sysdev_register(&hpet_device);
- if (err)
- sysdev_class_unregister(&hpet_class);
- }
-
- return err;
-}
-
-device_initcall(hpet_register_sysfs);
-
-#endif
diff --git a/arch/i386/kernel/i8253.c b/arch/i386/kernel/i8253.c
index f8a3c4054c7..6d839f2f1b1 100644
--- a/arch/i386/kernel/i8253.c
+++ b/arch/i386/kernel/i8253.c
@@ -3,18 +3,17 @@
*
*/
#include <linux/clockchips.h>
-#include <linux/spinlock.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
#include <linux/jiffies.h>
-#include <linux/sysdev.h>
#include <linux/module.h>
-#include <linux/init.h>
+#include <linux/spinlock.h>
#include <asm/smp.h>
#include <asm/delay.h>
#include <asm/i8253.h>
#include <asm/io.h>
-
-#include "io_ports.h"
+#include <asm/timer.h>
DEFINE_SPINLOCK(i8253_lock);
EXPORT_SYMBOL(i8253_lock);
@@ -41,26 +40,27 @@ static void init_pit_timer(enum clock_event_mode mode,
case CLOCK_EVT_MODE_PERIODIC:
/* binary, mode 2, LSB/MSB, ch 0 */
outb_p(0x34, PIT_MODE);
- udelay(10);
outb_p(LATCH & 0xff , PIT_CH0); /* LSB */
- udelay(10);
outb(LATCH >> 8 , PIT_CH0); /* MSB */
break;
- /*
- * Avoid unnecessary state transitions, as it confuses
- * Geode / Cyrix based boxen.
- */
case CLOCK_EVT_MODE_SHUTDOWN:
- if (evt->mode == CLOCK_EVT_MODE_UNUSED)
- break;
case CLOCK_EVT_MODE_UNUSED:
- if (evt->mode == CLOCK_EVT_MODE_SHUTDOWN)
- break;
+ if (evt->mode == CLOCK_EVT_MODE_PERIODIC ||
+ evt->mode == CLOCK_EVT_MODE_ONESHOT) {
+ outb_p(0x30, PIT_MODE);
+ outb_p(0, PIT_CH0);
+ outb_p(0, PIT_CH0);
+ }
+ break;
+
case CLOCK_EVT_MODE_ONESHOT:
/* One shot setup */
outb_p(0x38, PIT_MODE);
- udelay(10);
+ break;
+
+ case CLOCK_EVT_MODE_RESUME:
+ /* Nothing to do here */
break;
}
spin_unlock_irqrestore(&i8253_lock, flags);
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index 21db8f56c9a..893df828075 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -353,14 +353,6 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
# include <linux/slab.h> /* kmalloc() */
# include <linux/timer.h> /* time_after() */
-#ifdef CONFIG_BALANCED_IRQ_DEBUG
-# define TDprintk(x...) do { printk("<%ld:%s:%d>: ", jiffies, __FILE__, __LINE__); printk(x); } while (0)
-# define Dprintk(x...) do { TDprintk(x); } while (0)
-# else
-# define TDprintk(x...)
-# define Dprintk(x...)
-# endif
-
#define IRQBALANCE_CHECK_ARCH -999
#define MAX_BALANCED_IRQ_INTERVAL (5*HZ)
#define MIN_BALANCED_IRQ_INTERVAL (HZ/2)
@@ -443,7 +435,7 @@ static inline void balance_irq(int cpu, int irq)
static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
{
int i, j;
- Dprintk("Rotating IRQs among CPUs.\n");
+
for_each_online_cpu(i) {
for (j = 0; j < NR_IRQS; j++) {
if (!irq_desc[j].action)
@@ -560,19 +552,11 @@ tryanothercpu:
max_loaded = tmp_loaded; /* processor */
imbalance = (max_cpu_irq - min_cpu_irq) / 2;
- Dprintk("max_loaded cpu = %d\n", max_loaded);
- Dprintk("min_loaded cpu = %d\n", min_loaded);
- Dprintk("max_cpu_irq load = %ld\n", max_cpu_irq);
- Dprintk("min_cpu_irq load = %ld\n", min_cpu_irq);
- Dprintk("load imbalance = %lu\n", imbalance);
-
/* if imbalance is less than approx 10% of max load, then
* observe diminishing returns action. - quit
*/
- if (imbalance < (max_cpu_irq >> 3)) {
- Dprintk("Imbalance too trivial\n");
+ if (imbalance < (max_cpu_irq >> 3))
goto not_worth_the_effort;
- }
tryanotherirq:
/* if we select an IRQ to move that can't go where we want, then
@@ -629,9 +613,6 @@ tryanotherirq:
cpus_and(tmp, target_cpu_mask, allowed_mask);
if (!cpus_empty(tmp)) {
-
- Dprintk("irq = %d moved to cpu = %d\n",
- selected_irq, min_loaded);
/* mark for change destination */
set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
@@ -651,7 +632,6 @@ not_worth_the_effort:
*/
balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);
- Dprintk("IRQ worth rotating not found\n");
return;
}
@@ -1902,7 +1882,7 @@ __setup("no_timer_check", notimercheck);
* - if this function detects that timer IRQs are defunct, then we fall
* back to ISA timer IRQs
*/
-int __init timer_irq_works(void)
+static int __init timer_irq_works(void)
{
unsigned long t1 = jiffies;
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index ba44d40b066..dd2b97fc00b 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -149,15 +149,11 @@ fastcall unsigned int do_IRQ(struct pt_regs *regs)
#ifdef CONFIG_4KSTACKS
-/*
- * These should really be __section__(".bss.page_aligned") as well, but
- * gcc's 3.0 and earlier don't handle that correctly.
- */
static char softirq_stack[NR_CPUS * THREAD_SIZE]
- __attribute__((__aligned__(THREAD_SIZE)));
+ __attribute__((__section__(".bss.page_aligned")));
static char hardirq_stack[NR_CPUS * THREAD_SIZE]
- __attribute__((__aligned__(THREAD_SIZE)));
+ __attribute__((__section__(".bss.page_aligned")));
/*
* allocate per-cpu stacks for hardirq and for softirq processing
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 6c49acb9698..84664710b78 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -300,6 +300,7 @@ early_param("idle", idle_setup);
void show_regs(struct pt_regs * regs)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
+ unsigned long d0, d1, d2, d3, d6, d7;
printk("\n");
printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
@@ -324,6 +325,17 @@ void show_regs(struct pt_regs * regs)
cr3 = read_cr3();
cr4 = read_cr4_safe();
printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
+
+ get_debugreg(d0, 0);
+ get_debugreg(d1, 1);
+ get_debugreg(d2, 2);
+ get_debugreg(d3, 3);
+ printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
+ d0, d1, d2, d3);
+ get_debugreg(d6, 6);
+ get_debugreg(d7, 7);
+ printk("DR6: %08lx DR7: %08lx\n", d6, d7);
+
show_trace(NULL, regs, &regs->esp);
}
diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c
index 5513f8d5b5b..0d796248866 100644
--- a/arch/i386/kernel/reboot.c
+++ b/arch/i386/kernel/reboot.c
@@ -113,6 +113,15 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 300/"),
},
},
+ { /* Handle problems with rebooting on Dell Optiplex 745's SFF*/
+ .callback = set_bios_reboot,
+ .ident = "Dell OptiPlex 745",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"),
+ DMI_MATCH(DMI_BOARD_NAME, "0WF810"),
+ },
+ },
{ /* Handle problems with rebooting on Dell 2400's */
.callback = set_bios_reboot,
.ident = "Dell PowerEdge 2400",
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index 74871d066c2..d474cd639bc 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -273,18 +273,18 @@ unsigned long __init find_max_low_pfn(void)
printk(KERN_WARNING "Warning only %ldMB will be used.\n",
MAXMEM>>20);
if (max_pfn > MAX_NONPAE_PFN)
- printk(KERN_WARNING "Use a PAE enabled kernel.\n");
+ printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n");
else
printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
max_pfn = MAXMEM_PFN;
#else /* !CONFIG_HIGHMEM */
-#ifndef CONFIG_X86_PAE
+#ifndef CONFIG_HIGHMEM64G
if (max_pfn > MAX_NONPAE_PFN) {
max_pfn = MAX_NONPAE_PFN;
printk(KERN_WARNING "Warning only 4GB will be used.\n");
- printk(KERN_WARNING "Use a PAE enabled kernel.\n");
+ printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n");
}
-#endif /* !CONFIG_X86_PAE */
+#endif /* !CONFIG_HIGHMEM64G */
#endif /* !CONFIG_HIGHMEM */
} else {
if (highmem_pages == -1)
@@ -466,7 +466,7 @@ void __init setup_bootmem_allocator(void)
*
* This should all compile down to nothing when NUMA is off.
*/
-void __init remapped_pgdat_init(void)
+static void __init remapped_pgdat_init(void)
{
int nid;
@@ -640,6 +640,7 @@ void __init setup_arch(char **cmdline_p)
#endif
e820_register_memory();
+ e820_mark_nosave_regions();
#ifdef CONFIG_VT
#if defined(CONFIG_VGA_CONSOLE)
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c
index ff4ee6f3326..6deb159d08e 100644
--- a/arch/i386/kernel/sysenter.c
+++ b/arch/i386/kernel/sysenter.c
@@ -336,7 +336,9 @@ struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
int in_gate_area(struct task_struct *task, unsigned long addr)
{
- return 0;
+ const struct vm_area_struct *vma = get_gate_vma(task);
+
+ return vma && addr >= vma->vm_start && addr < vma->vm_end;
}
int in_gate_area_no_task(unsigned long addr)
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index a665df61f08..19a6c678d02 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -207,55 +207,9 @@ unsigned long read_persistent_clock(void)
return retval;
}
-static void sync_cmos_clock(unsigned long dummy);
-
-static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0);
-int no_sync_cmos_clock;
-
-static void sync_cmos_clock(unsigned long dummy)
-{
- struct timeval now, next;
- int fail = 1;
-
- /*
- * If we have an externally synchronized Linux clock, then update
- * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
- * called as close as possible to 500 ms before the new second starts.
- * This code is run on a timer. If the clock is set, that timer
- * may not expire at the correct time. Thus, we adjust...
- */
- if (!ntp_synced())
- /*
- * Not synced, exit, do not restart a timer (if one is
- * running, let it run out).
- */
- return;
-
- do_gettimeofday(&now);
- if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
- now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2)
- fail = set_rtc_mmss(now.tv_sec);
-
- next.tv_usec = USEC_AFTER - now.tv_usec;
- if (next.tv_usec <= 0)
- next.tv_usec += USEC_PER_SEC;
-
- if (!fail)
- next.tv_sec = 659;
- else
- next.tv_sec = 0;
-
- if (next.tv_usec >= USEC_PER_SEC) {
- next.tv_sec++;
- next.tv_usec -= USEC_PER_SEC;
- }
- mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next));
-}
-
-void notify_arch_cmos_timer(void)
+int update_persistent_clock(struct timespec now)
{
- if (!no_sync_cmos_clock)
- mod_timer(&sync_cmos_timer, jiffies + 1);
+ return set_rtc_mmss(now.tv_sec);
}
extern void (*late_time_init)(void);
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index 3e7753c78b9..57772a18c39 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -152,7 +152,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
if (!stack) {
unsigned long dummy;
stack = &dummy;
- if (task && task != current)
+ if (task != current)
stack = (unsigned long *)task->thread.esp;
}
@@ -211,6 +211,7 @@ static void print_trace_address(void *data, unsigned long addr)
{
printk("%s [<%08lx>] ", (char *)data, addr);
print_symbol("%s\n", addr);
+ touch_nmi_watchdog();
}
static struct stacktrace_ops print_trace_ops = {
diff --git a/arch/i386/kernel/vmiclock.c b/arch/i386/kernel/vmiclock.c
index f9b845f4e69..b1b5ab08b26 100644
--- a/arch/i386/kernel/vmiclock.c
+++ b/arch/i386/kernel/vmiclock.c
@@ -32,6 +32,7 @@
#include <asm/apicdef.h>
#include <asm/apic.h>
#include <asm/timer.h>
+#include <asm/i8253.h>
#include <irq_vectors.h>
#include "io_ports.h"
@@ -142,6 +143,7 @@ static void vmi_timer_set_mode(enum clock_event_mode mode,
switch (mode) {
case CLOCK_EVT_MODE_ONESHOT:
+ case CLOCK_EVT_MODE_RESUME:
break;
case CLOCK_EVT_MODE_PERIODIC:
cycles_per_hz = vmi_timer_ops.get_cycle_frequency();
diff --git a/arch/i386/lib/Makefile b/arch/i386/lib/Makefile
index 22d8ac5815f..4d105fdfe81 100644
--- a/arch/i386/lib/Makefile
+++ b/arch/i386/lib/Makefile
@@ -4,7 +4,7 @@
lib-y = checksum.o delay.o usercopy.o getuser.o putuser.o memcpy.o strstr.o \
- bitops.o semaphore.o
+ bitops.o semaphore.o string.o
lib-$(CONFIG_X86_USE_3DNOW) += mmx.o
diff --git a/arch/i386/lib/string.c b/arch/i386/lib/string.c
new file mode 100644
index 00000000000..2c773fefa3d
--- /dev/null
+++ b/arch/i386/lib/string.c
@@ -0,0 +1,257 @@
+/*
+ * Most of the string-functions are rather heavily hand-optimized,
+ * see especially strsep,strstr,str[c]spn. They should work, but are not
+ * very easy to understand. Everything is done entirely within the register
+ * set, making the functions fast and clean. String instructions have been
+ * used through-out, making for "slightly" unclear code :-)
+ *
+ * AK: On P4 and K7 using non string instruction implementations might be faster
+ * for large memory blocks. But most of them are unlikely to be used on large
+ * strings.
+ */
+
+#include <linux/string.h>
+#include <linux/module.h>
+
+#ifdef __HAVE_ARCH_STRCPY
+char *strcpy(char * dest,const char *src)
+{
+ int d0, d1, d2;
+ asm volatile( "1:\tlodsb\n\t"
+ "stosb\n\t"
+ "testb %%al,%%al\n\t"
+ "jne 1b"
+ : "=&S" (d0), "=&D" (d1), "=&a" (d2)
+ :"0" (src),"1" (dest) : "memory");
+ return dest;
+}
+EXPORT_SYMBOL(strcpy);
+#endif
+
+#ifdef __HAVE_ARCH_STRNCPY
+char *strncpy(char * dest,const char *src,size_t count)
+{
+ int d0, d1, d2, d3;
+ asm volatile( "1:\tdecl %2\n\t"
+ "js 2f\n\t"
+ "lodsb\n\t"
+ "stosb\n\t"
+ "testb %%al,%%al\n\t"
+ "jne 1b\n\t"
+ "rep\n\t"
+ "stosb\n"
+ "2:"
+ : "=&S" (d0), "=&D" (d1), "=&c" (d2), "=&a" (d3)
+ :"0" (src),"1" (dest),"2" (count) : "memory");
+ return dest;
+}
+EXPORT_SYMBOL(strncpy);
+#endif
+
+#ifdef __HAVE_ARCH_STRCAT
+char *strcat(char * dest,const char * src)
+{
+ int d0, d1, d2, d3;
+ asm volatile( "repne\n\t"
+ "scasb\n\t"
+ "decl %1\n"
+ "1:\tlodsb\n\t"
+ "stosb\n\t"
+ "testb %%al,%%al\n\t"
+ "jne 1b"
+ : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
+ : "0" (src), "1" (dest), "2" (0), "3" (0xffffffffu): "memory");
+ return dest;
+}
+EXPORT_SYMBOL(strcat);
+#endif
+
+#ifdef __HAVE_ARCH_STRNCAT
+char *strncat(char * dest,const char * src,size_t count)
+{
+ int d0, d1, d2, d3;
+ asm volatile( "repne\n\t"
+ "scasb\n\t"
+ "decl %1\n\t"
+ "movl %8,%3\n"
+ "1:\tdecl %3\n\t"
+ "js 2f\n\t"
+ "lodsb\n\t"
+ "stosb\n\t"
+ "testb %%al,%%al\n\t"
+ "jne 1b\n"
+ "2:\txorl %2,%2\n\t"
+ "stosb"
+ : "=&S" (d0), "=&D" (d1), "=&a" (d2), "=&c" (d3)
+ : "0" (src),"1" (dest),"2" (0),"3" (0xffffffffu), "g" (count)
+ : "memory");
+ return dest;
+}
+EXPORT_SYMBOL(strncat);
+#endif
+
+#ifdef __HAVE_ARCH_STRCMP
+int strcmp(const char * cs,const char * ct)
+{
+ int d0, d1;
+ int res;
+ asm volatile( "1:\tlodsb\n\t"
+ "scasb\n\t"
+ "jne 2f\n\t"
+ "testb %%al,%%al\n\t"
+ "jne 1b\n\t"
+ "xorl %%eax,%%eax\n\t"
+ "jmp 3f\n"
+ "2:\tsbbl %%eax,%%eax\n\t"
+ "orb $1,%%al\n"
+ "3:"
+ :"=a" (res), "=&S" (d0), "=&D" (d1)
+ :"1" (cs),"2" (ct)
+ :"memory");
+ return res;
+}
+EXPORT_SYMBOL(strcmp);
+#endif
+
+#ifdef __HAVE_ARCH_STRNCMP
+int strncmp(const char * cs,const char * ct,size_t count)
+{
+ int res;
+ int d0, d1, d2;
+ asm volatile( "1:\tdecl %3\n\t"
+ "js 2f\n\t"
+ "lodsb\n\t"
+ "scasb\n\t"
+ "jne 3f\n\t"
+ "testb %%al,%%al\n\t"
+ "jne 1b\n"
+ "2:\txorl %%eax,%%eax\n\t"
+ "jmp 4f\n"
+ "3:\tsbbl %%eax,%%eax\n\t"
+ "orb $1,%%al\n"
+ "4:"
+ :"=a" (res), "=&S" (d0), "=&D" (d1), "=&c" (d2)
+ :"1" (cs),"2" (ct),"3" (count)
+ :"memory");
+ return res;
+}
+EXPORT_SYMBOL(strncmp);
+#endif
+
+#ifdef __HAVE_ARCH_STRCHR
+char *strchr(const char * s, int c)
+{
+ int d0;
+ char * res;
+ asm volatile( "movb %%al,%%ah\n"
+ "1:\tlodsb\n\t"
+ "cmpb %%ah,%%al\n\t"
+ "je 2f\n\t"
+ "testb %%al,%%al\n\t"
+ "jne 1b\n\t"
+ "movl $1,%1\n"
+ "2:\tmovl %1,%0\n\t"
+ "decl %0"
+ :"=a" (res), "=&S" (d0)
+ :"1" (s),"0" (c)
+ :"memory");
+ return res;
+}
+EXPORT_SYMBOL(strchr);
+#endif
+
+#ifdef __HAVE_ARCH_STRRCHR
+char *strrchr(const char * s, int c)
+{
+ int d0, d1;
+ char * res;
+ asm volatile( "movb %%al,%%ah\n"
+ "1:\tlodsb\n\t"
+ "cmpb %%ah,%%al\n\t"
+ "jne 2f\n\t"
+ "leal -1(%%esi),%0\n"
+ "2:\ttestb %%al,%%al\n\t"
+ "jne 1b"
+ :"=g" (res), "=&S" (d0), "=&a" (d1)
+ :"0" (0),"1" (s),"2" (c)
+ :"memory");
+ return res;
+}
+EXPORT_SYMBOL(strrchr);
+#endif
+
+#ifdef __HAVE_ARCH_STRLEN
+size_t strlen(const char * s)
+{
+ int d0;
+ int res;
+ asm volatile( "repne\n\t"
+ "scasb\n\t"
+ "notl %0\n\t"
+ "decl %0"
+ :"=c" (res), "=&D" (d0)
+ :"1" (s),"a" (0), "0" (0xffffffffu)
+ :"memory");
+ return res;
+}
+EXPORT_SYMBOL(strlen);
+#endif
+
+#ifdef __HAVE_ARCH_MEMCHR
+void *memchr(const void *cs,int c,size_t count)
+{
+ int d0;
+ void *res;
+ if (!count)
+ return NULL;
+ asm volatile( "repne\n\t"
+ "scasb\n\t"
+ "je 1f\n\t"
+ "movl $1,%0\n"
+ "1:\tdecl %0"
+ :"=D" (res), "=&c" (d0)
+ :"a" (c),"0" (cs),"1" (count)
+ :"memory");
+ return res;
+}
+EXPORT_SYMBOL(memchr);
+#endif
+
+#ifdef __HAVE_ARCH_MEMSCAN
+void *memscan(void * addr, int c, size_t size)
+{
+ if (!size)
+ return addr;
+ asm volatile("repnz; scasb\n\t"
+ "jnz 1f\n\t"
+ "dec %%edi\n"
+ "1:"
+ : "=D" (addr), "=c" (size)
+ : "0" (addr), "1" (size), "a" (c)
+ : "memory");
+ return addr;
+}
+EXPORT_SYMBOL(memscan);
+#endif
+
+#ifdef __HAVE_ARCH_STRNLEN
+size_t strnlen(const char *s, size_t count)
+{
+ int d0;
+ int res;
+ asm volatile( "movl %2,%0\n\t"
+ "jmp 2f\n"
+ "1:\tcmpb $0,(%0)\n\t"
+ "je 3f\n\t"
+ "incl %0\n"
+ "2:\tdecl %1\n\t"
+ "cmpl $-1,%1\n\t"
+ "jne 1b\n"
+ "3:\tsubl %2,%0"
+ :"=a" (res), "=&d" (d0)
+ :"c" (s),"1" (count)
+ :"memory");
+ return res;
+}
+EXPORT_SYMBOL(strnlen);
+#endif
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index 6e72f22e6bb..e1a9a805c44 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -471,6 +471,10 @@ void zap_low_mappings (void)
flush_tlb_all();
}
+int nx_enabled = 0;
+
+#ifdef CONFIG_X86_PAE
+
static int disable_nx __initdata = 0;
u64 __supported_pte_mask __read_mostly = ~_PAGE_NX;
EXPORT_SYMBOL_GPL(__supported_pte_mask);
@@ -500,9 +504,6 @@ static int __init noexec_setup(char *str)
}
early_param("noexec", noexec_setup);
-int nx_enabled = 0;
-#ifdef CONFIG_X86_PAE
-
static void __init set_nx(void)
{
unsigned int v[4], l, h;
diff --git a/arch/i386/mm/ioremap.c b/arch/i386/mm/ioremap.c
index fff08ae7b5e..0b278315d73 100644
--- a/arch/i386/mm/ioremap.c
+++ b/arch/i386/mm/ioremap.c
@@ -196,7 +196,7 @@ void iounmap(volatile void __iomem *addr)
/* Reset the direct mapping. Can block */
if ((p->flags >> 20) && p->phys_addr < virt_to_phys(high_memory) - 1) {
change_page_attr(virt_to_page(__va(p->phys_addr)),
- p->size >> PAGE_SHIFT,
+ get_vm_area_size(p) >> PAGE_SHIFT,
PAGE_KERNEL);
global_flush_tlb();
}
diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c
index 37992ffb163..8927222b3ab 100644
--- a/arch/i386/mm/pageattr.c
+++ b/arch/i386/mm/pageattr.c
@@ -82,7 +82,7 @@ static void flush_kernel_map(void *arg)
struct page *p;
/* High level code is not ready for clflush yet */
- if (0 && cpu_has_clflush) {
+ if (cpu_has_clflush) {
list_for_each_entry (p, lh, lru)
cache_flush_page(p);
} else if (boot_cpu_data.x86_model >= 4)
@@ -136,6 +136,12 @@ static inline void revert_page(struct page *kpte_page, unsigned long address)
ref_prot));
}
+static inline void save_page(struct page *kpte_page)
+{
+ if (!test_and_set_bit(PG_arch_1, &kpte_page->flags))
+ list_add(&kpte_page->lru, &df_list);
+}
+
static int
__change_page_attr(struct page *page, pgprot_t prot)
{
@@ -150,6 +156,9 @@ __change_page_attr(struct page *page, pgprot_t prot)
if (!kpte)
return -EINVAL;
kpte_page = virt_to_page(kpte);
+ BUG_ON(PageLRU(kpte_page));
+ BUG_ON(PageCompound(kpte_page));
+
if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) {
if (!pte_huge(*kpte)) {
set_pte_atomic(kpte, mk_pte(page, prot));
@@ -179,11 +188,11 @@ __change_page_attr(struct page *page, pgprot_t prot)
* time (not via split_large_page) and in turn we must not
* replace it with a largepage.
*/
+
+ save_page(kpte_page);
if (!PageReserved(kpte_page)) {
if (cpu_has_pse && (page_private(kpte_page) == 0)) {
- ClearPagePrivate(kpte_page);
paravirt_release_pt(page_to_pfn(kpte_page));
- list_add(&kpte_page->lru, &df_list);
revert_page(kpte_page, address);
}
}
@@ -236,6 +245,11 @@ void global_flush_tlb(void)
spin_unlock_irq(&cpa_lock);
flush_map(&l);
list_for_each_entry_safe(pg, next, &l, lru) {
+ list_del(&pg->lru);
+ clear_bit(PG_arch_1, &pg->flags);
+ if (PageReserved(pg) || !cpu_has_pse || page_private(pg) != 0)
+ continue;
+ ClearPagePrivate(pg);
__free_page(pg);
}
}
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c
index 8d7c0864cc0..01437c46baa 100644
--- a/arch/i386/mm/pgtable.c
+++ b/arch/i386/mm/pgtable.c
@@ -235,7 +235,7 @@ static inline void pgd_list_del(pgd_t *pgd)
#if (PTRS_PER_PMD == 1)
/* Non-PAE pgd constructor */
-void pgd_ctor(void *pgd)
+static void pgd_ctor(void *pgd)
{
unsigned long flags;
@@ -257,7 +257,7 @@ void pgd_ctor(void *pgd)
}
#else /* PTRS_PER_PMD > 1 */
/* PAE pgd constructor */
-void pgd_ctor(void *pgd)
+static void pgd_ctor(void *pgd)
{
/* PAE, kernel PMD may be shared */
@@ -276,7 +276,7 @@ void pgd_ctor(void *pgd)
}
#endif /* PTRS_PER_PMD */
-void pgd_dtor(void *pgd)
+static void pgd_dtor(void *pgd)
{
unsigned long flags; /* can be called from interrupt context */
diff --git a/arch/i386/pci/acpi.c b/arch/i386/pci/acpi.c
index b33aea845f5..bc8a44bddaa 100644
--- a/arch/i386/pci/acpi.c
+++ b/arch/i386/pci/acpi.c
@@ -8,20 +8,42 @@
struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int domain, int busnum)
{
struct pci_bus *bus;
+ struct pci_sysdata *sd;
+ int pxm;
+
+ /* Allocate per-root-bus (not per bus) arch-specific data.
+ * TODO: leak; this memory is never freed.
+ * It's arguable whether it's worth the trouble to care.
+ */
+ sd = kzalloc(sizeof(*sd), GFP_KERNEL);
+ if (!sd) {
+ printk(KERN_ERR "PCI: OOM, not probing PCI bus %02x\n", busnum);
+ return NULL;
+ }
if (domain != 0) {
printk(KERN_WARNING "PCI: Multiple domains not supported\n");
+ kfree(sd);
return NULL;
}
- bus = pcibios_scan_root(busnum);
+ sd->node = -1;
+
+ pxm = acpi_get_pxm(device->handle);
+#ifdef CONFIG_ACPI_NUMA
+ if (pxm >= 0)
+ sd->node = pxm_to_node(pxm);
+#endif
+
+ bus = pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd);
+ if (!bus)
+ kfree(sd);
+
#ifdef CONFIG_ACPI_NUMA
if (bus != NULL) {
- int pxm = acpi_get_pxm(device->handle);
if (pxm >= 0) {
- bus->sysdata = (void *)(unsigned long)pxm_to_node(pxm);
- printk("bus %d -> pxm %d -> node %ld\n",
- busnum, pxm, (long)(bus->sysdata));
+ printk("bus %d -> pxm %d -> node %d\n",
+ busnum, pxm, sd->node);
}
}
#endif
diff --git a/arch/i386/pci/common.c b/arch/i386/pci/common.c
index 3f78d4d8ecf..85503deeda4 100644
--- a/arch/i386/pci/common.c
+++ b/arch/i386/pci/common.c
@@ -293,6 +293,7 @@ static struct dmi_system_id __devinitdata pciprobe_dmi_table[] = {
struct pci_bus * __devinit pcibios_scan_root(int busnum)
{
struct pci_bus *bus = NULL;
+ struct pci_sysdata *sd;
dmi_check_system(pciprobe_dmi_table);
@@ -303,9 +304,19 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum)
}
}
+ /* Allocate per-root-bus (not per bus) arch-specific data.
+ * TODO: leak; this memory is never freed.
+ * It's arguable whether it's worth the trouble to care.
+ */
+ sd = kzalloc(sizeof(*sd), GFP_KERNEL);
+ if (!sd) {
+ printk(KERN_ERR "PCI: OOM, not probing PCI bus %02x\n", busnum);
+ return NULL;
+ }
+
printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum);
- return pci_scan_bus_parented(NULL, busnum, &pci_root_ops, NULL);
+ return pci_scan_bus_parented(NULL, busnum, &pci_root_ops, sd);
}
extern u8 pci_cache_line_size;
diff --git a/arch/i386/pci/mmconfig-shared.c b/arch/i386/pci/mmconfig-shared.c
index c7cabeed4d7..4df637e34f8 100644
--- a/arch/i386/pci/mmconfig-shared.c
+++ b/arch/i386/pci/mmconfig-shared.c
@@ -24,6 +24,9 @@
DECLARE_BITMAP(pci_mmcfg_fallback_slots, 32*PCI_MMCFG_MAX_CHECK_BUS);
+/* Indicate if the mmcfg resources have been placed into the resource table. */
+static int __initdata pci_mmcfg_resources_inserted;
+
/* K8 systems have some devices (typically in the builtin northbridge)
that are only accessible using type1
Normally this can be expressed in the MCFG by not listing them
@@ -170,7 +173,7 @@ static int __init pci_mmcfg_check_hostbridge(void)
return name != NULL;
}
-static void __init pci_mmcfg_insert_resources(void)
+static void __init pci_mmcfg_insert_resources(unsigned long resource_flags)
{
#define PCI_MMCFG_RESOURCE_NAME_LEN 19
int i;
@@ -194,10 +197,13 @@ static void __init pci_mmcfg_insert_resources(void)
cfg->pci_segment);
res->start = cfg->address;
res->end = res->start + (num_buses << 20) - 1;
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ res->flags = IORESOURCE_MEM | resource_flags;
insert_resource(&iomem_resource, res);
names += PCI_MMCFG_RESOURCE_NAME_LEN;
}
+
+ /* Mark that the resources have been inserted. */
+ pci_mmcfg_resources_inserted = 1;
}
static void __init pci_mmcfg_reject_broken(int type)
@@ -267,7 +273,43 @@ void __init pci_mmcfg_init(int type)
if (type == 1)
unreachable_devices();
if (known_bridge)
- pci_mmcfg_insert_resources();
+ pci_mmcfg_insert_resources(IORESOURCE_BUSY);
pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
+ } else {
+ /*
+ * Signal not to attempt to insert mmcfg resources because
+ * the architecture mmcfg setup could not initialize.
+ */
+ pci_mmcfg_resources_inserted = 1;
}
}
+
+static int __init pci_mmcfg_late_insert_resources(void)
+{
+ /*
+ * If resources are already inserted or we are not using MMCONFIG,
+ * don't insert the resources.
+ */
+ if ((pci_mmcfg_resources_inserted == 1) ||
+ (pci_probe & PCI_PROBE_MMCONF) == 0 ||
+ (pci_mmcfg_config_num == 0) ||
+ (pci_mmcfg_config == NULL) ||
+ (pci_mmcfg_config[0].address == 0))
+ return 1;
+
+ /*
+ * Attempt to insert the mmcfg resources but not with the busy flag
+ * marked so it won't cause request errors when __request_region is
+ * called.
+ */
+ pci_mmcfg_insert_resources(0);
+
+ return 0;
+}
+
+/*
+ * Perform MMCONFIG resource insertion after PCI initialization to allow for
+ * misprogrammed MCFG tables that state larger sizes but actually conflict
+ * with other system resources.
+ */
+late_initcall(pci_mmcfg_late_insert_resources);
diff --git a/arch/i386/xen/time.c b/arch/i386/xen/time.c
index 51fdabf1fd4..dfd6db69ead 100644
--- a/arch/i386/xen/time.c
+++ b/arch/i386/xen/time.c
@@ -412,6 +412,7 @@ static void xen_timerop_set_mode(enum clock_event_mode mode,
break;
case CLOCK_EVT_MODE_ONESHOT:
+ case CLOCK_EVT_MODE_RESUME:
break;
case CLOCK_EVT_MODE_UNUSED:
@@ -474,6 +475,8 @@ static void xen_vcpuop_set_mode(enum clock_event_mode mode,
HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
BUG();
break;
+ case CLOCK_EVT_MODE_RESUME:
+ break;
}
}
diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c
index e1189ba1ca5..1cfab326fb7 100644
--- a/arch/ia64/ia32/binfmt_elf32.c
+++ b/arch/ia64/ia32/binfmt_elf32.c
@@ -226,7 +226,7 @@ elf32_set_personality (void)
}
static unsigned long
-elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type, unsigned long unused)
+elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type)
{
unsigned long pgoff = (eppnt->p_vaddr) & ~IA32_PAGE_MASK;
diff --git a/arch/powerpc/boot/ps3-head.S b/arch/powerpc/boot/ps3-head.S
index 1a6d64a68df..a55c2735f75 100644
--- a/arch/powerpc/boot/ps3-head.S
+++ b/arch/powerpc/boot/ps3-head.S
@@ -20,6 +20,8 @@
#include "ppc_asm.h"
+ .machine "ppc64"
+
.text
/*
diff --git a/arch/powerpc/boot/ps3-hvcall.S b/arch/powerpc/boot/ps3-hvcall.S
index c8b7df3210d..585965f7e6a 100644
--- a/arch/powerpc/boot/ps3-hvcall.S
+++ b/arch/powerpc/boot/ps3-hvcall.S
@@ -20,6 +20,8 @@
#include "ppc_asm.h"
+ .machine "ppc64"
+
/*
* The PS3 hypervisor uses a 64 bit "C" language calling convention.
* The routines here marshal arguments between the 32 bit wrapper
diff --git a/arch/powerpc/mm/tlb_32.c b/arch/powerpc/mm/tlb_32.c
index 06c7e77e097..eb4b512d65f 100644
--- a/arch/powerpc/mm/tlb_32.c
+++ b/arch/powerpc/mm/tlb_32.c
@@ -26,6 +26,8 @@
#include <linux/mm.h>
#include <linux/init.h>
#include <linux/highmem.h>
+#include <linux/pagemap.h>
+
#include <asm/tlbflush.h>
#include <asm/tlb.h>
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index 7de4e919687..c2aaec5289d 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -941,6 +941,13 @@ static const struct file_operations spufs_signal1_nosched_fops = {
.mmap = spufs_signal1_mmap,
};
+static const struct file_operations spufs_signal1_nosched_fops = {
+ .open = spufs_signal1_open,
+ .release = spufs_signal1_release,
+ .write = spufs_signal1_write,
+ .mmap = spufs_signal1_mmap,
+};
+
static int spufs_signal2_open(struct inode *inode, struct file *file)
{
struct spufs_inode_info *i = SPUFS_I(inode);
@@ -1076,6 +1083,13 @@ static const struct file_operations spufs_signal2_nosched_fops = {
.mmap = spufs_signal2_mmap,
};
+static const struct file_operations spufs_signal2_nosched_fops = {
+ .open = spufs_signal2_open,
+ .release = spufs_signal2_release,
+ .write = spufs_signal2_write,
+ .mmap = spufs_signal2_mmap,
+};
+
static void spufs_signal1_type_set(void *data, u64 val)
{
struct spu_context *ctx = data;
@@ -2177,8 +2191,8 @@ struct tree_descr spufs_dir_contents[] = {
{ "mbox_stat", &spufs_mbox_stat_fops, 0444, },
{ "ibox_stat", &spufs_ibox_stat_fops, 0444, },
{ "wbox_stat", &spufs_wbox_stat_fops, 0444, },
- { "signal1", &spufs_signal1_fops, 0666, },
- { "signal2", &spufs_signal2_fops, 0666, },
+ { "signal1", &spufs_signal1_nosched_fops, 0222, },
+ { "signal2", &spufs_signal2_nosched_fops, 0222, },
{ "signal1_type", &spufs_signal1_type, 0666, },
{ "signal2_type", &spufs_signal2_type, 0666, },
{ "cntl", &spufs_cntl_fops, 0666, },
diff --git a/arch/powerpc/platforms/ps3/Kconfig b/arch/powerpc/platforms/ps3/Kconfig
index a05079b0769..d4fc74f7bb1 100644
--- a/arch/powerpc/platforms/ps3/Kconfig
+++ b/arch/powerpc/platforms/ps3/Kconfig
@@ -102,4 +102,40 @@ config PS3_STORAGE
depends on PPC_PS3
tristate
+config PS3_DISK
+ tristate "PS3 Disk Storage Driver"
+ depends on PPC_PS3 && BLOCK
+ select PS3_STORAGE
+ help
+ Include support for the PS3 Disk Storage.
+
+ This support is required to access the PS3 hard disk.
+ In general, all users will say Y or M.
+
+config PS3_ROM
+ tristate "PS3 BD/DVD/CD-ROM Storage Driver"
+ depends on PPC_PS3 && SCSI
+ select PS3_STORAGE
+ help
+ Include support for the PS3 ROM Storage.
+
+ This support is required to access the PS3 BD/DVD/CD-ROM drive.
+ In general, all users will say Y or M.
+ Also make sure to say Y or M to "SCSI CDROM support" later.
+
+config PS3_FLASH
+ tristate "PS3 FLASH ROM Storage Driver"
+ depends on PPC_PS3
+ select PS3_STORAGE
+ help
+ Include support for the PS3 FLASH ROM Storage.
+
+ This support is required to access the PS3 FLASH ROM, which
+ contains the boot loader and some boot options.
+ In general, all users will say Y or M.
+
+ As this driver needs a fixed buffer of 256 KiB of memory, it can
+ be disabled on the kernel command line using "ps3flash=off", to
+ not allocate this fixed buffer.
+
endmenu
diff --git a/arch/sh/kernel/timers/timer-tmu.c b/arch/sh/kernel/timers/timer-tmu.c
index 097ebd49f1b..7aca37d7976 100644
--- a/arch/sh/kernel/timers/timer-tmu.c
+++ b/arch/sh/kernel/timers/timer-tmu.c
@@ -80,6 +80,7 @@ static void tmu_set_mode(enum clock_event_mode mode,
break;
case CLOCK_EVT_MODE_UNUSED:
case CLOCK_EVT_MODE_SHUTDOWN:
+ case CLOCK_EVT_MODE_RESUME:
break;
}
}
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index f1cc55677ff..33dabf588bd 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -23,6 +23,10 @@ config GENERIC_TIME
bool
default y
+config GENERIC_CMOS_UPDATE
+ bool
+ default y
+
config GENERIC_CLOCKEVENTS
bool
default y
diff --git a/arch/sparc64/kernel/time.c b/arch/sparc64/kernel/time.c
index e340eb401fb..49063ca2efc 100644
--- a/arch/sparc64/kernel/time.c
+++ b/arch/sparc64/kernel/time.c
@@ -403,58 +403,9 @@ static struct sparc64_tick_ops hbtick_operations __read_mostly = {
static unsigned long timer_ticks_per_nsec_quotient __read_mostly;
-#define TICK_SIZE (tick_nsec / 1000)
-
-#define USEC_AFTER 500000
-#define USEC_BEFORE 500000
-
-static void sync_cmos_clock(unsigned long dummy);
-
-static DEFINE_TIMER(sync_cmos_timer, sync_cmos_clock, 0, 0);
-
-static void sync_cmos_clock(unsigned long dummy)
-{
- struct timeval now, next;
- int fail = 1;
-
- /*
- * If we have an externally synchronized Linux clock, then update
- * CMOS clock accordingly every ~11 minutes. Set_rtc_mmss() has to be
- * called as close as possible to 500 ms before the new second starts.
- * This code is run on a timer. If the clock is set, that timer
- * may not expire at the correct time. Thus, we adjust...
- */
- if (!ntp_synced())
- /*
- * Not synced, exit, do not restart a timer (if one is
- * running, let it run out).
- */
- return;
-
- do_gettimeofday(&now);
- if (now.tv_usec >= USEC_AFTER - ((unsigned) TICK_SIZE) / 2 &&
- now.tv_usec <= USEC_BEFORE + ((unsigned) TICK_SIZE) / 2)
- fail = set_rtc_mmss(now.tv_sec);
-
- next.tv_usec = USEC_AFTER - now.tv_usec;
- if (next.tv_usec <= 0)
- next.tv_usec += USEC_PER_SEC;
-
- if (!fail)
- next.tv_sec = 659;
- else
- next.tv_sec = 0;
-
- if (next.tv_usec >= USEC_PER_SEC) {
- next.tv_sec++;
- next.tv_usec -= USEC_PER_SEC;
- }
- mod_timer(&sync_cmos_timer, jiffies + timeval_to_jiffies(&next));
-}
-
-void notify_arch_cmos_timer(void)
+int update_persistent_clock(struct timespec now)
{
- mod_timer(&sync_cmos_timer, jiffies + 1);
+ return set_rtc_mmss(now.tv_sec);
}
/* Kick start a stopped clock (procedure from the Sun NVRAM/hostid FAQ). */
@@ -931,6 +882,7 @@ static void sparc64_timer_setup(enum clock_event_mode mode,
{
switch (mode) {
case CLOCK_EVT_MODE_ONESHOT:
+ case CLOCK_EVT_MODE_RESUME:
break;
case CLOCK_EVT_MODE_SHUTDOWN:
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig
index 14bf8ce3ea2..45f82ae6d38 100644
--- a/arch/x86_64/Kconfig
+++ b/arch/x86_64/Kconfig
@@ -32,6 +32,10 @@ config GENERIC_TIME_VSYSCALL
bool
default y
+config GENERIC_CMOS_UPDATE
+ bool
+ default y
+
config ZONE_DMA32
bool
default y
@@ -56,6 +60,14 @@ config ZONE_DMA
bool
default y
+config QUICKLIST
+ bool
+ default y
+
+config NR_QUICK
+ int
+ default 2
+
config ISA
bool
diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile
index 29617ae3926..128561d3e87 100644
--- a/arch/x86_64/Makefile
+++ b/arch/x86_64/Makefile
@@ -76,7 +76,8 @@ head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kern
libs-y += arch/x86_64/lib/
core-y += arch/x86_64/kernel/ \
arch/x86_64/mm/ \
- arch/x86_64/crypto/
+ arch/x86_64/crypto/ \
+ arch/x86_64/vdso/
core-$(CONFIG_IA32_EMULATION) += arch/x86_64/ia32/
drivers-$(CONFIG_PCI) += arch/x86_64/pci/
drivers-$(CONFIG_OPROFILE) += arch/x86_64/oprofile/
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig
index 40178e5c310..b7c4cd04bfc 100644
--- a/arch/x86_64/defconfig
+++ b/arch/x86_64/defconfig
@@ -1,19 +1,22 @@
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.22-rc2
-# Mon May 21 13:23:40 2007
+# Linux kernel version: 2.6.22-git14
+# Fri Jul 20 09:53:15 2007
#
CONFIG_X86_64=y
CONFIG_64BIT=y
CONFIG_X86=y
CONFIG_GENERIC_TIME=y
CONFIG_GENERIC_TIME_VSYSCALL=y
+CONFIG_GENERIC_CMOS_UPDATE=y
CONFIG_ZONE_DMA32=y
CONFIG_LOCKDEP_SUPPORT=y
CONFIG_STACKTRACE_SUPPORT=y
CONFIG_SEMAPHORE_SLEEPERS=y
CONFIG_MMU=y
CONFIG_ZONE_DMA=y
+CONFIG_QUICKLIST=y
+CONFIG_NR_QUICK=2
CONFIG_RWSEM_GENERIC_SPINLOCK=y
CONFIG_GENERIC_HWEIGHT=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
@@ -44,19 +47,18 @@ CONFIG_LOCALVERSION=""
CONFIG_LOCALVERSION_AUTO=y
CONFIG_SWAP=y
CONFIG_SYSVIPC=y
-# CONFIG_IPC_NS is not set
CONFIG_SYSVIPC_SYSCTL=y
CONFIG_POSIX_MQUEUE=y
# CONFIG_BSD_PROCESS_ACCT is not set
# CONFIG_TASKSTATS is not set
-# CONFIG_UTS_NS is not set
+# CONFIG_USER_NS is not set
# CONFIG_AUDIT is not set
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=18
# CONFIG_CPUSETS is not set
CONFIG_SYSFS_DEPRECATED=y
-# CONFIG_RELAY is not set
+CONFIG_RELAY=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_INITRAMFS_SOURCE=""
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
@@ -86,10 +88,6 @@ CONFIG_SLAB=y
CONFIG_RT_MUTEXES=y
# CONFIG_TINY_SHMEM is not set
CONFIG_BASE_SMALL=0
-
-#
-# Loadable module support
-#
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
@@ -97,12 +95,9 @@ CONFIG_MODULE_FORCE_UNLOAD=y
# CONFIG_MODULE_SRCVERSION_ALL is not set
# CONFIG_KMOD is not set
CONFIG_STOP_MACHINE=y
-
-#
-# Block layer
-#
CONFIG_BLOCK=y
# CONFIG_BLK_DEV_IO_TRACE is not set
+# CONFIG_BLK_DEV_BSG is not set
#
# IO Schedulers
@@ -165,9 +160,12 @@ CONFIG_SPLIT_PTLOCK_CPUS=4
CONFIG_MIGRATION=y
CONFIG_RESOURCES_64BIT=y
CONFIG_ZONE_DMA_FLAG=1
+CONFIG_BOUNCE=y
+CONFIG_VIRT_TO_BUS=y
CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y
CONFIG_OUT_OF_LINE_PFN_TO_PAGE=y
CONFIG_NR_CPUS=32
+CONFIG_PHYSICAL_ALIGN=0x200000
CONFIG_HOTPLUG_CPU=y
CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
CONFIG_HPET_TIMER=y
@@ -180,7 +178,7 @@ CONFIG_X86_MCE_INTEL=y
CONFIG_X86_MCE_AMD=y
# CONFIG_KEXEC is not set
# CONFIG_CRASH_DUMP is not set
-CONFIG_RELOCATABLE=y
+# CONFIG_RELOCATABLE is not set
CONFIG_PHYSICAL_START=0x200000
CONFIG_SECCOMP=y
# CONFIG_CC_STACKPROTECTOR is not set
@@ -201,7 +199,6 @@ CONFIG_GENERIC_PENDING_IRQ=y
CONFIG_PM=y
# CONFIG_PM_LEGACY is not set
# CONFIG_PM_DEBUG is not set
-# CONFIG_PM_SYSFS_DEPRECATED is not set
CONFIG_SOFTWARE_SUSPEND=y
CONFIG_PM_STD_PARTITION=""
CONFIG_SUSPEND_SMP=y
@@ -248,7 +245,7 @@ CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
# CONFIG_CPU_FREQ_GOV_POWERSAVE is not set
CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_GOV_ONDEMAND=y
-# CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
#
# CPUFreq processor drivers
@@ -351,20 +348,8 @@ CONFIG_IPV6_SIT=y
# CONFIG_IPV6_MULTIPLE_TABLES is not set
# CONFIG_NETWORK_SECMARK is not set
# CONFIG_NETFILTER is not set
-
-#
-# DCCP Configuration (EXPERIMENTAL)
-#
# CONFIG_IP_DCCP is not set
-
-#
-# SCTP Configuration (EXPERIMENTAL)
-#
# CONFIG_IP_SCTP is not set
-
-#
-# TIPC Configuration (EXPERIMENTAL)
-#
# CONFIG_TIPC is not set
# CONFIG_ATM is not set
# CONFIG_BRIDGE is not set
@@ -401,6 +386,7 @@ CONFIG_IPV6_SIT=y
# CONFIG_MAC80211 is not set
# CONFIG_IEEE80211 is not set
# CONFIG_RFKILL is not set
+# CONFIG_NET_9P is not set
#
# Device Drivers
@@ -415,21 +401,9 @@ CONFIG_FW_LOADER=y
# CONFIG_DEBUG_DRIVER is not set
# CONFIG_DEBUG_DEVRES is not set
# CONFIG_SYS_HYPERVISOR is not set
-
-#
-# Connector - unified userspace <-> kernelspace linker
-#
# CONFIG_CONNECTOR is not set
# CONFIG_MTD is not set
-
-#
-# Parallel port support
-#
# CONFIG_PARPORT is not set
-
-#
-# Plug and Play support
-#
CONFIG_PNP=y
# CONFIG_PNP_DEBUG is not set
@@ -437,10 +411,7 @@ CONFIG_PNP=y
# Protocols
#
CONFIG_PNPACPI=y
-
-#
-# Block devices
-#
+CONFIG_BLK_DEV=y
CONFIG_BLK_DEV_FD=y
# CONFIG_BLK_CPQ_DA is not set
# CONFIG_BLK_CPQ_CISS_DA is not set
@@ -458,17 +429,14 @@ CONFIG_BLK_DEV_RAM_SIZE=4096
CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024
# CONFIG_CDROM_PKTCDVD is not set
# CONFIG_ATA_OVER_ETH is not set
-
-#
-# Misc devices
-#
+CONFIG_MISC_DEVICES=y
# CONFIG_IBM_ASM is not set
# CONFIG_PHANTOM is not set
+# CONFIG_EEPROM_93CX6 is not set
# CONFIG_SGI_IOC4 is not set
# CONFIG_TIFM_CORE is not set
# CONFIG_SONY_LAPTOP is not set
# CONFIG_THINKPAD_ACPI is not set
-# CONFIG_BLINK is not set
CONFIG_IDE=y
CONFIG_BLK_DEV_IDE=y
@@ -539,6 +507,7 @@ CONFIG_BLK_DEV_IDEDMA=y
#
# CONFIG_RAID_ATTRS is not set
CONFIG_SCSI=y
+CONFIG_SCSI_DMA=y
# CONFIG_SCSI_TGT is not set
CONFIG_SCSI_NETLINK=y
# CONFIG_SCSI_PROC_FS is not set
@@ -590,11 +559,9 @@ CONFIG_AIC79XX_DEBUG_MASK=0
# CONFIG_AIC79XX_REG_PRETTY_PRINT is not set
# CONFIG_SCSI_AIC94XX is not set
# CONFIG_SCSI_ARCMSR is not set
-CONFIG_MEGARAID_NEWGEN=y
-CONFIG_MEGARAID_MM=y
-CONFIG_MEGARAID_MAILBOX=y
+# CONFIG_MEGARAID_NEWGEN is not set
# CONFIG_MEGARAID_LEGACY is not set
-CONFIG_MEGARAID_SAS=y
+# CONFIG_MEGARAID_SAS is not set
# CONFIG_SCSI_HPTIOP is not set
# CONFIG_SCSI_BUSLOGIC is not set
# CONFIG_SCSI_DMX3191D is not set
@@ -614,7 +581,6 @@ CONFIG_MEGARAID_SAS=y
# CONFIG_SCSI_DC395x is not set
# CONFIG_SCSI_DC390T is not set
# CONFIG_SCSI_DEBUG is not set
-# CONFIG_SCSI_ESP_CORE is not set
# CONFIG_SCSI_SRP is not set
CONFIG_ATA=y
# CONFIG_ATA_NONSTANDARD is not set
@@ -671,10 +637,6 @@ CONFIG_SATA_VIA=y
# CONFIG_PATA_SIS is not set
# CONFIG_PATA_VIA is not set
# CONFIG_PATA_WINBOND is not set
-
-#
-# Multi-device support (RAID and LVM)
-#
CONFIG_MD=y
# CONFIG_BLK_DEV_MD is not set
CONFIG_BLK_DEV_DM=y
@@ -692,7 +654,7 @@ CONFIG_BLK_DEV_DM=y
CONFIG_FUSION=y
CONFIG_FUSION_SPI=y
# CONFIG_FUSION_FC is not set
-CONFIG_FUSION_SAS=y
+# CONFIG_FUSION_SAS is not set
CONFIG_FUSION_MAX_SGE=128
# CONFIG_FUSION_CTL is not set
@@ -710,7 +672,10 @@ CONFIG_IEEE1394=y
#
# Controllers
#
-# CONFIG_IEEE1394_PCILYNX is not set
+
+#
+# Texas Instruments PCILynx requires I2C
+#
CONFIG_IEEE1394_OHCI1394=y
#
@@ -722,32 +687,19 @@ CONFIG_IEEE1394_OHCI1394=y
# CONFIG_IEEE1394_ETH1394 is not set
# CONFIG_IEEE1394_DV1394 is not set
CONFIG_IEEE1394_RAWIO=y
-
-#
-# I2O device support
-#
# CONFIG_I2O is not set
-# CONFIG_MACINTOSH_DRIVERS is not set
-
-#
-# Network device support
-#
+CONFIG_MACINTOSH_DRIVERS=y
+# CONFIG_MAC_EMUMOUSEBTN is not set
CONFIG_NETDEVICES=y
+CONFIG_NETDEVICES_MULTIQUEUE=y
# CONFIG_DUMMY is not set
# CONFIG_BONDING is not set
+# CONFIG_MACVLAN is not set
# CONFIG_EQUALIZER is not set
CONFIG_TUN=y
# CONFIG_NET_SB1000 is not set
-
-#
-# ARCnet devices
-#
# CONFIG_ARCNET is not set
# CONFIG_PHYLIB is not set
-
-#
-# Ethernet (10 or 100Mbit)
-#
CONFIG_NET_ETHERNET=y
CONFIG_MII=y
# CONFIG_HAPPYMEAL is not set
@@ -756,10 +708,6 @@ CONFIG_MII=y
CONFIG_NET_VENDOR_3COM=y
CONFIG_VORTEX=y
# CONFIG_TYPHOON is not set
-
-#
-# Tulip family network device support
-#
CONFIG_NET_TULIP=y
# CONFIG_DE2104X is not set
CONFIG_TULIP=y
@@ -773,7 +721,8 @@ CONFIG_TULIP=y
# CONFIG_HP100 is not set
CONFIG_NET_PCI=y
# CONFIG_PCNET32 is not set
-# CONFIG_AMD8111_ETH is not set
+CONFIG_AMD8111_ETH=y
+# CONFIG_AMD8111E_NAPI is not set
# CONFIG_ADAPTEC_STARFIRE is not set
CONFIG_B44=y
CONFIG_FORCEDETH=y
@@ -808,7 +757,6 @@ CONFIG_E1000=y
# CONFIG_SIS190 is not set
# CONFIG_SKGE is not set
# CONFIG_SKY2 is not set
-# CONFIG_SK98LIN is not set
# CONFIG_VIA_VELOCITY is not set
CONFIG_TIGON3=y
CONFIG_BNX2=y
@@ -823,10 +771,6 @@ CONFIG_S2IO=m
# CONFIG_MYRI10GE is not set
# CONFIG_NETXEN_NIC is not set
# CONFIG_MLX4_CORE is not set
-
-#
-# Token Ring devices
-#
# CONFIG_TR is not set
#
@@ -855,15 +799,7 @@ CONFIG_NETCONSOLE=y
CONFIG_NETPOLL=y
# CONFIG_NETPOLL_TRAP is not set
CONFIG_NET_POLL_CONTROLLER=y
-
-#
-# ISDN subsystem
-#
# CONFIG_ISDN is not set
-
-#
-# Telephony Support
-#
# CONFIG_PHONE is not set
#
@@ -871,6 +807,7 @@ CONFIG_NET_POLL_CONTROLLER=y
#
CONFIG_INPUT=y
# CONFIG_INPUT_FF_MEMLESS is not set
+# CONFIG_INPUT_POLLDEV is not set
#
# Userland interfaces
@@ -936,6 +873,7 @@ CONFIG_HW_CONSOLE=y
#
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_FIX_EARLYCON_MEM=y
CONFIG_SERIAL_8250_PCI=y
CONFIG_SERIAL_8250_PNP=y
CONFIG_SERIAL_8250_NR_UARTS=4
@@ -951,16 +889,11 @@ CONFIG_SERIAL_CORE_CONSOLE=y
CONFIG_UNIX98_PTYS=y
CONFIG_LEGACY_PTYS=y
CONFIG_LEGACY_PTY_COUNT=256
-
-#
-# IPMI
-#
# CONFIG_IPMI_HANDLER is not set
# CONFIG_WATCHDOG is not set
CONFIG_HW_RANDOM=y
CONFIG_HW_RANDOM_INTEL=y
CONFIG_HW_RANDOM_AMD=y
-# CONFIG_HW_RANDOM_GEODE is not set
# CONFIG_NVRAM is not set
CONFIG_RTC=y
# CONFIG_R3964 is not set
@@ -979,127 +912,19 @@ CONFIG_HPET=y
# CONFIG_HPET_RTC_IRQ is not set
CONFIG_HPET_MMAP=y
# CONFIG_HANGCHECK_TIMER is not set
-
-#
-# TPM devices
-#
# CONFIG_TCG_TPM is not set
# CONFIG_TELCLOCK is not set
CONFIG_DEVPORT=y
-CONFIG_I2C=m
-CONFIG_I2C_BOARDINFO=y
-CONFIG_I2C_CHARDEV=m
-
-#
-# I2C Algorithms
-#
-# CONFIG_I2C_ALGOBIT is not set
-# CONFIG_I2C_ALGOPCF is not set
-# CONFIG_I2C_ALGOPCA is not set
-
-#
-# I2C Hardware Bus support
-#
-# CONFIG_I2C_ALI1535 is not set
-# CONFIG_I2C_ALI1563 is not set
-# CONFIG_I2C_ALI15X3 is not set
-# CONFIG_I2C_AMD756 is not set
-# CONFIG_I2C_AMD8111 is not set
-# CONFIG_I2C_I801 is not set
-# CONFIG_I2C_I810 is not set
-# CONFIG_I2C_PIIX4 is not set
-# CONFIG_I2C_NFORCE2 is not set
-# CONFIG_I2C_OCORES is not set
-# CONFIG_I2C_PARPORT_LIGHT is not set
-# CONFIG_I2C_PROSAVAGE is not set
-# CONFIG_I2C_SAVAGE4 is not set
-# CONFIG_I2C_SIMTEC is not set
-# CONFIG_I2C_SIS5595 is not set
-# CONFIG_I2C_SIS630 is not set
-# CONFIG_I2C_SIS96X is not set
-# CONFIG_I2C_STUB is not set
-# CONFIG_I2C_TINY_USB is not set
-# CONFIG_I2C_VIA is not set
-# CONFIG_I2C_VIAPRO is not set
-# CONFIG_I2C_VOODOO3 is not set
-
-#
-# Miscellaneous I2C Chip support
-#
-# CONFIG_SENSORS_DS1337 is not set
-# CONFIG_SENSORS_DS1374 is not set
-# CONFIG_SENSORS_EEPROM is not set
-# CONFIG_SENSORS_PCF8574 is not set
-# CONFIG_SENSORS_PCA9539 is not set
-# CONFIG_SENSORS_PCF8591 is not set
-# CONFIG_SENSORS_MAX6875 is not set
-# CONFIG_I2C_DEBUG_CORE is not set
-# CONFIG_I2C_DEBUG_ALGO is not set
-# CONFIG_I2C_DEBUG_BUS is not set
-# CONFIG_I2C_DEBUG_CHIP is not set
+# CONFIG_I2C is not set
#
# SPI support
#
# CONFIG_SPI is not set
# CONFIG_SPI_MASTER is not set
-
-#
-# Dallas's 1-wire bus
-#
# CONFIG_W1 is not set
-CONFIG_HWMON=y
-# CONFIG_HWMON_VID is not set
-# CONFIG_SENSORS_ABITUGURU is not set
-# CONFIG_SENSORS_AD7418 is not set
-# CONFIG_SENSORS_ADM1021 is not set
-# CONFIG_SENSORS_ADM1025 is not set
-# CONFIG_SENSORS_ADM1026 is not set
-# CONFIG_SENSORS_ADM1029 is not set
-# CONFIG_SENSORS_ADM1031 is not set
-# CONFIG_SENSORS_ADM9240 is not set
-# CONFIG_SENSORS_K8TEMP is not set
-# CONFIG_SENSORS_ASB100 is not set
-# CONFIG_SENSORS_ATXP1 is not set
-# CONFIG_SENSORS_DS1621 is not set
-# CONFIG_SENSORS_F71805F is not set
-# CONFIG_SENSORS_FSCHER is not set
-# CONFIG_SENSORS_FSCPOS is not set
-# CONFIG_SENSORS_GL518SM is not set
-# CONFIG_SENSORS_GL520SM is not set
-CONFIG_SENSORS_CORETEMP=y
-# CONFIG_SENSORS_IT87 is not set
-# CONFIG_SENSORS_LM63 is not set
-# CONFIG_SENSORS_LM75 is not set
-# CONFIG_SENSORS_LM77 is not set
-# CONFIG_SENSORS_LM78 is not set
-# CONFIG_SENSORS_LM80 is not set
-# CONFIG_SENSORS_LM83 is not set
-# CONFIG_SENSORS_LM85 is not set
-# CONFIG_SENSORS_LM87 is not set
-# CONFIG_SENSORS_LM90 is not set
-# CONFIG_SENSORS_LM92 is not set
-# CONFIG_SENSORS_MAX1619 is not set
-# CONFIG_SENSORS_MAX6650 is not set
-# CONFIG_SENSORS_PC87360 is not set
-# CONFIG_SENSORS_PC87427 is not set
-# CONFIG_SENSORS_SIS5595 is not set
-# CONFIG_SENSORS_SMSC47M1 is not set
-# CONFIG_SENSORS_SMSC47M192 is not set
-CONFIG_SENSORS_SMSC47B397=m
-# CONFIG_SENSORS_VIA686A is not set
-# CONFIG_SENSORS_VT1211 is not set
-# CONFIG_SENSORS_VT8231 is not set
-# CONFIG_SENSORS_W83781D is not set
-# CONFIG_SENSORS_W83791D is not set
-# CONFIG_SENSORS_W83792D is not set
-# CONFIG_SENSORS_W83793 is not set
-# CONFIG_SENSORS_W83L785TS is not set
-# CONFIG_SENSORS_W83627HF is not set
-# CONFIG_SENSORS_W83627EHF is not set
-# CONFIG_SENSORS_HDAPS is not set
-# CONFIG_SENSORS_APPLESMC is not set
-# CONFIG_HWMON_DEBUG_CHIP is not set
+# CONFIG_POWER_SUPPLY is not set
+# CONFIG_HWMON is not set
#
# Multifunction device drivers
@@ -1149,15 +974,11 @@ CONFIG_SOUND=y
# Open Sound System
#
CONFIG_SOUND_PRIME=y
-# CONFIG_OSS_OBSOLETE is not set
# CONFIG_SOUND_TRIDENT is not set
# CONFIG_SOUND_MSNDCLAS is not set
# CONFIG_SOUND_MSNDPIN is not set
# CONFIG_SOUND_OSS is not set
-
-#
-# HID Devices
-#
+CONFIG_HID_SUPPORT=y
CONFIG_HID=y
# CONFIG_HID_DEBUG is not set
@@ -1168,10 +989,7 @@ CONFIG_USB_HID=y
# CONFIG_USB_HIDINPUT_POWERBOOK is not set
# CONFIG_HID_FF is not set
# CONFIG_USB_HIDDEV is not set
-
-#
-# USB support
-#
+CONFIG_USB_SUPPORT=y
CONFIG_USB_ARCH_HAS_HCD=y
CONFIG_USB_ARCH_HAS_OHCI=y
CONFIG_USB_ARCH_HAS_EHCI=y
@@ -1185,6 +1003,7 @@ CONFIG_USB_DEVICEFS=y
# CONFIG_USB_DEVICE_CLASS is not set
# CONFIG_USB_DYNAMIC_MINORS is not set
# CONFIG_USB_SUSPEND is not set
+# CONFIG_USB_PERSIST is not set
# CONFIG_USB_OTG is not set
#
@@ -1194,7 +1013,6 @@ CONFIG_USB_EHCI_HCD=y
# CONFIG_USB_EHCI_SPLIT_ISO is not set
# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
# CONFIG_USB_EHCI_TT_NEWSCHED is not set
-# CONFIG_USB_EHCI_BIG_ENDIAN_MMIO is not set
# CONFIG_USB_ISP116X_HCD is not set
CONFIG_USB_OHCI_HCD=y
# CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set
@@ -1202,6 +1020,7 @@ CONFIG_USB_OHCI_HCD=y
CONFIG_USB_OHCI_LITTLE_ENDIAN=y
CONFIG_USB_UHCI_HCD=y
# CONFIG_USB_SL811_HCD is not set
+# CONFIG_USB_R8A66597_HCD is not set
#
# USB Device Class drivers
@@ -1292,15 +1111,7 @@ CONFIG_USB_MON=y
#
# LED Triggers
#
-
-#
-# InfiniBand support
-#
# CONFIG_INFINIBAND is not set
-
-#
-# EDAC - error detection and reporting (RAS) (EXPERIMENTAL)
-#
# CONFIG_EDAC is not set
#
@@ -1320,11 +1131,13 @@ CONFIG_USB_MON=y
#
# DMA Devices
#
+CONFIG_VIRTUALIZATION=y
+# CONFIG_KVM is not set
#
-# Virtualization
+# Userspace I/O
#
-# CONFIG_KVM is not set
+# CONFIG_UIO is not set
#
# Firmware Drivers
@@ -1332,6 +1145,7 @@ CONFIG_USB_MON=y
# CONFIG_EDD is not set
# CONFIG_DELL_RBU is not set
# CONFIG_DCDBAS is not set
+CONFIG_DMIID=y
#
# File systems
@@ -1447,7 +1261,6 @@ CONFIG_SUNRPC=y
# CONFIG_NCP_FS is not set
# CONFIG_CODA_FS is not set
# CONFIG_AFS_FS is not set
-# CONFIG_9P_FS is not set
#
# Partition Types
@@ -1524,8 +1337,9 @@ CONFIG_DEBUG_FS=y
CONFIG_DEBUG_KERNEL=y
# CONFIG_DEBUG_SHIRQ is not set
CONFIG_DETECT_SOFTLOCKUP=y
+# CONFIG_SCHED_DEBUG is not set
# CONFIG_SCHEDSTATS is not set
-# CONFIG_TIMER_STATS is not set
+CONFIG_TIMER_STATS=y
# CONFIG_DEBUG_SLAB is not set
# CONFIG_DEBUG_RT_MUTEXES is not set
# CONFIG_RT_MUTEX_TESTER is not set
@@ -1533,6 +1347,7 @@ CONFIG_DETECT_SOFTLOCKUP=y
# CONFIG_DEBUG_MUTEXES is not set
# CONFIG_DEBUG_LOCK_ALLOC is not set
# CONFIG_PROVE_LOCKING is not set
+# CONFIG_LOCK_STAT is not set
# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
# CONFIG_DEBUG_KOBJECT is not set
@@ -1541,8 +1356,6 @@ CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_VM is not set
# CONFIG_DEBUG_LIST is not set
# CONFIG_FRAME_POINTER is not set
-CONFIG_UNWIND_INFO=y
-CONFIG_STACK_UNWIND=y
# CONFIG_FORCED_INLINING is not set
# CONFIG_RCU_TORTURE_TEST is not set
# CONFIG_LKDTM is not set
@@ -1557,10 +1370,6 @@ CONFIG_DEBUG_STACKOVERFLOW=y
#
# CONFIG_KEYS is not set
# CONFIG_SECURITY is not set
-
-#
-# Cryptographic options
-#
# CONFIG_CRYPTO is not set
#
@@ -1571,6 +1380,7 @@ CONFIG_BITREVERSE=y
# CONFIG_CRC16 is not set
# CONFIG_CRC_ITU_T is not set
CONFIG_CRC32=y
+# CONFIG_CRC7 is not set
# CONFIG_LIBCRC32C is not set
CONFIG_ZLIB_INFLATE=y
CONFIG_PLIST=y
diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c
index ed56a8806ea..b70f3e7cf06 100644
--- a/arch/x86_64/ia32/ia32_binfmt.c
+++ b/arch/x86_64/ia32/ia32_binfmt.c
@@ -38,6 +38,7 @@
int sysctl_vsyscall32 = 1;
+#undef ARCH_DLINFO
#define ARCH_DLINFO do { \
if (sysctl_vsyscall32) { \
NEW_AUX_ENT(AT_SYSINFO, (u32)(u64)VSYSCALL32_VSYSCALL); \
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S
index 3f66e970d86..938278697e2 100644
--- a/arch/x86_64/ia32/ia32entry.S
+++ b/arch/x86_64/ia32/ia32entry.S
@@ -104,7 +104,7 @@ ENTRY(ia32_sysenter_target)
pushq %rax
CFI_ADJUST_CFA_OFFSET 8
cld
- SAVE_ARGS 0,0,0
+ SAVE_ARGS 0,0,1
/* no need to do an access_ok check here because rbp has been
32bit zero extended */
1: movl (%rbp),%r9d
@@ -294,7 +294,7 @@ ia32_badarg:
*/
ENTRY(ia32_syscall)
- CFI_STARTPROC simple
+ CFI_STARTPROC32 simple
CFI_SIGNAL_FRAME
CFI_DEF_CFA rsp,SS+8-RIP
/*CFI_REL_OFFSET ss,SS-RIP*/
@@ -330,6 +330,7 @@ ia32_sysret:
ia32_tracesys:
SAVE_REST
+ CLEAR_RREGS
movq $-ENOSYS,RAX(%rsp) /* really needed? */
movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter
diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c
index a3d450d6c15..8f681cae7bf 100644
--- a/arch/x86_64/kernel/aperture.c
+++ b/arch/x86_64/kernel/aperture.c
@@ -20,7 +20,7 @@
#include <linux/ioport.h>
#include <asm/e820.h>
#include <asm/io.h>
-#include <asm/proto.h>
+#include <asm/iommu.h>
#include <asm/pci-direct.h>
#include <asm/dma.h>
#include <asm/k8.h>
@@ -214,7 +214,7 @@ void __init iommu_hole_init(void)
if (iommu_aperture_disabled || !fix_aperture || !early_pci_allowed())
return;
- printk("Checking aperture...\n");
+ printk(KERN_INFO "Checking aperture...\n");
fix = 0;
for (num = 24; num < 32; num++) {
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c
index 1b0e07bb872..900ff38d68d 100644
--- a/arch/x86_64/kernel/apic.c
+++ b/arch/x86_64/kernel/apic.c
@@ -92,8 +92,9 @@ unsigned int safe_apic_wait_icr_idle(void)
void enable_NMI_through_LVT0 (void * dummy)
{
unsigned int v;
-
- v = APIC_DM_NMI; /* unmask and set to NMI */
+
+ /* unmask and set to NMI */
+ v = APIC_DM_NMI;
apic_write(APIC_LVT0, v);
}
@@ -120,7 +121,7 @@ void ack_bad_irq(unsigned int irq)
* holds up an irq slot - in excessive cases (when multiple
* unexpected vectors occur) that might lock up the APIC
* completely.
- * But don't ack when the APIC is disabled. -AK
+ * But don't ack when the APIC is disabled. -AK
*/
if (!disable_apic)
ack_APIC_irq();
@@ -616,7 +617,7 @@ early_param("apic", apic_set_verbosity);
* Detect and enable local APICs on non-SMP boards.
* Original code written by Keir Fraser.
* On AMD64 we trust the BIOS - if it says no APIC it is likely
- * not correctly set up (usually the APIC timer won't work etc.)
+ * not correctly set up (usually the APIC timer won't work etc.)
*/
static int __init detect_init_APIC (void)
@@ -789,13 +790,13 @@ static void setup_APIC_timer(unsigned int clocks)
local_irq_save(flags);
/* wait for irq slice */
- if (hpet_address && hpet_use_timer) {
- int trigger = hpet_readl(HPET_T0_CMP);
- while (hpet_readl(HPET_COUNTER) >= trigger)
- /* do nothing */ ;
- while (hpet_readl(HPET_COUNTER) < trigger)
- /* do nothing */ ;
- } else {
+ if (hpet_address && hpet_use_timer) {
+ int trigger = hpet_readl(HPET_T0_CMP);
+ while (hpet_readl(HPET_COUNTER) >= trigger)
+ /* do nothing */ ;
+ while (hpet_readl(HPET_COUNTER) < trigger)
+ /* do nothing */ ;
+ } else {
int c1, c2;
outb_p(0x00, 0x43);
c2 = inb_p(0x40);
@@ -881,10 +882,10 @@ static unsigned int calibration_result;
void __init setup_boot_APIC_clock (void)
{
- if (disable_apic_timer) {
- printk(KERN_INFO "Disabling APIC timer\n");
- return;
- }
+ if (disable_apic_timer) {
+ printk(KERN_INFO "Disabling APIC timer\n");
+ return;
+ }
printk(KERN_INFO "Using local APIC timer interrupts.\n");
using_apic_timer = 1;
@@ -990,8 +991,8 @@ int setup_profiling_timer(unsigned int multiplier)
return -EINVAL;
}
-void setup_APIC_extened_lvt(unsigned char lvt_off, unsigned char vector,
- unsigned char msg_type, unsigned char mask)
+void setup_APIC_extended_lvt(unsigned char lvt_off, unsigned char vector,
+ unsigned char msg_type, unsigned char mask)
{
unsigned long reg = (lvt_off << 4) + K8_APIC_EXT_LVT_BASE;
unsigned int v = (mask << 16) | (msg_type << 8) | vector;
@@ -1128,20 +1129,6 @@ asmlinkage void smp_spurious_interrupt(void)
if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
ack_APIC_irq();
-#if 0
- static unsigned long last_warning;
- static unsigned long skipped;
-
- /* see sw-dev-man vol 3, chapter 7.4.13.5 */
- if (time_before(last_warning+30*HZ,jiffies)) {
- printk(KERN_INFO "spurious APIC interrupt on CPU#%d, %ld skipped.\n",
- smp_processor_id(), skipped);
- last_warning = jiffies;
- skipped = 0;
- } else {
- skipped++;
- }
-#endif
irq_exit();
}
@@ -1173,11 +1160,11 @@ asmlinkage void smp_error_interrupt(void)
7: Illegal register address
*/
printk (KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n",
- smp_processor_id(), v , v1);
+ smp_processor_id(), v , v1);
irq_exit();
}
-int disable_apic;
+int disable_apic;
/*
* This initializes the IO-APIC and APIC hardware if this is
@@ -1185,11 +1172,11 @@ int disable_apic;
*/
int __init APIC_init_uniprocessor (void)
{
- if (disable_apic) {
+ if (disable_apic) {
printk(KERN_INFO "Apic disabled\n");
- return -1;
+ return -1;
}
- if (!cpu_has_apic) {
+ if (!cpu_has_apic) {
disable_apic = 1;
printk(KERN_INFO "Apic disabled by BIOS\n");
return -1;
@@ -1211,8 +1198,8 @@ int __init APIC_init_uniprocessor (void)
return 0;
}
-static __init int setup_disableapic(char *str)
-{
+static __init int setup_disableapic(char *str)
+{
disable_apic = 1;
clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
return 0;
@@ -1220,10 +1207,10 @@ static __init int setup_disableapic(char *str)
early_param("disableapic", setup_disableapic);
/* same as disableapic, for compatibility */
-static __init int setup_nolapic(char *str)
-{
+static __init int setup_nolapic(char *str)
+{
return setup_disableapic(str);
-}
+}
early_param("nolapic", setup_nolapic);
static int __init parse_lapic_timer_c2_ok(char *arg)
@@ -1233,13 +1220,13 @@ static int __init parse_lapic_timer_c2_ok(char *arg)
}
early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
-static __init int setup_noapictimer(char *str)
-{
+static __init int setup_noapictimer(char *str)
+{
if (str[0] != ' ' && str[0] != 0)
return 0;
disable_apic_timer = 1;
return 1;
-}
+}
static __init int setup_apicmaintimer(char *str)
{
@@ -1264,5 +1251,5 @@ static __init int setup_apicpmtimer(char *s)
}
__setup("apicpmtimer", setup_apicpmtimer);
-__setup("noapictimer", setup_noapictimer);
+__setup("noapictimer", setup_noapictimer);
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c
index 13c6c37610e..0f4d5e209e9 100644
--- a/arch/x86_64/kernel/e820.c
+++ b/arch/x86_64/kernel/e820.c
@@ -194,37 +194,6 @@ unsigned long __init e820_end_of_ram(void)
}
/*
- * Find the hole size in the range.
- */
-unsigned long __init e820_hole_size(unsigned long start, unsigned long end)
-{
- unsigned long ram = 0;
- int i;
-
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
- unsigned long last, addr;
-
- if (ei->type != E820_RAM ||
- ei->addr+ei->size <= start ||
- ei->addr >= end)
- continue;
-
- addr = round_up(ei->addr, PAGE_SIZE);
- if (addr < start)
- addr = start;
-
- last = round_down(ei->addr + ei->size, PAGE_SIZE);
- if (last >= end)
- last = end;
-
- if (last > addr)
- ram += last - addr;
- }
- return ((end - start) - ram);
-}
-
-/*
* Mark e820 reserved areas as busy for the resource manager.
*/
void __init e820_reserve_resources(void)
@@ -289,47 +258,61 @@ void __init e820_mark_nosave_regions(void)
}
}
-/* Walk the e820 map and register active regions within a node */
-void __init
-e820_register_active_regions(int nid, unsigned long start_pfn,
- unsigned long end_pfn)
+/*
+ * Finds an active region in the address range from start_pfn to end_pfn and
+ * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
+ */
+static int __init e820_find_active_region(const struct e820entry *ei,
+ unsigned long start_pfn,
+ unsigned long end_pfn,
+ unsigned long *ei_startpfn,
+ unsigned long *ei_endpfn)
{
- int i;
- unsigned long ei_startpfn, ei_endpfn;
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
- ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT;
- ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE)
- >> PAGE_SHIFT;
+ *ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT;
+ *ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE) >> PAGE_SHIFT;
- /* Skip map entries smaller than a page */
- if (ei_startpfn >= ei_endpfn)
- continue;
+ /* Skip map entries smaller than a page */
+ if (*ei_startpfn >= *ei_endpfn)
+ return 0;
- /* Check if end_pfn_map should be updated */
- if (ei->type != E820_RAM && ei_endpfn > end_pfn_map)
- end_pfn_map = ei_endpfn;
+ /* Check if end_pfn_map should be updated */
+ if (ei->type != E820_RAM && *ei_endpfn > end_pfn_map)
+ end_pfn_map = *ei_endpfn;
- /* Skip if map is outside the node */
- if (ei->type != E820_RAM ||
- ei_endpfn <= start_pfn ||
- ei_startpfn >= end_pfn)
- continue;
+ /* Skip if map is outside the node */
+ if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
+ *ei_startpfn >= end_pfn)
+ return 0;
- /* Check for overlaps */
- if (ei_startpfn < start_pfn)
- ei_startpfn = start_pfn;
- if (ei_endpfn > end_pfn)
- ei_endpfn = end_pfn;
+ /* Check for overlaps */
+ if (*ei_startpfn < start_pfn)
+ *ei_startpfn = start_pfn;
+ if (*ei_endpfn > end_pfn)
+ *ei_endpfn = end_pfn;
- /* Obey end_user_pfn to save on memmap */
- if (ei_startpfn >= end_user_pfn)
- continue;
- if (ei_endpfn > end_user_pfn)
- ei_endpfn = end_user_pfn;
+ /* Obey end_user_pfn to save on memmap */
+ if (*ei_startpfn >= end_user_pfn)
+ return 0;
+ if (*ei_endpfn > end_user_pfn)
+ *ei_endpfn = end_user_pfn;
- add_active_range(nid, ei_startpfn, ei_endpfn);
- }
+ return 1;
+}
+
+/* Walk the e820 map and register active regions within a node */
+void __init
+e820_register_active_regions(int nid, unsigned long start_pfn,
+ unsigned long end_pfn)
+{
+ unsigned long ei_startpfn;
+ unsigned long ei_endpfn;
+ int i;
+
+ for (i = 0; i < e820.nr_map; i++)
+ if (e820_find_active_region(&e820.map[i],
+ start_pfn, end_pfn,
+ &ei_startpfn, &ei_endpfn))
+ add_active_range(nid, ei_startpfn, ei_endpfn);
}
/*
@@ -350,12 +333,35 @@ void __init add_memory_region(unsigned long start, unsigned long size, int type)
e820.nr_map++;
}
+/*
+ * Find the hole size (in bytes) in the memory range.
+ * @start: starting address of the memory range to scan
+ * @end: ending address of the memory range to scan
+ */
+unsigned long __init e820_hole_size(unsigned long start, unsigned long end)
+{
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+ unsigned long end_pfn = end >> PAGE_SHIFT;
+ unsigned long ei_startpfn;
+ unsigned long ei_endpfn;
+ unsigned long ram = 0;
+ int i;
+
+ for (i = 0; i < e820.nr_map; i++) {
+ if (e820_find_active_region(&e820.map[i],
+ start_pfn, end_pfn,
+ &ei_startpfn, &ei_endpfn))
+ ram += ei_endpfn - ei_startpfn;
+ }
+ return end - start - (ram << PAGE_SHIFT);
+}
+
void __init e820_print_map(char *who)
{
int i;
for (i = 0; i < e820.nr_map; i++) {
- printk(" %s: %016Lx - %016Lx ", who,
+ printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
(unsigned long long) e820.map[i].addr,
(unsigned long long) (e820.map[i].addr + e820.map[i].size));
switch (e820.map[i].type) {
diff --git a/arch/x86_64/kernel/early-quirks.c b/arch/x86_64/kernel/early-quirks.c
index 990d9c218a5..13aa4fd728f 100644
--- a/arch/x86_64/kernel/early-quirks.c
+++ b/arch/x86_64/kernel/early-quirks.c
@@ -14,6 +14,7 @@
#include <linux/pci_ids.h>
#include <asm/pci-direct.h>
#include <asm/proto.h>
+#include <asm/iommu.h>
#include <asm/dma.h>
static void __init via_bugs(void)
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index a67f87bf401..830cfc6ee8c 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -282,7 +282,7 @@ sysret_careful:
sysret_signal:
TRACE_IRQS_ON
sti
- testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
+ testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
jz 1f
/* Really a signal */
@@ -375,7 +375,7 @@ int_very_careful:
jmp int_restore_rest
int_signal:
- testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP),%edx
+ testl $(_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
jz 1f
movq %rsp,%rdi # &ptregs -> arg1
xorl %esi,%esi # oldset -> arg2
@@ -599,7 +599,7 @@ retint_careful:
jmp retint_check
retint_signal:
- testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP),%edx
+ testl $(_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SINGLESTEP|_TIF_MCE_NOTIFY),%edx
jz retint_swapgs
TRACE_IRQS_ON
sti
diff --git a/arch/x86_64/kernel/hpet.c b/arch/x86_64/kernel/hpet.c
index b8286968662..636f4f9fc6b 100644
--- a/arch/x86_64/kernel/hpet.c
+++ b/arch/x86_64/kernel/hpet.c
@@ -190,7 +190,7 @@ int hpet_reenable(void)
*/
#define TICK_COUNT 100000000
-#define TICK_MIN 5000
+#define SMI_THRESHOLD 50000
#define MAX_TRIES 5
/*
@@ -205,7 +205,7 @@ static void __init read_hpet_tsc(int *hpet, int *tsc)
tsc1 = get_cycles_sync();
hpet1 = hpet_readl(HPET_COUNTER);
tsc2 = get_cycles_sync();
- if (tsc2 - tsc1 > TICK_MIN)
+ if ((tsc2 - tsc1) < SMI_THRESHOLD)
break;
}
*hpet = hpet1;
@@ -439,7 +439,7 @@ int hpet_rtc_dropped_irq(void)
return 1;
}
-irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id)
{
struct rtc_time curr_time;
unsigned long rtc_int_flag = 0;
diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c
index 4b326655b20..948cae64609 100644
--- a/arch/x86_64/kernel/i8259.c
+++ b/arch/x86_64/kernel/i8259.c
@@ -444,24 +444,6 @@ void __init init_ISA_irqs (void)
}
}
-void apic_timer_interrupt(void);
-void spurious_interrupt(void);
-void error_interrupt(void);
-void reschedule_interrupt(void);
-void call_function_interrupt(void);
-void irq_move_cleanup_interrupt(void);
-void invalidate_interrupt0(void);
-void invalidate_interrupt1(void);
-void invalidate_interrupt2(void);
-void invalidate_interrupt3(void);
-void invalidate_interrupt4(void);
-void invalidate_interrupt5(void);
-void invalidate_interrupt6(void);
-void invalidate_interrupt7(void);
-void thermal_interrupt(void);
-void threshold_interrupt(void);
-void i8254_timer_resume(void);
-
static void setup_timer_hardware(void)
{
outb_p(0x34,0x43); /* binary, mode 2, LSB/MSB, ch 0 */
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 1c6c6f72457..050141c0602 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -152,6 +152,32 @@ static inline void io_apic_modify(unsigned int apic, unsigned int value)
writel(value, &io_apic->data);
}
+static int io_apic_level_ack_pending(unsigned int irq)
+{
+ struct irq_pin_list *entry;
+ unsigned long flags;
+ int pending = 0;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ entry = irq_2_pin + irq;
+ for (;;) {
+ unsigned int reg;
+ int pin;
+
+ pin = entry->pin;
+ if (pin == -1)
+ break;
+ reg = io_apic_read(entry->apic, 0x10 + pin*2);
+ /* Is the remote IRR bit set? */
+ pending |= (reg >> 14) & 1;
+ if (!entry->next)
+ break;
+ entry = irq_2_pin + entry->next;
+ }
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+ return pending;
+}
+
/*
* Synchronize the IO-APIC and the CPU by doing
* a dummy read from the IO-APIC
@@ -1418,9 +1444,37 @@ static void ack_apic_level(unsigned int irq)
ack_APIC_irq();
/* Now we can move and renable the irq */
- move_masked_irq(irq);
- if (unlikely(do_unmask_irq))
+ if (unlikely(do_unmask_irq)) {
+ /* Only migrate the irq if the ack has been received.
+ *
+ * On rare occasions the broadcast level triggered ack gets
+ * delayed going to ioapics, and if we reprogram the
+ * vector while Remote IRR is still set the irq will never
+ * fire again.
+ *
+ * To prevent this scenario we read the Remote IRR bit
+ * of the ioapic. This has two effects.
+ * - On any sane system the read of the ioapic will
+ * flush writes (and acks) going to the ioapic from
+ * this cpu.
+ * - We get to see if the ACK has actually been delivered.
+ *
+ * Based on failed experiments of reprogramming the
+ * ioapic entry from outside of irq context starting
+ * with masking the ioapic entry and then polling until
+ * Remote IRR was clear before reprogramming the
+ * ioapic I don't trust the Remote IRR bit to be
+ * completey accurate.
+ *
+ * However there appears to be no other way to plug
+ * this race, so if the Remote IRR bit is not
+ * accurate and is causing problems then it is a hardware bug
+ * and you can go talk to the chipset vendor about it.
+ */
+ if (!io_apic_level_ack_pending(irq))
+ move_masked_irq(irq);
unmask_IO_APIC_irq(irq);
+ }
}
static struct irq_chip ioapic_chip __read_mostly = {
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index f3fb8174559..4d8450ee363 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -18,6 +18,8 @@
#include <linux/capability.h>
#include <linux/cpu.h>
#include <linux/percpu.h>
+#include <linux/poll.h>
+#include <linux/thread_info.h>
#include <linux/ctype.h>
#include <linux/kmod.h>
#include <linux/kdebug.h>
@@ -26,6 +28,7 @@
#include <asm/mce.h>
#include <asm/uaccess.h>
#include <asm/smp.h>
+#include <asm/idle.h>
#define MISC_MCELOG_MINOR 227
#define NR_BANKS 6
@@ -34,13 +37,17 @@ atomic_t mce_entry;
static int mce_dont_init;
-/* 0: always panic, 1: panic if deadlock possible, 2: try to avoid panic,
- 3: never panic or exit (for testing only) */
+/*
+ * Tolerant levels:
+ * 0: always panic on uncorrected errors, log corrected errors
+ * 1: panic or SIGBUS on uncorrected errors, log corrected errors
+ * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors
+ * 3: never panic or SIGBUS, log all errors (for testing only)
+ */
static int tolerant = 1;
static int banks;
static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL };
-static unsigned long console_logged;
-static int notify_user;
+static unsigned long notify_user;
static int rip_msr;
static int mce_bootlog = 1;
static atomic_t mce_events;
@@ -48,6 +55,8 @@ static atomic_t mce_events;
static char trigger[128];
static char *trigger_argv[2] = { trigger, NULL };
+static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
+
/*
* Lockless MCE logging infrastructure.
* This avoids deadlocks on printk locks without having to break locks. Also
@@ -94,8 +103,7 @@ void mce_log(struct mce *mce)
mcelog.entry[entry].finished = 1;
wmb();
- if (!test_and_set_bit(0, &console_logged))
- notify_user = 1;
+ set_bit(0, &notify_user);
}
static void print_mce(struct mce *m)
@@ -128,6 +136,7 @@ static void print_mce(struct mce *m)
static void mce_panic(char *msg, struct mce *backup, unsigned long start)
{
int i;
+
oops_begin();
for (i = 0; i < MCE_LOG_LEN; i++) {
unsigned long tsc = mcelog.entry[i].tsc;
@@ -139,10 +148,7 @@ static void mce_panic(char *msg, struct mce *backup, unsigned long start)
}
if (backup)
print_mce(backup);
- if (tolerant >= 3)
- printk("Fake panic: %s\n", msg);
- else
- panic(msg);
+ panic(msg);
}
static int mce_available(struct cpuinfo_x86 *c)
@@ -167,17 +173,6 @@ static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
}
}
-static void do_mce_trigger(void)
-{
- static atomic_t mce_logged;
- int events = atomic_read(&mce_events);
- if (events != atomic_read(&mce_logged) && trigger[0]) {
- /* Small race window, but should be harmless. */
- atomic_set(&mce_logged, events);
- call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT);
- }
-}
-
/*
* The actual machine check handler
*/
@@ -185,11 +180,19 @@ static void do_mce_trigger(void)
void do_machine_check(struct pt_regs * regs, long error_code)
{
struct mce m, panicm;
- int nowayout = (tolerant < 1);
- int kill_it = 0;
u64 mcestart = 0;
int i;
int panicm_found = 0;
+ /*
+ * If no_way_out gets set, there is no safe way to recover from this
+ * MCE. If tolerant is cranked up, we'll try anyway.
+ */
+ int no_way_out = 0;
+ /*
+ * If kill_it gets set, there might be a way to recover from this
+ * error.
+ */
+ int kill_it = 0;
atomic_inc(&mce_entry);
@@ -201,8 +204,9 @@ void do_machine_check(struct pt_regs * regs, long error_code)
memset(&m, 0, sizeof(struct mce));
m.cpu = smp_processor_id();
rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
+ /* if the restart IP is not valid, we're done for */
if (!(m.mcgstatus & MCG_STATUS_RIPV))
- kill_it = 1;
+ no_way_out = 1;
rdtscll(mcestart);
barrier();
@@ -221,10 +225,18 @@ void do_machine_check(struct pt_regs * regs, long error_code)
continue;
if (m.status & MCI_STATUS_EN) {
- /* In theory _OVER could be a nowayout too, but
- assume any overflowed errors were no fatal. */
- nowayout |= !!(m.status & MCI_STATUS_PCC);
- kill_it |= !!(m.status & MCI_STATUS_UC);
+ /* if PCC was set, there's no way out */
+ no_way_out |= !!(m.status & MCI_STATUS_PCC);
+ /*
+ * If this error was uncorrectable and there was
+ * an overflow, we're in trouble. If no overflow,
+ * we might get away with just killing a task.
+ */
+ if (m.status & MCI_STATUS_UC) {
+ if (tolerant < 1 || m.status & MCI_STATUS_OVER)
+ no_way_out = 1;
+ kill_it = 1;
+ }
}
if (m.status & MCI_STATUS_MISCV)
@@ -235,7 +247,6 @@ void do_machine_check(struct pt_regs * regs, long error_code)
mce_get_rip(&m, regs);
if (error_code >= 0)
rdtscll(m.tsc);
- wrmsrl(MSR_IA32_MC0_STATUS + i*4, 0);
if (error_code != -2)
mce_log(&m);
@@ -251,45 +262,59 @@ void do_machine_check(struct pt_regs * regs, long error_code)
}
/* Never do anything final in the polling timer */
- if (!regs) {
- /* Normal interrupt context here. Call trigger for any new
- events. */
- do_mce_trigger();
+ if (!regs)
goto out;
- }
/* If we didn't find an uncorrectable error, pick
the last one (shouldn't happen, just being safe). */
if (!panicm_found)
panicm = m;
- if (nowayout)
+
+ /*
+ * If we have decided that we just CAN'T continue, and the user
+ * has not set tolerant to an insane level, give up and die.
+ */
+ if (no_way_out && tolerant < 3)
mce_panic("Machine check", &panicm, mcestart);
- if (kill_it) {
+
+ /*
+ * If the error seems to be unrecoverable, something should be
+ * done. Try to kill as little as possible. If we can kill just
+ * one task, do that. If the user has set the tolerance very
+ * high, don't try to do anything at all.
+ */
+ if (kill_it && tolerant < 3) {
int user_space = 0;
- if (m.mcgstatus & MCG_STATUS_RIPV)
+ /*
+ * If the EIPV bit is set, it means the saved IP is the
+ * instruction which caused the MCE.
+ */
+ if (m.mcgstatus & MCG_STATUS_EIPV)
user_space = panicm.rip && (panicm.cs & 3);
-
- /* When the machine was in user space and the CPU didn't get
- confused it's normally not necessary to panic, unless you
- are paranoid (tolerant == 0)
-
- RED-PEN could be more tolerant for MCEs in idle,
- but most likely they occur at boot anyways, where
- it is best to just halt the machine. */
- if ((!user_space && (panic_on_oops || tolerant < 2)) ||
- (unsigned)current->pid <= 1)
- mce_panic("Uncorrected machine check", &panicm, mcestart);
-
- /* do_exit takes an awful lot of locks and has as
- slight risk of deadlocking. If you don't want that
- don't set tolerant >= 2 */
- if (tolerant < 3)
+
+ /*
+ * If we know that the error was in user space, send a
+ * SIGBUS. Otherwise, panic if tolerance is low.
+ *
+ * do_exit() takes an awful lot of locks and has a slight
+ * risk of deadlocking.
+ */
+ if (user_space) {
do_exit(SIGBUS);
+ } else if (panic_on_oops || tolerant < 2) {
+ mce_panic("Uncorrected machine check",
+ &panicm, mcestart);
+ }
}
+ /* notify userspace ASAP */
+ set_thread_flag(TIF_MCE_NOTIFY);
+
out:
- /* Last thing done in the machine check exception to clear state. */
+ /* the last thing we do is clear state */
+ for (i = 0; i < banks; i++)
+ wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
wrmsrl(MSR_IA32_MCG_STATUS, 0);
out2:
atomic_dec(&mce_entry);
@@ -344,37 +369,69 @@ static void mcheck_timer(struct work_struct *work)
on_each_cpu(mcheck_check_cpu, NULL, 1, 1);
/*
- * It's ok to read stale data here for notify_user and
- * console_logged as we'll simply get the updated versions
- * on the next mcheck_timer execution and atomic operations
- * on console_logged act as synchronization for notify_user
- * writes.
+ * Alert userspace if needed. If we logged an MCE, reduce the
+ * polling interval, otherwise increase the polling interval.
*/
- if (notify_user && console_logged) {
+ if (mce_notify_user()) {
+ next_interval = max(next_interval/2, HZ/100);
+ } else {
+ next_interval = min(next_interval*2,
+ (int)round_jiffies_relative(check_interval*HZ));
+ }
+
+ schedule_delayed_work(&mcheck_work, next_interval);
+}
+
+/*
+ * This is only called from process context. This is where we do
+ * anything we need to alert userspace about new MCEs. This is called
+ * directly from the poller and also from entry.S and idle, thanks to
+ * TIF_MCE_NOTIFY.
+ */
+int mce_notify_user(void)
+{
+ clear_thread_flag(TIF_MCE_NOTIFY);
+ if (test_and_clear_bit(0, &notify_user)) {
static unsigned long last_print;
unsigned long now = jiffies;
- /* if we logged an MCE, reduce the polling interval */
- next_interval = max(next_interval/2, HZ/100);
- notify_user = 0;
- clear_bit(0, &console_logged);
+ wake_up_interruptible(&mce_wait);
+ if (trigger[0])
+ call_usermodehelper(trigger, trigger_argv, NULL,
+ UMH_NO_WAIT);
+
if (time_after_eq(now, last_print + (check_interval*HZ))) {
last_print = now;
printk(KERN_INFO "Machine check events logged\n");
}
- } else {
- next_interval = min(next_interval*2, check_interval*HZ);
+
+ return 1;
}
+ return 0;
+}
- schedule_delayed_work(&mcheck_work, next_interval);
+/* see if the idle task needs to notify userspace */
+static int
+mce_idle_callback(struct notifier_block *nfb, unsigned long action, void *junk)
+{
+ /* IDLE_END should be safe - interrupts are back on */
+ if (action == IDLE_END && test_thread_flag(TIF_MCE_NOTIFY))
+ mce_notify_user();
+
+ return NOTIFY_OK;
}
+static struct notifier_block mce_idle_notifier = {
+ .notifier_call = mce_idle_callback,
+};
static __init int periodic_mcheck_init(void)
{
next_interval = check_interval * HZ;
if (next_interval)
- schedule_delayed_work(&mcheck_work, next_interval);
+ schedule_delayed_work(&mcheck_work,
+ round_jiffies_relative(next_interval));
+ idle_notifier_register(&mce_idle_notifier);
return 0;
}
__initcall(periodic_mcheck_init);
@@ -465,6 +522,40 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
* Character device to read and clear the MCE log.
*/
+static DEFINE_SPINLOCK(mce_state_lock);
+static int open_count; /* #times opened */
+static int open_exclu; /* already open exclusive? */
+
+static int mce_open(struct inode *inode, struct file *file)
+{
+ spin_lock(&mce_state_lock);
+
+ if (open_exclu || (open_count && (file->f_flags & O_EXCL))) {
+ spin_unlock(&mce_state_lock);
+ return -EBUSY;
+ }
+
+ if (file->f_flags & O_EXCL)
+ open_exclu = 1;
+ open_count++;
+
+ spin_unlock(&mce_state_lock);
+
+ return nonseekable_open(inode, file);
+}
+
+static int mce_release(struct inode *inode, struct file *file)
+{
+ spin_lock(&mce_state_lock);
+
+ open_count--;
+ open_exclu = 0;
+
+ spin_unlock(&mce_state_lock);
+
+ return 0;
+}
+
static void collect_tscs(void *data)
{
unsigned long *cpu_tsc = (unsigned long *)data;
@@ -532,6 +623,14 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, loff
return err ? -EFAULT : buf - ubuf;
}
+static unsigned int mce_poll(struct file *file, poll_table *wait)
+{
+ poll_wait(file, &mce_wait, wait);
+ if (rcu_dereference(mcelog.next))
+ return POLLIN | POLLRDNORM;
+ return 0;
+}
+
static int mce_ioctl(struct inode *i, struct file *f,unsigned int cmd, unsigned long arg)
{
int __user *p = (int __user *)arg;
@@ -555,7 +654,10 @@ static int mce_ioctl(struct inode *i, struct file *f,unsigned int cmd, unsigned
}
static const struct file_operations mce_chrdev_ops = {
+ .open = mce_open,
+ .release = mce_release,
.read = mce_read,
+ .poll = mce_poll,
.ioctl = mce_ioctl,
};
@@ -620,7 +722,8 @@ static void mce_restart(void)
on_each_cpu(mce_init, NULL, 1, 1);
next_interval = check_interval * HZ;
if (next_interval)
- schedule_delayed_work(&mcheck_work, next_interval);
+ schedule_delayed_work(&mcheck_work,
+ round_jiffies_relative(next_interval));
}
static struct sysdev_class mce_sysclass = {
diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c
index 03356e64f9c..2f8a7f18b0f 100644
--- a/arch/x86_64/kernel/mce_amd.c
+++ b/arch/x86_64/kernel/mce_amd.c
@@ -157,9 +157,9 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c)
high |= K8_APIC_EXT_LVT_ENTRY_THRESHOLD << 20;
wrmsr(address, low, high);
- setup_APIC_extened_lvt(K8_APIC_EXT_LVT_ENTRY_THRESHOLD,
- THRESHOLD_APIC_VECTOR,
- K8_APIC_EXT_INT_MSG_FIX, 0);
+ setup_APIC_extended_lvt(K8_APIC_EXT_LVT_ENTRY_THRESHOLD,
+ THRESHOLD_APIC_VECTOR,
+ K8_APIC_EXT_INT_MSG_FIX, 0);
threshold_defaults.address = address;
threshold_restart_bank(&threshold_defaults, 0, 0);
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
index 61ae57eb9e4..8bf0ca03ac8 100644
--- a/arch/x86_64/kernel/mpparse.c
+++ b/arch/x86_64/kernel/mpparse.c
@@ -32,7 +32,6 @@
/* Have we found an MP table */
int smp_found_config;
-unsigned int __initdata maxcpus = NR_CPUS;
/*
* Various Linux-internal data structures created from the
@@ -649,6 +648,20 @@ static int mp_find_ioapic(int gsi)
return -1;
}
+static u8 uniq_ioapic_id(u8 id)
+{
+ int i;
+ DECLARE_BITMAP(used, 256);
+ bitmap_zero(used, 256);
+ for (i = 0; i < nr_ioapics; i++) {
+ struct mpc_config_ioapic *ia = &mp_ioapics[i];
+ __set_bit(ia->mpc_apicid, used);
+ }
+ if (!test_bit(id, used))
+ return id;
+ return find_first_zero_bit(used, 256);
+}
+
void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
{
int idx = 0;
@@ -656,14 +669,14 @@ void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
if (bad_ioapic(address))
return;
- idx = nr_ioapics++;
+ idx = nr_ioapics;
mp_ioapics[idx].mpc_type = MP_IOAPIC;
mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
mp_ioapics[idx].mpc_apicaddr = address;
set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
- mp_ioapics[idx].mpc_apicid = id;
+ mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id);
mp_ioapics[idx].mpc_apicver = 0;
/*
@@ -680,6 +693,8 @@ void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
mp_ioapics[idx].mpc_apicaddr,
mp_ioapic_routing[idx].gsi_start,
mp_ioapic_routing[idx].gsi_end);
+
+ nr_ioapics++;
}
void __init
diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c
index 5bd20b542c1..ba16c968ca3 100644
--- a/arch/x86_64/kernel/pci-calgary.c
+++ b/arch/x86_64/kernel/pci-calgary.c
@@ -1,7 +1,7 @@
/*
* Derived from arch/powerpc/kernel/iommu.c
*
- * Copyright (C) IBM Corporation, 2006
+ * Copyright IBM Corporation, 2006-2007
* Copyright (C) 2006 Jon Mason <jdmason@kudzu.us>
*
* Author: Jon Mason <jdmason@kudzu.us>
@@ -35,7 +35,7 @@
#include <linux/pci_ids.h>
#include <linux/pci.h>
#include <linux/delay.h>
-#include <asm/proto.h>
+#include <asm/iommu.h>
#include <asm/calgary.h>
#include <asm/tce.h>
#include <asm/pci-direct.h>
@@ -50,13 +50,7 @@ int use_calgary __read_mostly = 0;
#endif /* CONFIG_CALGARY_DEFAULT_ENABLED */
#define PCI_DEVICE_ID_IBM_CALGARY 0x02a1
-#define PCI_VENDOR_DEVICE_ID_CALGARY \
- (PCI_VENDOR_ID_IBM | PCI_DEVICE_ID_IBM_CALGARY << 16)
-
-/* we need these for register space address calculation */
-#define START_ADDRESS 0xfe000000
-#define CHASSIS_BASE 0
-#define ONE_BASED_CHASSIS_NUM 1
+#define PCI_DEVICE_ID_IBM_CALIOC2 0x0308
/* register offsets inside the host bridge space */
#define CALGARY_CONFIG_REG 0x0108
@@ -80,6 +74,12 @@ int use_calgary __read_mostly = 0;
#define PHB_MEM_2_SIZE_LOW 0x02E0
#define PHB_DOSHOLE_OFFSET 0x08E0
+/* CalIOC2 specific */
+#define PHB_SAVIOR_L2 0x0DB0
+#define PHB_PAGE_MIG_CTRL 0x0DA8
+#define PHB_PAGE_MIG_DEBUG 0x0DA0
+#define PHB_ROOT_COMPLEX_STATUS 0x0CB0
+
/* PHB_CONFIG_RW */
#define PHB_TCE_ENABLE 0x20000000
#define PHB_SLOT_DISABLE 0x1C000000
@@ -92,7 +92,11 @@ int use_calgary __read_mostly = 0;
/* CSR (Channel/DMA Status Register) */
#define CSR_AGENT_MASK 0xffe0ffff
/* CCR (Calgary Configuration Register) */
-#define CCR_2SEC_TIMEOUT 0x000000000000000EUL
+#define CCR_2SEC_TIMEOUT 0x000000000000000EUL
+/* PMCR/PMDR (Page Migration Control/Debug Registers */
+#define PMR_SOFTSTOP 0x80000000
+#define PMR_SOFTSTOPFAULT 0x40000000
+#define PMR_HARDSTOP 0x20000000
#define MAX_NUM_OF_PHBS 8 /* how many PHBs in total? */
#define MAX_NUM_CHASSIS 8 /* max number of chassis */
@@ -155,9 +159,26 @@ struct calgary_bus_info {
void __iomem *bbar;
};
-static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, };
+static void calgary_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev);
+static void calgary_tce_cache_blast(struct iommu_table *tbl);
+static void calgary_dump_error_regs(struct iommu_table *tbl);
+static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev);
+static void calioc2_tce_cache_blast(struct iommu_table *tbl);
+static void calioc2_dump_error_regs(struct iommu_table *tbl);
+
+static struct cal_chipset_ops calgary_chip_ops = {
+ .handle_quirks = calgary_handle_quirks,
+ .tce_cache_blast = calgary_tce_cache_blast,
+ .dump_error_regs = calgary_dump_error_regs
+};
-static void tce_cache_blast(struct iommu_table *tbl);
+static struct cal_chipset_ops calioc2_chip_ops = {
+ .handle_quirks = calioc2_handle_quirks,
+ .tce_cache_blast = calioc2_tce_cache_blast,
+ .dump_error_regs = calioc2_dump_error_regs
+};
+
+static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, };
/* enable this to stress test the chip's TCE cache */
#ifdef CONFIG_IOMMU_DEBUG
@@ -187,6 +208,7 @@ static inline unsigned long verify_bit_range(unsigned long* bitmap,
{
return ~0UL;
}
+
#endif /* CONFIG_IOMMU_DEBUG */
static inline unsigned int num_dma_pages(unsigned long dma, unsigned int dmalen)
@@ -206,11 +228,12 @@ static inline int translate_phb(struct pci_dev* dev)
}
static void iommu_range_reserve(struct iommu_table *tbl,
- unsigned long start_addr, unsigned int npages)
+ unsigned long start_addr, unsigned int npages)
{
unsigned long index;
unsigned long end;
unsigned long badbit;
+ unsigned long flags;
index = start_addr >> PAGE_SHIFT;
@@ -222,6 +245,8 @@ static void iommu_range_reserve(struct iommu_table *tbl,
if (end > tbl->it_size) /* don't go off the table */
end = tbl->it_size;
+ spin_lock_irqsave(&tbl->it_lock, flags);
+
badbit = verify_bit_range(tbl->it_map, 0, index, end);
if (badbit != ~0UL) {
if (printk_ratelimit())
@@ -231,23 +256,29 @@ static void iommu_range_reserve(struct iommu_table *tbl,
}
set_bit_string(tbl->it_map, index, npages);
+
+ spin_unlock_irqrestore(&tbl->it_lock, flags);
}
static unsigned long iommu_range_alloc(struct iommu_table *tbl,
unsigned int npages)
{
+ unsigned long flags;
unsigned long offset;
BUG_ON(npages == 0);
+ spin_lock_irqsave(&tbl->it_lock, flags);
+
offset = find_next_zero_string(tbl->it_map, tbl->it_hint,
tbl->it_size, npages);
if (offset == ~0UL) {
- tce_cache_blast(tbl);
+ tbl->chip_ops->tce_cache_blast(tbl);
offset = find_next_zero_string(tbl->it_map, 0,
tbl->it_size, npages);
if (offset == ~0UL) {
printk(KERN_WARNING "Calgary: IOMMU full.\n");
+ spin_unlock_irqrestore(&tbl->it_lock, flags);
if (panic_on_overflow)
panic("Calgary: fix the allocator.\n");
else
@@ -259,17 +290,17 @@ static unsigned long iommu_range_alloc(struct iommu_table *tbl,
tbl->it_hint = offset + npages;
BUG_ON(tbl->it_hint > tbl->it_size);
+ spin_unlock_irqrestore(&tbl->it_lock, flags);
+
return offset;
}
static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *vaddr,
unsigned int npages, int direction)
{
- unsigned long entry, flags;
+ unsigned long entry;
dma_addr_t ret = bad_dma_address;
- spin_lock_irqsave(&tbl->it_lock, flags);
-
entry = iommu_range_alloc(tbl, npages);
if (unlikely(entry == bad_dma_address))
@@ -282,23 +313,21 @@ static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *vaddr,
tce_build(tbl, entry, npages, (unsigned long)vaddr & PAGE_MASK,
direction);
- spin_unlock_irqrestore(&tbl->it_lock, flags);
-
return ret;
error:
- spin_unlock_irqrestore(&tbl->it_lock, flags);
printk(KERN_WARNING "Calgary: failed to allocate %u pages in "
"iommu %p\n", npages, tbl);
return bad_dma_address;
}
-static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
+static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
unsigned int npages)
{
unsigned long entry;
unsigned long badbit;
unsigned long badend;
+ unsigned long flags;
/* were we called with bad_dma_address? */
badend = bad_dma_address + (EMERGENCY_PAGES * PAGE_SIZE);
@@ -315,6 +344,8 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
tce_free(tbl, entry, npages);
+ spin_lock_irqsave(&tbl->it_lock, flags);
+
badbit = verify_bit_range(tbl->it_map, 1, entry, entry + npages);
if (badbit != ~0UL) {
if (printk_ratelimit())
@@ -324,23 +355,40 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
}
__clear_bit_string(tbl->it_map, entry, npages);
+
+ spin_unlock_irqrestore(&tbl->it_lock, flags);
}
-static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
- unsigned int npages)
+static inline struct iommu_table *find_iommu_table(struct device *dev)
{
- unsigned long flags;
+ struct pci_dev *pdev;
+ struct pci_bus *pbus;
+ struct iommu_table *tbl;
- spin_lock_irqsave(&tbl->it_lock, flags);
+ pdev = to_pci_dev(dev);
- __iommu_free(tbl, dma_addr, npages);
+ /* is the device behind a bridge? */
+ if (unlikely(pdev->bus->parent))
+ pbus = pdev->bus->parent;
+ else
+ pbus = pdev->bus;
- spin_unlock_irqrestore(&tbl->it_lock, flags);
+ tbl = pci_iommu(pbus);
+
+ BUG_ON(pdev->bus->parent &&
+ (tbl->it_busno != pdev->bus->parent->number));
+
+ return tbl;
}
-static void __calgary_unmap_sg(struct iommu_table *tbl,
+static void calgary_unmap_sg(struct device *dev,
struct scatterlist *sglist, int nelems, int direction)
{
+ struct iommu_table *tbl = find_iommu_table(dev);
+
+ if (!translate_phb(to_pci_dev(dev)))
+ return;
+
while (nelems--) {
unsigned int npages;
dma_addr_t dma = sglist->dma_address;
@@ -350,33 +398,17 @@ static void __calgary_unmap_sg(struct iommu_table *tbl,
break;
npages = num_dma_pages(dma, dmalen);
- __iommu_free(tbl, dma, npages);
+ iommu_free(tbl, dma, npages);
sglist++;
}
}
-void calgary_unmap_sg(struct device *dev, struct scatterlist *sglist,
- int nelems, int direction)
-{
- unsigned long flags;
- struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
-
- if (!translate_phb(to_pci_dev(dev)))
- return;
-
- spin_lock_irqsave(&tbl->it_lock, flags);
-
- __calgary_unmap_sg(tbl, sglist, nelems, direction);
-
- spin_unlock_irqrestore(&tbl->it_lock, flags);
-}
-
static int calgary_nontranslate_map_sg(struct device* dev,
struct scatterlist *sg, int nelems, int direction)
{
int i;
- for (i = 0; i < nelems; i++ ) {
+ for (i = 0; i < nelems; i++ ) {
struct scatterlist *s = &sg[i];
BUG_ON(!s->page);
s->dma_address = virt_to_bus(page_address(s->page) +s->offset);
@@ -385,11 +417,10 @@ static int calgary_nontranslate_map_sg(struct device* dev,
return nelems;
}
-int calgary_map_sg(struct device *dev, struct scatterlist *sg,
+static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
int nelems, int direction)
{
- struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
- unsigned long flags;
+ struct iommu_table *tbl = find_iommu_table(dev);
unsigned long vaddr;
unsigned int npages;
unsigned long entry;
@@ -398,8 +429,6 @@ int calgary_map_sg(struct device *dev, struct scatterlist *sg,
if (!translate_phb(to_pci_dev(dev)))
return calgary_nontranslate_map_sg(dev, sg, nelems, direction);
- spin_lock_irqsave(&tbl->it_lock, flags);
-
for (i = 0; i < nelems; i++ ) {
struct scatterlist *s = &sg[i];
BUG_ON(!s->page);
@@ -423,26 +452,23 @@ int calgary_map_sg(struct device *dev, struct scatterlist *sg,
s->dma_length = s->length;
}
- spin_unlock_irqrestore(&tbl->it_lock, flags);
-
return nelems;
error:
- __calgary_unmap_sg(tbl, sg, nelems, direction);
+ calgary_unmap_sg(dev, sg, nelems, direction);
for (i = 0; i < nelems; i++) {
sg[i].dma_address = bad_dma_address;
sg[i].dma_length = 0;
}
- spin_unlock_irqrestore(&tbl->it_lock, flags);
return 0;
}
-dma_addr_t calgary_map_single(struct device *dev, void *vaddr,
+static dma_addr_t calgary_map_single(struct device *dev, void *vaddr,
size_t size, int direction)
{
dma_addr_t dma_handle = bad_dma_address;
unsigned long uaddr;
unsigned int npages;
- struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
+ struct iommu_table *tbl = find_iommu_table(dev);
uaddr = (unsigned long)vaddr;
npages = num_dma_pages(uaddr, size);
@@ -455,10 +481,10 @@ dma_addr_t calgary_map_single(struct device *dev, void *vaddr,
return dma_handle;
}
-void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle,
+static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle,
size_t size, int direction)
{
- struct iommu_table *tbl = to_pci_dev(dev)->bus->self->sysdata;
+ struct iommu_table *tbl = find_iommu_table(dev);
unsigned int npages;
if (!translate_phb(to_pci_dev(dev)))
@@ -468,15 +494,13 @@ void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle,
iommu_free(tbl, dma_handle, npages);
}
-void* calgary_alloc_coherent(struct device *dev, size_t size,
+static void* calgary_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t flag)
{
void *ret = NULL;
dma_addr_t mapping;
unsigned int npages, order;
- struct iommu_table *tbl;
-
- tbl = to_pci_dev(dev)->bus->self->sysdata;
+ struct iommu_table *tbl = find_iommu_table(dev);
size = PAGE_ALIGN(size); /* size rounded up to full pages */
npages = size >> PAGE_SHIFT;
@@ -552,7 +576,22 @@ static inline void __iomem* calgary_reg(void __iomem *bar, unsigned long offset)
return (void __iomem*)target;
}
-static void tce_cache_blast(struct iommu_table *tbl)
+static inline int is_calioc2(unsigned short device)
+{
+ return (device == PCI_DEVICE_ID_IBM_CALIOC2);
+}
+
+static inline int is_calgary(unsigned short device)
+{
+ return (device == PCI_DEVICE_ID_IBM_CALGARY);
+}
+
+static inline int is_cal_pci_dev(unsigned short device)
+{
+ return (is_calgary(device) || is_calioc2(device));
+}
+
+static void calgary_tce_cache_blast(struct iommu_table *tbl)
{
u64 val;
u32 aer;
@@ -589,6 +628,85 @@ static void tce_cache_blast(struct iommu_table *tbl)
(void)readl(target); /* flush */
}
+static void calioc2_tce_cache_blast(struct iommu_table *tbl)
+{
+ void __iomem *bbar = tbl->bbar;
+ void __iomem *target;
+ u64 val64;
+ u32 val;
+ int i = 0;
+ int count = 1;
+ unsigned char bus = tbl->it_busno;
+
+begin:
+ printk(KERN_DEBUG "Calgary: CalIOC2 bus 0x%x entering tce cache blast "
+ "sequence - count %d\n", bus, count);
+
+ /* 1. using the Page Migration Control reg set SoftStop */
+ target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_CTRL);
+ val = be32_to_cpu(readl(target));
+ printk(KERN_DEBUG "1a. read 0x%x [LE] from %p\n", val, target);
+ val |= PMR_SOFTSTOP;
+ printk(KERN_DEBUG "1b. writing 0x%x [LE] to %p\n", val, target);
+ writel(cpu_to_be32(val), target);
+
+ /* 2. poll split queues until all DMA activity is done */
+ printk(KERN_DEBUG "2a. starting to poll split queues\n");
+ target = calgary_reg(bbar, split_queue_offset(bus));
+ do {
+ val64 = readq(target);
+ i++;
+ } while ((val64 & 0xff) != 0xff && i < 100);
+ if (i == 100)
+ printk(KERN_WARNING "CalIOC2: PCI bus not quiesced, "
+ "continuing anyway\n");
+
+ /* 3. poll Page Migration DEBUG for SoftStopFault */
+ target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_DEBUG);
+ val = be32_to_cpu(readl(target));
+ printk(KERN_DEBUG "3. read 0x%x [LE] from %p\n", val, target);
+
+ /* 4. if SoftStopFault - goto (1) */
+ if (val & PMR_SOFTSTOPFAULT) {
+ if (++count < 100)
+ goto begin;
+ else {
+ printk(KERN_WARNING "CalIOC2: too many SoftStopFaults, "
+ "aborting TCE cache flush sequence!\n");
+ return; /* pray for the best */
+ }
+ }
+
+ /* 5. Slam into HardStop by reading PHB_PAGE_MIG_CTRL */
+ target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_CTRL);
+ printk(KERN_DEBUG "5a. slamming into HardStop by reading %p\n", target);
+ val = be32_to_cpu(readl(target));
+ printk(KERN_DEBUG "5b. read 0x%x [LE] from %p\n", val, target);
+ target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_DEBUG);
+ val = be32_to_cpu(readl(target));
+ printk(KERN_DEBUG "5c. read 0x%x [LE] from %p (debug)\n", val, target);
+
+ /* 6. invalidate TCE cache */
+ printk(KERN_DEBUG "6. invalidating TCE cache\n");
+ target = calgary_reg(bbar, tar_offset(bus));
+ writeq(tbl->tar_val, target);
+
+ /* 7. Re-read PMCR */
+ printk(KERN_DEBUG "7a. Re-reading PMCR\n");
+ target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_CTRL);
+ val = be32_to_cpu(readl(target));
+ printk(KERN_DEBUG "7b. read 0x%x [LE] from %p\n", val, target);
+
+ /* 8. Remove HardStop */
+ printk(KERN_DEBUG "8a. removing HardStop from PMCR\n");
+ target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_CTRL);
+ val = 0;
+ printk(KERN_DEBUG "8b. writing 0x%x [LE] to %p\n", val, target);
+ writel(cpu_to_be32(val), target);
+ val = be32_to_cpu(readl(target));
+ printk(KERN_DEBUG "8c. read 0x%x [LE] from %p\n", val, target);
+}
+
static void __init calgary_reserve_mem_region(struct pci_dev *dev, u64 start,
u64 limit)
{
@@ -598,7 +716,7 @@ static void __init calgary_reserve_mem_region(struct pci_dev *dev, u64 start,
limit++;
numpages = ((limit - start) >> PAGE_SHIFT);
- iommu_range_reserve(dev->sysdata, start, numpages);
+ iommu_range_reserve(pci_iommu(dev->bus), start, numpages);
}
static void __init calgary_reserve_peripheral_mem_1(struct pci_dev *dev)
@@ -606,7 +724,7 @@ static void __init calgary_reserve_peripheral_mem_1(struct pci_dev *dev)
void __iomem *target;
u64 low, high, sizelow;
u64 start, limit;
- struct iommu_table *tbl = dev->sysdata;
+ struct iommu_table *tbl = pci_iommu(dev->bus);
unsigned char busnum = dev->bus->number;
void __iomem *bbar = tbl->bbar;
@@ -630,7 +748,7 @@ static void __init calgary_reserve_peripheral_mem_2(struct pci_dev *dev)
u32 val32;
u64 low, high, sizelow, sizehigh;
u64 start, limit;
- struct iommu_table *tbl = dev->sysdata;
+ struct iommu_table *tbl = pci_iommu(dev->bus);
unsigned char busnum = dev->bus->number;
void __iomem *bbar = tbl->bbar;
@@ -666,14 +784,20 @@ static void __init calgary_reserve_regions(struct pci_dev *dev)
{
unsigned int npages;
u64 start;
- struct iommu_table *tbl = dev->sysdata;
+ struct iommu_table *tbl = pci_iommu(dev->bus);
/* reserve EMERGENCY_PAGES from bad_dma_address and up */
iommu_range_reserve(tbl, bad_dma_address, EMERGENCY_PAGES);
/* avoid the BIOS/VGA first 640KB-1MB region */
- start = (640 * 1024);
- npages = ((1024 - 640) * 1024) >> PAGE_SHIFT;
+ /* for CalIOC2 - avoid the entire first MB */
+ if (is_calgary(dev->device)) {
+ start = (640 * 1024);
+ npages = ((1024 - 640) * 1024) >> PAGE_SHIFT;
+ } else { /* calioc2 */
+ start = 0;
+ npages = (1 * 1024 * 1024) >> PAGE_SHIFT;
+ }
iommu_range_reserve(tbl, start, npages);
/* reserve the two PCI peripheral memory regions in IO space */
@@ -694,10 +818,17 @@ static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar)
if (ret)
return ret;
- tbl = dev->sysdata;
+ tbl = pci_iommu(dev->bus);
tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space;
tce_free(tbl, 0, tbl->it_size);
+ if (is_calgary(dev->device))
+ tbl->chip_ops = &calgary_chip_ops;
+ else if (is_calioc2(dev->device))
+ tbl->chip_ops = &calioc2_chip_ops;
+ else
+ BUG();
+
calgary_reserve_regions(dev);
/* set TARs for each PHB */
@@ -706,15 +837,15 @@ static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar)
/* zero out all TAR bits under sw control */
val64 &= ~TAR_SW_BITS;
-
- tbl = dev->sysdata;
table_phys = (u64)__pa(tbl->it_base);
+
val64 |= table_phys;
BUG_ON(specified_table_size > TCE_TABLE_SIZE_8M);
val64 |= (u64) specified_table_size;
tbl->tar_val = cpu_to_be64(val64);
+
writeq(tbl->tar_val, target);
readq(target); /* flush */
@@ -724,7 +855,7 @@ static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar)
static void __init calgary_free_bus(struct pci_dev *dev)
{
u64 val64;
- struct iommu_table *tbl = dev->sysdata;
+ struct iommu_table *tbl = pci_iommu(dev->bus);
void __iomem *target;
unsigned int bitmapsz;
@@ -739,16 +870,81 @@ static void __init calgary_free_bus(struct pci_dev *dev)
tbl->it_map = NULL;
kfree(tbl);
- dev->sysdata = NULL;
+
+ set_pci_iommu(dev->bus, NULL);
/* Can't free bootmem allocated memory after system is up :-( */
bus_info[dev->bus->number].tce_space = NULL;
}
+static void calgary_dump_error_regs(struct iommu_table *tbl)
+{
+ void __iomem *bbar = tbl->bbar;
+ void __iomem *target;
+ u32 csr, plssr;
+
+ target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_CSR_OFFSET);
+ csr = be32_to_cpu(readl(target));
+
+ target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_PLSSR_OFFSET);
+ plssr = be32_to_cpu(readl(target));
+
+ /* If no error, the agent ID in the CSR is not valid */
+ printk(KERN_EMERG "Calgary: DMA error on Calgary PHB 0x%x, "
+ "0x%08x@CSR 0x%08x@PLSSR\n", tbl->it_busno, csr, plssr);
+}
+
+static void calioc2_dump_error_regs(struct iommu_table *tbl)
+{
+ void __iomem *bbar = tbl->bbar;
+ u32 csr, csmr, plssr, mck, rcstat;
+ void __iomem *target;
+ unsigned long phboff = phb_offset(tbl->it_busno);
+ unsigned long erroff;
+ u32 errregs[7];
+ int i;
+
+ /* dump CSR */
+ target = calgary_reg(bbar, phboff | PHB_CSR_OFFSET);
+ csr = be32_to_cpu(readl(target));
+ /* dump PLSSR */
+ target = calgary_reg(bbar, phboff | PHB_PLSSR_OFFSET);
+ plssr = be32_to_cpu(readl(target));
+ /* dump CSMR */
+ target = calgary_reg(bbar, phboff | 0x290);
+ csmr = be32_to_cpu(readl(target));
+ /* dump mck */
+ target = calgary_reg(bbar, phboff | 0x800);
+ mck = be32_to_cpu(readl(target));
+
+ printk(KERN_EMERG "Calgary: DMA error on CalIOC2 PHB 0x%x\n",
+ tbl->it_busno);
+
+ printk(KERN_EMERG "Calgary: 0x%08x@CSR 0x%08x@PLSSR 0x%08x@CSMR 0x%08x@MCK\n",
+ csr, plssr, csmr, mck);
+
+ /* dump rest of error regs */
+ printk(KERN_EMERG "Calgary: ");
+ for (i = 0; i < ARRAY_SIZE(errregs); i++) {
+ /* err regs are at 0x810 - 0x870 */
+ erroff = (0x810 + (i * 0x10));
+ target = calgary_reg(bbar, phboff | erroff);
+ errregs[i] = be32_to_cpu(readl(target));
+ printk("0x%08x@0x%lx ", errregs[i], erroff);
+ }
+ printk("\n");
+
+ /* root complex status */
+ target = calgary_reg(bbar, phboff | PHB_ROOT_COMPLEX_STATUS);
+ rcstat = be32_to_cpu(readl(target));
+ printk(KERN_EMERG "Calgary: 0x%08x@0x%x\n", rcstat,
+ PHB_ROOT_COMPLEX_STATUS);
+}
+
static void calgary_watchdog(unsigned long data)
{
struct pci_dev *dev = (struct pci_dev *)data;
- struct iommu_table *tbl = dev->sysdata;
+ struct iommu_table *tbl = pci_iommu(dev->bus);
void __iomem *bbar = tbl->bbar;
u32 val32;
void __iomem *target;
@@ -758,13 +954,14 @@ static void calgary_watchdog(unsigned long data)
/* If no error, the agent ID in the CSR is not valid */
if (val32 & CSR_AGENT_MASK) {
- printk(KERN_EMERG "calgary_watchdog: DMA error on PHB %#x, "
- "CSR = %#x\n", dev->bus->number, val32);
+ tbl->chip_ops->dump_error_regs(tbl);
+
+ /* reset error */
writel(0, target);
/* Disable bus that caused the error */
target = calgary_reg(bbar, phb_offset(tbl->it_busno) |
- PHB_CONFIG_RW_OFFSET);
+ PHB_CONFIG_RW_OFFSET);
val32 = be32_to_cpu(readl(target));
val32 |= PHB_SLOT_DISABLE;
writel(cpu_to_be32(val32), target);
@@ -775,8 +972,8 @@ static void calgary_watchdog(unsigned long data)
}
}
-static void __init calgary_increase_split_completion_timeout(void __iomem *bbar,
- unsigned char busnum)
+static void __init calgary_set_split_completion_timeout(void __iomem *bbar,
+ unsigned char busnum, unsigned long timeout)
{
u64 val64;
void __iomem *target;
@@ -802,11 +999,40 @@ static void __init calgary_increase_split_completion_timeout(void __iomem *bbar,
/* zero out this PHB's timer bits */
mask = ~(0xFUL << phb_shift);
val64 &= mask;
- val64 |= (CCR_2SEC_TIMEOUT << phb_shift);
+ val64 |= (timeout << phb_shift);
writeq(cpu_to_be64(val64), target);
readq(target); /* flush */
}
+static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev)
+{
+ unsigned char busnum = dev->bus->number;
+ void __iomem *bbar = tbl->bbar;
+ void __iomem *target;
+ u32 val;
+
+ /*
+ * CalIOC2 designers recommend setting bit 8 in 0xnDB0 to 1
+ */
+ target = calgary_reg(bbar, phb_offset(busnum) | PHB_SAVIOR_L2);
+ val = cpu_to_be32(readl(target));
+ val |= 0x00800000;
+ writel(cpu_to_be32(val), target);
+}
+
+static void calgary_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev)
+{
+ unsigned char busnum = dev->bus->number;
+
+ /*
+ * Give split completion a longer timeout on bus 1 for aic94xx
+ * http://bugzilla.kernel.org/show_bug.cgi?id=7180
+ */
+ if (is_calgary(dev->device) && (busnum == 1))
+ calgary_set_split_completion_timeout(tbl->bbar, busnum,
+ CCR_2SEC_TIMEOUT);
+}
+
static void __init calgary_enable_translation(struct pci_dev *dev)
{
u32 val32;
@@ -816,7 +1042,7 @@ static void __init calgary_enable_translation(struct pci_dev *dev)
struct iommu_table *tbl;
busnum = dev->bus->number;
- tbl = dev->sysdata;
+ tbl = pci_iommu(dev->bus);
bbar = tbl->bbar;
/* enable TCE in PHB Config Register */
@@ -824,20 +1050,15 @@ static void __init calgary_enable_translation(struct pci_dev *dev)
val32 = be32_to_cpu(readl(target));
val32 |= PHB_TCE_ENABLE | PHB_DAC_DISABLE | PHB_MCSR_ENABLE;
- printk(KERN_INFO "Calgary: enabling translation on PHB %#x\n", busnum);
+ printk(KERN_INFO "Calgary: enabling translation on %s PHB %#x\n",
+ (dev->device == PCI_DEVICE_ID_IBM_CALGARY) ?
+ "Calgary" : "CalIOC2", busnum);
printk(KERN_INFO "Calgary: errant DMAs will now be prevented on this "
"bus.\n");
writel(cpu_to_be32(val32), target);
readl(target); /* flush */
- /*
- * Give split completion a longer timeout on bus 1 for aic94xx
- * http://bugzilla.kernel.org/show_bug.cgi?id=7180
- */
- if (busnum == 1)
- calgary_increase_split_completion_timeout(bbar, busnum);
-
init_timer(&tbl->watchdog_timer);
tbl->watchdog_timer.function = &calgary_watchdog;
tbl->watchdog_timer.data = (unsigned long)dev;
@@ -853,7 +1074,7 @@ static void __init calgary_disable_translation(struct pci_dev *dev)
struct iommu_table *tbl;
busnum = dev->bus->number;
- tbl = dev->sysdata;
+ tbl = pci_iommu(dev->bus);
bbar = tbl->bbar;
/* disable TCE in PHB Config Register */
@@ -871,13 +1092,19 @@ static void __init calgary_disable_translation(struct pci_dev *dev)
static void __init calgary_init_one_nontraslated(struct pci_dev *dev)
{
pci_dev_get(dev);
- dev->sysdata = NULL;
- dev->bus->self = dev;
+ set_pci_iommu(dev->bus, NULL);
+
+ /* is the device behind a bridge? */
+ if (dev->bus->parent)
+ dev->bus->parent->self = dev;
+ else
+ dev->bus->self = dev;
}
static int __init calgary_init_one(struct pci_dev *dev)
{
void __iomem *bbar;
+ struct iommu_table *tbl;
int ret;
BUG_ON(dev->bus->number >= MAX_PHB_BUS_NUM);
@@ -888,7 +1115,18 @@ static int __init calgary_init_one(struct pci_dev *dev)
goto done;
pci_dev_get(dev);
- dev->bus->self = dev;
+
+ if (dev->bus->parent) {
+ if (dev->bus->parent->self)
+ printk(KERN_WARNING "Calgary: IEEEE, dev %p has "
+ "bus->parent->self!\n", dev);
+ dev->bus->parent->self = dev;
+ } else
+ dev->bus->self = dev;
+
+ tbl = pci_iommu(dev->bus);
+ tbl->chip_ops->handle_quirks(tbl, dev);
+
calgary_enable_translation(dev);
return 0;
@@ -924,11 +1162,18 @@ static int __init calgary_locate_bbars(void)
target = calgary_reg(bbar, offset);
val = be32_to_cpu(readl(target));
+
start_bus = (u8)((val & 0x00FF0000) >> 16);
end_bus = (u8)((val & 0x0000FF00) >> 8);
- for (bus = start_bus; bus <= end_bus; bus++) {
- bus_info[bus].bbar = bbar;
- bus_info[bus].phbid = phb;
+
+ if (end_bus) {
+ for (bus = start_bus; bus <= end_bus; bus++) {
+ bus_info[bus].bbar = bbar;
+ bus_info[bus].phbid = phb;
+ }
+ } else {
+ bus_info[start_bus].bbar = bbar;
+ bus_info[start_bus].phbid = phb;
}
}
}
@@ -948,22 +1193,24 @@ static int __init calgary_init(void)
{
int ret;
struct pci_dev *dev = NULL;
+ void *tce_space;
ret = calgary_locate_bbars();
if (ret)
return ret;
do {
- dev = pci_get_device(PCI_VENDOR_ID_IBM,
- PCI_DEVICE_ID_IBM_CALGARY,
- dev);
+ dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev);
if (!dev)
break;
+ if (!is_cal_pci_dev(dev->device))
+ continue;
if (!translate_phb(dev)) {
calgary_init_one_nontraslated(dev);
continue;
}
- if (!bus_info[dev->bus->number].tce_space && !translate_empty_slots)
+ tce_space = bus_info[dev->bus->number].tce_space;
+ if (!tce_space && !translate_empty_slots)
continue;
ret = calgary_init_one(dev);
@@ -976,10 +1223,11 @@ static int __init calgary_init(void)
error:
do {
dev = pci_get_device_reverse(PCI_VENDOR_ID_IBM,
- PCI_DEVICE_ID_IBM_CALGARY,
- dev);
+ PCI_ANY_ID, dev);
if (!dev)
break;
+ if (!is_cal_pci_dev(dev->device))
+ continue;
if (!translate_phb(dev)) {
pci_dev_put(dev);
continue;
@@ -1057,9 +1305,29 @@ static int __init build_detail_arrays(void)
return 0;
}
-void __init detect_calgary(void)
+static int __init calgary_bus_has_devices(int bus, unsigned short pci_dev)
{
+ int dev;
u32 val;
+
+ if (pci_dev == PCI_DEVICE_ID_IBM_CALIOC2) {
+ /*
+ * FIXME: properly scan for devices accross the
+ * PCI-to-PCI bridge on every CalIOC2 port.
+ */
+ return 1;
+ }
+
+ for (dev = 1; dev < 8; dev++) {
+ val = read_pci_config(bus, dev, 0, 0);
+ if (val != 0xffffffff)
+ break;
+ }
+ return (val != 0xffffffff);
+}
+
+void __init detect_calgary(void)
+{
int bus;
void *tbl;
int calgary_found = 0;
@@ -1116,29 +1384,26 @@ void __init detect_calgary(void)
specified_table_size = determine_tce_table_size(end_pfn * PAGE_SIZE);
for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
- int dev;
struct calgary_bus_info *info = &bus_info[bus];
+ unsigned short pci_device;
+ u32 val;
+
+ val = read_pci_config(bus, 0, 0, 0);
+ pci_device = (val & 0xFFFF0000) >> 16;
- if (read_pci_config(bus, 0, 0, 0) != PCI_VENDOR_DEVICE_ID_CALGARY)
+ if (!is_cal_pci_dev(pci_device))
continue;
if (info->translation_disabled)
continue;
- /*
- * Scan the slots of the PCI bus to see if there is a device present.
- * The parent bus will be the zero-ith device, so start at 1.
- */
- for (dev = 1; dev < 8; dev++) {
- val = read_pci_config(bus, dev, 0, 0);
- if (val != 0xffffffff || translate_empty_slots) {
- tbl = alloc_tce_table();
- if (!tbl)
- goto cleanup;
- info->tce_space = tbl;
- calgary_found = 1;
- break;
- }
+ if (calgary_bus_has_devices(bus, pci_device) ||
+ translate_empty_slots) {
+ tbl = alloc_tce_table();
+ if (!tbl)
+ goto cleanup;
+ info->tce_space = tbl;
+ calgary_found = 1;
}
}
@@ -1249,3 +1514,66 @@ static int __init calgary_parse_options(char *p)
return 1;
}
__setup("calgary=", calgary_parse_options);
+
+static void __init calgary_fixup_one_tce_space(struct pci_dev *dev)
+{
+ struct iommu_table *tbl;
+ unsigned int npages;
+ int i;
+
+ tbl = pci_iommu(dev->bus);
+
+ for (i = 0; i < 4; i++) {
+ struct resource *r = &dev->resource[PCI_BRIDGE_RESOURCES + i];
+
+ /* Don't give out TCEs that map MEM resources */
+ if (!(r->flags & IORESOURCE_MEM))
+ continue;
+
+ /* 0-based? we reserve the whole 1st MB anyway */
+ if (!r->start)
+ continue;
+
+ /* cover the whole region */
+ npages = (r->end - r->start) >> PAGE_SHIFT;
+ npages++;
+
+ iommu_range_reserve(tbl, r->start, npages);
+ }
+}
+
+static int __init calgary_fixup_tce_spaces(void)
+{
+ struct pci_dev *dev = NULL;
+ void *tce_space;
+
+ if (no_iommu || swiotlb || !calgary_detected)
+ return -ENODEV;
+
+ printk(KERN_DEBUG "Calgary: fixing up tce spaces\n");
+
+ do {
+ dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev);
+ if (!dev)
+ break;
+ if (!is_cal_pci_dev(dev->device))
+ continue;
+ if (!translate_phb(dev))
+ continue;
+
+ tce_space = bus_info[dev->bus->number].tce_space;
+ if (!tce_space)
+ continue;
+
+ calgary_fixup_one_tce_space(dev);
+
+ } while (1);
+
+ return 0;
+}
+
+/*
+ * We need to be call after pcibios_assign_resources (fs_initcall level)
+ * and before device_initcall.
+ */
+rootfs_initcall(calgary_fixup_tce_spaces);
diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c
index 90f6315d02d..05d745ede56 100644
--- a/arch/x86_64/kernel/pci-dma.c
+++ b/arch/x86_64/kernel/pci-dma.c
@@ -8,7 +8,7 @@
#include <linux/pci.h>
#include <linux/module.h>
#include <asm/io.h>
-#include <asm/proto.h>
+#include <asm/iommu.h>
#include <asm/calgary.h>
int iommu_merge __read_mostly = 0;
@@ -321,6 +321,11 @@ static int __init pci_iommu_init(void)
return 0;
}
+void pci_iommu_shutdown(void)
+{
+ gart_iommu_shutdown();
+}
+
#ifdef CONFIG_PCI
/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c
index ae091cdc1a4..4918c575d58 100644
--- a/arch/x86_64/kernel/pci-gart.c
+++ b/arch/x86_64/kernel/pci-gart.c
@@ -28,6 +28,7 @@
#include <asm/mtrr.h>
#include <asm/pgtable.h>
#include <asm/proto.h>
+#include <asm/iommu.h>
#include <asm/cacheflush.h>
#include <asm/swiotlb.h>
#include <asm/dma.h>
@@ -235,7 +236,7 @@ static dma_addr_t gart_map_simple(struct device *dev, char *buf,
}
/* Map a single area into the IOMMU */
-dma_addr_t gart_map_single(struct device *dev, void *addr, size_t size, int dir)
+static dma_addr_t gart_map_single(struct device *dev, void *addr, size_t size, int dir)
{
unsigned long phys_mem, bus;
@@ -253,7 +254,7 @@ dma_addr_t gart_map_single(struct device *dev, void *addr, size_t size, int dir)
/*
* Free a DMA mapping.
*/
-void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
+static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
size_t size, int direction)
{
unsigned long iommu_page;
@@ -275,7 +276,7 @@ void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
/*
* Wrapper for pci_unmap_single working with scatterlists.
*/
-void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
+static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
{
int i;
@@ -571,6 +572,26 @@ static const struct dma_mapping_ops gart_dma_ops = {
.unmap_sg = gart_unmap_sg,
};
+void gart_iommu_shutdown(void)
+{
+ struct pci_dev *dev;
+ int i;
+
+ if (no_agp && (dma_ops != &gart_dma_ops))
+ return;
+
+ for (i = 0; i < num_k8_northbridges; i++) {
+ u32 ctl;
+
+ dev = k8_northbridges[i];
+ pci_read_config_dword(dev, 0x90, &ctl);
+
+ ctl &= ~1;
+
+ pci_write_config_dword(dev, 0x90, ctl);
+ }
+}
+
void __init gart_iommu_init(void)
{
struct agp_kern_info info;
diff --git a/arch/x86_64/kernel/pci-nommu.c b/arch/x86_64/kernel/pci-nommu.c
index 6dade0c867c..2a34c6c025a 100644
--- a/arch/x86_64/kernel/pci-nommu.c
+++ b/arch/x86_64/kernel/pci-nommu.c
@@ -6,7 +6,7 @@
#include <linux/string.h>
#include <linux/dma-mapping.h>
-#include <asm/proto.h>
+#include <asm/iommu.h>
#include <asm/processor.h>
#include <asm/dma.h>
@@ -34,7 +34,7 @@ nommu_map_single(struct device *hwdev, void *ptr, size_t size,
return bus;
}
-void nommu_unmap_single(struct device *dev, dma_addr_t addr,size_t size,
+static void nommu_unmap_single(struct device *dev, dma_addr_t addr,size_t size,
int direction)
{
}
@@ -54,7 +54,7 @@ void nommu_unmap_single(struct device *dev, dma_addr_t addr,size_t size,
* Device ownership issues as mentioned above for pci_map_single are
* the same here.
*/
-int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
+static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
int nents, int direction)
{
int i;
@@ -74,7 +74,7 @@ int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
* Again, cpu read rules concerning calls here are the same as for
* pci_unmap_single() above.
*/
-void nommu_unmap_sg(struct device *dev, struct scatterlist *sg,
+static void nommu_unmap_sg(struct device *dev, struct scatterlist *sg,
int nents, int dir)
{
}
diff --git a/arch/x86_64/kernel/pci-swiotlb.c b/arch/x86_64/kernel/pci-swiotlb.c
index 4b4569abc60..b2f405ea7c8 100644
--- a/arch/x86_64/kernel/pci-swiotlb.c
+++ b/arch/x86_64/kernel/pci-swiotlb.c
@@ -5,7 +5,7 @@
#include <linux/module.h>
#include <linux/dma-mapping.h>
-#include <asm/proto.h>
+#include <asm/iommu.h>
#include <asm/swiotlb.h>
#include <asm/dma.h>
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index 5909039f37a..92fade4a62c 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -207,6 +207,7 @@ void cpu_idle (void)
if (__get_cpu_var(cpu_idle_state))
__get_cpu_var(cpu_idle_state) = 0;
+ check_pgt_cache();
rmb();
idle = pm_idle;
if (!idle)
@@ -278,7 +279,7 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
*/
if (!pm_idle) {
if (!printed) {
- printk("using mwait in idle threads.\n");
+ printk(KERN_INFO "using mwait in idle threads.\n");
printed = 1;
}
pm_idle = mwait_idle;
@@ -305,6 +306,7 @@ early_param("idle", idle_setup);
void __show_regs(struct pt_regs * regs)
{
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
+ unsigned long d0, d1, d2, d3, d6, d7;
unsigned int fsindex,gsindex;
unsigned int ds,cs,es;
@@ -349,6 +351,15 @@ void __show_regs(struct pt_regs * regs)
fs,fsindex,gs,gsindex,shadowgs);
printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
+
+ get_debugreg(d0, 0);
+ get_debugreg(d1, 1);
+ get_debugreg(d2, 2);
+ printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
+ get_debugreg(d3, 3);
+ get_debugreg(d6, 6);
+ get_debugreg(d7, 7);
+ printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
}
void show_regs(struct pt_regs *regs)
diff --git a/arch/x86_64/kernel/reboot.c b/arch/x86_64/kernel/reboot.c
index 7503068e788..368db2b9c5a 100644
--- a/arch/x86_64/kernel/reboot.c
+++ b/arch/x86_64/kernel/reboot.c
@@ -16,6 +16,7 @@
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/apic.h>
+#include <asm/iommu.h>
/*
* Power off function, if any
@@ -81,6 +82,7 @@ static inline void kb_wait(void)
void machine_shutdown(void)
{
unsigned long flags;
+
/* Stop the cpus and apics */
#ifdef CONFIG_SMP
int reboot_cpu_id;
@@ -111,6 +113,8 @@ void machine_shutdown(void)
disable_IO_APIC();
local_irq_restore(flags);
+
+ pci_iommu_shutdown();
}
void machine_emergency_restart(void)
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 33ef718f8cb..6fa0a302e2a 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -575,6 +575,8 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
level = cpuid_eax(1);
if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58))
set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
+ if (c->x86 == 0x10)
+ set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
/* Enable workaround for FXSAVE leak */
if (c->x86 >= 6)
@@ -600,8 +602,11 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
if (c->extended_cpuid_level >= 0x80000008)
amd_detect_cmp(c);
- /* Fix cpuid4 emulation for more */
- num_cache_leaves = 3;
+ if (c->extended_cpuid_level >= 0x80000006 &&
+ (cpuid_edx(0x80000006) & 0xf000))
+ num_cache_leaves = 4;
+ else
+ num_cache_leaves = 3;
/* RDTSC can be speculated around */
clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c
index 290f5d8037c..4886afcd628 100644
--- a/arch/x86_64/kernel/signal.c
+++ b/arch/x86_64/kernel/signal.c
@@ -26,6 +26,7 @@
#include <asm/i387.h>
#include <asm/proto.h>
#include <asm/ia32_unistd.h>
+#include <asm/mce.h>
/* #define DEBUG_SIG 1 */
@@ -472,6 +473,12 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
clear_thread_flag(TIF_SINGLESTEP);
}
+#ifdef CONFIG_X86_MCE
+ /* notify userspace of pending MCEs */
+ if (thread_info_flags & _TIF_MCE_NOTIFY)
+ mce_notify_user();
+#endif /* CONFIG_X86_MCE */
+
/* deal with pending signal delivery */
if (thread_info_flags & (_TIF_SIGPENDING|_TIF_RESTORE_SIGMASK))
do_signal(regs);
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
index 0694940b2e7..673a300b594 100644
--- a/arch/x86_64/kernel/smp.c
+++ b/arch/x86_64/kernel/smp.c
@@ -241,7 +241,7 @@ void flush_tlb_mm (struct mm_struct * mm)
}
if (!cpus_empty(cpu_mask))
flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
-
+ check_pgt_cache();
preempt_enable();
}
EXPORT_SYMBOL(flush_tlb_mm);
@@ -386,9 +386,9 @@ int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
return 0;
}
- spin_lock_bh(&call_lock);
+ spin_lock(&call_lock);
__smp_call_function_single(cpu, func, info, nonatomic, wait);
- spin_unlock_bh(&call_lock);
+ spin_unlock(&call_lock);
put_cpu();
return 0;
}
diff --git a/arch/x86_64/kernel/tce.c b/arch/x86_64/kernel/tce.c
index f61fb8e4f12..3aeae2fa2e2 100644
--- a/arch/x86_64/kernel/tce.c
+++ b/arch/x86_64/kernel/tce.c
@@ -136,9 +136,9 @@ int build_tce_table(struct pci_dev *dev, void __iomem *bbar)
struct iommu_table *tbl;
int ret;
- if (dev->sysdata) {
- printk(KERN_ERR "Calgary: dev %p has sysdata %p\n",
- dev, dev->sysdata);
+ if (pci_iommu(dev->bus)) {
+ printk(KERN_ERR "Calgary: dev %p has sysdata->iommu %p\n",
+ dev, pci_iommu(dev->bus));
BUG();
}
@@ -155,11 +155,7 @@ int build_tce_table(struct pci_dev *dev, void __iomem *bbar)
tbl->bbar = bbar;
- /*
- * NUMA is already using the bus's sysdata pointer, so we use
- * the bus's pci_dev's sysdata instead.
- */
- dev->sysdata = tbl;
+ set_pci_iommu(dev->bus, tbl);
return 0;
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 4a0895bacf5..6d48a4e826d 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -33,6 +33,7 @@
#include <acpi/acpi_bus.h>
#endif
#include <asm/8253pit.h>
+#include <asm/i8253.h>
#include <asm/pgtable.h>
#include <asm/vsyscall.h>
#include <asm/timex.h>
@@ -44,12 +45,14 @@
#include <asm/hpet.h>
#include <asm/mpspec.h>
#include <asm/nmi.h>
+#include <asm/vgtod.h>
static char *timename = NULL;
DEFINE_SPINLOCK(rtc_lock);
EXPORT_SYMBOL(rtc_lock);
DEFINE_SPINLOCK(i8253_lock);
+EXPORT_SYMBOL(i8253_lock);
volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
@@ -79,8 +82,9 @@ EXPORT_SYMBOL(profile_pc);
* sheet for details.
*/
-static void set_rtc_mmss(unsigned long nowtime)
+static int set_rtc_mmss(unsigned long nowtime)
{
+ int retval = 0;
int real_seconds, real_minutes, cmos_minutes;
unsigned char control, freq_select;
@@ -120,6 +124,7 @@ static void set_rtc_mmss(unsigned long nowtime)
if (abs(real_minutes - cmos_minutes) >= 30) {
printk(KERN_WARNING "time.c: can't update CMOS clock "
"from %d to %d\n", cmos_minutes, real_minutes);
+ retval = -1;
} else {
BIN_TO_BCD(real_seconds);
BIN_TO_BCD(real_minutes);
@@ -139,12 +144,17 @@ static void set_rtc_mmss(unsigned long nowtime)
CMOS_WRITE(freq_select, RTC_FREQ_SELECT);
spin_unlock(&rtc_lock);
+
+ return retval;
}
+int update_persistent_clock(struct timespec now)
+{
+ return set_rtc_mmss(now.tv_sec);
+}
void main_timer_handler(void)
{
- static unsigned long rtc_update = 0;
/*
* Here we are in the timer irq handler. We have irqs locally disabled (so we
* don't need spin_lock_irqsave()) but we don't know if the timer_bh is running
@@ -172,20 +182,6 @@ void main_timer_handler(void)
if (!using_apic_timer)
smp_local_timer_interrupt();
-/*
- * If we have an externally synchronized Linux clock, then update CMOS clock
- * accordingly every ~11 minutes. set_rtc_mmss() will be called in the jiffy
- * closest to exactly 500 ms before the next second. If the update fails, we
- * don't care, as it'll be updated on the next turn, and the problem (time way
- * off) isn't likely to go away much sooner anyway.
- */
-
- if (ntp_synced() && xtime.tv_sec > rtc_update &&
- abs(xtime.tv_nsec - 500000000) <= tick_nsec / 2) {
- set_rtc_mmss(xtime.tv_sec);
- rtc_update = xtime.tv_sec + 660;
- }
-
write_sequnlock(&xtime_lock);
}
@@ -199,7 +195,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
-static unsigned long get_cmos_time(void)
+unsigned long read_persistent_clock(void)
{
unsigned int year, mon, day, hour, min, sec;
unsigned long flags;
@@ -226,7 +222,7 @@ static unsigned long get_cmos_time(void)
/*
* We know that x86-64 always uses BCD format, no need to check the
* config register.
- */
+ */
BCD_TO_BIN(sec);
BCD_TO_BIN(min);
@@ -239,11 +235,11 @@ static unsigned long get_cmos_time(void)
BCD_TO_BIN(century);
year += century * 100;
printk(KERN_INFO "Extended CMOS year: %d\n", century * 100);
- } else {
+ } else {
/*
* x86-64 systems only exists since 2002.
* This will work up to Dec 31, 2100
- */
+ */
year += 2000;
}
@@ -255,45 +251,45 @@ static unsigned long get_cmos_time(void)
#define TICK_COUNT 100000000
static unsigned int __init tsc_calibrate_cpu_khz(void)
{
- int tsc_start, tsc_now;
- int i, no_ctr_free;
- unsigned long evntsel3 = 0, pmc3 = 0, pmc_now = 0;
- unsigned long flags;
-
- for (i = 0; i < 4; i++)
- if (avail_to_resrv_perfctr_nmi_bit(i))
- break;
- no_ctr_free = (i == 4);
- if (no_ctr_free) {
- i = 3;
- rdmsrl(MSR_K7_EVNTSEL3, evntsel3);
- wrmsrl(MSR_K7_EVNTSEL3, 0);
- rdmsrl(MSR_K7_PERFCTR3, pmc3);
- } else {
- reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i);
- reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
- }
- local_irq_save(flags);
- /* start meauring cycles, incrementing from 0 */
- wrmsrl(MSR_K7_PERFCTR0 + i, 0);
- wrmsrl(MSR_K7_EVNTSEL0 + i, 1 << 22 | 3 << 16 | 0x76);
- rdtscl(tsc_start);
- do {
- rdmsrl(MSR_K7_PERFCTR0 + i, pmc_now);
- tsc_now = get_cycles_sync();
- } while ((tsc_now - tsc_start) < TICK_COUNT);
-
- local_irq_restore(flags);
- if (no_ctr_free) {
- wrmsrl(MSR_K7_EVNTSEL3, 0);
- wrmsrl(MSR_K7_PERFCTR3, pmc3);
- wrmsrl(MSR_K7_EVNTSEL3, evntsel3);
- } else {
- release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
- release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
- }
-
- return pmc_now * tsc_khz / (tsc_now - tsc_start);
+ int tsc_start, tsc_now;
+ int i, no_ctr_free;
+ unsigned long evntsel3 = 0, pmc3 = 0, pmc_now = 0;
+ unsigned long flags;
+
+ for (i = 0; i < 4; i++)
+ if (avail_to_resrv_perfctr_nmi_bit(i))
+ break;
+ no_ctr_free = (i == 4);
+ if (no_ctr_free) {
+ i = 3;
+ rdmsrl(MSR_K7_EVNTSEL3, evntsel3);
+ wrmsrl(MSR_K7_EVNTSEL3, 0);
+ rdmsrl(MSR_K7_PERFCTR3, pmc3);
+ } else {
+ reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i);
+ reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
+ }
+ local_irq_save(flags);
+ /* start meauring cycles, incrementing from 0 */
+ wrmsrl(MSR_K7_PERFCTR0 + i, 0);
+ wrmsrl(MSR_K7_EVNTSEL0 + i, 1 << 22 | 3 << 16 | 0x76);
+ rdtscl(tsc_start);
+ do {
+ rdmsrl(MSR_K7_PERFCTR0 + i, pmc_now);
+ tsc_now = get_cycles_sync();
+ } while ((tsc_now - tsc_start) < TICK_COUNT);
+
+ local_irq_restore(flags);
+ if (no_ctr_free) {
+ wrmsrl(MSR_K7_EVNTSEL3, 0);
+ wrmsrl(MSR_K7_PERFCTR3, pmc3);
+ wrmsrl(MSR_K7_EVNTSEL3, evntsel3);
+ } else {
+ release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
+ release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
+ }
+
+ return pmc_now * tsc_khz / (tsc_now - tsc_start);
}
/*
@@ -321,7 +317,7 @@ static unsigned int __init pit_calibrate_tsc(void)
end = get_cycles_sync();
spin_unlock_irqrestore(&i8253_lock, flags);
-
+
return (end - start) / 50;
}
@@ -366,25 +362,20 @@ static struct irqaction irq0 = {
.handler = timer_interrupt,
.flags = IRQF_DISABLED | IRQF_IRQPOLL,
.mask = CPU_MASK_NONE,
- .name = "timer"
+ .name = "timer"
};
void __init time_init(void)
{
if (nohpet)
hpet_address = 0;
- xtime.tv_sec = get_cmos_time();
- xtime.tv_nsec = 0;
-
- set_normalized_timespec(&wall_to_monotonic,
- -xtime.tv_sec, -xtime.tv_nsec);
if (hpet_arch_init())
hpet_address = 0;
if (hpet_use_timer) {
/* set tick_nsec to use the proper rate for HPET */
- tick_nsec = TICK_NSEC_HPET;
+ tick_nsec = TICK_NSEC_HPET;
tsc_khz = hpet_calibrate_tsc();
timename = "HPET";
} else {
@@ -415,54 +406,21 @@ void __init time_init(void)
setup_irq(0, &irq0);
}
-
-static long clock_cmos_diff;
-static unsigned long sleep_start;
-
/*
* sysfs support for the timer.
*/
static int timer_suspend(struct sys_device *dev, pm_message_t state)
{
- /*
- * Estimate time zone so that set_time can update the clock
- */
- long cmos_time = get_cmos_time();
-
- clock_cmos_diff = -cmos_time;
- clock_cmos_diff += get_seconds();
- sleep_start = cmos_time;
return 0;
}
static int timer_resume(struct sys_device *dev)
{
- unsigned long flags;
- unsigned long sec;
- unsigned long ctime = get_cmos_time();
- long sleep_length = (ctime - sleep_start) * HZ;
-
- if (sleep_length < 0) {
- printk(KERN_WARNING "Time skew detected in timer resume!\n");
- /* The time after the resume must not be earlier than the time
- * before the suspend or some nasty things will happen
- */
- sleep_length = 0;
- ctime = sleep_start;
- }
if (hpet_address)
hpet_reenable();
else
i8254_timer_resume();
-
- sec = ctime + clock_cmos_diff;
- write_seqlock_irqsave(&xtime_lock,flags);
- xtime.tv_sec = sec;
- xtime.tv_nsec = 0;
- jiffies += sleep_length;
- write_sequnlock_irqrestore(&xtime_lock,flags);
- touch_softlockup_watchdog();
return 0;
}
diff --git a/arch/x86_64/kernel/tsc.c b/arch/x86_64/kernel/tsc.c
index e850aa01e1b..9b76b03d060 100644
--- a/arch/x86_64/kernel/tsc.c
+++ b/arch/x86_64/kernel/tsc.c
@@ -61,25 +61,9 @@ inline int check_tsc_unstable(void)
* first tick after the change will be slightly wrong.
*/
-#include <linux/workqueue.h>
-
-static unsigned int cpufreq_delayed_issched = 0;
-static unsigned int cpufreq_init = 0;
-static struct work_struct cpufreq_delayed_get_work;
-
-static void handle_cpufreq_delayed_get(struct work_struct *v)
-{
- unsigned int cpu;
- for_each_online_cpu(cpu) {
- cpufreq_get(cpu);
- }
- cpufreq_delayed_issched = 0;
-}
-
-static unsigned int ref_freq = 0;
-static unsigned long loops_per_jiffy_ref = 0;
-
-static unsigned long tsc_khz_ref = 0;
+static unsigned int ref_freq;
+static unsigned long loops_per_jiffy_ref;
+static unsigned long tsc_khz_ref;
static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
void *data)
@@ -125,10 +109,8 @@ static struct notifier_block time_cpufreq_notifier_block = {
static int __init cpufreq_tsc(void)
{
- INIT_WORK(&cpufreq_delayed_get_work, handle_cpufreq_delayed_get);
- if (!cpufreq_register_notifier(&time_cpufreq_notifier_block,
- CPUFREQ_TRANSITION_NOTIFIER))
- cpufreq_init = 1;
+ cpufreq_register_notifier(&time_cpufreq_notifier_block,
+ CPUFREQ_TRANSITION_NOTIFIER);
return 0;
}
@@ -153,17 +135,18 @@ __cpuinit int unsynchronized_tsc(void)
#endif
/* Most intel systems have synchronized TSCs except for
multi node systems */
- if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
#ifdef CONFIG_ACPI
/* But TSC doesn't tick in C3 so don't use it there */
- if (acpi_gbl_FADT.header.length > 0 && acpi_gbl_FADT.C3latency < 1000)
+ if (acpi_gbl_FADT.header.length > 0 &&
+ acpi_gbl_FADT.C3latency < 1000)
return 1;
#endif
- return 0;
+ return 0;
}
- /* Assume multi socket systems are not synchronized */
- return num_present_cpus() > 1;
+ /* Assume multi socket systems are not synchronized */
+ return num_present_cpus() > 1;
}
int __init notsc_setup(char *s)
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index 5c57ea4591c..e7a5eb6cd78 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -54,6 +54,13 @@ SECTIONS
RODATA
+ . = ALIGN(4);
+ .tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) {
+ __tracedata_start = .;
+ *(.tracedata)
+ __tracedata_end = .;
+ }
+
. = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */
/* Data */
.data : AT(ADDR(.data) - LOAD_OFFSET) {
@@ -93,6 +100,9 @@ SECTIONS
.vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data))
{ *(.vsyscall_gtod_data) }
vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data);
+ .vsyscall_clock : AT(VLOAD(.vsyscall_clock))
+ { *(.vsyscall_clock) }
+ vsyscall_clock = VVIRT(.vsyscall_clock);
.vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1))
@@ -133,20 +143,11 @@ SECTIONS
/* might get freed after init */
. = ALIGN(4096);
__smp_alt_begin = .;
- __smp_alt_instructions = .;
- .smp_altinstructions : AT(ADDR(.smp_altinstructions) - LOAD_OFFSET) {
- *(.smp_altinstructions)
- }
- __smp_alt_instructions_end = .;
- . = ALIGN(8);
__smp_locks = .;
.smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
*(.smp_locks)
}
__smp_locks_end = .;
- .smp_altinstr_replacement : AT(ADDR(.smp_altinstr_replacement) - LOAD_OFFSET) {
- *(.smp_altinstr_replacement)
- }
. = ALIGN(4096);
__smp_alt_end = .;
@@ -189,6 +190,12 @@ SECTIONS
.exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { *(.exit.text) }
.exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { *(.exit.data) }
+/* vdso blob that is mapped into user space */
+ vdso_start = . ;
+ .vdso : AT(ADDR(.vdso) - LOAD_OFFSET) { *(.vdso) }
+ . = ALIGN(4096);
+ vdso_end = .;
+
#ifdef CONFIG_BLK_DEV_INITRD
. = ALIGN(4096);
__initramfs_start = .;
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index 57660d58d50..06c34949bfd 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -42,6 +42,7 @@
#include <asm/segment.h>
#include <asm/desc.h>
#include <asm/topology.h>
+#include <asm/vgtod.h>
#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
#define __syscall_clobber "r11","rcx","memory"
@@ -57,26 +58,9 @@
* - writen by timer interrupt or systcl (/proc/sys/kernel/vsyscall64)
* Try to keep this structure as small as possible to avoid cache line ping pongs
*/
-struct vsyscall_gtod_data_t {
- seqlock_t lock;
-
- /* open coded 'struct timespec' */
- time_t wall_time_sec;
- u32 wall_time_nsec;
-
- int sysctl_enabled;
- struct timezone sys_tz;
- struct { /* extract of a clocksource struct */
- cycle_t (*vread)(void);
- cycle_t cycle_last;
- cycle_t mask;
- u32 mult;
- u32 shift;
- } clock;
-};
int __vgetcpu_mode __section_vgetcpu_mode;
-struct vsyscall_gtod_data_t __vsyscall_gtod_data __section_vsyscall_gtod_data =
+struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data =
{
.lock = SEQLOCK_UNLOCKED,
.sysctl_enabled = 1,
@@ -96,6 +80,8 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
vsyscall_gtod_data.sys_tz = sys_tz;
+ vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
+ vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic;
write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
}
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index 84f11728fc7..2074bddd4f0 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -301,7 +301,7 @@ static int vmalloc_fault(unsigned long address)
return 0;
}
-int page_fault_trace = 0;
+static int page_fault_trace;
int exception_trace = 1;
/*
@@ -568,7 +568,7 @@ out_of_memory:
}
printk("VM: killing process %s\n", tsk->comm);
if (error_code & 4)
- do_exit(SIGKILL);
+ do_group_exit(SIGKILL);
goto no_context;
do_sigbus:
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index 9a0e98accf0..381c2ecd407 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -700,8 +700,6 @@ int kern_addr_valid(unsigned long addr)
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
-extern int exception_trace, page_fault_trace;
-
static ctl_table debug_table2[] = {
{
.ctl_name = 99,
@@ -774,3 +772,12 @@ void *alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size)
return __alloc_bootmem_core(pgdat->bdata, size,
SMP_CACHE_BYTES, (4UL*1024*1024*1024), 0);
}
+
+const char *arch_vma_name(struct vm_area_struct *vma)
+{
+ if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso)
+ return "[vdso]";
+ if (vma == &gate_vma)
+ return "[vsyscall]";
+ return NULL;
+}
diff --git a/arch/x86_64/mm/k8topology.c b/arch/x86_64/mm/k8topology.c
index f983c75825d..a96006f7ae0 100644
--- a/arch/x86_64/mm/k8topology.c
+++ b/arch/x86_64/mm/k8topology.c
@@ -44,12 +44,12 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
{
unsigned long prevbase;
struct bootnode nodes[8];
- int nodeid, i, nb;
+ int nodeid, i, j, nb;
unsigned char nodeids[8];
int found = 0;
u32 reg;
unsigned numnodes;
- unsigned dualcore = 0;
+ unsigned num_cores;
if (!early_pci_allowed())
return -1;
@@ -60,6 +60,9 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
printk(KERN_INFO "Scanning NUMA topology in Northbridge %d\n", nb);
+ num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
+ printk(KERN_INFO "CPU has %d num_cores\n", num_cores);
+
reg = read_pci_config(0, nb, 0, 0x60);
numnodes = ((reg >> 4) & 0xF) + 1;
if (numnodes <= 1)
@@ -73,8 +76,6 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
unsigned long base,limit;
u32 nodeid;
- /* Undefined before E stepping, but hopefully 0 */
- dualcore |= ((read_pci_config(0, nb, 3, 0xe8) >> 12) & 3) == 1;
base = read_pci_config(0, nb, 1, 0x40 + i*8);
limit = read_pci_config(0, nb, 1, 0x44 + i*8);
@@ -170,8 +171,8 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end)
for (i = 0; i < 8; i++) {
if (nodes[i].start != nodes[i].end) {
nodeid = nodeids[i];
- apicid_to_node[nodeid << dualcore] = i;
- apicid_to_node[(nodeid << dualcore) + dualcore] = i;
+ for (j = 0; j < num_cores; j++)
+ apicid_to_node[(nodeid * num_cores) + j] = i;
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
}
}
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c
index 51548947ad3..6da23552226 100644
--- a/arch/x86_64/mm/numa.c
+++ b/arch/x86_64/mm/numa.c
@@ -273,9 +273,6 @@ void __init numa_init_array(void)
#ifdef CONFIG_NUMA_EMU
/* Numa emulation */
-#define E820_ADDR_HOLE_SIZE(start, end) \
- (e820_hole_size((start) >> PAGE_SHIFT, (end) >> PAGE_SHIFT) << \
- PAGE_SHIFT)
char *cmdline __initdata;
/*
@@ -319,7 +316,7 @@ static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr,
return -1;
if (num_nodes > MAX_NUMNODES)
num_nodes = MAX_NUMNODES;
- size = (max_addr - *addr - E820_ADDR_HOLE_SIZE(*addr, max_addr)) /
+ size = (max_addr - *addr - e820_hole_size(*addr, max_addr)) /
num_nodes;
/*
* Calculate the number of big nodes that can be allocated as a result
@@ -347,7 +344,7 @@ static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr,
if (i == num_nodes + node_start - 1)
end = max_addr;
else
- while (end - *addr - E820_ADDR_HOLE_SIZE(*addr, end) <
+ while (end - *addr - e820_hole_size(*addr, end) <
size) {
end += FAKE_NODE_MIN_SIZE;
if (end > max_addr) {
@@ -476,18 +473,22 @@ out:
/*
* We need to vacate all active ranges that may have been registered by
- * SRAT.
+ * SRAT and set acpi_numa to -1 so that srat_disabled() always returns
+ * true. NUMA emulation has succeeded so we will not scan ACPI nodes.
*/
remove_all_active_ranges();
+#ifdef CONFIG_ACPI_NUMA
+ acpi_numa = -1;
+#endif
for_each_node_mask(i, node_possible_map) {
e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT,
nodes[i].end >> PAGE_SHIFT);
setup_node_bootmem(i, nodes[i].start, nodes[i].end);
}
+ acpi_fake_nodes(nodes, num_nodes);
numa_init_array();
return 0;
}
-#undef E820_ADDR_HOLE_SIZE
#endif /* CONFIG_NUMA_EMU */
void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c
index 9148f4a4cec..36377b6b8ef 100644
--- a/arch/x86_64/mm/pageattr.c
+++ b/arch/x86_64/mm/pageattr.c
@@ -74,14 +74,12 @@ static void flush_kernel_map(void *arg)
struct page *pg;
/* When clflush is available always use it because it is
- much cheaper than WBINVD. Disable clflush for now because
- the high level code is not ready yet */
- if (1 || !cpu_has_clflush)
+ much cheaper than WBINVD. */
+ if (!cpu_has_clflush)
asm volatile("wbinvd" ::: "memory");
else list_for_each_entry(pg, l, lru) {
void *adr = page_address(pg);
- if (cpu_has_clflush)
- cache_flush_page(adr);
+ cache_flush_page(adr);
}
__flush_tlb_all();
}
@@ -95,7 +93,8 @@ static LIST_HEAD(deferred_pages); /* protected by init_mm.mmap_sem */
static inline void save_page(struct page *fpage)
{
- list_add(&fpage->lru, &deferred_pages);
+ if (!test_and_set_bit(PG_arch_1, &fpage->flags))
+ list_add(&fpage->lru, &deferred_pages);
}
/*
@@ -129,9 +128,12 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
pte_t *kpte;
struct page *kpte_page;
pgprot_t ref_prot2;
+
kpte = lookup_address(address);
if (!kpte) return 0;
kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK);
+ BUG_ON(PageLRU(kpte_page));
+ BUG_ON(PageCompound(kpte_page));
if (pgprot_val(prot) != pgprot_val(ref_prot)) {
if (!pte_huge(*kpte)) {
set_pte(kpte, pfn_pte(pfn, prot));
@@ -159,10 +161,9 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
/* on x86-64 the direct mapping set at boot is not using 4k pages */
BUG_ON(PageReserved(kpte_page));
- if (page_private(kpte_page) == 0) {
- save_page(kpte_page);
+ save_page(kpte_page);
+ if (page_private(kpte_page) == 0)
revert_page(address, ref_prot);
- }
return 0;
}
@@ -234,6 +235,10 @@ void global_flush_tlb(void)
flush_map(&l);
list_for_each_entry_safe(pg, next, &l, lru) {
+ list_del(&pg->lru);
+ clear_bit(PG_arch_1, &pg->flags);
+ if (page_private(pg) != 0)
+ continue;
ClearPagePrivate(pg);
__free_page(pg);
}
diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c
index 1e76bb0a727..acdf03e1914 100644
--- a/arch/x86_64/mm/srat.c
+++ b/arch/x86_64/mm/srat.c
@@ -106,9 +106,9 @@ static __init int slit_valid(struct acpi_table_slit *slit)
for (j = 0; j < d; j++) {
u8 val = slit->entry[d*i + j];
if (i == j) {
- if (val != 10)
+ if (val != LOCAL_DISTANCE)
return 0;
- } else if (val <= 10)
+ } else if (val <= LOCAL_DISTANCE)
return 0;
}
}
@@ -350,7 +350,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma)
/* Sanity check to catch more bad SRATs (they are amazingly common).
Make sure the PXMs cover all memory. */
-static int nodes_cover_memory(void)
+static int __init nodes_cover_memory(const struct bootnode *nodes)
{
int i;
unsigned long pxmram, e820ram;
@@ -394,6 +394,9 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
{
int i;
+ if (acpi_numa <= 0)
+ return -1;
+
/* First clean up the node list */
for (i = 0; i < MAX_NUMNODES; i++) {
cutoff_node(i, start, end);
@@ -403,10 +406,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
}
}
- if (acpi_numa <= 0)
- return -1;
-
- if (!nodes_cover_memory()) {
+ if (!nodes_cover_memory(nodes)) {
bad_srat();
return -1;
}
@@ -440,6 +440,86 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end)
return 0;
}
+#ifdef CONFIG_NUMA_EMU
+static int __init find_node_by_addr(unsigned long addr)
+{
+ int ret = NUMA_NO_NODE;
+ int i;
+
+ for_each_node_mask(i, nodes_parsed) {
+ /*
+ * Find the real node that this emulated node appears on. For
+ * the sake of simplicity, we only use a real node's starting
+ * address to determine which emulated node it appears on.
+ */
+ if (addr >= nodes[i].start && addr < nodes[i].end) {
+ ret = i;
+ break;
+ }
+ }
+ return i;
+}
+
+/*
+ * In NUMA emulation, we need to setup proximity domain (_PXM) to node ID
+ * mappings that respect the real ACPI topology but reflect our emulated
+ * environment. For each emulated node, we find which real node it appears on
+ * and create PXM to NID mappings for those fake nodes which mirror that
+ * locality. SLIT will now represent the correct distances between emulated
+ * nodes as a result of the real topology.
+ */
+void __init acpi_fake_nodes(const struct bootnode *fake_nodes, int num_nodes)
+{
+ int i, j;
+ int fake_node_to_pxm_map[MAX_NUMNODES] = {
+ [0 ... MAX_NUMNODES-1] = PXM_INVAL
+ };
+ unsigned char fake_apicid_to_node[MAX_LOCAL_APIC] = {
+ [0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
+ };
+
+ printk(KERN_INFO "Faking PXM affinity for fake nodes on real "
+ "topology.\n");
+ for (i = 0; i < num_nodes; i++) {
+ int nid, pxm;
+
+ nid = find_node_by_addr(fake_nodes[i].start);
+ if (nid == NUMA_NO_NODE)
+ continue;
+ pxm = node_to_pxm(nid);
+ if (pxm == PXM_INVAL)
+ continue;
+ fake_node_to_pxm_map[i] = pxm;
+ /*
+ * For each apicid_to_node mapping that exists for this real
+ * node, it must now point to the fake node ID.
+ */
+ for (j = 0; j < MAX_LOCAL_APIC; j++)
+ if (apicid_to_node[j] == nid)
+ fake_apicid_to_node[j] = i;
+ }
+ for (i = 0; i < num_nodes; i++)
+ __acpi_map_pxm_to_node(fake_node_to_pxm_map[i], i);
+ memcpy(apicid_to_node, fake_apicid_to_node, sizeof(apicid_to_node));
+
+ nodes_clear(nodes_parsed);
+ for (i = 0; i < num_nodes; i++)
+ if (fake_nodes[i].start != fake_nodes[i].end)
+ node_set(i, nodes_parsed);
+ WARN_ON(!nodes_cover_memory(fake_nodes));
+}
+
+static int null_slit_node_compare(int a, int b)
+{
+ return node_to_pxm(a) == node_to_pxm(b);
+}
+#else
+static int null_slit_node_compare(int a, int b)
+{
+ return a == b;
+}
+#endif /* CONFIG_NUMA_EMU */
+
void __init srat_reserve_add_area(int nodeid)
{
if (found_add_area && nodes_add[nodeid].end) {
@@ -464,7 +544,8 @@ int __node_distance(int a, int b)
int index;
if (!acpi_slit)
- return a == b ? 10 : 20;
+ return null_slit_node_compare(a, b) ? LOCAL_DISTANCE :
+ REMOTE_DISTANCE;
index = acpi_slit->locality_count * node_to_pxm(a);
return acpi_slit->entry[index + node_to_pxm(b)];
}
diff --git a/arch/x86_64/pci/k8-bus.c b/arch/x86_64/pci/k8-bus.c
index 3acf60ded2a..9cc813e2970 100644
--- a/arch/x86_64/pci/k8-bus.c
+++ b/arch/x86_64/pci/k8-bus.c
@@ -59,6 +59,8 @@ fill_mp_bus_to_cpumask(void)
j <= SUBORDINATE_LDT_BUS_NUMBER(ldtbus);
j++) {
struct pci_bus *bus;
+ struct pci_sysdata *sd;
+
long node = NODE_ID(nid);
/* Algorithm a bit dumb, but
it shouldn't matter here */
@@ -67,7 +69,9 @@ fill_mp_bus_to_cpumask(void)
continue;
if (!node_online(node))
node = 0;
- bus->sysdata = (void *)node;
+
+ sd = bus->sysdata;
+ sd->node = node;
}
}
}
diff --git a/arch/x86_64/vdso/Makefile b/arch/x86_64/vdso/Makefile
new file mode 100644
index 00000000000..faaa72fb250
--- /dev/null
+++ b/arch/x86_64/vdso/Makefile
@@ -0,0 +1,49 @@
+#
+# x86-64 vDSO.
+#
+
+# files to link into the vdso
+# vdso-start.o has to be first
+vobjs-y := vdso-start.o vdso-note.o vclock_gettime.o vgetcpu.o vvar.o
+
+# files to link into kernel
+obj-y := vma.o vdso.o vdso-syms.o
+
+vobjs := $(foreach F,$(vobjs-y),$(obj)/$F)
+
+$(obj)/vdso.o: $(obj)/vdso.so
+
+targets += vdso.so vdso.lds $(vobjs-y) vdso-syms.o
+
+# The DSO images are built using a special linker script.
+quiet_cmd_syscall = SYSCALL $@
+ cmd_syscall = $(CC) -m elf_x86_64 -nostdlib $(SYSCFLAGS_$(@F)) \
+ -Wl,-T,$(filter-out FORCE,$^) -o $@
+
+export CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
+
+vdso-flags = -fPIC -shared -Wl,-soname=linux-vdso.so.1 \
+ $(call ld-option, -Wl$(comma)--hash-style=sysv) \
+ -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096
+SYSCFLAGS_vdso.so = $(vdso-flags)
+
+$(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so
+
+$(obj)/vdso.so: $(src)/vdso.lds $(vobjs) FORCE
+ $(call if_changed,syscall)
+
+CF := $(PROFILING) -mcmodel=small -fPIC -g0 -O2 -fasynchronous-unwind-tables -m64
+
+$(obj)/vclock_gettime.o: CFLAGS = $(CF)
+$(obj)/vgetcpu.o: CFLAGS = $(CF)
+
+# We also create a special relocatable object that should mirror the symbol
+# table and layout of the linked DSO. With ld -R we can then refer to
+# these symbols in the kernel code rather than hand-coded addresses.
+extra-y += vdso-syms.o
+$(obj)/built-in.o: $(obj)/vdso-syms.o
+$(obj)/built-in.o: ld_flags += -R $(obj)/vdso-syms.o
+
+SYSCFLAGS_vdso-syms.o = -r -d
+$(obj)/vdso-syms.o: $(src)/vdso.lds $(vobjs) FORCE
+ $(call if_changed,syscall)
diff --git a/arch/x86_64/vdso/vclock_gettime.c b/arch/x86_64/vdso/vclock_gettime.c
new file mode 100644
index 00000000000..17f6a00de71
--- /dev/null
+++ b/arch/x86_64/vdso/vclock_gettime.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright 2006 Andi Kleen, SUSE Labs.
+ * Subject to the GNU Public License, v.2
+ *
+ * Fast user context implementation of clock_gettime and gettimeofday.
+ *
+ * The code should have no internal unresolved relocations.
+ * Check with readelf after changing.
+ * Also alternative() doesn't work.
+ */
+
+#include <linux/kernel.h>
+#include <linux/posix-timers.h>
+#include <linux/time.h>
+#include <linux/string.h>
+#include <asm/vsyscall.h>
+#include <asm/vgtod.h>
+#include <asm/timex.h>
+#include <asm/hpet.h>
+#include <asm/unistd.h>
+#include <asm/io.h>
+#include <asm/vgtod.h>
+#include "vextern.h"
+
+#define gtod vdso_vsyscall_gtod_data
+
+static long vdso_fallback_gettime(long clock, struct timespec *ts)
+{
+ long ret;
+ asm("syscall" : "=a" (ret) :
+ "0" (__NR_clock_gettime),"D" (clock), "S" (ts) : "memory");
+ return ret;
+}
+
+static inline long vgetns(void)
+{
+ cycles_t (*vread)(void);
+ vread = gtod->clock.vread;
+ return ((vread() - gtod->clock.cycle_last) * gtod->clock.mult) >>
+ gtod->clock.shift;
+}
+
+static noinline int do_realtime(struct timespec *ts)
+{
+ unsigned long seq, ns;
+ do {
+ seq = read_seqbegin(&gtod->lock);
+ ts->tv_sec = gtod->wall_time_sec;
+ ts->tv_nsec = gtod->wall_time_nsec;
+ ns = vgetns();
+ } while (unlikely(read_seqretry(&gtod->lock, seq)));
+ timespec_add_ns(ts, ns);
+ return 0;
+}
+
+/* Copy of the version in kernel/time.c which we cannot directly access */
+static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec)
+{
+ while (nsec >= NSEC_PER_SEC) {
+ nsec -= NSEC_PER_SEC;
+ ++sec;
+ }
+ while (nsec < 0) {
+ nsec += NSEC_PER_SEC;
+ --sec;
+ }
+ ts->tv_sec = sec;
+ ts->tv_nsec = nsec;
+}
+
+static noinline int do_monotonic(struct timespec *ts)
+{
+ unsigned long seq, ns, secs;
+ do {
+ seq = read_seqbegin(&gtod->lock);
+ secs = gtod->wall_time_sec;
+ ns = gtod->wall_time_nsec + vgetns();
+ secs += gtod->wall_to_monotonic.tv_sec;
+ ns += gtod->wall_to_monotonic.tv_nsec;
+ } while (unlikely(read_seqretry(&gtod->lock, seq)));
+ vset_normalized_timespec(ts, secs, ns);
+ return 0;
+}
+
+int __vdso_clock_gettime(clockid_t clock, struct timespec *ts)
+{
+ if (likely(gtod->sysctl_enabled && gtod->clock.vread))
+ switch (clock) {
+ case CLOCK_REALTIME:
+ return do_realtime(ts);
+ case CLOCK_MONOTONIC:
+ return do_monotonic(ts);
+ }
+ return vdso_fallback_gettime(clock, ts);
+}
+int clock_gettime(clockid_t, struct timespec *)
+ __attribute__((weak, alias("__vdso_clock_gettime")));
+
+int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+ long ret;
+ if (likely(gtod->sysctl_enabled && gtod->clock.vread)) {
+ BUILD_BUG_ON(offsetof(struct timeval, tv_usec) !=
+ offsetof(struct timespec, tv_nsec) ||
+ sizeof(*tv) != sizeof(struct timespec));
+ do_realtime((struct timespec *)tv);
+ tv->tv_usec /= 1000;
+ if (unlikely(tz != NULL)) {
+ /* This relies on gcc inlining the memcpy. We'll notice
+ if it ever fails to do so. */
+ memcpy(tz, &gtod->sys_tz, sizeof(struct timezone));
+ }
+ return 0;
+ }
+ asm("syscall" : "=a" (ret) :
+ "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory");
+ return ret;
+}
+int gettimeofday(struct timeval *, struct timezone *)
+ __attribute__((weak, alias("__vdso_gettimeofday")));
diff --git a/arch/x86_64/vdso/vdso-note.S b/arch/x86_64/vdso/vdso-note.S
new file mode 100644
index 00000000000..79a071e4357
--- /dev/null
+++ b/arch/x86_64/vdso/vdso-note.S
@@ -0,0 +1,12 @@
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/uts.h>
+#include <linux/version.h>
+#include <linux/elfnote.h>
+
+ELFNOTE_START(Linux, 0, "a")
+ .long LINUX_VERSION_CODE
+ELFNOTE_END
diff --git a/arch/x86_64/vdso/vdso-start.S b/arch/x86_64/vdso/vdso-start.S
new file mode 100644
index 00000000000..2dc2cdb84d6
--- /dev/null
+++ b/arch/x86_64/vdso/vdso-start.S
@@ -0,0 +1,2 @@
+ .globl vdso_kernel_start
+vdso_kernel_start:
diff --git a/arch/x86_64/vdso/vdso.S b/arch/x86_64/vdso/vdso.S
new file mode 100644
index 00000000000..92e80c1972a
--- /dev/null
+++ b/arch/x86_64/vdso/vdso.S
@@ -0,0 +1,2 @@
+ .section ".vdso","a"
+ .incbin "arch/x86_64/vdso/vdso.so"
diff --git a/arch/x86_64/vdso/vdso.lds.S b/arch/x86_64/vdso/vdso.lds.S
new file mode 100644
index 00000000000..b9a60e665d0
--- /dev/null
+++ b/arch/x86_64/vdso/vdso.lds.S
@@ -0,0 +1,77 @@
+/*
+ * Linker script for vsyscall DSO. The vsyscall page is an ELF shared
+ * object prelinked to its virtual address, and with only one read-only
+ * segment (that fits in one page). This script controls its layout.
+ */
+#include <asm/asm-offsets.h>
+#include "voffset.h"
+
+#define VDSO_PRELINK 0xffffffffff700000
+
+SECTIONS
+{
+ . = VDSO_PRELINK + SIZEOF_HEADERS;
+
+ .hash : { *(.hash) } :text
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+
+ /* This linker script is used both with -r and with -shared.
+ For the layouts to match, we need to skip more than enough
+ space for the dynamic symbol table et al. If this amount
+ is insufficient, ld -shared will barf. Just increase it here. */
+ . = VDSO_PRELINK + VDSO_TEXT_OFFSET;
+
+ .text : { *(.text) } :text
+ .text.ptr : { *(.text.ptr) } :text
+ . = VDSO_PRELINK + 0x900;
+ .data : { *(.data) } :text
+ .bss : { *(.bss) } :text
+
+ .altinstructions : { *(.altinstructions) } :text
+ .altinstr_replacement : { *(.altinstr_replacement) } :text
+
+ .note : { *(.note.*) } :text :note
+ .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
+ .eh_frame : { KEEP (*(.eh_frame)) } :text
+ .dynamic : { *(.dynamic) } :text :dynamic
+ .useless : {
+ *(.got.plt) *(.got)
+ *(.gnu.linkonce.d.*)
+ *(.dynbss)
+ *(.gnu.linkonce.b.*)
+ } :text
+}
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+ text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
+ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+ note PT_NOTE FLAGS(4); /* PF_R */
+ eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
+}
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+ LINUX_2.6 {
+ global:
+ clock_gettime;
+ __vdso_clock_gettime;
+ gettimeofday;
+ __vdso_gettimeofday;
+ getcpu;
+ __vdso_getcpu;
+ local: *;
+ };
+}
diff --git a/arch/x86_64/vdso/vextern.h b/arch/x86_64/vdso/vextern.h
new file mode 100644
index 00000000000..1683ba2ae3e
--- /dev/null
+++ b/arch/x86_64/vdso/vextern.h
@@ -0,0 +1,16 @@
+#ifndef VEXTERN
+#include <asm/vsyscall.h>
+#define VEXTERN(x) \
+ extern typeof(x) *vdso_ ## x __attribute__((visibility("hidden")));
+#endif
+
+#define VMAGIC 0xfeedbabeabcdefabUL
+
+/* Any kernel variables used in the vDSO must be exported in the main
+ kernel's vmlinux.lds.S/vsyscall.h/proper __section and
+ put into vextern.h and be referenced as a pointer with vdso prefix.
+ The main kernel later fills in the values. */
+
+VEXTERN(jiffies)
+VEXTERN(vgetcpu_mode)
+VEXTERN(vsyscall_gtod_data)
diff --git a/arch/x86_64/vdso/vgetcpu.c b/arch/x86_64/vdso/vgetcpu.c
new file mode 100644
index 00000000000..91f6e85d0fc
--- /dev/null
+++ b/arch/x86_64/vdso/vgetcpu.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2006 Andi Kleen, SUSE Labs.
+ * Subject to the GNU Public License, v.2
+ *
+ * Fast user context implementation of getcpu()
+ */
+
+#include <linux/kernel.h>
+#include <linux/getcpu.h>
+#include <linux/jiffies.h>
+#include <linux/time.h>
+#include <asm/vsyscall.h>
+#include <asm/vgtod.h>
+#include "vextern.h"
+
+long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
+{
+ unsigned int dummy, p;
+ unsigned long j = 0;
+
+ /* Fast cache - only recompute value once per jiffies and avoid
+ relatively costly rdtscp/cpuid otherwise.
+ This works because the scheduler usually keeps the process
+ on the same CPU and this syscall doesn't guarantee its
+ results anyways.
+ We do this here because otherwise user space would do it on
+ its own in a likely inferior way (no access to jiffies).
+ If you don't like it pass NULL. */
+ if (tcache && tcache->blob[0] == (j = *vdso_jiffies)) {
+ p = tcache->blob[1];
+ } else if (*vdso_vgetcpu_mode == VGETCPU_RDTSCP) {
+ /* Load per CPU data from RDTSCP */
+ rdtscp(dummy, dummy, p);
+ } else {
+ /* Load per CPU data from GDT */
+ asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
+ }
+ if (tcache) {
+ tcache->blob[0] = j;
+ tcache->blob[1] = p;
+ }
+ if (cpu)
+ *cpu = p & 0xfff;
+ if (node)
+ *node = p >> 12;
+ return 0;
+}
+
+long getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
+ __attribute__((weak, alias("__vdso_getcpu")));
diff --git a/arch/x86_64/vdso/vma.c b/arch/x86_64/vdso/vma.c
new file mode 100644
index 00000000000..d4cb83a6c06
--- /dev/null
+++ b/arch/x86_64/vdso/vma.c
@@ -0,0 +1,139 @@
+/*
+ * Set up the VMAs to tell the VM about the vDSO.
+ * Copyright 2007 Andi Kleen, SUSE Labs.
+ * Subject to the GPL, v.2
+ */
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/init.h>
+#include <linux/random.h>
+#include <asm/vsyscall.h>
+#include <asm/vgtod.h>
+#include <asm/proto.h>
+#include "voffset.h"
+
+int vdso_enabled = 1;
+
+#define VEXTERN(x) extern typeof(__ ## x) *vdso_ ## x;
+#include "vextern.h"
+#undef VEXTERN
+
+extern char vdso_kernel_start[], vdso_start[], vdso_end[];
+extern unsigned short vdso_sync_cpuid;
+
+struct page **vdso_pages;
+
+static inline void *var_ref(void *vbase, char *var, char *name)
+{
+ unsigned offset = var - &vdso_kernel_start[0] + VDSO_TEXT_OFFSET;
+ void *p = vbase + offset;
+ if (*(void **)p != (void *)VMAGIC) {
+ printk("VDSO: variable %s broken\n", name);
+ vdso_enabled = 0;
+ }
+ return p;
+}
+
+static int __init init_vdso_vars(void)
+{
+ int npages = (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE;
+ int i;
+ char *vbase;
+
+ vdso_pages = kmalloc(sizeof(struct page *) * npages, GFP_KERNEL);
+ if (!vdso_pages)
+ goto oom;
+ for (i = 0; i < npages; i++) {
+ struct page *p;
+ p = alloc_page(GFP_KERNEL);
+ if (!p)
+ goto oom;
+ vdso_pages[i] = p;
+ copy_page(page_address(p), vdso_start + i*PAGE_SIZE);
+ }
+
+ vbase = vmap(vdso_pages, npages, 0, PAGE_KERNEL);
+ if (!vbase)
+ goto oom;
+
+ if (memcmp(vbase, "\177ELF", 4)) {
+ printk("VDSO: I'm broken; not ELF\n");
+ vdso_enabled = 0;
+ }
+
+#define V(x) *(typeof(x) *) var_ref(vbase, (char *)RELOC_HIDE(&x, 0), #x)
+#define VEXTERN(x) \
+ V(vdso_ ## x) = &__ ## x;
+#include "vextern.h"
+#undef VEXTERN
+ return 0;
+
+ oom:
+ printk("Cannot allocate vdso\n");
+ vdso_enabled = 0;
+ return -ENOMEM;
+}
+__initcall(init_vdso_vars);
+
+struct linux_binprm;
+
+/* Put the vdso above the (randomized) stack with another randomized offset.
+ This way there is no hole in the middle of address space.
+ To save memory make sure it is still in the same PTE as the stack top.
+ This doesn't give that many random bits */
+static unsigned long vdso_addr(unsigned long start, unsigned len)
+{
+ unsigned long addr, end;
+ unsigned offset;
+ end = (start + PMD_SIZE - 1) & PMD_MASK;
+ if (end >= TASK_SIZE64)
+ end = TASK_SIZE64;
+ end -= len;
+ /* This loses some more bits than a modulo, but is cheaper */
+ offset = get_random_int() & (PTRS_PER_PTE - 1);
+ addr = start + (offset << PAGE_SHIFT);
+ if (addr >= end)
+ addr = end;
+ return addr;
+}
+
+/* Setup a VMA at program startup for the vsyscall page.
+ Not called for compat tasks */
+int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
+{
+ struct mm_struct *mm = current->mm;
+ unsigned long addr;
+ int ret;
+ unsigned len = round_up(vdso_end - vdso_start, PAGE_SIZE);
+
+ if (!vdso_enabled)
+ return 0;
+
+ down_write(&mm->mmap_sem);
+ addr = vdso_addr(mm->start_stack, len);
+ addr = get_unmapped_area(NULL, addr, len, 0, 0);
+ if (IS_ERR_VALUE(addr)) {
+ ret = addr;
+ goto up_fail;
+ }
+
+ ret = install_special_mapping(mm, addr, len,
+ VM_READ|VM_EXEC|
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
+ VM_ALWAYSDUMP,
+ vdso_pages);
+ if (ret)
+ goto up_fail;
+
+ current->mm->context.vdso = (void *)addr;
+up_fail:
+ up_write(&mm->mmap_sem);
+ return ret;
+}
+
+static __init int vdso_setup(char *s)
+{
+ vdso_enabled = simple_strtoul(s, NULL, 0);
+ return 0;
+}
+__setup("vdso=", vdso_setup);
diff --git a/arch/x86_64/vdso/voffset.h b/arch/x86_64/vdso/voffset.h
new file mode 100644
index 00000000000..5304204911f
--- /dev/null
+++ b/arch/x86_64/vdso/voffset.h
@@ -0,0 +1 @@
+#define VDSO_TEXT_OFFSET 0x500
diff --git a/arch/x86_64/vdso/vvar.c b/arch/x86_64/vdso/vvar.c
new file mode 100644
index 00000000000..6fc22219a47
--- /dev/null
+++ b/arch/x86_64/vdso/vvar.c
@@ -0,0 +1,12 @@
+/* Define pointer to external vDSO variables.
+ These are part of the vDSO. The kernel fills in the real addresses
+ at boot time. This is done because when the vdso is linked the
+ kernel isn't yet and we don't know the final addresses. */
+#include <linux/kernel.h>
+#include <linux/time.h>
+#include <asm/vsyscall.h>
+#include <asm/timex.h>
+#include <asm/vgtod.h>
+
+#define VEXTERN(x) typeof (__ ## x) *vdso_ ## x = (void *)VMAGIC;
+#include "vextern.h"