aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/cpu/cpufreq/p4-clockmod.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c4
-rw-r--r--arch/x86/kernel/machine_kexec_32.c17
-rw-r--r--arch/x86/kernel/machine_kexec_64.c99
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S24
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S189
-rw-r--r--arch/x86/kernel/visws_quirks.c2
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S7
8 files changed, 278 insertions, 65 deletions
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
index b585e04cbc9..3178c3acd97 100644
--- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
@@ -277,7 +277,6 @@ static struct cpufreq_driver p4clockmod_driver = {
.name = "p4-clockmod",
.owner = THIS_MODULE,
.attr = p4clockmod_attr,
- .hide_interface = 1,
};
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index bfbd5323a63..ca14604611e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -639,7 +639,7 @@ static void mce_init_timer(void)
if (!next_interval)
return;
setup_timer(t, mcheck_timer, smp_processor_id());
- t->expires = round_jiffies_relative(jiffies + next_interval);
+ t->expires = round_jiffies(jiffies + next_interval);
add_timer(t);
}
@@ -1110,7 +1110,7 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb,
break;
case CPU_DOWN_FAILED:
case CPU_DOWN_FAILED_FROZEN:
- t->expires = round_jiffies_relative(jiffies + next_interval);
+ t->expires = round_jiffies(jiffies + next_interval);
add_timer_on(t, cpu);
smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
break;
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index f5fc8c781a6..e7368c1da01 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -14,12 +14,12 @@
#include <linux/ftrace.h>
#include <linux/suspend.h>
#include <linux/gfp.h>
+#include <linux/io.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
-#include <asm/io.h>
#include <asm/apic.h>
#include <asm/cpufeature.h>
#include <asm/desc.h>
@@ -63,7 +63,7 @@ static void load_segments(void)
"\tmovl %%eax,%%fs\n"
"\tmovl %%eax,%%gs\n"
"\tmovl %%eax,%%ss\n"
- ::: "eax", "memory");
+ : : : "eax", "memory");
#undef STR
#undef __STR
}
@@ -205,7 +205,8 @@ void machine_kexec(struct kimage *image)
if (image->preserve_context) {
#ifdef CONFIG_X86_IO_APIC
- /* We need to put APICs in legacy mode so that we can
+ /*
+ * We need to put APICs in legacy mode so that we can
* get timer interrupts in second kernel. kexec/kdump
* paths already have calls to disable_IO_APIC() in
* one form or other. kexec jump path also need
@@ -227,7 +228,8 @@ void machine_kexec(struct kimage *image)
page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
<< PAGE_SHIFT);
- /* The segment registers are funny things, they have both a
+ /*
+ * The segment registers are funny things, they have both a
* visible and an invisible part. Whenever the visible part is
* set to a specific selector, the invisible part is loaded
* with from a table in memory. At no other time is the
@@ -237,11 +239,12 @@ void machine_kexec(struct kimage *image)
* segments, before I zap the gdt with an invalid value.
*/
load_segments();
- /* The gdt & idt are now invalid.
+ /*
+ * The gdt & idt are now invalid.
* If you want to load them you must set up your own idt & gdt.
*/
- set_gdt(phys_to_virt(0),0);
- set_idt(phys_to_virt(0),0);
+ set_gdt(phys_to_virt(0), 0);
+ set_idt(phys_to_virt(0), 0);
/* now call it */
image->start = relocate_kernel_ptr((unsigned long)image->head,
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 6993d51b7fd..89cea4d4467 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -12,11 +12,47 @@
#include <linux/reboot.h>
#include <linux/numa.h>
#include <linux/ftrace.h>
+#include <linux/io.h>
+#include <linux/suspend.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
-#include <asm/io.h>
+
+static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
+ unsigned long addr)
+{
+ pud_t *pud;
+ pmd_t *pmd;
+ struct page *page;
+ int result = -ENOMEM;
+
+ addr &= PMD_MASK;
+ pgd += pgd_index(addr);
+ if (!pgd_present(*pgd)) {
+ page = kimage_alloc_control_pages(image, 0);
+ if (!page)
+ goto out;
+ pud = (pud_t *)page_address(page);
+ memset(pud, 0, PAGE_SIZE);
+ set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+ }
+ pud = pud_offset(pgd, addr);
+ if (!pud_present(*pud)) {
+ page = kimage_alloc_control_pages(image, 0);
+ if (!page)
+ goto out;
+ pmd = (pmd_t *)page_address(page);
+ memset(pmd, 0, PAGE_SIZE);
+ set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+ }
+ pmd = pmd_offset(pud, addr);
+ if (!pmd_present(*pmd))
+ set_pmd(pmd, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
+ result = 0;
+out:
+ return result;
+}
static void init_level2_page(pmd_t *level2p, unsigned long addr)
{
@@ -83,9 +119,8 @@ static int init_level4_page(struct kimage *image, pgd_t *level4p,
}
level3p = (pud_t *)page_address(page);
result = init_level3_page(image, level3p, addr, last_addr);
- if (result) {
+ if (result)
goto out;
- }
set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE));
addr += PGDIR_SIZE;
}
@@ -156,6 +191,13 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT);
if (result)
return result;
+ /*
+ * image->start may be outside 0 ~ max_pfn, for example when
+ * jump back to original kernel from kexeced kernel
+ */
+ result = init_one_level2_page(image, level4p, image->start);
+ if (result)
+ return result;
return init_transition_pgtable(image, level4p);
}
@@ -229,20 +271,45 @@ void machine_kexec(struct kimage *image)
{
unsigned long page_list[PAGES_NR];
void *control_page;
+ int save_ftrace_enabled;
- tracer_disable();
+#ifdef CONFIG_KEXEC_JUMP
+ if (kexec_image->preserve_context)
+ save_processor_state();
+#endif
+
+ save_ftrace_enabled = __ftrace_enabled_save();
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
+ if (image->preserve_context) {
+#ifdef CONFIG_X86_IO_APIC
+ /*
+ * We need to put APICs in legacy mode so that we can
+ * get timer interrupts in second kernel. kexec/kdump
+ * paths already have calls to disable_IO_APIC() in
+ * one form or other. kexec jump path also need
+ * one.
+ */
+ disable_IO_APIC();
+#endif
+ }
+
control_page = page_address(image->control_code_page) + PAGE_SIZE;
- memcpy(control_page, relocate_kernel, PAGE_SIZE);
+ memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE);
page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
+ page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
page_list[PA_TABLE_PAGE] =
(unsigned long)__pa(page_address(image->control_code_page));
- /* The segment registers are funny things, they have both a
+ if (image->type == KEXEC_TYPE_DEFAULT)
+ page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
+ << PAGE_SHIFT);
+
+ /*
+ * The segment registers are funny things, they have both a
* visible and an invisible part. Whenever the visible part is
* set to a specific selector, the invisible part is loaded
* with from a table in memory. At no other time is the
@@ -252,15 +319,25 @@ void machine_kexec(struct kimage *image)
* segments, before I zap the gdt with an invalid value.
*/
load_segments();
- /* The gdt & idt are now invalid.
+ /*
+ * The gdt & idt are now invalid.
* If you want to load them you must set up your own idt & gdt.
*/
- set_gdt(phys_to_virt(0),0);
- set_idt(phys_to_virt(0),0);
+ set_gdt(phys_to_virt(0), 0);
+ set_idt(phys_to_virt(0), 0);
/* now call it */
- relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
- image->start);
+ image->start = relocate_kernel((unsigned long)image->head,
+ (unsigned long)page_list,
+ image->start,
+ image->preserve_context);
+
+#ifdef CONFIG_KEXEC_JUMP
+ if (kexec_image->preserve_context)
+ restore_processor_state();
+#endif
+
+ __ftrace_enabled_restore(save_ftrace_enabled);
}
void arch_crash_save_vmcoreinfo(void)
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index 2064d0aa8d2..41235531b11 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -17,7 +17,8 @@
#define PTR(x) (x << 2)
-/* control_page + KEXEC_CONTROL_CODE_MAX_SIZE
+/*
+ * control_page + KEXEC_CONTROL_CODE_MAX_SIZE
* ~ control_page + PAGE_SIZE are used as data storage and stack for
* jumping back
*/
@@ -76,8 +77,10 @@ relocate_kernel:
movl %eax, CP_PA_SWAP_PAGE(%edi)
movl %ebx, CP_PA_BACKUP_PAGES_MAP(%edi)
- /* get physical address of control page now */
- /* this is impossible after page table switch */
+ /*
+ * get physical address of control page now
+ * this is impossible after page table switch
+ */
movl PTR(PA_CONTROL_PAGE)(%ebp), %edi
/* switch to new set of page tables */
@@ -97,7 +100,8 @@ identity_mapped:
/* store the start address on the stack */
pushl %edx
- /* Set cr0 to a known state:
+ /*
+ * Set cr0 to a known state:
* - Paging disabled
* - Alignment check disabled
* - Write protect disabled
@@ -113,7 +117,8 @@ identity_mapped:
/* clear cr4 if applicable */
testl %ecx, %ecx
jz 1f
- /* Set cr4 to a known state:
+ /*
+ * Set cr4 to a known state:
* Setting everything to zero seems safe.
*/
xorl %eax, %eax
@@ -132,15 +137,18 @@ identity_mapped:
call swap_pages
addl $8, %esp
- /* To be certain of avoiding problems with self-modifying code
+ /*
+ * To be certain of avoiding problems with self-modifying code
* I need to execute a serializing instruction here.
* So I flush the TLB, it's handy, and not processor dependent.
*/
xorl %eax, %eax
movl %eax, %cr3
- /* set all of the registers to known values */
- /* leave %esp alone */
+ /*
+ * set all of the registers to known values
+ * leave %esp alone
+ */
testl %esi, %esi
jnz 1f
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index d32cfb27a47..4de8f5b3d47 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -19,29 +19,77 @@
#define PTR(x) (x << 3)
#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+/*
+ * control_page + KEXEC_CONTROL_CODE_MAX_SIZE
+ * ~ control_page + PAGE_SIZE are used as data storage and stack for
+ * jumping back
+ */
+#define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset))
+
+/* Minimal CPU state */
+#define RSP DATA(0x0)
+#define CR0 DATA(0x8)
+#define CR3 DATA(0x10)
+#define CR4 DATA(0x18)
+
+/* other data */
+#define CP_PA_TABLE_PAGE DATA(0x20)
+#define CP_PA_SWAP_PAGE DATA(0x28)
+#define CP_PA_BACKUP_PAGES_MAP DATA(0x30)
+
.text
.align PAGE_SIZE
.code64
.globl relocate_kernel
relocate_kernel:
- /* %rdi indirection_page
+ /*
+ * %rdi indirection_page
* %rsi page_list
* %rdx start address
+ * %rcx preserve_context
*/
+ /* Save the CPU context, used for jumping back */
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushf
+
+ movq PTR(VA_CONTROL_PAGE)(%rsi), %r11
+ movq %rsp, RSP(%r11)
+ movq %cr0, %rax
+ movq %rax, CR0(%r11)
+ movq %cr3, %rax
+ movq %rax, CR3(%r11)
+ movq %cr4, %rax
+ movq %rax, CR4(%r11)
+
/* zero out flags, and disable interrupts */
pushq $0
popfq
- /* get physical address of control page now */
- /* this is impossible after page table switch */
+ /*
+ * get physical address of control page now
+ * this is impossible after page table switch
+ */
movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
/* get physical address of page table now too */
- movq PTR(PA_TABLE_PAGE)(%rsi), %rcx
+ movq PTR(PA_TABLE_PAGE)(%rsi), %r9
+
+ /* get physical address of swap page now */
+ movq PTR(PA_SWAP_PAGE)(%rsi), %r10
+
+ /* save some information for jumping back */
+ movq %r9, CP_PA_TABLE_PAGE(%r11)
+ movq %r10, CP_PA_SWAP_PAGE(%r11)
+ movq %rdi, CP_PA_BACKUP_PAGES_MAP(%r11)
/* Switch to the identity mapped page tables */
- movq %rcx, %cr3
+ movq %r9, %cr3
/* setup a new stack at the end of the physical control page */
lea PAGE_SIZE(%r8), %rsp
@@ -55,7 +103,8 @@ identity_mapped:
/* store the start address on the stack */
pushq %rdx
- /* Set cr0 to a known state:
+ /*
+ * Set cr0 to a known state:
* - Paging enabled
* - Alignment check disabled
* - Write protect disabled
@@ -68,7 +117,8 @@ identity_mapped:
orl $(X86_CR0_PG | X86_CR0_PE), %eax
movq %rax, %cr0
- /* Set cr4 to a known state:
+ /*
+ * Set cr4 to a known state:
* - physical address extension enabled
*/
movq $X86_CR4_PAE, %rax
@@ -78,9 +128,87 @@ identity_mapped:
1:
/* Flush the TLB (needed?) */
- movq %rcx, %cr3
+ movq %r9, %cr3
+
+ movq %rcx, %r11
+ call swap_pages
+
+ /*
+ * To be certain of avoiding problems with self-modifying code
+ * I need to execute a serializing instruction here.
+ * So I flush the TLB by reloading %cr3 here, it's handy,
+ * and not processor dependent.
+ */
+ movq %cr3, %rax
+ movq %rax, %cr3
+
+ /*
+ * set all of the registers to known values
+ * leave %rsp alone
+ */
+
+ testq %r11, %r11
+ jnz 1f
+ xorq %rax, %rax
+ xorq %rbx, %rbx
+ xorq %rcx, %rcx
+ xorq %rdx, %rdx
+ xorq %rsi, %rsi
+ xorq %rdi, %rdi
+ xorq %rbp, %rbp
+ xorq %r8, %r8
+ xorq %r9, %r9
+ xorq %r10, %r9
+ xorq %r11, %r11
+ xorq %r12, %r12
+ xorq %r13, %r13
+ xorq %r14, %r14
+ xorq %r15, %r15
+
+ ret
+
+1:
+ popq %rdx
+ leaq PAGE_SIZE(%r10), %rsp
+ call *%rdx
+
+ /* get the re-entry point of the peer system */
+ movq 0(%rsp), %rbp
+ call 1f
+1:
+ popq %r8
+ subq $(1b - relocate_kernel), %r8
+ movq CP_PA_SWAP_PAGE(%r8), %r10
+ movq CP_PA_BACKUP_PAGES_MAP(%r8), %rdi
+ movq CP_PA_TABLE_PAGE(%r8), %rax
+ movq %rax, %cr3
+ lea PAGE_SIZE(%r8), %rsp
+ call swap_pages
+ movq $virtual_mapped, %rax
+ pushq %rax
+ ret
+
+virtual_mapped:
+ movq RSP(%r8), %rsp
+ movq CR4(%r8), %rax
+ movq %rax, %cr4
+ movq CR3(%r8), %rax
+ movq CR0(%r8), %r8
+ movq %rax, %cr3
+ movq %r8, %cr0
+ movq %rbp, %rax
+
+ popf
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+ ret
/* Do the copies */
+swap_pages:
movq %rdi, %rcx /* Put the page_list in %rcx */
xorq %rdi, %rdi
xorq %rsi, %rsi
@@ -112,36 +240,27 @@ identity_mapped:
movq %rcx, %rsi /* For ever source page do a copy */
andq $0xfffffffffffff000, %rsi
+ movq %rdi, %rdx
+ movq %rsi, %rax
+
+ movq %r10, %rdi
movq $512, %rcx
rep ; movsq
- jmp 0b
-3:
-
- /* To be certain of avoiding problems with self-modifying code
- * I need to execute a serializing instruction here.
- * So I flush the TLB by reloading %cr3 here, it's handy,
- * and not processor dependent.
- */
- movq %cr3, %rax
- movq %rax, %cr3
- /* set all of the registers to known values */
- /* leave %rsp alone */
+ movq %rax, %rdi
+ movq %rdx, %rsi
+ movq $512, %rcx
+ rep ; movsq
- xorq %rax, %rax
- xorq %rbx, %rbx
- xorq %rcx, %rcx
- xorq %rdx, %rdx
- xorq %rsi, %rsi
- xorq %rdi, %rdi
- xorq %rbp, %rbp
- xorq %r8, %r8
- xorq %r9, %r9
- xorq %r10, %r9
- xorq %r11, %r11
- xorq %r12, %r12
- xorq %r13, %r13
- xorq %r14, %r14
- xorq %r15, %r15
+ movq %rdx, %rdi
+ movq %r10, %rsi
+ movq $512, %rcx
+ rep ; movsq
+ lea PAGE_SIZE(%rax), %rsi
+ jmp 0b
+3:
ret
+
+ .globl kexec_control_code_size
+.set kexec_control_code_size, . - relocate_kernel
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index 191a876e9e8..31ffc24eec4 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -578,7 +578,7 @@ static struct irq_chip piix4_virtual_irq_type = {
static irqreturn_t piix4_master_intr(int irq, void *dev_id)
{
int realirq;
- irq_desc_t *desc;
+ struct irq_desc *desc;
unsigned long flags;
spin_lock_irqsave(&i8259A_lock, flags);
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index fbfced6f680..5bf54e40c6e 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -275,3 +275,10 @@ ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
ASSERT((per_cpu__irq_stack_union == 0),
"irq_stack_union is not at start of per-cpu area");
#endif
+
+#ifdef CONFIG_KEXEC
+#include <asm/kexec.h>
+
+ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
+ "kexec control code size is too big")
+#endif