aboutsummaryrefslogtreecommitdiff
path: root/arch/x86/kernel/machine_kexec_64.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/machine_kexec_64.c')
-rw-r--r--arch/x86/kernel/machine_kexec_64.c179
1 files changed, 142 insertions, 37 deletions
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index c43caa3a91f..89cea4d4467 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -12,20 +12,47 @@
#include <linux/reboot.h>
#include <linux/numa.h>
#include <linux/ftrace.h>
+#include <linux/io.h>
+#include <linux/suspend.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
-#include <asm/io.h>
-#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
-static u64 kexec_pgd[512] PAGE_ALIGNED;
-static u64 kexec_pud0[512] PAGE_ALIGNED;
-static u64 kexec_pmd0[512] PAGE_ALIGNED;
-static u64 kexec_pte0[512] PAGE_ALIGNED;
-static u64 kexec_pud1[512] PAGE_ALIGNED;
-static u64 kexec_pmd1[512] PAGE_ALIGNED;
-static u64 kexec_pte1[512] PAGE_ALIGNED;
+static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
+ unsigned long addr)
+{
+ pud_t *pud;
+ pmd_t *pmd;
+ struct page *page;
+ int result = -ENOMEM;
+
+ addr &= PMD_MASK;
+ pgd += pgd_index(addr);
+ if (!pgd_present(*pgd)) {
+ page = kimage_alloc_control_pages(image, 0);
+ if (!page)
+ goto out;
+ pud = (pud_t *)page_address(page);
+ memset(pud, 0, PAGE_SIZE);
+ set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+ }
+ pud = pud_offset(pgd, addr);
+ if (!pud_present(*pud)) {
+ page = kimage_alloc_control_pages(image, 0);
+ if (!page)
+ goto out;
+ pmd = (pmd_t *)page_address(page);
+ memset(pmd, 0, PAGE_SIZE);
+ set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+ }
+ pmd = pmd_offset(pud, addr);
+ if (!pmd_present(*pmd))
+ set_pmd(pmd, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
+ result = 0;
+out:
+ return result;
+}
static void init_level2_page(pmd_t *level2p, unsigned long addr)
{
@@ -92,9 +119,8 @@ static int init_level4_page(struct kimage *image, pgd_t *level4p,
}
level3p = (pud_t *)page_address(page);
result = init_level3_page(image, level3p, addr, last_addr);
- if (result) {
+ if (result)
goto out;
- }
set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE));
addr += PGDIR_SIZE;
}
@@ -107,12 +133,72 @@ out:
return result;
}
+static void free_transition_pgtable(struct kimage *image)
+{
+ free_page((unsigned long)image->arch.pud);
+ free_page((unsigned long)image->arch.pmd);
+ free_page((unsigned long)image->arch.pte);
+}
+
+static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
+{
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ unsigned long vaddr, paddr;
+ int result = -ENOMEM;
+
+ vaddr = (unsigned long)relocate_kernel;
+ paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE);
+ pgd += pgd_index(vaddr);
+ if (!pgd_present(*pgd)) {
+ pud = (pud_t *)get_zeroed_page(GFP_KERNEL);
+ if (!pud)
+ goto err;
+ image->arch.pud = pud;
+ set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+ }
+ pud = pud_offset(pgd, vaddr);
+ if (!pud_present(*pud)) {
+ pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL);
+ if (!pmd)
+ goto err;
+ image->arch.pmd = pmd;
+ set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
+ }
+ pmd = pmd_offset(pud, vaddr);
+ if (!pmd_present(*pmd)) {
+ pte = (pte_t *)get_zeroed_page(GFP_KERNEL);
+ if (!pte)
+ goto err;
+ image->arch.pte = pte;
+ set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
+ }
+ pte = pte_offset_kernel(pmd, vaddr);
+ set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
+ return 0;
+err:
+ free_transition_pgtable(image);
+ return result;
+}
+
static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
{
pgd_t *level4p;
+ int result;
level4p = (pgd_t *)__va(start_pgtable);
- return init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT);
+ result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT);
+ if (result)
+ return result;
+ /*
+ * image->start may be outside 0 ~ max_pfn, for example when
+ * jump back to original kernel from kexeced kernel
+ */
+ result = init_one_level2_page(image, level4p, image->start);
+ if (result)
+ return result;
+ return init_transition_pgtable(image, level4p);
}
static void set_idt(void *newidt, u16 limit)
@@ -174,7 +260,7 @@ int machine_kexec_prepare(struct kimage *image)
void machine_kexec_cleanup(struct kimage *image)
{
- return;
+ free_transition_pgtable(image);
}
/*
@@ -185,36 +271,45 @@ void machine_kexec(struct kimage *image)
{
unsigned long page_list[PAGES_NR];
void *control_page;
+ int save_ftrace_enabled;
- tracer_disable();
+#ifdef CONFIG_KEXEC_JUMP
+ if (kexec_image->preserve_context)
+ save_processor_state();
+#endif
+
+ save_ftrace_enabled = __ftrace_enabled_save();
/* Interrupts aren't acceptable while we reboot */
local_irq_disable();
+ if (image->preserve_context) {
+#ifdef CONFIG_X86_IO_APIC
+ /*
+ * We need to put APICs in legacy mode so that we can
+ * get timer interrupts in second kernel. kexec/kdump
+ * paths already have calls to disable_IO_APIC() in
+ * one form or other. kexec jump path also need
+ * one.
+ */
+ disable_IO_APIC();
+#endif
+ }
+
control_page = page_address(image->control_code_page) + PAGE_SIZE;
- memcpy(control_page, relocate_kernel, PAGE_SIZE);
+ memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE);
page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
- page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
- page_list[PA_PGD] = virt_to_phys(&kexec_pgd);
- page_list[VA_PGD] = (unsigned long)kexec_pgd;
- page_list[PA_PUD_0] = virt_to_phys(&kexec_pud0);
- page_list[VA_PUD_0] = (unsigned long)kexec_pud0;
- page_list[PA_PMD_0] = virt_to_phys(&kexec_pmd0);
- page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
- page_list[PA_PTE_0] = virt_to_phys(&kexec_pte0);
- page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
- page_list[PA_PUD_1] = virt_to_phys(&kexec_pud1);
- page_list[VA_PUD_1] = (unsigned long)kexec_pud1;
- page_list[PA_PMD_1] = virt_to_phys(&kexec_pmd1);
- page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
- page_list[PA_PTE_1] = virt_to_phys(&kexec_pte1);
- page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
-
+ page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
page_list[PA_TABLE_PAGE] =
(unsigned long)__pa(page_address(image->control_code_page));
- /* The segment registers are funny things, they have both a
+ if (image->type == KEXEC_TYPE_DEFAULT)
+ page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
+ << PAGE_SHIFT);
+
+ /*
+ * The segment registers are funny things, they have both a
* visible and an invisible part. Whenever the visible part is
* set to a specific selector, the invisible part is loaded
* with from a table in memory. At no other time is the
@@ -224,15 +319,25 @@ void machine_kexec(struct kimage *image)
* segments, before I zap the gdt with an invalid value.
*/
load_segments();
- /* The gdt & idt are now invalid.
+ /*
+ * The gdt & idt are now invalid.
* If you want to load them you must set up your own idt & gdt.
*/
- set_gdt(phys_to_virt(0),0);
- set_idt(phys_to_virt(0),0);
+ set_gdt(phys_to_virt(0), 0);
+ set_idt(phys_to_virt(0), 0);
/* now call it */
- relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
- image->start);
+ image->start = relocate_kernel((unsigned long)image->head,
+ (unsigned long)page_list,
+ image->start,
+ image->preserve_context);
+
+#ifdef CONFIG_KEXEC_JUMP
+ if (kexec_image->preserve_context)
+ restore_processor_state();
+#endif
+
+ __ftrace_enabled_restore(save_ftrace_enabled);
}
void arch_crash_save_vmcoreinfo(void)