From 19f9a34f87c48bbd270d617d1c986d0c23866a1a Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Wed, 27 Sep 2006 18:33:49 +0900 Subject: sh: Initial vsyscall page support. This implements initial support for the vsyscall page on SH. At the moment we leave it configurable due to having nommu to support from the same code base. We hook it up for the signal trampoline return at present, with more to be added later, once uClibc catches up. Signed-off-by: Paul Mundt --- arch/sh/kernel/Makefile | 1 + arch/sh/kernel/process.c | 2 +- arch/sh/kernel/signal.c | 17 ++- arch/sh/kernel/vsyscall/Makefile | 36 +++++++ arch/sh/kernel/vsyscall/vsyscall-note.S | 25 +++++ arch/sh/kernel/vsyscall/vsyscall-sigreturn.S | 39 +++++++ arch/sh/kernel/vsyscall/vsyscall-syscall.S | 10 ++ arch/sh/kernel/vsyscall/vsyscall-trapa.S | 42 ++++++++ arch/sh/kernel/vsyscall/vsyscall.c | 150 +++++++++++++++++++++++++++ arch/sh/kernel/vsyscall/vsyscall.lds.S | 74 +++++++++++++ arch/sh/mm/Kconfig | 13 +++ arch/sh/mm/init.c | 3 + arch/sh/mm/tlb-flush.c | 18 ++-- 13 files changed, 418 insertions(+), 12 deletions(-) create mode 100644 arch/sh/kernel/vsyscall/Makefile create mode 100644 arch/sh/kernel/vsyscall/vsyscall-note.S create mode 100644 arch/sh/kernel/vsyscall/vsyscall-sigreturn.S create mode 100644 arch/sh/kernel/vsyscall/vsyscall-syscall.S create mode 100644 arch/sh/kernel/vsyscall/vsyscall-trapa.S create mode 100644 arch/sh/kernel/vsyscall/vsyscall.c create mode 100644 arch/sh/kernel/vsyscall/vsyscall.lds.S (limited to 'arch') diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile index 0e2148f63e7..5da88a43d35 100644 --- a/arch/sh/kernel/Makefile +++ b/arch/sh/kernel/Makefile @@ -9,6 +9,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \ io.o io_generic.o sh_ksyms.o syscalls.o obj-y += cpu/ timers/ +obj-$(CONFIG_VSYSCALL) += vsyscall/ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_CF_ENABLER) += cf-enabler.o diff --git a/arch/sh/kernel/process.c b/arch/sh/kernel/process.c index 630ec1af248..0b1d5dd7a93 100644 --- a/arch/sh/kernel/process.c +++ b/arch/sh/kernel/process.c @@ -355,7 +355,7 @@ struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *ne else if (next->thread.ubc_pc && next->mm) { int asid = 0; #ifdef CONFIG_MMU - asid |= next->mm->context & MMU_CONTEXT_ASID_MASK; + asid |= next->mm->context.id & MMU_CONTEXT_ASID_MASK; #endif ubc_set_tracing(asid, next->thread.ubc_pc); } else { diff --git a/arch/sh/kernel/signal.c b/arch/sh/kernel/signal.c index 2f1c9545b49..5213f5bc6ce 100644 --- a/arch/sh/kernel/signal.c +++ b/arch/sh/kernel/signal.c @@ -8,7 +8,6 @@ * SuperH version: Copyright (C) 1999, 2000 Niibe Yutaka & Kaz Kojima * */ - #include #include #include @@ -21,6 +20,7 @@ #include #include #include +#include #include #include @@ -29,8 +29,6 @@ #include #include -#undef DEBUG - #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) /* @@ -312,6 +310,11 @@ get_sigframe(struct k_sigaction *ka, unsigned long sp, size_t frame_size) return (void __user *)((sp - frame_size) & -8ul); } +/* These symbols are defined with the addresses in the vsyscall page. + See vsyscall-trapa.S. */ +extern void __user __kernel_sigreturn; +extern void __user __kernel_rt_sigreturn; + static int setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, struct pt_regs *regs) { @@ -340,6 +343,10 @@ static int setup_frame(int sig, struct k_sigaction *ka, already in userspace. */ if (ka->sa.sa_flags & SA_RESTORER) { regs->pr = (unsigned long) ka->sa.sa_restorer; +#ifdef CONFIG_VSYSCALL + } else if (likely(current->mm->context.vdso)) { + regs->pr = VDSO_SYM(&__kernel_sigreturn); +#endif } else { /* Generate return code (system call to sigreturn) */ err |= __put_user(MOVW(7), &frame->retcode[0]); @@ -416,6 +423,10 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, already in userspace. */ if (ka->sa.sa_flags & SA_RESTORER) { regs->pr = (unsigned long) ka->sa.sa_restorer; +#ifdef CONFIG_VSYSCALL + } else if (likely(current->mm->context.vdso)) { + regs->pr = VDSO_SYM(&__kernel_rt_sigreturn); +#endif } else { /* Generate return code (system call to rt_sigreturn) */ err |= __put_user(MOVW(7), &frame->retcode[0]); diff --git a/arch/sh/kernel/vsyscall/Makefile b/arch/sh/kernel/vsyscall/Makefile new file mode 100644 index 00000000000..4bbce1cfa35 --- /dev/null +++ b/arch/sh/kernel/vsyscall/Makefile @@ -0,0 +1,36 @@ +obj-y += vsyscall.o vsyscall-syscall.o + +$(obj)/vsyscall-syscall.o: \ + $(foreach F,trapa,$(obj)/vsyscall-$F.so) + +# Teach kbuild about targets +targets += $(foreach F,trapa,vsyscall-$F.o vsyscall-$F.so) +targets += vsyscall-note.o vsyscall.lds + +# The DSO images are built using a special linker script +quiet_cmd_syscall = SYSCALL $@ + cmd_syscall = $(CC) -nostdlib $(SYSCFLAGS_$(@F)) \ + -Wl,-T,$(filter-out FORCE,$^) -o $@ + +export CPPFLAGS_vsyscall.lds += -P -C -Ush + +vsyscall-flags = -shared -s -Wl,-soname=linux-gate.so.1 \ + $(call ld-option, -Wl$(comma)--hash-style=sysv) + +SYSCFLAGS_vsyscall-trapa.so = $(vsyscall-flags) + +$(obj)/vsyscall-trapa.so: \ +$(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE + $(call if_changed,syscall) + +# We also create a special relocatable object that should mirror the symbol +# table and layout of the linked DSO. With ld -R we can then refer to +# these symbols in the kernel code rather than hand-coded addresses. +extra-y += vsyscall-syms.o +$(obj)/built-in.o: $(obj)/vsyscall-syms.o +$(obj)/built-in.o: ld_flags += -R $(obj)/vsyscall-syms.o + +SYSCFLAGS_vsyscall-syms.o = -r +$(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \ + $(obj)/vsyscall-trapa.o $(obj)/vsyscall-note.o FORCE + $(call if_changed,syscall) diff --git a/arch/sh/kernel/vsyscall/vsyscall-note.S b/arch/sh/kernel/vsyscall/vsyscall-note.S new file mode 100644 index 00000000000..d4b5be4f3d5 --- /dev/null +++ b/arch/sh/kernel/vsyscall/vsyscall-note.S @@ -0,0 +1,25 @@ +/* + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include +#include + +#define ASM_ELF_NOTE_BEGIN(name, flags, vendor, type) \ + .section name, flags; \ + .balign 4; \ + .long 1f - 0f; /* name length */ \ + .long 3f - 2f; /* data length */ \ + .long type; /* note type */ \ +0: .asciz vendor; /* vendor name */ \ +1: .balign 4; \ +2: + +#define ASM_ELF_NOTE_END \ +3: .balign 4; /* pad out section */ \ + .previous + + ASM_ELF_NOTE_BEGIN(".note.kernel-version", "a", UTS_SYSNAME, 0) + .long LINUX_VERSION_CODE + ASM_ELF_NOTE_END diff --git a/arch/sh/kernel/vsyscall/vsyscall-sigreturn.S b/arch/sh/kernel/vsyscall/vsyscall-sigreturn.S new file mode 100644 index 00000000000..555a64f124c --- /dev/null +++ b/arch/sh/kernel/vsyscall/vsyscall-sigreturn.S @@ -0,0 +1,39 @@ +#include + + .text + .balign 32 + .globl __kernel_sigreturn + .type __kernel_sigreturn,@function +__kernel_sigreturn: +.LSTART_sigreturn: + mov.w 1f, r3 + trapa #0x10 + or r0, r0 + or r0, r0 + or r0, r0 + or r0, r0 + or r0, r0 + +1: .short __NR_sigreturn +.LEND_sigreturn: + .size __kernel_sigreturn,.-.LSTART_sigreturn + + .balign 32 + .globl __kernel_rt_sigreturn + .type __kernel_rt_sigreturn,@function +__kernel_rt_sigreturn: +.LSTART_rt_sigreturn: + mov.w 1f, r3 + trapa #0x10 + or r0, r0 + or r0, r0 + or r0, r0 + or r0, r0 + or r0, r0 + +1: .short __NR_rt_sigreturn +.LEND_rt_sigreturn: + .size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn + + .section .eh_frame,"a",@progbits + .previous diff --git a/arch/sh/kernel/vsyscall/vsyscall-syscall.S b/arch/sh/kernel/vsyscall/vsyscall-syscall.S new file mode 100644 index 00000000000..c2ac7f0282b --- /dev/null +++ b/arch/sh/kernel/vsyscall/vsyscall-syscall.S @@ -0,0 +1,10 @@ +#include + +__INITDATA + + .globl vsyscall_trapa_start, vsyscall_trapa_end +vsyscall_trapa_start: + .incbin "arch/sh/kernel/vsyscall/vsyscall-trapa.so" +vsyscall_trapa_end: + +__FINIT diff --git a/arch/sh/kernel/vsyscall/vsyscall-trapa.S b/arch/sh/kernel/vsyscall/vsyscall-trapa.S new file mode 100644 index 00000000000..3b6eb34c43f --- /dev/null +++ b/arch/sh/kernel/vsyscall/vsyscall-trapa.S @@ -0,0 +1,42 @@ + .text + .globl __kernel_vsyscall + .type __kernel_vsyscall,@function +__kernel_vsyscall: +.LSTART_vsyscall: + /* XXX: We'll have to do something here once we opt to use the vDSO + * page for something other than the signal trampoline.. as well as + * fill out .eh_frame -- PFM. */ +.LEND_vsyscall: + .size __kernel_vsyscall,.-.LSTART_vsyscall + .previous + + .section .eh_frame,"a",@progbits +.LCIE: + .ualong .LCIE_end - .LCIE_start +.LCIE_start: + .ualong 0 /* CIE ID */ + .byte 0x1 /* Version number */ + .string "zRS" /* NUL-terminated augmentation string */ + .uleb128 0x1 /* Code alignment factor */ + .sleb128 -4 /* Data alignment factor */ + .byte 0x11 /* Return address register column */ + /* Augmentation length and data (none) */ + .byte 0xc /* DW_CFA_def_cfa */ + .uleb128 0xf /* r15 */ + .uleb128 0x0 /* offset 0 */ + + .align 2 +.LCIE_end: + + .ualong .LFDE_end-.LFDE_start /* Length FDE */ +.LFDE_start: + .ualong .LCIE /* CIE pointer */ + .ualong .LSTART_vsyscall-. /* start address */ + .ualong .LEND_vsyscall-.LSTART_vsyscall + .uleb128 0 + .align 2 +.LFDE_end: + .previous + +/* Get the common code for the sigreturn entry points */ +#include "vsyscall-sigreturn.S" diff --git a/arch/sh/kernel/vsyscall/vsyscall.c b/arch/sh/kernel/vsyscall/vsyscall.c new file mode 100644 index 00000000000..075d6cc1a2d --- /dev/null +++ b/arch/sh/kernel/vsyscall/vsyscall.c @@ -0,0 +1,150 @@ +/* + * arch/sh/kernel/vsyscall.c + * + * Copyright (C) 2006 Paul Mundt + * + * vDSO randomization + * Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include +#include +#include +#include +#include +#include +#include + +/* + * Should the kernel map a VDSO page into processes and pass its + * address down to glibc upon exec()? + */ +unsigned int __read_mostly vdso_enabled = 1; +EXPORT_SYMBOL_GPL(vdso_enabled); + +static int __init vdso_setup(char *s) +{ + vdso_enabled = simple_strtoul(s, NULL, 0); + return 1; +} +__setup("vdso=", vdso_setup); + +/* + * These symbols are defined by vsyscall.o to mark the bounds + * of the ELF DSO images included therein. + */ +extern const char vsyscall_trapa_start, vsyscall_trapa_end; +static void *syscall_page; + +int __init vsyscall_init(void) +{ + syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); + + /* + * XXX: Map this page to a fixmap entry if we get around + * to adding the page to ELF core dumps + */ + + memcpy(syscall_page, + &vsyscall_trapa_start, + &vsyscall_trapa_end - &vsyscall_trapa_start); + + return 0; +} + +static struct page *syscall_vma_nopage(struct vm_area_struct *vma, + unsigned long address, int *type) +{ + unsigned long offset = address - vma->vm_start; + struct page *page; + + if (address < vma->vm_start || address > vma->vm_end) + return NOPAGE_SIGBUS; + + page = virt_to_page(syscall_page + offset); + + get_page(page); + + return page; +} + +/* Prevent VMA merging */ +static void syscall_vma_close(struct vm_area_struct *vma) +{ +} + +static struct vm_operations_struct syscall_vm_ops = { + .nopage = syscall_vma_nopage, + .close = syscall_vma_close, +}; + +/* Setup a VMA at program startup for the vsyscall page */ +int arch_setup_additional_pages(struct linux_binprm *bprm, + int executable_stack) +{ + struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; + unsigned long addr; + int ret; + + down_write(&mm->mmap_sem); + addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); + if (IS_ERR_VALUE(addr)) { + ret = addr; + goto up_fail; + } + + vma = kmem_cache_zalloc(vm_area_cachep, SLAB_KERNEL); + if (!vma) { + ret = -ENOMEM; + goto up_fail; + } + + vma->vm_start = addr; + vma->vm_end = addr + PAGE_SIZE; + /* MAYWRITE to allow gdb to COW and set breakpoints */ + vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE; + vma->vm_flags |= mm->def_flags; + vma->vm_page_prot = protection_map[vma->vm_flags & 7]; + vma->vm_ops = &syscall_vm_ops; + vma->vm_mm = mm; + + ret = insert_vm_struct(mm, vma); + if (unlikely(ret)) { + kmem_cache_free(vm_area_cachep, vma); + goto up_fail; + } + + current->mm->context.vdso = (void *)addr; + + mm->total_vm++; +up_fail: + up_write(&mm->mmap_sem); + return ret; +} + +const char *arch_vma_name(struct vm_area_struct *vma) +{ + if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) + return "[vdso]"; + + return NULL; +} + +struct vm_area_struct *get_gate_vma(struct task_struct *task) +{ + return NULL; +} + +int in_gate_area(struct task_struct *task, unsigned long address) +{ + return 0; +} + +int in_gate_area_no_task(unsigned long address) +{ + return 0; +} diff --git a/arch/sh/kernel/vsyscall/vsyscall.lds.S b/arch/sh/kernel/vsyscall/vsyscall.lds.S new file mode 100644 index 00000000000..b13c3d439fe --- /dev/null +++ b/arch/sh/kernel/vsyscall/vsyscall.lds.S @@ -0,0 +1,74 @@ +/* + * Linker script for vsyscall DSO. The vsyscall page is an ELF shared + * object prelinked to its virtual address, and with only one read-only + * segment (that fits in one page). This script controls its layout. + */ +#include + +#ifdef CONFIG_CPU_LITTLE_ENDIAN +OUTPUT_FORMAT("elf32-sh-linux", "elf32-sh-linux", "elf32-sh-linux") +#else +OUTPUT_FORMAT("elf32-shbig-linux", "elf32-shbig-linux", "elf32-shbig-linux") +#endif +OUTPUT_ARCH(sh) + +/* The ELF entry point can be used to set the AT_SYSINFO value. */ +ENTRY(__kernel_vsyscall); + +SECTIONS +{ + . = SIZEOF_HEADERS; + + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + /* This linker script is used both with -r and with -shared. + For the layouts to match, we need to skip more than enough + space for the dynamic symbol table et al. If this amount + is insufficient, ld -shared will barf. Just increase it here. */ + . = 0x400; + + .text : { *(.text) } :text =0x90909090 + .note : { *(.note.*) } :text :note + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + .dynamic : { *(.dynamic) } :text :dynamic + .useless : { + *(.got.plt) *(.got) + *(.data .data.* .gnu.linkonce.d.*) + *(.dynbss) + *(.bss .bss.* .gnu.linkonce.b.*) + } :text +} + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */ +} + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + LINUX_2.6 { + global: + __kernel_vsyscall; + __kernel_sigreturn; + __kernel_rt_sigreturn; + + local: *; + }; +} diff --git a/arch/sh/mm/Kconfig b/arch/sh/mm/Kconfig index b445d02075e..9dd606464d2 100644 --- a/arch/sh/mm/Kconfig +++ b/arch/sh/mm/Kconfig @@ -223,6 +223,19 @@ config 32BIT 32-bits through the SH-4A PMB. If this is not set, legacy 29-bit physical addressing will be used. +config VSYSCALL + bool "Support vsyscall page" + depends on MMU + default y + help + This will enable support for the kernel mapping a vDSO page + in process space, and subsequently handing down the entry point + to the libc through the ELF auxiliary vector. + + From the kernel side this is used for the signal trampoline. + For systems with an MMU that can afford to give up a page, + (the default value) say Y. + choice prompt "HugeTLB page size" depends on HUGETLB_PAGE && CPU_SH4 && MMU diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index ad182b31d84..7154d1ce978 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -286,6 +286,9 @@ void __init mem_init(void) initsize >> 10); p3_cache_init(); + + /* Initialize the vDSO */ + vsyscall_init(); } void free_initmem(void) diff --git a/arch/sh/mm/tlb-flush.c b/arch/sh/mm/tlb-flush.c index fd7e42bcaa4..73ec7f6084f 100644 --- a/arch/sh/mm/tlb-flush.c +++ b/arch/sh/mm/tlb-flush.c @@ -14,12 +14,12 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long page) { - if (vma->vm_mm && vma->vm_mm->context != NO_CONTEXT) { + if (vma->vm_mm && vma->vm_mm->context.id != NO_CONTEXT) { unsigned long flags; unsigned long asid; unsigned long saved_asid = MMU_NO_ASID; - asid = vma->vm_mm->context & MMU_CONTEXT_ASID_MASK; + asid = vma->vm_mm->context.id & MMU_CONTEXT_ASID_MASK; page &= PAGE_MASK; local_irq_save(flags); @@ -39,20 +39,21 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, { struct mm_struct *mm = vma->vm_mm; - if (mm->context != NO_CONTEXT) { + if (mm->context.id != NO_CONTEXT) { unsigned long flags; int size; local_irq_save(flags); size = (end - start + (PAGE_SIZE - 1)) >> PAGE_SHIFT; if (size > (MMU_NTLB_ENTRIES/4)) { /* Too many TLB to flush */ - mm->context = NO_CONTEXT; + mm->context.id = NO_CONTEXT; if (mm == current->mm) activate_context(mm); } else { - unsigned long asid = mm->context&MMU_CONTEXT_ASID_MASK; + unsigned long asid; unsigned long saved_asid = MMU_NO_ASID; + asid = mm->context.id & MMU_CONTEXT_ASID_MASK; start &= PAGE_MASK; end += (PAGE_SIZE - 1); end &= PAGE_MASK; @@ -81,9 +82,10 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) if (size > (MMU_NTLB_ENTRIES/4)) { /* Too many TLB to flush */ flush_tlb_all(); } else { - unsigned long asid = init_mm.context&MMU_CONTEXT_ASID_MASK; + unsigned long asid; unsigned long saved_asid = get_asid(); + asid = init_mm.context.id & MMU_CONTEXT_ASID_MASK; start &= PAGE_MASK; end += (PAGE_SIZE - 1); end &= PAGE_MASK; @@ -101,11 +103,11 @@ void flush_tlb_mm(struct mm_struct *mm) { /* Invalidate all TLB of this process. */ /* Instead of invalidating each TLB, we get new MMU context. */ - if (mm->context != NO_CONTEXT) { + if (mm->context.id != NO_CONTEXT) { unsigned long flags; local_irq_save(flags); - mm->context = NO_CONTEXT; + mm->context.id = NO_CONTEXT; if (mm == current->mm) activate_context(mm); local_irq_restore(flags); -- cgit v1.2.3