From ae531c26c5c2a28ca1b35a75b39b3b256850f2c8 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Thu, 24 Apr 2008 23:40:47 +0200 Subject: x86: introduce /dev/mem restrictions with a config option This patch introduces a restriction on /dev/mem: Only non-memory can be read or written unless the newly introduced config option is set. The X server needs access to /dev/mem for the PCI space, but it doesn't need access to memory; both the file permissions and SELinux permissions of /dev/mem just make X effectively super-super powerful. With the exception of the BIOS area, there's just no valid app that uses /dev/mem on actual memory. Other popular users of /dev/mem are rootkits and the like. (note: mmap access of memory via /dev/mem was already not allowed since a really long time) People who want to use /dev/mem for kernel debugging can enable the config option. The restrictions of this patch have been in the Fedora and RHEL kernels for at least 4 years without any problems. Signed-off-by: Arjan van de Ven Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig.debug | 12 ++++++++++++ arch/x86/mm/init_32.c | 19 +++++++++++++++++++ arch/x86/mm/init_64.c | 20 ++++++++++++++++++++ drivers/char/mem.c | 28 ++++++++++++++++++++++++++++ include/asm-x86/page.h | 1 + 5 files changed, 80 insertions(+) diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 610aaecc19f..0c1890c4127 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -5,6 +5,18 @@ config TRACE_IRQFLAGS_SUPPORT source "lib/Kconfig.debug" +config NONPROMISC_DEVMEM + bool "Disable promiscuous /dev/mem" + default y + help + The /dev/mem file by default only allows userspace access to PCI + space and the BIOS code and data regions. This is sufficient for + dosemu and X and all common users of /dev/mem. With this config + option, you allow userspace access to all of memory, including + kernel and userspace memory. Accidental access to this is + obviously disasterous, but specific access can be used by people + debugging the kernel. + config EARLY_PRINTK bool "Early printk" if EMBEDDED default y diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 9ec62da85fd..39852d53901 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -227,6 +227,25 @@ static inline int page_kills_ppro(unsigned long pagenr) return 0; } +/* + * devmem_is_allowed() checks to see if /dev/mem access to a certain address + * is valid. The argument is a physical page number. + * + * + * On x86, access has to be given to the first megabyte of ram because that area + * contains bios code and data regions used by X and dosemu and similar apps. + * Access has to be given to non-kernel-ram areas as well, these contain the PCI + * mmio resources as well as potential bios/acpi data regions. + */ +int devmem_is_allowed(unsigned long pagenr) +{ + if (pagenr <= 256) + return 1; + if (!page_is_ram(pagenr)) + return 1; + return 0; +} + #ifdef CONFIG_HIGHMEM pte_t *kmap_pte; pgprot_t kmap_prot; diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 1ff7906a9a4..49c274ee2fb 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -664,6 +664,26 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid); #endif /* CONFIG_MEMORY_HOTPLUG */ +/* + * devmem_is_allowed() checks to see if /dev/mem access to a certain address + * is valid. The argument is a physical page number. + * + * + * On x86, access has to be given to the first megabyte of ram because that area + * contains bios code and data regions used by X and dosemu and similar apps. + * Access has to be given to non-kernel-ram areas as well, these contain the PCI + * mmio resources as well as potential bios/acpi data regions. + */ +int devmem_is_allowed(unsigned long pagenr) +{ + if (pagenr <= 256) + return 1; + if (!page_is_ram(pagenr)) + return 1; + return 0; +} + + static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, kcore_vsyscall; diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 20070b7c573..dcf6e31970a 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -108,6 +108,30 @@ static inline int valid_mmap_phys_addr_range(unsigned long pfn, size_t size) } #endif +#ifdef CONFIG_NONPROMISC_DEVMEM +static inline int range_is_allowed(unsigned long from, unsigned long to) +{ + unsigned long cursor; + + cursor = from >> PAGE_SHIFT; + while ((cursor << PAGE_SHIFT) < to) { + if (!devmem_is_allowed(cursor)) { + printk(KERN_INFO "Program %s tried to read /dev/mem " + "between %lx->%lx.\n", + current->comm, from, to); + return 0; + } + cursor++; + } + return 1; +} +#else +static inline int range_is_allowed(unsigned long from, unsigned long to) +{ + return 1; +} +#endif + /* * This funcion reads the *physical* memory. The f_pos points directly to the * memory location. @@ -157,6 +181,8 @@ static ssize_t read_mem(struct file * file, char __user * buf, */ ptr = xlate_dev_mem_ptr(p); + if (!range_is_allowed(p, p+count)) + return -EPERM; if (copy_to_user(buf, ptr, sz)) return -EFAULT; buf += sz; @@ -214,6 +240,8 @@ static ssize_t write_mem(struct file * file, const char __user * buf, */ ptr = xlate_dev_mem_ptr(p); + if (!range_is_allowed(p, p+sz)) + return -EPERM; copied = copy_from_user(ptr, buf, sz); if (copied) { written += sz - copied; diff --git a/include/asm-x86/page.h b/include/asm-x86/page.h index 6724a4bc6b7..b381f4a5a0b 100644 --- a/include/asm-x86/page.h +++ b/include/asm-x86/page.h @@ -47,6 +47,7 @@ #ifndef __ASSEMBLY__ extern int page_is_ram(unsigned long pagenr); +extern int devmem_is_allowed(unsigned long pagenr); extern unsigned long max_pfn_mapped; -- cgit v1.2.3 From e2beb3eae627211b67e456c53f946cede2ac10d7 Mon Sep 17 00:00:00 2001 From: Venki Pallipadi Date: Thu, 6 Mar 2008 23:01:47 -0800 Subject: devmem: add range_is_allowed() check to mmap of /dev/mem Earlier patch that introduced CONFIG_NONPROMISC_DEVMEM, did the range_is_allowed() check only for read and write. Add range_is_allowed() check to mmap of /dev/mem as well. Changes the paramaters of range_is_allowed() to pfn and size to handle more than 32 bits of physical address on 32 bit arch cleanly. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Ingo Molnar --- drivers/char/mem.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/drivers/char/mem.c b/drivers/char/mem.c index dcf6e31970a..964ff3b1cff 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -109,24 +109,26 @@ static inline int valid_mmap_phys_addr_range(unsigned long pfn, size_t size) #endif #ifdef CONFIG_NONPROMISC_DEVMEM -static inline int range_is_allowed(unsigned long from, unsigned long to) +static inline int range_is_allowed(unsigned long pfn, unsigned long size) { - unsigned long cursor; + u64 from = ((u64)pfn) << PAGE_SHIFT; + u64 to = from + size; + u64 cursor = from; - cursor = from >> PAGE_SHIFT; - while ((cursor << PAGE_SHIFT) < to) { - if (!devmem_is_allowed(cursor)) { - printk(KERN_INFO "Program %s tried to read /dev/mem " - "between %lx->%lx.\n", + while (cursor < to) { + if (!devmem_is_allowed(pfn)) { + printk(KERN_INFO + "Program %s tried to access /dev/mem between %Lx->%Lx.\n", current->comm, from, to); return 0; } - cursor++; + cursor += PAGE_SIZE; + pfn++; } return 1; } #else -static inline int range_is_allowed(unsigned long from, unsigned long to) +static inline int range_is_allowed(unsigned long pfn, unsigned long size) { return 1; } @@ -181,7 +183,7 @@ static ssize_t read_mem(struct file * file, char __user * buf, */ ptr = xlate_dev_mem_ptr(p); - if (!range_is_allowed(p, p+count)) + if (!range_is_allowed(p >> PAGE_SHIFT, count)) return -EPERM; if (copy_to_user(buf, ptr, sz)) return -EFAULT; @@ -240,7 +242,7 @@ static ssize_t write_mem(struct file * file, const char __user * buf, */ ptr = xlate_dev_mem_ptr(p); - if (!range_is_allowed(p, p+sz)) + if (!range_is_allowed(p >> PAGE_SHIFT, sz)) return -EPERM; copied = copy_from_user(ptr, buf, sz); if (copied) { @@ -309,6 +311,9 @@ static int mmap_mem(struct file * file, struct vm_area_struct * vma) if (!private_mapping_ok(vma)) return -ENOSYS; + if (!range_is_allowed(vma->vm_pgoff, size)) + return -EPERM; + vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff, size, vma->vm_page_prot); -- cgit v1.2.3 From e045fb2a988a9a1964059b0d33dbaf18d12f925f Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Tue, 18 Mar 2008 17:00:15 -0700 Subject: x86: PAT avoid aliasing in /dev/mem read/write Add xlate and unxlate around /dev/mem read/write. This sets up the mapping that can be used for /dev/mem read and write without aliasing worries. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/mm/ioremap.c | 29 +++++++++++++++++++++++++++++ drivers/char/mem.c | 32 +++++++++++++++++++++++++++----- include/asm-x86/io.h | 8 ++++++++ include/asm-x86/io_32.h | 6 ------ include/asm-x86/io_64.h | 6 ------ 5 files changed, 64 insertions(+), 17 deletions(-) diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 3a4baf95e24..caac7d5699a 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -336,6 +336,35 @@ void iounmap(volatile void __iomem *addr) } EXPORT_SYMBOL(iounmap); +/* + * Convert a physical pointer to a virtual kernel pointer for /dev/mem + * access + */ +void *xlate_dev_mem_ptr(unsigned long phys) +{ + void *addr; + unsigned long start = phys & PAGE_MASK; + + /* If page is RAM, we can use __va. Otherwise ioremap and unmap. */ + if (page_is_ram(start >> PAGE_SHIFT)) + return __va(phys); + + addr = (void *)ioremap(start, PAGE_SIZE); + if (addr) + addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK)); + + return addr; +} + +void unxlate_dev_mem_ptr(unsigned long phys, void *addr) +{ + if (page_is_ram(phys >> PAGE_SHIFT)) + return; + + iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK)); + return; +} + #ifdef CONFIG_X86_32 int __initdata early_ioremap_debug; diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 964ff3b1cff..83495885ada 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -134,6 +134,10 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) } #endif +void __attribute__((weak)) unxlate_dev_mem_ptr(unsigned long phys, void *addr) +{ +} + /* * This funcion reads the *physical* memory. The f_pos points directly to the * memory location. @@ -176,17 +180,25 @@ static ssize_t read_mem(struct file * file, char __user * buf, sz = min_t(unsigned long, sz, count); + if (!range_is_allowed(p >> PAGE_SHIFT, count)) + return -EPERM; + /* * On ia64 if a page has been mapped somewhere as * uncached, then it must also be accessed uncached * by the kernel or data corruption may occur */ ptr = xlate_dev_mem_ptr(p); + if (!ptr) + return -EFAULT; - if (!range_is_allowed(p >> PAGE_SHIFT, count)) - return -EPERM; - if (copy_to_user(buf, ptr, sz)) + if (copy_to_user(buf, ptr, sz)) { + unxlate_dev_mem_ptr(p, ptr); return -EFAULT; + } + + unxlate_dev_mem_ptr(p, ptr); + buf += sz; p += sz; count -= sz; @@ -235,22 +247,32 @@ static ssize_t write_mem(struct file * file, const char __user * buf, sz = min_t(unsigned long, sz, count); + if (!range_is_allowed(p >> PAGE_SHIFT, sz)) + return -EPERM; + /* * On ia64 if a page has been mapped somewhere as * uncached, then it must also be accessed uncached * by the kernel or data corruption may occur */ ptr = xlate_dev_mem_ptr(p); + if (!ptr) { + if (written) + break; + return -EFAULT; + } - if (!range_is_allowed(p >> PAGE_SHIFT, sz)) - return -EPERM; copied = copy_from_user(ptr, buf, sz); if (copied) { written += sz - copied; + unxlate_dev_mem_ptr(p, ptr); if (written) break; return -EFAULT; } + + unxlate_dev_mem_ptr(p, ptr); + buf += sz; p += sz; count -= sz; diff --git a/include/asm-x86/io.h b/include/asm-x86/io.h index 7b292d38671..d5b11f60dbd 100644 --- a/include/asm-x86/io.h +++ b/include/asm-x86/io.h @@ -1,3 +1,6 @@ +#ifndef _ASM_X86_IO_H +#define _ASM_X86_IO_H + #define ARCH_HAS_IOREMAP_WC #ifdef CONFIG_X86_32 @@ -5,7 +8,12 @@ #else # include "io_64.h" #endif + +extern void *xlate_dev_mem_ptr(unsigned long phys); +extern void unxlate_dev_mem_ptr(unsigned long phys, void *addr); + extern int ioremap_change_attr(unsigned long vaddr, unsigned long size, unsigned long prot_val); extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size); +#endif /* _ASM_X86_IO_H */ diff --git a/include/asm-x86/io_32.h b/include/asm-x86/io_32.h index 509045f5fda..6e73467a4fb 100644 --- a/include/asm-x86/io_32.h +++ b/include/asm-x86/io_32.h @@ -48,12 +48,6 @@ #include -/* - * Convert a physical pointer to a virtual kernel pointer for /dev/mem - * access - */ -#define xlate_dev_mem_ptr(p) __va(p) - /* * Convert a virtual cached pointer to an uncached pointer */ diff --git a/include/asm-x86/io_64.h b/include/asm-x86/io_64.h index c2f5eef47b8..0930bedf9e4 100644 --- a/include/asm-x86/io_64.h +++ b/include/asm-x86/io_64.h @@ -307,12 +307,6 @@ void memset_io(volatile void __iomem *a, int b, size_t c); extern int iommu_bio_merge; #define BIO_VMERGE_BOUNDARY iommu_bio_merge -/* - * Convert a physical pointer to a virtual kernel pointer for /dev/mem - * access - */ -#define xlate_dev_mem_ptr(p) __va(p) - /* * Convert a virtual cached pointer to an uncached pointer */ -- cgit v1.2.3 From f0970c13b6a5b01189aeb196ebb573cf87d95839 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Tue, 18 Mar 2008 17:00:20 -0700 Subject: x86: PAT phys_mem_access_prot_allowed for dev/mem mmap Introduce phys_mem_access_prot_allowed(), which checks whether the mapping is possible, without any conflicts and returns success or failure based on that. phys_mem_access_prot() by itself does not allow failure case. This ability to return error is needed for PAT where we may have aliasing conflicts. x86 setup __HAVE_PHYS_MEM_ACCESS_PROT and move x86 specific code out of /dev/mem into arch specific area. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 39 +++++++++++++++++++++++++++++++++++++++ drivers/char/mem.c | 41 +++++++++++------------------------------ include/asm-x86/pgtable.h | 9 +++++++++ 3 files changed, 59 insertions(+), 30 deletions(-) diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 72c0f609740..64cc0c18233 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -419,3 +419,42 @@ int free_memtype(u64 start, u64 end) return err; } + +/* /dev/mem interface. Use the previous mapping */ +pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot) +{ + return vma_prot; +} + +int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t *vma_prot) +{ + + if (file->f_flags & O_SYNC) { + *vma_prot = pgprot_noncached(*vma_prot); + return 1; + } + +#ifdef CONFIG_X86_32 + /* + * On the PPro and successors, the MTRRs are used to set + * memory types for physical addresses outside main memory, + * so blindly setting UC or PWT on those pages is wrong. + * For Pentiums and earlier, the surround logic should disable + * caching for the high addresses through the KEN pin, but + * we maintain the tradition of paranoia in this code. + */ + if (!pat_wc_enabled && + ! ( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) || + test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) || + test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) || + test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability)) && + (pfn << PAGE_SHIFT) >= __pa(high_memory)) { + *vma_prot = pgprot_noncached(*vma_prot); + return 1; + } +#endif + + return 1; +} diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 83495885ada..56b2fb4fbc9 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -41,36 +41,7 @@ */ static inline int uncached_access(struct file *file, unsigned long addr) { -#if defined(__i386__) && !defined(__arch_um__) - /* - * On the PPro and successors, the MTRRs are used to set - * memory types for physical addresses outside main memory, - * so blindly setting PCD or PWT on those pages is wrong. - * For Pentiums and earlier, the surround logic should disable - * caching for the high addresses through the KEN pin, but - * we maintain the tradition of paranoia in this code. - */ - if (file->f_flags & O_SYNC) - return 1; - return !( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) || - test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) || - test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) || - test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability) ) - && addr >= __pa(high_memory); -#elif defined(__x86_64__) && !defined(__arch_um__) - /* - * This is broken because it can generate memory type aliases, - * which can cause cache corruptions - * But it is only available for root and we have to be bug-to-bug - * compatible with i386. - */ - if (file->f_flags & O_SYNC) - return 1; - /* same behaviour as i386. PAT always set to cached and MTRRs control the - caching behaviour. - Hopefully a full PAT implementation will fix that soon. */ - return 0; -#elif defined(CONFIG_IA64) +#if defined(CONFIG_IA64) /* * On ia64, we ignore O_SYNC because we cannot tolerate memory attribute aliases. */ @@ -283,6 +254,12 @@ static ssize_t write_mem(struct file * file, const char __user * buf, return written; } +int __attribute__((weak)) phys_mem_access_prot_allowed(struct file *file, + unsigned long pfn, unsigned long size, pgprot_t *vma_prot) +{ + return 1; +} + #ifndef __HAVE_PHYS_MEM_ACCESS_PROT static pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) @@ -336,6 +313,10 @@ static int mmap_mem(struct file * file, struct vm_area_struct * vma) if (!range_is_allowed(vma->vm_pgoff, size)) return -EPERM; + if (!phys_mem_access_prot_allowed(file, vma->vm_pgoff, size, + &vma->vm_page_prot)) + return -EINVAL; + vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff, size, vma->vm_page_prot); diff --git a/include/asm-x86/pgtable.h b/include/asm-x86/pgtable.h index f1d9f4a03f6..1902f0aed6c 100644 --- a/include/asm-x86/pgtable.h +++ b/include/asm-x86/pgtable.h @@ -289,6 +289,15 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask) +#ifndef __ASSEMBLY__ +#define __HAVE_PHYS_MEM_ACCESS_PROT +struct file; +pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot); +int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t *vma_prot); +#endif + #ifdef CONFIG_PARAVIRT #include #else /* !CONFIG_PARAVIRT */ -- cgit v1.2.3 From e7f260a276f2c9184fe753732d834b1f6fbe9f17 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Tue, 18 Mar 2008 17:00:21 -0700 Subject: x86: PAT use reserve free memtype in mmap of /dev/mem Use reserve_memtype and free_memtype wrappers for /dev/mem mmaps. The memtype is slightly complicated here, given that we have to support existing X mappings. We fallback on UC_MINUS for that. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 128 ++++++++++++++++++++++++++++++++++++++++++++++++----- drivers/char/mem.c | 35 ++++++++++++++- 2 files changed, 152 insertions(+), 11 deletions(-) diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 64cc0c18233..1489aafbfa7 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -21,6 +22,7 @@ #include #include #include +#include int pat_wc_enabled = 1; @@ -190,6 +192,21 @@ static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot, return 0; } +/* + * req_type typically has one of the: + * - _PAGE_CACHE_WB + * - _PAGE_CACHE_WC + * - _PAGE_CACHE_UC_MINUS + * - _PAGE_CACHE_UC + * + * req_type will have a special case value '-1', when requester want to inherit + * the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS. + * + * If ret_type is NULL, function will return an error if it cannot reserve the + * region with req_type. If ret_type is non-null, function will return + * available type in ret_type in case of no error. In case of any error + * it will return a negative return value. + */ int reserve_memtype(u64 start, u64 end, unsigned long req_type, unsigned long *ret_type) { @@ -200,9 +217,14 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, /* Only track when pat_wc_enabled */ if (!pat_wc_enabled) { - if (ret_type) - *ret_type = req_type; - + /* This is identical to page table setting without PAT */ + if (ret_type) { + if (req_type == -1) { + *ret_type = _PAGE_CACHE_WB; + } else { + *ret_type = req_type; + } + } return 0; } @@ -214,8 +236,29 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, return 0; } - req_type &= _PAGE_CACHE_MASK; - err = pat_x_mtrr_type(start, end, req_type, &actual_type); + if (req_type == -1) { + /* + * Special case where caller wants to inherit from mtrr or + * existing pat mapping, defaulting to UC_MINUS in case of + * no match. + */ + u8 mtrr_type = mtrr_type_lookup(start, end); + if (mtrr_type == 0xFE) { /* MTRR match error */ + err = -1; + } + + if (mtrr_type == MTRR_TYPE_WRBACK) { + req_type = _PAGE_CACHE_WB; + actual_type = _PAGE_CACHE_WB; + } else { + req_type = _PAGE_CACHE_UC_MINUS; + actual_type = _PAGE_CACHE_UC_MINUS; + } + } else { + req_type &= _PAGE_CACHE_MASK; + err = pat_x_mtrr_type(start, end, req_type, &actual_type); + } + if (err) { if (ret_type) *ret_type = actual_type; @@ -420,7 +463,14 @@ int free_memtype(u64 start, u64 end) } -/* /dev/mem interface. Use the previous mapping */ +/* + * /dev/mem mmap interface. The memtype used for mapping varies: + * - Use UC for mappings with O_SYNC flag + * - Without O_SYNC flag, if there is any conflict in reserve_memtype, + * inherit the memtype from existing mapping. + * - Else use UC_MINUS memtype (for backward compatibility with existing + * X drivers. + */ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { @@ -430,10 +480,13 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, unsigned long size, pgprot_t *vma_prot) { + u64 offset = ((u64) pfn) << PAGE_SHIFT; + unsigned long flags = _PAGE_CACHE_UC_MINUS; + unsigned long ret_flags; + int retval; if (file->f_flags & O_SYNC) { - *vma_prot = pgprot_noncached(*vma_prot); - return 1; + flags = _PAGE_CACHE_UC; } #ifdef CONFIG_X86_32 @@ -451,10 +504,65 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) || test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability)) && (pfn << PAGE_SHIFT) >= __pa(high_memory)) { - *vma_prot = pgprot_noncached(*vma_prot); - return 1; + flags = _PAGE_CACHE_UC; } #endif + /* + * With O_SYNC, we can only take UC mapping. Fail if we cannot. + * Without O_SYNC, we want to get + * - WB for WB-able memory and no other conflicting mappings + * - UC_MINUS for non-WB-able memory with no other conflicting mappings + * - Inherit from confliting mappings otherwise + */ + if (flags != _PAGE_CACHE_UC_MINUS) { + retval = reserve_memtype(offset, offset + size, flags, NULL); + } else { + retval = reserve_memtype(offset, offset + size, -1, &ret_flags); + } + + if (retval < 0) + return 0; + + flags = ret_flags; + + if (pfn <= max_pfn_mapped && + ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) { + free_memtype(offset, offset + size); + printk(KERN_DEBUG + "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n", + current->comm, current->pid, + cattr_name(flags), + offset, offset + size); + return 0; + } + + *vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) | + flags); return 1; } + +void map_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot) +{ + u64 addr = (u64)pfn << PAGE_SHIFT; + unsigned long flags; + unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK); + + reserve_memtype(addr, addr + size, want_flags, &flags); + if (flags != want_flags) { + printk(KERN_DEBUG + "%s:%d /dev/mem expected mapping type %s for %Lx-%Lx, got %s\n", + current->comm, current->pid, + cattr_name(want_flags), + addr, addr + size, + cattr_name(flags)); + } +} + +void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot) +{ + u64 addr = (u64)pfn << PAGE_SHIFT; + + free_memtype(addr, addr + size); +} + diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 56b2fb4fbc9..e83623ead44 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -300,6 +300,35 @@ static inline int private_mapping_ok(struct vm_area_struct *vma) } #endif +void __attribute__((weak)) +map_devmem(unsigned long pfn, unsigned long len, pgprot_t prot) +{ + /* nothing. architectures can override. */ +} + +void __attribute__((weak)) +unmap_devmem(unsigned long pfn, unsigned long len, pgprot_t prot) +{ + /* nothing. architectures can override. */ +} + +static void mmap_mem_open(struct vm_area_struct *vma) +{ + map_devmem(vma->vm_pgoff, vma->vm_end - vma->vm_start, + vma->vm_page_prot); +} + +static void mmap_mem_close(struct vm_area_struct *vma) +{ + unmap_devmem(vma->vm_pgoff, vma->vm_end - vma->vm_start, + vma->vm_page_prot); +} + +static struct vm_operations_struct mmap_mem_ops = { + .open = mmap_mem_open, + .close = mmap_mem_close +}; + static int mmap_mem(struct file * file, struct vm_area_struct * vma) { size_t size = vma->vm_end - vma->vm_start; @@ -321,13 +350,17 @@ static int mmap_mem(struct file * file, struct vm_area_struct * vma) size, vma->vm_page_prot); + vma->vm_ops = &mmap_mem_ops; + /* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */ if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, size, - vma->vm_page_prot)) + vma->vm_page_prot)) { + unmap_devmem(vma->vm_pgoff, size, vma->vm_page_prot); return -EAGAIN; + } return 0; } -- cgit v1.2.3 From 28eb559b5b0b9b51b9165a9b8faa75b0bb91ca8d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 3 Apr 2008 10:14:33 +0200 Subject: pat: cleanups Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 1489aafbfa7..ef8b64b89c7 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -284,7 +284,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, struct memtype *saved_ptr; if (parse->start >= end) { - printk("New Entry\n"); + pr_debug("New Entry\n"); list_add(&new_entry->nd, parse->nd.prev); new_entry = NULL; break; @@ -386,7 +386,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, break; } - printk("Overlap at 0x%Lx-0x%Lx\n", + printk(KERN_INFO "Overlap at 0x%Lx-0x%Lx\n", saved_ptr->start, saved_ptr->end); /* No conflict. Go ahead and add this new entry */ list_add(&new_entry->nd, &saved_ptr->nd); @@ -396,7 +396,7 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, } if (err) { - printk( + printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, track %s, req %s\n", start, end, cattr_name(new_entry->type), cattr_name(req_type)); @@ -408,16 +408,16 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, if (new_entry) { /* No conflict. Not yet added to the list. Add to the tail */ list_add_tail(&new_entry->nd, &memtype_list); - printk("New Entry\n"); - } + pr_debug("New Entry\n"); + } if (ret_type) { - printk( + pr_debug( "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", start, end, cattr_name(actual_type), cattr_name(req_type), cattr_name(*ret_type)); } else { - printk( + pr_debug( "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s\n", start, end, cattr_name(actual_type), cattr_name(req_type)); @@ -454,11 +454,11 @@ int free_memtype(u64 start, u64 end) spin_unlock(&memtype_lock); if (err) { - printk(KERN_DEBUG "%s:%d freeing invalid memtype %Lx-%Lx\n", + printk(KERN_INFO "%s:%d freeing invalid memtype %Lx-%Lx\n", current->comm, current->pid, start, end); } - printk( "free_memtype request 0x%Lx-0x%Lx\n", start, end); + pr_debug("free_memtype request 0x%Lx-0x%Lx\n", start, end); return err; } @@ -529,7 +529,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, if (pfn <= max_pfn_mapped && ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) { free_memtype(offset, offset + size); - printk(KERN_DEBUG + printk(KERN_INFO "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n", current->comm, current->pid, cattr_name(flags), @@ -550,7 +550,7 @@ void map_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot) reserve_memtype(addr, addr + size, want_flags, &flags); if (flags != want_flags) { - printk(KERN_DEBUG + printk(KERN_INFO "%s:%d /dev/mem expected mapping type %s for %Lx-%Lx, got %s\n", current->comm, current->pid, cattr_name(want_flags), -- cgit v1.2.3 From 1f56cf1c58c81f7ecf16f5e99ac4a333d9dc9aea Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 18 Apr 2008 21:42:36 +0200 Subject: /dev/mem: make promisc the default default to the old semantics. Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.debug | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 0c1890c4127..239fd9fba0a 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -7,7 +7,6 @@ source "lib/Kconfig.debug" config NONPROMISC_DEVMEM bool "Disable promiscuous /dev/mem" - default y help The /dev/mem file by default only allows userspace access to PCI space and the BIOS code and data regions. This is sufficient for -- cgit v1.2.3 From 1526a756fba5b1f2eb5001b8e8de2a0ea1bd2c66 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Tue, 18 Mar 2008 17:00:24 -0700 Subject: generic: add ioremap_wc() interface wrapper x86 has ioremap_wc for wc remap. Also introduce a generic ioremap_wc aliased to ioremap_uc so that drivers can use this interface transparently. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: Ingo Molnar --- include/asm-generic/iomap.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/asm-generic/iomap.h b/include/asm-generic/iomap.h index 67dc84cd134..76b0cc5637f 100644 --- a/include/asm-generic/iomap.h +++ b/include/asm-generic/iomap.h @@ -60,6 +60,10 @@ extern void iowrite32_rep(void __iomem *port, const void *buf, unsigned long cou extern void __iomem *ioport_map(unsigned long port, unsigned int nr); extern void ioport_unmap(void __iomem *); +#ifndef ARCH_HAS_IOREMAP_WC +#define ioremap_wc ioremap_nocache +#endif + /* Create a virtual mapping cookie for a PCI BAR (memory or IO) */ struct pci_dev; extern void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long max); -- cgit v1.2.3