diff options
Diffstat (limited to 'arch/i386/mm/hugetlbpage.c')
-rw-r--r-- | arch/i386/mm/hugetlbpage.c | 431 |
1 files changed, 431 insertions, 0 deletions
diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c new file mode 100644 index 00000000000..a8c45143088 --- /dev/null +++ b/arch/i386/mm/hugetlbpage.c @@ -0,0 +1,431 @@ +/* + * IA-32 Huge TLB Page Support for Kernel. + * + * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com> + */ + +#include <linux/config.h> +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/mm.h> +#include <linux/hugetlb.h> +#include <linux/pagemap.h> +#include <linux/smp_lock.h> +#include <linux/slab.h> +#include <linux/err.h> +#include <linux/sysctl.h> +#include <asm/mman.h> +#include <asm/tlb.h> +#include <asm/tlbflush.h> + +static pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd = NULL; + + pgd = pgd_offset(mm, addr); + pud = pud_alloc(mm, pgd, addr); + pmd = pmd_alloc(mm, pud, addr); + return (pte_t *) pmd; +} + +static pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd = NULL; + + pgd = pgd_offset(mm, addr); + pud = pud_offset(pgd, addr); + pmd = pmd_offset(pud, addr); + return (pte_t *) pmd; +} + +static void set_huge_pte(struct mm_struct *mm, struct vm_area_struct *vma, struct page *page, pte_t * page_table, int write_access) +{ + pte_t entry; + + add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE); + if (write_access) { + entry = + pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); + } else + entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot)); + entry = pte_mkyoung(entry); + mk_pte_huge(entry); + set_pte(page_table, entry); +} + +/* + * This function checks for proper alignment of input addr and len parameters. + */ +int is_aligned_hugepage_range(unsigned long addr, unsigned long len) +{ + if (len & ~HPAGE_MASK) + return -EINVAL; + if (addr & ~HPAGE_MASK) + return -EINVAL; + return 0; +} + +int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, + struct vm_area_struct *vma) +{ + pte_t *src_pte, *dst_pte, entry; + struct page *ptepage; + unsigned long addr = vma->vm_start; + unsigned long end = vma->vm_end; + + while (addr < end) { + dst_pte = huge_pte_alloc(dst, addr); + if (!dst_pte) + goto nomem; + src_pte = huge_pte_offset(src, addr); + entry = *src_pte; + ptepage = pte_page(entry); + get_page(ptepage); + set_pte(dst_pte, entry); + add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE); + addr += HPAGE_SIZE; + } + return 0; + +nomem: + return -ENOMEM; +} + +int +follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, + struct page **pages, struct vm_area_struct **vmas, + unsigned long *position, int *length, int i) +{ + unsigned long vpfn, vaddr = *position; + int remainder = *length; + + WARN_ON(!is_vm_hugetlb_page(vma)); + + vpfn = vaddr/PAGE_SIZE; + while (vaddr < vma->vm_end && remainder) { + + if (pages) { + pte_t *pte; + struct page *page; + + pte = huge_pte_offset(mm, vaddr); + + /* hugetlb should be locked, and hence, prefaulted */ + WARN_ON(!pte || pte_none(*pte)); + + page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; + + WARN_ON(!PageCompound(page)); + + get_page(page); + pages[i] = page; + } + + if (vmas) + vmas[i] = vma; + + vaddr += PAGE_SIZE; + ++vpfn; + --remainder; + ++i; + } + + *length = remainder; + *position = vaddr; + + return i; +} + +#if 0 /* This is just for testing */ +struct page * +follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) +{ + unsigned long start = address; + int length = 1; + int nr; + struct page *page; + struct vm_area_struct *vma; + + vma = find_vma(mm, addr); + if (!vma || !is_vm_hugetlb_page(vma)) + return ERR_PTR(-EINVAL); + + pte = huge_pte_offset(mm, address); + + /* hugetlb should be locked, and hence, prefaulted */ + WARN_ON(!pte || pte_none(*pte)); + + page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)]; + + WARN_ON(!PageCompound(page)); + + return page; +} + +int pmd_huge(pmd_t pmd) +{ + return 0; +} + +struct page * +follow_huge_pmd(struct mm_struct *mm, unsigned long address, + pmd_t *pmd, int write) +{ + return NULL; +} + +#else + +struct page * +follow_huge_addr(struct mm_struct *mm, unsigned long address, int write) +{ + return ERR_PTR(-EINVAL); +} + +int pmd_huge(pmd_t pmd) +{ + return !!(pmd_val(pmd) & _PAGE_PSE); +} + +struct page * +follow_huge_pmd(struct mm_struct *mm, unsigned long address, + pmd_t *pmd, int write) +{ + struct page *page; + + page = pte_page(*(pte_t *)pmd); + if (page) + page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT); + return page; +} +#endif + +void unmap_hugepage_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + struct mm_struct *mm = vma->vm_mm; + unsigned long address; + pte_t pte, *ptep; + struct page *page; + + BUG_ON(start & (HPAGE_SIZE - 1)); + BUG_ON(end & (HPAGE_SIZE - 1)); + + for (address = start; address < end; address += HPAGE_SIZE) { + ptep = huge_pte_offset(mm, address); + if (!ptep) + continue; + pte = ptep_get_and_clear(mm, address, ptep); + if (pte_none(pte)) + continue; + page = pte_page(pte); + put_page(page); + } + add_mm_counter(mm ,rss, -((end - start) >> PAGE_SHIFT)); + flush_tlb_range(vma, start, end); +} + +int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma) +{ + struct mm_struct *mm = current->mm; + unsigned long addr; + int ret = 0; + + BUG_ON(vma->vm_start & ~HPAGE_MASK); + BUG_ON(vma->vm_end & ~HPAGE_MASK); + + spin_lock(&mm->page_table_lock); + for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) { + unsigned long idx; + pte_t *pte = huge_pte_alloc(mm, addr); + struct page *page; + + if (!pte) { + ret = -ENOMEM; + goto out; + } + + if (!pte_none(*pte)) { + pmd_t *pmd = (pmd_t *) pte; + + page = pmd_page(*pmd); + pmd_clear(pmd); + mm->nr_ptes--; + dec_page_state(nr_page_table_pages); + page_cache_release(page); + } + + idx = ((addr - vma->vm_start) >> HPAGE_SHIFT) + + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT)); + page = find_get_page(mapping, idx); + if (!page) { + /* charge the fs quota first */ + if (hugetlb_get_quota(mapping)) { + ret = -ENOMEM; + goto out; + } + page = alloc_huge_page(); + if (!page) { + hugetlb_put_quota(mapping); + ret = -ENOMEM; + goto out; + } + ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC); + if (! ret) { + unlock_page(page); + } else { + hugetlb_put_quota(mapping); + free_huge_page(page); + goto out; + } + } + set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE); + } +out: + spin_unlock(&mm->page_table_lock); + return ret; +} + +/* x86_64 also uses this file */ + +#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA +static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, + unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + unsigned long start_addr; + + start_addr = mm->free_area_cache; + +full_search: + addr = ALIGN(start_addr, HPAGE_SIZE); + + for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { + /* At this point: (!vma || addr < vma->vm_end). */ + if (TASK_SIZE - len < addr) { + /* + * Start a new search - just in case we missed + * some holes. + */ + if (start_addr != TASK_UNMAPPED_BASE) { + start_addr = TASK_UNMAPPED_BASE; + goto full_search; + } + return -ENOMEM; + } + if (!vma || addr + len <= vma->vm_start) { + mm->free_area_cache = addr + len; + return addr; + } + addr = ALIGN(vma->vm_end, HPAGE_SIZE); + } +} + +static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, + unsigned long addr0, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma, *prev_vma; + unsigned long base = mm->mmap_base, addr = addr0; + int first_time = 1; + + /* don't allow allocations above current base */ + if (mm->free_area_cache > base) + mm->free_area_cache = base; + +try_again: + /* make sure it can fit in the remaining address space */ + if (mm->free_area_cache < len) + goto fail; + + /* either no address requested or cant fit in requested address hole */ + addr = (mm->free_area_cache - len) & HPAGE_MASK; + do { + /* + * Lookup failure means no vma is above this address, + * i.e. return with success: + */ + if (!(vma = find_vma_prev(mm, addr, &prev_vma))) + return addr; + + /* + * new region fits between prev_vma->vm_end and + * vma->vm_start, use it: + */ + if (addr + len <= vma->vm_start && + (!prev_vma || (addr >= prev_vma->vm_end))) + /* remember the address as a hint for next time */ + return (mm->free_area_cache = addr); + else + /* pull free_area_cache down to the first hole */ + if (mm->free_area_cache == vma->vm_end) + mm->free_area_cache = vma->vm_start; + + /* try just below the current vma->vm_start */ + addr = (vma->vm_start - len) & HPAGE_MASK; + } while (len <= vma->vm_start); + +fail: + /* + * if hint left us with no space for the requested + * mapping then try again: + */ + if (first_time) { + mm->free_area_cache = base; + first_time = 0; + goto try_again; + } + /* + * A failed mmap() very likely causes application failure, + * so fall back to the bottom-up function here. This scenario + * can happen with large stack limits and large mmap() + * allocations. + */ + mm->free_area_cache = TASK_UNMAPPED_BASE; + addr = hugetlb_get_unmapped_area_bottomup(file, addr0, + len, pgoff, flags); + + /* + * Restore the topdown base: + */ + mm->free_area_cache = base; + + return addr; +} + +unsigned long +hugetlb_get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, unsigned long flags) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + + if (len & ~HPAGE_MASK) + return -EINVAL; + if (len > TASK_SIZE) + return -ENOMEM; + + if (addr) { + addr = ALIGN(addr, HPAGE_SIZE); + vma = find_vma(mm, addr); + if (TASK_SIZE - len >= addr && + (!vma || addr + len <= vma->vm_start)) + return addr; + } + if (mm->get_unmapped_area == arch_get_unmapped_area) + return hugetlb_get_unmapped_area_bottomup(file, addr, len, + pgoff, flags); + else + return hugetlb_get_unmapped_area_topdown(file, addr, len, + pgoff, flags); +} + +#endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/ + |