diff options
author | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2009-09-11 10:28:57 +0200 |
---|---|---|
committer | Martin Schwidefsky <schwidefsky@de.ibm.com> | 2009-09-11 10:29:53 +0200 |
commit | 50aa98bad056a17655864a4d71ebc32d95c629a7 (patch) | |
tree | bf8d22851d99583e2ea388766697bf64672d7926 /arch | |
parent | c4de0c1a18237c2727dde8ad392e333539b0af3c (diff) |
[S390] fix recursive locking on page_table_lock
Suzuki Poulose reported the following recursive locking bug on s390:
Here is the stack trace : (see Appendix I for more info)
[<0000000000406ed6>] _spin_lock+0x52/0x94
[<0000000000103bde>] crst_table_free+0x14e/0x1a4
[<00000000001ba684>] __pmd_alloc+0x114/0x1ec
[<00000000001be8d0>] handle_mm_fault+0x2cc/0xb80
[<0000000000407d62>] do_dat_exception+0x2b6/0x3a0
[<0000000000114f8c>] sysc_return+0x0/0x8
[<00000200001642b2>] 0x200001642b2
The page_table_lock is already acquired in __pmd_alloc (mm/memory.c) and
it tries to populate the pud/pgd with a new pmd allocated. If another
thread populates it before we get a chance, we free the pmd using
pmd_free().
On s390x, pmd_free(even pud_free ) is #defined to crst_table_free(),
which acquires the page_table_lock to protect the crst_table index updates.
Hence this ends up in a recursive locking of the page_table_lock.
The solution suggested by Dave Hansen is to use a new spin lock in the mmu
context to protect the access to the crst_list and the pgtable_list.
Reported-by: Suzuki Poulose <suzuki@in.ibm.com>
Cc: Dave Hansen <dave@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch')
-rw-r--r-- | arch/s390/include/asm/mmu.h | 1 | ||||
-rw-r--r-- | arch/s390/include/asm/pgalloc.h | 1 | ||||
-rw-r--r-- | arch/s390/mm/pgtable.c | 24 | ||||
-rw-r--r-- | arch/s390/mm/vmem.c | 1 |
4 files changed, 15 insertions, 12 deletions
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h index 3b59216e628..03be99919d6 100644 --- a/arch/s390/include/asm/mmu.h +++ b/arch/s390/include/asm/mmu.h @@ -2,6 +2,7 @@ #define __MMU_H typedef struct { + spinlock_t list_lock; struct list_head crst_list; struct list_head pgtable_list; unsigned long asce_bits; diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index b2658b9220f..ddad5903341 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -140,6 +140,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) static inline pgd_t *pgd_alloc(struct mm_struct *mm) { + spin_lock_init(&mm->context.list_lock); INIT_LIST_HEAD(&mm->context.crst_list); INIT_LIST_HEAD(&mm->context.pgtable_list); return (pgd_t *) crst_table_alloc(mm, s390_noexec); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 56566720798..c7021524707 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -78,9 +78,9 @@ unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) } page->index = page_to_phys(shadow); } - spin_lock(&mm->page_table_lock); + spin_lock(&mm->context.list_lock); list_add(&page->lru, &mm->context.crst_list); - spin_unlock(&mm->page_table_lock); + spin_unlock(&mm->context.list_lock); return (unsigned long *) page_to_phys(page); } @@ -89,9 +89,9 @@ void crst_table_free(struct mm_struct *mm, unsigned long *table) unsigned long *shadow = get_shadow_table(table); struct page *page = virt_to_page(table); - spin_lock(&mm->page_table_lock); + spin_lock(&mm->context.list_lock); list_del(&page->lru); - spin_unlock(&mm->page_table_lock); + spin_unlock(&mm->context.list_lock); if (shadow) free_pages((unsigned long) shadow, ALLOC_ORDER); free_pages((unsigned long) table, ALLOC_ORDER); @@ -182,7 +182,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) unsigned long bits; bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; - spin_lock(&mm->page_table_lock); + spin_lock(&mm->context.list_lock); page = NULL; if (!list_empty(&mm->context.pgtable_list)) { page = list_first_entry(&mm->context.pgtable_list, @@ -191,7 +191,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) page = NULL; } if (!page) { - spin_unlock(&mm->page_table_lock); + spin_unlock(&mm->context.list_lock); page = alloc_page(GFP_KERNEL|__GFP_REPEAT); if (!page) return NULL; @@ -202,7 +202,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) clear_table_pgstes(table); else clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); - spin_lock(&mm->page_table_lock); + spin_lock(&mm->context.list_lock); list_add(&page->lru, &mm->context.pgtable_list); } table = (unsigned long *) page_to_phys(page); @@ -213,7 +213,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) page->flags |= bits; if ((page->flags & FRAG_MASK) == ((1UL << TABLES_PER_PAGE) - 1)) list_move_tail(&page->lru, &mm->context.pgtable_list); - spin_unlock(&mm->page_table_lock); + spin_unlock(&mm->context.list_lock); return table; } @@ -225,7 +225,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) bits = (mm->context.noexec || mm->context.has_pgste) ? 3UL : 1UL; bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - spin_lock(&mm->page_table_lock); + spin_lock(&mm->context.list_lock); page->flags ^= bits; if (page->flags & FRAG_MASK) { /* Page now has some free pgtable fragments. */ @@ -234,7 +234,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) } else /* All fragments of the 4K page have been freed. */ list_del(&page->lru); - spin_unlock(&mm->page_table_lock); + spin_unlock(&mm->context.list_lock); if (page) { pgtable_page_dtor(page); __free_page(page); @@ -245,7 +245,7 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) { struct page *page; - spin_lock(&mm->page_table_lock); + spin_lock(&mm->context.list_lock); /* Free shadow region and segment tables. */ list_for_each_entry(page, &mm->context.crst_list, lru) if (page->index) { @@ -255,7 +255,7 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) /* "Free" second halves of page tables. */ list_for_each_entry(page, &mm->context.pgtable_list, lru) page->flags &= ~SECOND_HALVES; - spin_unlock(&mm->page_table_lock); + spin_unlock(&mm->context.list_lock); mm->context.noexec = 0; update_mm(mm, tsk); } diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index e4868bfc672..5f91a38d759 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -331,6 +331,7 @@ void __init vmem_map_init(void) unsigned long start, end; int i; + spin_lock_init(&init_mm.context.list_lock); INIT_LIST_HEAD(&init_mm.context.crst_list); INIT_LIST_HEAD(&init_mm.context.pgtable_list); init_mm.context.noexec = 0; |