diff options
Diffstat (limited to 'arch/s390/mm')
-rw-r--r-- | arch/s390/mm/Makefile | 7 | ||||
-rw-r--r-- | arch/s390/mm/cmm.c | 443 | ||||
-rw-r--r-- | arch/s390/mm/extmem.c | 588 | ||||
-rw-r--r-- | arch/s390/mm/fault.c | 586 | ||||
-rw-r--r-- | arch/s390/mm/init.c | 310 | ||||
-rw-r--r-- | arch/s390/mm/ioremap.c | 138 | ||||
-rw-r--r-- | arch/s390/mm/mmap.c | 86 |
7 files changed, 2158 insertions, 0 deletions
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile new file mode 100644 index 00000000000..aa9a42b6e62 --- /dev/null +++ b/arch/s390/mm/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the linux s390-specific parts of the memory manager. +# + +obj-y := init.o fault.o ioremap.o extmem.o mmap.o +obj-$(CONFIG_CMM) += cmm.o + diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c new file mode 100644 index 00000000000..d30cdb4248a --- /dev/null +++ b/arch/s390/mm/cmm.c @@ -0,0 +1,443 @@ +/* + * arch/s390/mm/cmm.c + * + * S390 version + * Copyright (C) 2003 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * Collaborative memory management interface. + */ + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/sysctl.h> +#include <linux/ctype.h> + +#include <asm/pgalloc.h> +#include <asm/uaccess.h> + +#include "../../../drivers/s390/net/smsgiucv.h" + +#define CMM_NR_PAGES ((PAGE_SIZE / sizeof(unsigned long)) - 2) + +struct cmm_page_array { + struct cmm_page_array *next; + unsigned long index; + unsigned long pages[CMM_NR_PAGES]; +}; + +static long cmm_pages = 0; +static long cmm_timed_pages = 0; +static volatile long cmm_pages_target = 0; +static volatile long cmm_timed_pages_target = 0; +static long cmm_timeout_pages = 0; +static long cmm_timeout_seconds = 0; + +static struct cmm_page_array *cmm_page_list = 0; +static struct cmm_page_array *cmm_timed_page_list = 0; + +static unsigned long cmm_thread_active = 0; +static struct work_struct cmm_thread_starter; +static wait_queue_head_t cmm_thread_wait; +static struct timer_list cmm_timer; + +static void cmm_timer_fn(unsigned long); +static void cmm_set_timer(void); + +static long +cmm_strtoul(const char *cp, char **endp) +{ + unsigned int base = 10; + + if (*cp == '0') { + base = 8; + cp++; + if ((*cp == 'x' || *cp == 'X') && isxdigit(cp[1])) { + base = 16; + cp++; + } + } + return simple_strtoul(cp, endp, base); +} + +static long +cmm_alloc_pages(long pages, long *counter, struct cmm_page_array **list) +{ + struct cmm_page_array *pa; + unsigned long page; + + pa = *list; + while (pages) { + page = __get_free_page(GFP_NOIO); + if (!page) + break; + if (!pa || pa->index >= CMM_NR_PAGES) { + /* Need a new page for the page list. */ + pa = (struct cmm_page_array *) + __get_free_page(GFP_NOIO); + if (!pa) { + free_page(page); + break; + } + pa->next = *list; + pa->index = 0; + *list = pa; + } + diag10(page); + pa->pages[pa->index++] = page; + (*counter)++; + pages--; + } + return pages; +} + +static void +cmm_free_pages(long pages, long *counter, struct cmm_page_array **list) +{ + struct cmm_page_array *pa; + unsigned long page; + + pa = *list; + while (pages) { + if (!pa || pa->index <= 0) + break; + page = pa->pages[--pa->index]; + if (pa->index == 0) { + pa = pa->next; + free_page((unsigned long) *list); + *list = pa; + } + free_page(page); + (*counter)--; + pages--; + } +} + +static int +cmm_thread(void *dummy) +{ + int rc; + + daemonize("cmmthread"); + while (1) { + rc = wait_event_interruptible(cmm_thread_wait, + (cmm_pages != cmm_pages_target || + cmm_timed_pages != cmm_timed_pages_target)); + if (rc == -ERESTARTSYS) { + /* Got kill signal. End thread. */ + clear_bit(0, &cmm_thread_active); + cmm_pages_target = cmm_pages; + cmm_timed_pages_target = cmm_timed_pages; + break; + } + if (cmm_pages_target > cmm_pages) { + if (cmm_alloc_pages(1, &cmm_pages, &cmm_page_list)) + cmm_pages_target = cmm_pages; + } else if (cmm_pages_target < cmm_pages) { + cmm_free_pages(1, &cmm_pages, &cmm_page_list); + } + if (cmm_timed_pages_target > cmm_timed_pages) { + if (cmm_alloc_pages(1, &cmm_timed_pages, + &cmm_timed_page_list)) + cmm_timed_pages_target = cmm_timed_pages; + } else if (cmm_timed_pages_target < cmm_timed_pages) { + cmm_free_pages(1, &cmm_timed_pages, + &cmm_timed_page_list); + } + if (cmm_timed_pages > 0 && !timer_pending(&cmm_timer)) + cmm_set_timer(); + } + return 0; +} + +static void +cmm_start_thread(void) +{ + kernel_thread(cmm_thread, 0, 0); +} + +static void +cmm_kick_thread(void) +{ + if (!test_and_set_bit(0, &cmm_thread_active)) + schedule_work(&cmm_thread_starter); + wake_up(&cmm_thread_wait); +} + +static void +cmm_set_timer(void) +{ + if (cmm_timed_pages_target <= 0 || cmm_timeout_seconds <= 0) { + if (timer_pending(&cmm_timer)) + del_timer(&cmm_timer); + return; + } + if (timer_pending(&cmm_timer)) { + if (mod_timer(&cmm_timer, jiffies + cmm_timeout_seconds*HZ)) + return; + } + cmm_timer.function = cmm_timer_fn; + cmm_timer.data = 0; + cmm_timer.expires = jiffies + cmm_timeout_seconds*HZ; + add_timer(&cmm_timer); +} + +static void +cmm_timer_fn(unsigned long ignored) +{ + long pages; + + pages = cmm_timed_pages_target - cmm_timeout_pages; + if (pages < 0) + cmm_timed_pages_target = 0; + else + cmm_timed_pages_target = pages; + cmm_kick_thread(); + cmm_set_timer(); +} + +void +cmm_set_pages(long pages) +{ + cmm_pages_target = pages; + cmm_kick_thread(); +} + +long +cmm_get_pages(void) +{ + return cmm_pages; +} + +void +cmm_add_timed_pages(long pages) +{ + cmm_timed_pages_target += pages; + cmm_kick_thread(); +} + +long +cmm_get_timed_pages(void) +{ + return cmm_timed_pages; +} + +void +cmm_set_timeout(long pages, long seconds) +{ + cmm_timeout_pages = pages; + cmm_timeout_seconds = seconds; + cmm_set_timer(); +} + +static inline int +cmm_skip_blanks(char *cp, char **endp) +{ + char *str; + + for (str = cp; *str == ' ' || *str == '\t'; str++); + *endp = str; + return str != cp; +} + +#ifdef CONFIG_CMM_PROC +/* These will someday get removed. */ +#define VM_CMM_PAGES 1111 +#define VM_CMM_TIMED_PAGES 1112 +#define VM_CMM_TIMEOUT 1113 + +static struct ctl_table cmm_table[]; + +static int +cmm_pages_handler(ctl_table *ctl, int write, struct file *filp, + void *buffer, size_t *lenp, loff_t *ppos) +{ + char buf[16], *p; + long pages; + int len; + + if (!*lenp || (*ppos && !write)) { + *lenp = 0; + return 0; + } + + if (write) { + len = *lenp; + if (copy_from_user(buf, buffer, + len > sizeof(buf) ? sizeof(buf) : len)) + return -EFAULT; + buf[sizeof(buf) - 1] = '\0'; + cmm_skip_blanks(buf, &p); + pages = cmm_strtoul(p, &p); + if (ctl == &cmm_table[0]) + cmm_set_pages(pages); + else + cmm_add_timed_pages(pages); + } else { + if (ctl == &cmm_table[0]) + pages = cmm_get_pages(); + else + pages = cmm_get_timed_pages(); + len = sprintf(buf, "%ld\n", pages); + if (len > *lenp) + len = *lenp; + if (copy_to_user(buffer, buf, len)) + return -EFAULT; + } + *lenp = len; + *ppos += len; + return 0; +} + +static int +cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp, + void *buffer, size_t *lenp, loff_t *ppos) +{ + char buf[64], *p; + long pages, seconds; + int len; + + if (!*lenp || (*ppos && !write)) { + *lenp = 0; + return 0; + } + + if (write) { + len = *lenp; + if (copy_from_user(buf, buffer, + len > sizeof(buf) ? sizeof(buf) : len)) + return -EFAULT; + buf[sizeof(buf) - 1] = '\0'; + cmm_skip_blanks(buf, &p); + pages = cmm_strtoul(p, &p); + cmm_skip_blanks(p, &p); + seconds = cmm_strtoul(p, &p); + cmm_set_timeout(pages, seconds); + } else { + len = sprintf(buf, "%ld %ld\n", + cmm_timeout_pages, cmm_timeout_seconds); + if (len > *lenp) + len = *lenp; + if (copy_to_user(buffer, buf, len)) + return -EFAULT; + } + *lenp = len; + *ppos += len; + return 0; +} + +static struct ctl_table cmm_table[] = { + { + .ctl_name = VM_CMM_PAGES, + .procname = "cmm_pages", + .mode = 0600, + .proc_handler = &cmm_pages_handler, + }, + { + .ctl_name = VM_CMM_TIMED_PAGES, + .procname = "cmm_timed_pages", + .mode = 0600, + .proc_handler = &cmm_pages_handler, + }, + { + .ctl_name = VM_CMM_TIMEOUT, + .procname = "cmm_timeout", + .mode = 0600, + .proc_handler = &cmm_timeout_handler, + }, + { .ctl_name = 0 } +}; + +static struct ctl_table cmm_dir_table[] = { + { + .ctl_name = CTL_VM, + .procname = "vm", + .maxlen = 0, + .mode = 0555, + .child = cmm_table, + }, + { .ctl_name = 0 } +}; +#endif + +#ifdef CONFIG_CMM_IUCV +#define SMSG_PREFIX "CMM" +static void +cmm_smsg_target(char *msg) +{ + long pages, seconds; + + if (!cmm_skip_blanks(msg + strlen(SMSG_PREFIX), &msg)) + return; + if (strncmp(msg, "SHRINK", 6) == 0) { + if (!cmm_skip_blanks(msg + 6, &msg)) + return; + pages = cmm_strtoul(msg, &msg); + cmm_skip_blanks(msg, &msg); + if (*msg == '\0') + cmm_set_pages(pages); + } else if (strncmp(msg, "RELEASE", 7) == 0) { + if (!cmm_skip_blanks(msg + 7, &msg)) + return; + pages = cmm_strtoul(msg, &msg); + cmm_skip_blanks(msg, &msg); + if (*msg == '\0') + cmm_add_timed_pages(pages); + } else if (strncmp(msg, "REUSE", 5) == 0) { + if (!cmm_skip_blanks(msg + 5, &msg)) + return; + pages = cmm_strtoul(msg, &msg); + if (!cmm_skip_blanks(msg, &msg)) + return; + seconds = cmm_strtoul(msg, &msg); + cmm_skip_blanks(msg, &msg); + if (*msg == '\0') + cmm_set_timeout(pages, seconds); + } +} +#endif + +struct ctl_table_header *cmm_sysctl_header; + +static int +cmm_init (void) +{ +#ifdef CONFIG_CMM_PROC + cmm_sysctl_header = register_sysctl_table(cmm_dir_table, 1); +#endif +#ifdef CONFIG_CMM_IUCV + smsg_register_callback(SMSG_PREFIX, cmm_smsg_target); +#endif + INIT_WORK(&cmm_thread_starter, (void *) cmm_start_thread, 0); + init_waitqueue_head(&cmm_thread_wait); + init_timer(&cmm_timer); + return 0; +} + +static void +cmm_exit(void) +{ + cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list); + cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list); +#ifdef CONFIG_CMM_PROC + unregister_sysctl_table(cmm_sysctl_header); +#endif +#ifdef CONFIG_CMM_IUCV + smsg_unregister_callback(SMSG_PREFIX, cmm_smsg_target); +#endif +} + +module_init(cmm_init); +module_exit(cmm_exit); + +EXPORT_SYMBOL(cmm_set_pages); +EXPORT_SYMBOL(cmm_get_pages); +EXPORT_SYMBOL(cmm_add_timed_pages); +EXPORT_SYMBOL(cmm_get_timed_pages); +EXPORT_SYMBOL(cmm_set_timeout); + +MODULE_LICENSE("GPL"); diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c new file mode 100644 index 00000000000..648deed17e2 --- /dev/null +++ b/arch/s390/mm/extmem.c @@ -0,0 +1,588 @@ +/* + * File...........: arch/s390/mm/extmem.c + * Author(s)......: Carsten Otte <cotte@de.ibm.com> + * Rob M van der Heij <rvdheij@nl.ibm.com> + * Steven Shultz <shultzss@us.ibm.com> + * Bugreports.to..: <Linux390@de.ibm.com> + * (C) IBM Corporation 2002-2004 + */ + +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/spinlock.h> +#include <linux/list.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/bootmem.h> +#include <asm/page.h> +#include <asm/ebcdic.h> +#include <asm/errno.h> +#include <asm/extmem.h> +#include <asm/cpcmd.h> +#include <linux/ctype.h> + +#define DCSS_DEBUG /* Debug messages on/off */ + +#define DCSS_NAME "extmem" +#ifdef DCSS_DEBUG +#define PRINT_DEBUG(x...) printk(KERN_DEBUG DCSS_NAME " debug:" x) +#else +#define PRINT_DEBUG(x...) do {} while (0) +#endif +#define PRINT_INFO(x...) printk(KERN_INFO DCSS_NAME " info:" x) +#define PRINT_WARN(x...) printk(KERN_WARNING DCSS_NAME " warning:" x) +#define PRINT_ERR(x...) printk(KERN_ERR DCSS_NAME " error:" x) + + +#define DCSS_LOADSHR 0x00 +#define DCSS_LOADNSR 0x04 +#define DCSS_PURGESEG 0x08 +#define DCSS_FINDSEG 0x0c +#define DCSS_LOADNOLY 0x10 +#define DCSS_SEGEXT 0x18 +#define DCSS_FINDSEGA 0x0c + +struct qrange { + unsigned int start; // 3byte start address, 1 byte type + unsigned int end; // 3byte end address, 1 byte reserved +}; + +struct qout64 { + int segstart; + int segend; + int segcnt; + int segrcnt; + struct qrange range[6]; +}; + +struct qin64 { + char qopcode; + char rsrv1[3]; + char qrcode; + char rsrv2[3]; + char qname[8]; + unsigned int qoutptr; + short int qoutlen; +}; + +struct dcss_segment { + struct list_head list; + char dcss_name[8]; + unsigned long start_addr; + unsigned long end; + atomic_t ref_count; + int do_nonshared; + unsigned int vm_segtype; + struct qrange range[6]; + int segcnt; +}; + +static DEFINE_SPINLOCK(dcss_lock); +static struct list_head dcss_list = LIST_HEAD_INIT(dcss_list); +static char *segtype_string[] = { "SW", "EW", "SR", "ER", "SN", "EN", "SC", + "EW/EN-MIXED" }; + +extern struct { + unsigned long addr, size, type; +} memory_chunk[MEMORY_CHUNKS]; + +/* + * Create the 8 bytes, ebcdic VM segment name from + * an ascii name. + */ +static void inline +dcss_mkname(char *name, char *dcss_name) +{ + int i; + + for (i = 0; i < 8; i++) { + if (name[i] == '\0') + break; + dcss_name[i] = toupper(name[i]); + }; + for (; i < 8; i++) + dcss_name[i] = ' '; + ASCEBC(dcss_name, 8); +} + + +/* + * search all segments in dcss_list, and return the one + * namend *name. If not found, return NULL. + */ +static struct dcss_segment * +segment_by_name (char *name) +{ + char dcss_name[9]; + struct list_head *l; + struct dcss_segment *tmp, *retval = NULL; + + assert_spin_locked(&dcss_lock); + dcss_mkname (name, dcss_name); + list_for_each (l, &dcss_list) { + tmp = list_entry (l, struct dcss_segment, list); + if (memcmp(tmp->dcss_name, dcss_name, 8) == 0) { + retval = tmp; + break; + } + } + return retval; +} + + +/* + * Perform a function on a dcss segment. + */ +static inline int +dcss_diag (__u8 func, void *parameter, + unsigned long *ret1, unsigned long *ret2) +{ + unsigned long rx, ry; + int rc; + + rx = (unsigned long) parameter; + ry = (unsigned long) func; + __asm__ __volatile__( +#ifdef CONFIG_ARCH_S390X + " sam31\n" // switch to 31 bit + " diag %0,%1,0x64\n" + " sam64\n" // switch back to 64 bit +#else + " diag %0,%1,0x64\n" +#endif + " ipm %2\n" + " srl %2,28\n" + : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc" ); + *ret1 = rx; + *ret2 = ry; + return rc; +} + +static inline int +dcss_diag_translate_rc (int vm_rc) { + if (vm_rc == 44) + return -ENOENT; + return -EIO; +} + + +/* do a diag to get info about a segment. + * fills start_address, end and vm_segtype fields + */ +static int +query_segment_type (struct dcss_segment *seg) +{ + struct qin64 *qin = kmalloc (sizeof(struct qin64), GFP_DMA); + struct qout64 *qout = kmalloc (sizeof(struct qout64), GFP_DMA); + + int diag_cc, rc, i; + unsigned long dummy, vmrc; + + if ((qin == NULL) || (qout == NULL)) { + rc = -ENOMEM; + goto out_free; + } + + /* initialize diag input parameters */ + qin->qopcode = DCSS_FINDSEGA; + qin->qoutptr = (unsigned long) qout; + qin->qoutlen = sizeof(struct qout64); + memcpy (qin->qname, seg->dcss_name, 8); + + diag_cc = dcss_diag (DCSS_SEGEXT, qin, &dummy, &vmrc); + + if (diag_cc > 1) { + rc = dcss_diag_translate_rc (vmrc); + goto out_free; + } + + if (qout->segcnt > 6) { + rc = -ENOTSUPP; + goto out_free; + } + + if (qout->segcnt == 1) { + seg->vm_segtype = qout->range[0].start & 0xff; + } else { + /* multi-part segment. only one type supported here: + - all parts are contiguous + - all parts are either EW or EN type + - maximum 6 parts allowed */ + unsigned long start = qout->segstart >> PAGE_SHIFT; + for (i=0; i<qout->segcnt; i++) { + if (((qout->range[i].start & 0xff) != SEG_TYPE_EW) && + ((qout->range[i].start & 0xff) != SEG_TYPE_EN)) { + rc = -ENOTSUPP; + goto out_free; + } + if (start != qout->range[i].start >> PAGE_SHIFT) { + rc = -ENOTSUPP; + goto out_free; + } + start = (qout->range[i].end >> PAGE_SHIFT) + 1; + } + seg->vm_segtype = SEG_TYPE_EWEN; + } + + /* analyze diag output and update seg */ + seg->start_addr = qout->segstart; + seg->end = qout->segend; + + memcpy (seg->range, qout->range, 6*sizeof(struct qrange)); + seg->segcnt = qout->segcnt; + + rc = 0; + + out_free: + if (qin) kfree(qin); + if (qout) kfree(qout); + return rc; +} + +/* + * check if the given segment collides with guest storage. + * returns 1 if this is the case, 0 if no collision was found + */ +static int +segment_overlaps_storage(struct dcss_segment *seg) +{ + int i; + + for (i=0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { + if (memory_chunk[i].type != 0) + continue; + if ((memory_chunk[i].addr >> 20) > (seg->end >> 20)) + continue; + if (((memory_chunk[i].addr + memory_chunk[i].size - 1) >> 20) + < (seg->start_addr >> 20)) + continue; + return 1; + } + return 0; +} + +/* + * check if segment collides with other segments that are currently loaded + * returns 1 if this is the case, 0 if no collision was found + */ +static int +segment_overlaps_others (struct dcss_segment *seg) +{ + struct list_head *l; + struct dcss_segment *tmp; + + assert_spin_locked(&dcss_lock); + list_for_each(l, &dcss_list) { + tmp = list_entry(l, struct dcss_segment, list); + if ((tmp->start_addr >> 20) > (seg->end >> 20)) + continue; + if ((tmp->end >> 20) < (seg->start_addr >> 20)) + continue; + if (seg == tmp) + continue; + return 1; + } + return 0; +} + +/* + * check if segment exceeds the kernel mapping range (detected or set via mem=) + * returns 1 if this is the case, 0 if segment fits into the range + */ +static inline int +segment_exceeds_range (struct dcss_segment *seg) +{ + int seg_last_pfn = (seg->end) >> PAGE_SHIFT; + if (seg_last_pfn > max_pfn) + return 1; + return 0; +} + +/* + * get info about a segment + * possible return values: + * -ENOSYS : we are not running on VM + * -EIO : could not perform query diagnose + * -ENOENT : no such segment + * -ENOTSUPP: multi-part segment cannot be used with linux + * -ENOSPC : segment cannot be used (overlaps with storage) + * -ENOMEM : out of memory + * 0 .. 6 : type of segment as defined in include/asm-s390/extmem.h + */ +int +segment_type (char* name) +{ + int rc; + struct dcss_segment seg; + + if (!MACHINE_IS_VM) + return -ENOSYS; + + dcss_mkname(name, seg.dcss_name); + rc = query_segment_type (&seg); + if (rc < 0) + return rc; + return seg.vm_segtype; +} + +/* + * real segment loading function, called from segment_load + */ +static int +__segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long *end) +{ + struct dcss_segment *seg = kmalloc(sizeof(struct dcss_segment), + GFP_DMA); + int dcss_command, rc, diag_cc; + + if (seg == NULL) { + rc = -ENOMEM; + goto out; + } + dcss_mkname (name, seg->dcss_name); + rc = query_segment_type (seg); + if (rc < 0) + goto out_free; + if (segment_exceeds_range(seg)) { + PRINT_WARN ("segment_load: not loading segment %s - exceeds" + " kernel mapping range\n",name); + rc = -ERANGE; + goto out_free; + } + if (segment_overlaps_storage(seg)) { + PRINT_WARN ("segment_load: not loading segment %s - overlaps" + " storage\n",name); + rc = -ENOSPC; + goto out_free; + } + if (segment_overlaps_others(seg)) { + PRINT_WARN ("segment_load: not loading segment %s - overlaps" + " other segments\n",name); + rc = -EBUSY; + goto out_free; + } + if (do_nonshared) + dcss_command = DCSS_LOADNSR; + else + dcss_command = DCSS_LOADNOLY; + + diag_cc = dcss_diag(dcss_command, seg->dcss_name, + &seg->start_addr, &seg->end); + if (diag_cc > 1) { + PRINT_WARN ("segment_load: could not load segment %s - " + "diag returned error (%ld)\n",name,seg->end); + rc = dcss_diag_translate_rc (seg->end); + dcss_diag(DCSS_PURGESEG, seg->dcss_name, + &seg->start_addr, &seg->end); + goto out_free; + } + seg->do_nonshared = do_nonshared; + atomic_set(&seg->ref_count, 1); + list_add(&seg->list, &dcss_list); + rc = seg->vm_segtype; + *addr = seg->start_addr; + *end = seg->end; + if (do_nonshared) + PRINT_INFO ("segment_load: loaded segment %s range %p .. %p " + "type %s in non-shared mode\n", name, + (void*)seg->start_addr, (void*)seg->end, + segtype_string[seg->vm_segtype]); + else + PRINT_INFO ("segment_load: loaded segment %s range %p .. %p " + "type %s in shared mode\n", name, + (void*)seg->start_addr, (void*)seg->end, + segtype_string[seg->vm_segtype]); + goto out; + out_free: + kfree (seg); + out: + return rc; +} + +/* + * this function loads a DCSS segment + * name : name of the DCSS + * do_nonshared : 0 indicates that the dcss should be shared with other linux images + * 1 indicates that the dcss should be exclusive for this linux image + * addr : will be filled with start address of the segment + * end : will be filled with end address of the segment + * return values: + * -ENOSYS : we are not running on VM + * -EIO : could not perform query or load diagnose + * -ENOENT : no such segment + * -ENOTSUPP: multi-part segment cannot be used with linux + * -ENOSPC : segment cannot be used (overlaps with storage) + * -EBUSY : segment can temporarily not be used (overlaps with dcss) + * -ERANGE : segment cannot be used (exceeds kernel mapping range) + * -EPERM : segment is currently loaded with incompatible permissions + * -ENOMEM : out of memory + * 0 .. 6 : type of segment as defined in include/asm-s390/extmem.h + */ +int +segment_load (char *name, int do_nonshared, unsigned long *addr, + unsigned long *end) +{ + struct dcss_segment *seg; + int rc; + + if (!MACHINE_IS_VM) + return -ENOSYS; + + spin_lock (&dcss_lock); + seg = segment_by_name (name); + if (seg == NULL) + rc = __segment_load (name, do_nonshared, addr, end); + else { + if (do_nonshared == seg->do_nonshared) { + atomic_inc(&seg->ref_count); + *addr = seg->start_addr; + *end = seg->end; + rc = seg->vm_segtype; + } else { + *addr = *end = 0; + rc = -EPERM; + } + } + spin_unlock (&dcss_lock); + return rc; +} + +/* + * this function modifies the shared state of a DCSS segment. note that + * name : name of the DCSS + * do_nonshared : 0 indicates that the dcss should be shared with other linux images + * 1 indicates that the dcss should be exclusive for this linux image + * return values: + * -EIO : could not perform load diagnose (segment gone!) + * -ENOENT : no such segment (segment gone!) + * -EAGAIN : segment is in use by other exploiters, try later + * -EINVAL : no segment with the given name is currently loaded - name invalid + * 0 : operation succeeded + */ +int +segment_modify_shared (char *name, int do_nonshared) +{ + struct dcss_segment *seg; + unsigned long dummy; + int dcss_command, rc, diag_cc; + + spin_lock (&dcss_lock); + seg = segment_by_name (name); + if (seg == NULL) { + rc = -EINVAL; + goto out_unlock; + } + if (do_nonshared == seg->do_nonshared) { + PRINT_INFO ("segment_modify_shared: not reloading segment %s" + " - already in requested mode\n",name); + rc = 0; + goto out_unlock; + } + if (atomic_read (&seg->ref_count) != 1) { + PRINT_WARN ("segment_modify_shared: not reloading segment %s - " + "segment is in use by other driver(s)\n",name); + rc = -EAGAIN; + goto out_unlock; + } + dcss_diag(DCSS_PURGESEG, seg->dcss_name, + &dummy, &dummy); + if (do_nonshared) + dcss_command = DCSS_LOADNSR; + else + dcss_command = DCSS_LOADNOLY; + diag_cc = dcss_diag(dcss_command, seg->dcss_name, + &seg->start_addr, &seg->end); + if (diag_cc > 1) { + PRINT_WARN ("segment_modify_shared: could not reload segment %s" + " - diag returned error (%ld)\n",name,seg->end); + rc = dcss_diag_translate_rc (seg->end); + goto out_del; + } + seg->do_nonshared = do_nonshared; + rc = 0; + goto out_unlock; + out_del: + list_del(&seg->list); + dcss_diag(DCSS_PURGESEG, seg->dcss_name, + &dummy, &dummy); + kfree (seg); + out_unlock: + spin_unlock(&dcss_lock); + return rc; +} + +/* + * Decrease the use count of a DCSS segment and remove + * it from the address space if nobody is using it + * any longer. + */ +void +segment_unload(char *name) +{ + unsigned long dummy; + struct dcss_segment *seg; + + if (!MACHINE_IS_VM) + return; + + spin_lock(&dcss_lock); + seg = segment_by_name (name); + if (seg == NULL) { + PRINT_ERR ("could not find segment %s in segment_unload, " + "please report to linux390@de.ibm.com\n",name); + goto out_unlock; + } + if (atomic_dec_return(&seg->ref_count) == 0) { + list_del(&seg->list); + dcss_diag(DCSS_PURGESEG, seg->dcss_name, + &dummy, &dummy); + kfree(seg); + } +out_unlock: + spin_unlock(&dcss_lock); +} + +/* + * save segment content permanently + */ +void +segment_save(char *name) +{ + struct dcss_segment *seg; + int startpfn = 0; + int endpfn = 0; + char cmd1[160]; + char cmd2[80]; + int i; + + if (!MACHINE_IS_VM) + return; + + spin_lock(&dcss_lock); + seg = segment_by_name (name); + + if (seg == NULL) { + PRINT_ERR ("could not find segment %s in segment_save, please report to linux390@de.ibm.com\n",name); + return; + } + + startpfn = seg->start_addr >> PAGE_SHIFT; + endpfn = (seg->end) >> PAGE_SHIFT; + sprintf(cmd1, "DEFSEG %s", name); + for (i=0; i<seg->segcnt; i++) { + sprintf(cmd1+strlen(cmd1), " %X-%X %s", + seg->range[i].start >> PAGE_SHIFT, + seg->range[i].end >> PAGE_SHIFT, + segtype_string[seg->range[i].start & 0xff]); + } + sprintf(cmd2, "SAVESEG %s", name); + cpcmd(cmd1, NULL, 0); + cpcmd(cmd2, NULL, 0); + spin_unlock(&dcss_lock); +} + +EXPORT_SYMBOL(segment_load); +EXPORT_SYMBOL(segment_unload); +EXPORT_SYMBOL(segment_save); +EXPORT_SYMBOL(segment_type); +EXPORT_SYMBOL(segment_modify_shared); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c new file mode 100644 index 00000000000..80306bc8c79 --- /dev/null +++ b/arch/s390/mm/fault.c @@ -0,0 +1,586 @@ +/* + * arch/s390/mm/fault.c + * + * S390 version + * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Hartmut Penner (hp@de.ibm.com) + * Ulrich Weigand (uweigand@de.ibm.com) + * + * Derived from "arch/i386/mm/fault.c" + * Copyright (C) 1995 Linus Torvalds + */ + +#include <linux/config.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/init.h> +#include <linux/console.h> +#include <linux/module.h> +#include <linux/hardirq.h> + +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/pgtable.h> + +#ifndef CONFIG_ARCH_S390X +#define __FAIL_ADDR_MASK 0x7ffff000 +#define __FIXUP_MASK 0x7fffffff +#define __SUBCODE_MASK 0x0200 +#define __PF_RES_FIELD 0ULL +#else /* CONFIG_ARCH_S390X */ +#define __FAIL_ADDR_MASK -4096L +#define __FIXUP_MASK ~0L +#define __SUBCODE_MASK 0x0600 +#define __PF_RES_FIELD 0x8000000000000000ULL +#endif /* CONFIG_ARCH_S390X */ + +#ifdef CONFIG_SYSCTL +extern int sysctl_userprocess_debug; +#endif + +extern void die(const char *,struct pt_regs *,long); + +extern spinlock_t timerlist_lock; + +/* + * Unlock any spinlocks which will prevent us from getting the + * message out (timerlist_lock is acquired through the + * console unblank code) + */ +void bust_spinlocks(int yes) +{ + if (yes) { + oops_in_progress = 1; + } else { + int loglevel_save = console_loglevel; + console_unblank(); + oops_in_progress = 0; + /* + * OK, the message is on the console. Now we call printk() + * without oops_in_progress set so that printk will give klogd + * a poke. Hold onto your hats... + */ + console_loglevel = 15; + printk(" "); + console_loglevel = loglevel_save; + } +} + +/* + * Check which address space is addressed by the access + * register in S390_lowcore.exc_access_id. + * Returns 1 for user space and 0 for kernel space. + */ +static int __check_access_register(struct pt_regs *regs, int error_code) +{ + int areg = S390_lowcore.exc_access_id; + + if (areg == 0) + /* Access via access register 0 -> kernel address */ + return 0; + save_access_regs(current->thread.acrs); + if (regs && areg < NUM_ACRS && current->thread.acrs[areg] <= 1) + /* + * access register contains 0 -> kernel address, + * access register contains 1 -> user space address + */ + return current->thread.acrs[areg]; + + /* Something unhealthy was done with the access registers... */ + die("page fault via unknown access register", regs, error_code); + do_exit(SIGKILL); + return 0; +} + +/* + * Check which address space the address belongs to. + * Returns 1 for user space and 0 for kernel space. + */ +static inline int check_user_space(struct pt_regs *regs, int error_code) +{ + /* + * The lowest two bits of S390_lowcore.trans_exc_code indicate + * which paging table was used: + * 0: Primary Segment Table Descriptor + * 1: STD determined via access register + * 2: Secondary Segment Table Descriptor + * 3: Home Segment Table Descriptor + */ + int descriptor = S390_lowcore.trans_exc_code & 3; + if (unlikely(descriptor == 1)) + return __check_access_register(regs, error_code); + if (descriptor == 2) + return current->thread.mm_segment.ar4; + return descriptor != 0; +} + +/* + * Send SIGSEGV to task. This is an external routine + * to keep the stack usage of do_page_fault small. + */ +static void do_sigsegv(struct pt_regs *regs, unsigned long error_code, + int si_code, unsigned long address) +{ + struct siginfo si; + +#if defined(CONFIG_SYSCTL) || defined(CONFIG_PROCESS_DEBUG) +#if defined(CONFIG_SYSCTL) + if (sysctl_userprocess_debug) +#endif + { + printk("User process fault: interruption code 0x%lX\n", + error_code); + printk("failing address: %lX\n", address); + show_regs(regs); + } +#endif + si.si_signo = SIGSEGV; + si.si_code = si_code; + si.si_addr = (void *) address; + force_sig_info(SIGSEGV, &si, current); +} + +/* + * This routine handles page faults. It determines the address, + * and the problem, and then passes it off to one of the appropriate + * routines. + * + * error_code: + * 04 Protection -> Write-Protection (suprression) + * 10 Segment translation -> Not present (nullification) + * 11 Page translation -> Not present (nullification) + * 3b Region third trans. -> Not present (nullification) + */ +extern inline void +do_exception(struct pt_regs *regs, unsigned long error_code, int is_protection) +{ + struct task_struct *tsk; + struct mm_struct *mm; + struct vm_area_struct * vma; + unsigned long address; + int user_address; + const struct exception_table_entry *fixup; + int si_code = SEGV_MAPERR; + + tsk = current; + mm = tsk->mm; + + /* + * Check for low-address protection. This needs to be treated + * as a special case because the translation exception code + * field is not guaranteed to contain valid data in this case. + */ + if (is_protection && !(S390_lowcore.trans_exc_code & 4)) { + + /* Low-address protection hit in kernel mode means + NULL pointer write access in kernel mode. */ + if (!(regs->psw.mask & PSW_MASK_PSTATE)) { + address = 0; + user_address = 0; + goto no_context; + } + + /* Low-address protection hit in user mode 'cannot happen'. */ + die ("Low-address protection", regs, error_code); + do_exit(SIGKILL); + } + + /* + * get the failing address + * more specific the segment and page table portion of + * the address + */ + address = S390_lowcore.trans_exc_code & __FAIL_ADDR_MASK; + user_address = check_user_space(regs, error_code); + + /* + * Verify that the fault happened in user space, that + * we are not in an interrupt and that there is a + * user context. + */ + if (user_address == 0 || in_interrupt() || !mm) + goto no_context; + + /* + * When we get here, the fault happened in the current + * task's user address space, so we can switch on the + * interrupts again and then search the VMAs + */ + local_irq_enable(); + + down_read(&mm->mmap_sem); + + vma = find_vma(mm, address); + if (!vma) + goto bad_area; + if (vma->vm_start <= address) + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + if (expand_stack(vma, address)) + goto bad_area; +/* + * Ok, we have a good vm_area for this memory access, so + * we can handle it.. + */ +good_area: + si_code = SEGV_ACCERR; + if (!is_protection) { + /* page not present, check vm flags */ + if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) + goto bad_area; + } else { + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + } + +survive: + /* + * If for any reason at all we couldn't handle the fault, + * make sure we exit gracefully rather than endlessly redo + * the fault. + */ + switch (handle_mm_fault(mm, vma, address, is_protection)) { + case VM_FAULT_MINOR: + tsk->min_flt++; + break; + case VM_FAULT_MAJOR: + tsk->maj_flt++; + break; + case VM_FAULT_SIGBUS: + goto do_sigbus; + case VM_FAULT_OOM: + goto out_of_memory; + default: + BUG(); + } + + up_read(&mm->mmap_sem); + /* + * The instruction that caused the program check will + * be repeated. Don't signal single step via SIGTRAP. + */ + clear_tsk_thread_flag(current, TIF_SINGLE_STEP); + return; + +/* + * Something tried to access memory that isn't in our memory map.. + * Fix it, but check if it's kernel or user first.. + */ +bad_area: + up_read(&mm->mmap_sem); + + /* User mode accesses just cause a SIGSEGV */ + if (regs->psw.mask & PSW_MASK_PSTATE) { + tsk->thread.prot_addr = address; + tsk->thread.trap_no = error_code; + do_sigsegv(regs, error_code, si_code, address); + return; + } + +no_context: + /* Are we prepared to handle this kernel fault? */ + fixup = search_exception_tables(regs->psw.addr & __FIXUP_MASK); + if (fixup) { + regs->psw.addr = fixup->fixup | PSW_ADDR_AMODE; + return; + } + +/* + * Oops. The kernel tried to access some bad page. We'll have to + * terminate things with extreme prejudice. + */ + if (user_address == 0) + printk(KERN_ALERT "Unable to handle kernel pointer dereference" + " at virtual kernel address %p\n", (void *)address); + else + printk(KERN_ALERT "Unable to handle kernel paging request" + " at virtual user address %p\n", (void *)address); + + die("Oops", regs, error_code); + do_exit(SIGKILL); + + +/* + * We ran out of memory, or some other thing happened to us that made + * us unable to handle the page fault gracefully. +*/ +out_of_memory: + up_read(&mm->mmap_sem); + if (tsk->pid == 1) { + yield(); + goto survive; + } + printk("VM: killing process %s\n", tsk->comm); + if (regs->psw.mask & PSW_MASK_PSTATE) + do_exit(SIGKILL); + goto no_context; + +do_sigbus: + up_read(&mm->mmap_sem); + + /* + * Send a sigbus, regardless of whether we were in kernel + * or user mode. + */ + tsk->thread.prot_addr = address; + tsk->thread.trap_no = error_code; + force_sig(SIGBUS, tsk); + + /* Kernel mode? Handle exceptions or die */ + if (!(regs->psw.mask & PSW_MASK_PSTATE)) + goto no_context; +} + +void do_protection_exception(struct pt_regs *regs, unsigned long error_code) +{ + regs->psw.addr -= (error_code >> 16); + do_exception(regs, 4, 1); +} + +void do_dat_exception(struct pt_regs *regs, unsigned long error_code) +{ + do_exception(regs, error_code & 0xff, 0); +} + +#ifndef CONFIG_ARCH_S390X + +typedef struct _pseudo_wait_t { + struct _pseudo_wait_t *next; + wait_queue_head_t queue; + unsigned long address; + int resolved; +} pseudo_wait_t; + +static pseudo_wait_t *pseudo_lock_queue = NULL; +static spinlock_t pseudo_wait_spinlock; /* spinlock to protect lock queue */ + +/* + * This routine handles 'pagex' pseudo page faults. + */ +asmlinkage void +do_pseudo_page_fault(struct pt_regs *regs, unsigned long error_code) +{ + pseudo_wait_t wait_struct; + pseudo_wait_t *ptr, *last, *next; + unsigned long address; + + /* + * get the failing address + * more specific the segment and page table portion of + * the address + */ + address = S390_lowcore.trans_exc_code & 0xfffff000; + + if (address & 0x80000000) { + /* high bit set -> a page has been swapped in by VM */ + address &= 0x7fffffff; + spin_lock(&pseudo_wait_spinlock); + last = NULL; + ptr = pseudo_lock_queue; + while (ptr != NULL) { + next = ptr->next; + if (address == ptr->address) { + /* + * This is one of the processes waiting + * for the page. Unchain from the queue. + * There can be more than one process + * waiting for the same page. VM presents + * an initial and a completion interrupt for + * every process that tries to access a + * page swapped out by VM. + */ + if (last == NULL) + pseudo_lock_queue = next; + else + last->next = next; + /* now wake up the process */ + ptr->resolved = 1; + wake_up(&ptr->queue); + } else + last = ptr; + ptr = next; + } + spin_unlock(&pseudo_wait_spinlock); + } else { + /* Pseudo page faults in kernel mode is a bad idea */ + if (!(regs->psw.mask & PSW_MASK_PSTATE)) { + /* + * VM presents pseudo page faults if the interrupted + * state was not disabled for interrupts. So we can + * get pseudo page fault interrupts while running + * in kernel mode. We simply access the page here + * while we are running disabled. VM will then swap + * in the page synchronously. + */ + if (check_user_space(regs, error_code) == 0) + /* dereference a virtual kernel address */ + __asm__ __volatile__ ( + " ic 0,0(%0)" + : : "a" (address) : "0"); + else + /* dereference a virtual user address */ + __asm__ __volatile__ ( + " la 2,0(%0)\n" + " sacf 512\n" + " ic 2,0(2)\n" + "0:sacf 0\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 0b,0b\n" + ".previous" + : : "a" (address) : "2" ); + + return; + } + /* initialize and add element to pseudo_lock_queue */ + init_waitqueue_head (&wait_struct.queue); + wait_struct.address = address; + wait_struct.resolved = 0; + spin_lock(&pseudo_wait_spinlock); + wait_struct.next = pseudo_lock_queue; + pseudo_lock_queue = &wait_struct; + spin_unlock(&pseudo_wait_spinlock); + /* + * The instruction that caused the program check will + * be repeated. Don't signal single step via SIGTRAP. + */ + clear_tsk_thread_flag(current, TIF_SINGLE_STEP); + /* go to sleep */ + wait_event(wait_struct.queue, wait_struct.resolved); + } +} +#endif /* CONFIG_ARCH_S390X */ + +#ifdef CONFIG_PFAULT +/* + * 'pfault' pseudo page faults routines. + */ +static int pfault_disable = 0; + +static int __init nopfault(char *str) +{ + pfault_disable = 1; + return 1; +} + +__setup("nopfault", nopfault); + +typedef struct { + __u16 refdiagc; + __u16 reffcode; + __u16 refdwlen; + __u16 refversn; + __u64 refgaddr; + __u64 refselmk; + __u64 refcmpmk; + __u64 reserved; +} __attribute__ ((packed)) pfault_refbk_t; + +int pfault_init(void) +{ + pfault_refbk_t refbk = + { 0x258, 0, 5, 2, __LC_CURRENT, 1ULL << 48, 1ULL << 48, + __PF_RES_FIELD }; + int rc; + + if (pfault_disable) + return -1; + __asm__ __volatile__( + " diag %1,%0,0x258\n" + "0: j 2f\n" + "1: la %0,8\n" + "2:\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" +#ifndef CONFIG_ARCH_S390X + " .long 0b,1b\n" +#else /* CONFIG_ARCH_S390X */ + " .quad 0b,1b\n" +#endif /* CONFIG_ARCH_S390X */ + ".previous" + : "=d" (rc) : "a" (&refbk) : "cc" ); + __ctl_set_bit(0, 9); + return rc; +} + +void pfault_fini(void) +{ + pfault_refbk_t refbk = + { 0x258, 1, 5, 2, 0ULL, 0ULL, 0ULL, 0ULL }; + + if (pfault_disable) + return; + __ctl_clear_bit(0,9); + __asm__ __volatile__( + " diag %0,0,0x258\n" + "0:\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" +#ifndef CONFIG_ARCH_S390X + " .long 0b,0b\n" +#else /* CONFIG_ARCH_S390X */ + " .quad 0b,0b\n" +#endif /* CONFIG_ARCH_S390X */ + ".previous" + : : "a" (&refbk) : "cc" ); +} + +asmlinkage void +pfault_interrupt(struct pt_regs *regs, __u16 error_code) +{ + struct task_struct *tsk; + __u16 subcode; + + /* + * Get the external interruption subcode & pfault + * initial/completion signal bit. VM stores this + * in the 'cpu address' field associated with the + * external interrupt. + */ + subcode = S390_lowcore.cpu_addr; + if ((subcode & 0xff00) != __SUBCODE_MASK) + return; + + /* + * Get the token (= address of the task structure of the affected task). + */ + tsk = *(struct task_struct **) __LC_PFAULT_INTPARM; + + if (subcode & 0x0080) { + /* signal bit is set -> a page has been swapped in by VM */ + if (xchg(&tsk->thread.pfault_wait, -1) != 0) { + /* Initial interrupt was faster than the completion + * interrupt. pfault_wait is valid. Set pfault_wait + * back to zero and wake up the process. This can + * safely be done because the task is still sleeping + * and can't procude new pfaults. */ + tsk->thread.pfault_wait = 0; + wake_up_process(tsk); + } + } else { + /* signal bit not set -> a real page is missing. */ + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + if (xchg(&tsk->thread.pfault_wait, 1) != 0) { + /* Completion interrupt was faster than the initial + * interrupt (swapped in a -1 for pfault_wait). Set + * pfault_wait back to zero and exit. This can be + * done safely because tsk is running in kernel + * mode and can't produce new pfaults. */ + tsk->thread.pfault_wait = 0; + set_task_state(tsk, TASK_RUNNING); + } else + set_tsk_need_resched(tsk); + } +} +#endif + diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c new file mode 100644 index 00000000000..8e723bc7f79 --- /dev/null +++ b/arch/s390/mm/init.c @@ -0,0 +1,310 @@ +/* + * arch/s390/mm/init.c + * + * S390 version + * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Hartmut Penner (hp@de.ibm.com) + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1995 Linus Torvalds + */ + +#include <linux/config.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/smp.h> +#include <linux/init.h> +#include <linux/pagemap.h> +#include <linux/bootmem.h> + +#include <asm/processor.h> +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/dma.h> +#include <asm/lowcore.h> +#include <asm/tlb.h> +#include <asm/tlbflush.h> + +DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); + +pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE))); +char empty_zero_page[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); + +void diag10(unsigned long addr) +{ + if (addr >= 0x7ff00000) + return; +#ifdef __s390x__ + asm volatile ( + " sam31\n" + " diag %0,%0,0x10\n" + "0: sam64\n" + ".section __ex_table,\"a\"\n" + " .align 8\n" + " .quad 0b, 0b\n" + ".previous\n" + : : "a" (addr)); +#else + asm volatile ( + " diag %0,%0,0x10\n" + "0:\n" + ".section __ex_table,\"a\"\n" + " .align 4\n" + " .long 0b, 0b\n" + ".previous\n" + : : "a" (addr)); +#endif +} + +void show_mem(void) +{ + int i, total = 0, reserved = 0; + int shared = 0, cached = 0; + + printk("Mem-info:\n"); + show_free_areas(); + printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); + i = max_mapnr; + while (i-- > 0) { + total++; + if (PageReserved(mem_map+i)) + reserved++; + else if (PageSwapCache(mem_map+i)) + cached++; + else if (page_count(mem_map+i)) + shared += page_count(mem_map+i) - 1; + } + printk("%d pages of RAM\n",total); + printk("%d reserved pages\n",reserved); + printk("%d pages shared\n",shared); + printk("%d pages swap cached\n",cached); +} + +/* References to section boundaries */ + +extern unsigned long _text; +extern unsigned long _etext; +extern unsigned long _edata; +extern unsigned long __bss_start; +extern unsigned long _end; + +extern unsigned long __init_begin; +extern unsigned long __init_end; + +/* + * paging_init() sets up the page tables + */ + +#ifndef CONFIG_ARCH_S390X +void __init paging_init(void) +{ + pgd_t * pg_dir; + pte_t * pg_table; + pte_t pte; + int i; + unsigned long tmp; + unsigned long pfn = 0; + unsigned long pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERNSEG_TABLE; + static const int ssm_mask = 0x04000000L; + + /* unmap whole virtual address space */ + + pg_dir = swapper_pg_dir; + + for (i=0;i<KERNEL_PGD_PTRS;i++) + pmd_clear((pmd_t*)pg_dir++); + + /* + * map whole physical memory to virtual memory (identity mapping) + */ + + pg_dir = swapper_pg_dir; + + while (pfn < max_low_pfn) { + /* + * pg_table is physical at this point + */ + pg_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); + + pg_dir->pgd0 = (_PAGE_TABLE | __pa(pg_table)); + pg_dir->pgd1 = (_PAGE_TABLE | (__pa(pg_table)+1024)); + pg_dir->pgd2 = (_PAGE_TABLE | (__pa(pg_table)+2048)); + pg_dir->pgd3 = (_PAGE_TABLE | (__pa(pg_table)+3072)); + pg_dir++; + + for (tmp = 0 ; tmp < PTRS_PER_PTE ; tmp++,pg_table++) { + pte = pfn_pte(pfn, PAGE_KERNEL); + if (pfn >= max_low_pfn) + pte_clear(&init_mm, 0, &pte); + set_pte(pg_table, pte); + pfn++; + } + } + + S390_lowcore.kernel_asce = pgdir_k; + + /* enable virtual mapping in kernel mode */ + __asm__ __volatile__(" LCTL 1,1,%0\n" + " LCTL 7,7,%0\n" + " LCTL 13,13,%0\n" + " SSM %1" + : : "m" (pgdir_k), "m" (ssm_mask)); + + local_flush_tlb(); + + { + unsigned long zones_size[MAX_NR_ZONES] = { 0, 0, 0}; + + zones_size[ZONE_DMA] = max_low_pfn; + free_area_init(zones_size); + } + return; +} + +#else /* CONFIG_ARCH_S390X */ +void __init paging_init(void) +{ + pgd_t * pg_dir; + pmd_t * pm_dir; + pte_t * pt_dir; + pte_t pte; + int i,j,k; + unsigned long pfn = 0; + unsigned long pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | + _KERN_REGION_TABLE; + static const int ssm_mask = 0x04000000L; + + unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0}; + unsigned long dma_pfn, high_pfn; + + dma_pfn = MAX_DMA_ADDRESS >> PAGE_SHIFT; + high_pfn = max_low_pfn; + + if (dma_pfn > high_pfn) + zones_size[ZONE_DMA] = high_pfn; + else { + zones_size[ZONE_DMA] = dma_pfn; + zones_size[ZONE_NORMAL] = high_pfn - dma_pfn; + } + + /* Initialize mem_map[]. */ + free_area_init(zones_size); + + + /* + * map whole physical memory to virtual memory (identity mapping) + */ + + pg_dir = swapper_pg_dir; + + for (i = 0 ; i < PTRS_PER_PGD ; i++,pg_dir++) { + + if (pfn >= max_low_pfn) { + pgd_clear(pg_dir); + continue; + } + + pm_dir = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE*4); + pgd_populate(&init_mm, pg_dir, pm_dir); + + for (j = 0 ; j < PTRS_PER_PMD ; j++,pm_dir++) { + if (pfn >= max_low_pfn) { + pmd_clear(pm_dir); + continue; + } + + pt_dir = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); + pmd_populate_kernel(&init_mm, pm_dir, pt_dir); + + for (k = 0 ; k < PTRS_PER_PTE ; k++,pt_dir++) { + pte = pfn_pte(pfn, PAGE_KERNEL); + if (pfn >= max_low_pfn) { + pte_clear(&init_mm, 0, &pte); + continue; + } + set_pte(pt_dir, pte); + pfn++; + } + } + } + + S390_lowcore.kernel_asce = pgdir_k; + + /* enable virtual mapping in kernel mode */ + __asm__ __volatile__("lctlg 1,1,%0\n\t" + "lctlg 7,7,%0\n\t" + "lctlg 13,13,%0\n\t" + "ssm %1" + : :"m" (pgdir_k), "m" (ssm_mask)); + + local_flush_tlb(); + + return; +} +#endif /* CONFIG_ARCH_S390X */ + +void __init mem_init(void) +{ + unsigned long codesize, reservedpages, datasize, initsize; + + max_mapnr = num_physpages = max_low_pfn; + high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); + + /* clear the zero-page */ + memset(empty_zero_page, 0, PAGE_SIZE); + + /* this will put all low memory onto the freelists */ + totalram_pages += free_all_bootmem(); + + reservedpages = 0; + + codesize = (unsigned long) &_etext - (unsigned long) &_text; + datasize = (unsigned long) &_edata - (unsigned long) &_etext; + initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin; + printk("Memory: %luk/%luk available (%ldk kernel code, %ldk reserved, %ldk data, %ldk init)\n", + (unsigned long) nr_free_pages() << (PAGE_SHIFT-10), + max_mapnr << (PAGE_SHIFT-10), + codesize >> 10, + reservedpages << (PAGE_SHIFT-10), + datasize >>10, + initsize >> 10); +} + +void free_initmem(void) +{ + unsigned long addr; + + addr = (unsigned long)(&__init_begin); + for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { + ClearPageReserved(virt_to_page(addr)); + set_page_count(virt_to_page(addr), 1); + free_page(addr); + totalram_pages++; + } + printk ("Freeing unused kernel memory: %ldk freed\n", + ((unsigned long)&__init_end - (unsigned long)&__init_begin) >> 10); +} + +#ifdef CONFIG_BLK_DEV_INITRD +void free_initrd_mem(unsigned long start, unsigned long end) +{ + if (start < end) + printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); + for (; start < end; start += PAGE_SIZE) { + ClearPageReserved(virt_to_page(start)); + set_page_count(virt_to_page(start), 1); + free_page(start); + totalram_pages++; + } +} +#endif diff --git a/arch/s390/mm/ioremap.c b/arch/s390/mm/ioremap.c new file mode 100644 index 00000000000..c6c39d868bc --- /dev/null +++ b/arch/s390/mm/ioremap.c @@ -0,0 +1,138 @@ +/* + * arch/s390/mm/ioremap.c + * + * S390 version + * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Hartmut Penner (hp@de.ibm.com) + * + * Derived from "arch/i386/mm/extable.c" + * (C) Copyright 1995 1996 Linus Torvalds + * + * Re-map IO memory to kernel address space so that we can access it. + * This is needed for high PCI addresses that aren't mapped in the + * 640k-1MB IO memory area on PC's + */ + +#include <linux/vmalloc.h> +#include <linux/mm.h> +#include <asm/io.h> +#include <asm/pgalloc.h> +#include <asm/cacheflush.h> +#include <asm/tlbflush.h> + +static inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned long size, + unsigned long phys_addr, unsigned long flags) +{ + unsigned long end; + unsigned long pfn; + + address &= ~PMD_MASK; + end = address + size; + if (end > PMD_SIZE) + end = PMD_SIZE; + if (address >= end) + BUG(); + pfn = phys_addr >> PAGE_SHIFT; + do { + if (!pte_none(*pte)) { + printk("remap_area_pte: page already exists\n"); + BUG(); + } + set_pte(pte, pfn_pte(pfn, __pgprot(flags))); + address += PAGE_SIZE; + pfn++; + pte++; + } while (address && (address < end)); +} + +static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size, + unsigned long phys_addr, unsigned long flags) +{ + unsigned long end; + + address &= ~PGDIR_MASK; + end = address + size; + if (end > PGDIR_SIZE) + end = PGDIR_SIZE; + phys_addr -= address; + if (address >= end) + BUG(); + do { + pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address); + if (!pte) + return -ENOMEM; + remap_area_pte(pte, address, end - address, address + phys_addr, flags); + address = (address + PMD_SIZE) & PMD_MASK; + pmd++; + } while (address && (address < end)); + return 0; +} + +static int remap_area_pages(unsigned long address, unsigned long phys_addr, + unsigned long size, unsigned long flags) +{ + int error; + pgd_t * dir; + unsigned long end = address + size; + + phys_addr -= address; + dir = pgd_offset(&init_mm, address); + flush_cache_all(); + if (address >= end) + BUG(); + spin_lock(&init_mm.page_table_lock); + do { + pmd_t *pmd; + pmd = pmd_alloc(&init_mm, dir, address); + error = -ENOMEM; + if (!pmd) + break; + if (remap_area_pmd(pmd, address, end - address, + phys_addr + address, flags)) + break; + error = 0; + address = (address + PGDIR_SIZE) & PGDIR_MASK; + dir++; + } while (address && (address < end)); + spin_unlock(&init_mm.page_table_lock); + flush_tlb_all(); + return 0; +} + +/* + * Generic mapping function (not visible outside): + */ + +/* + * Remap an arbitrary physical address space into the kernel virtual + * address space. Needed when the kernel wants to access high addresses + * directly. + */ +void * __ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags) +{ + void * addr; + struct vm_struct * area; + + if (phys_addr < virt_to_phys(high_memory)) + return phys_to_virt(phys_addr); + if (phys_addr & ~PAGE_MASK) + return NULL; + size = PAGE_ALIGN(size); + if (!size || size > phys_addr + size) + return NULL; + area = get_vm_area(size, VM_IOREMAP); + if (!area) + return NULL; + addr = area->addr; + if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) { + vfree(addr); + return NULL; + } + return addr; +} + +void iounmap(void *addr) +{ + if (addr > high_memory) + vfree(addr); +} diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c new file mode 100644 index 00000000000..fb187e5a54b --- /dev/null +++ b/arch/s390/mm/mmap.c @@ -0,0 +1,86 @@ +/* + * linux/arch/s390/mm/mmap.c + * + * flexible mmap layout support + * + * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * + * Started by Ingo Molnar <mingo@elte.hu> + */ + +#include <linux/personality.h> +#include <linux/mm.h> +#include <linux/module.h> + +/* + * Top of mmap area (just below the process stack). + * + * Leave an at least ~128 MB hole. + */ +#define MIN_GAP (128*1024*1024) +#define MAX_GAP (TASK_SIZE/6*5) + +static inline unsigned long mmap_base(void) +{ + unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; + + if (gap < MIN_GAP) + gap = MIN_GAP; + else if (gap > MAX_GAP) + gap = MAX_GAP; + + return TASK_SIZE - (gap & PAGE_MASK); +} + +static inline int mmap_is_legacy(void) +{ +#ifdef CONFIG_ARCH_S390X + /* + * Force standard allocation for 64 bit programs. + */ + if (!test_thread_flag(TIF_31BIT)) + return 1; +#endif + return sysctl_legacy_va_layout || + (current->personality & ADDR_COMPAT_LAYOUT) || + current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY; +} + +/* + * This function, called very early during the creation of a new + * process VM image, sets up which VM layout function to use: + */ +void arch_pick_mmap_layout(struct mm_struct *mm) +{ + /* + * Fall back to the standard layout if the personality + * bit is set, or if the expected stack growth is unlimited: + */ + if (mmap_is_legacy()) { + mm->mmap_base = TASK_UNMAPPED_BASE; + mm->get_unmapped_area = arch_get_unmapped_area; + mm->unmap_area = arch_unmap_area; + } else { + mm->mmap_base = mmap_base(); + mm->get_unmapped_area = arch_get_unmapped_area_topdown; + mm->unmap_area = arch_unmap_area_topdown; + } +} +EXPORT_SYMBOL_GPL(arch_pick_mmap_layout); + |