diff options
431 files changed, 42908 insertions, 4762 deletions
diff --git a/Documentation/DocBook/kernel-hacking.tmpl b/Documentation/DocBook/kernel-hacking.tmpl index 6367bba32d2..582032eea87 100644 --- a/Documentation/DocBook/kernel-hacking.tmpl +++ b/Documentation/DocBook/kernel-hacking.tmpl @@ -1105,7 +1105,7 @@ static struct block_device_operations opt_fops = { </listitem> <listitem> <para> - Function names as strings (__func__). + Function names as strings (__FUNCTION__). </para> </listitem> <listitem> diff --git a/Documentation/device-mapper/snapshot.txt b/Documentation/device-mapper/snapshot.txt new file mode 100644 index 00000000000..dca274ff400 --- /dev/null +++ b/Documentation/device-mapper/snapshot.txt @@ -0,0 +1,73 @@ +Device-mapper snapshot support +============================== + +Device-mapper allows you, without massive data copying: + +*) To create snapshots of any block device i.e. mountable, saved states of +the block device which are also writable without interfering with the +original content; +*) To create device "forks", i.e. multiple different versions of the +same data stream. + + +In both cases, dm copies only the chunks of data that get changed and +uses a separate copy-on-write (COW) block device for storage. + + +There are two dm targets available: snapshot and snapshot-origin. + +*) snapshot-origin <origin> + +which will normally have one or more snapshots based on it. +You must create the snapshot-origin device before you can create snapshots. +Reads will be mapped directly to the backing device. For each write, the +original data will be saved in the <COW device> of each snapshot to keep +its visible content unchanged, at least until the <COW device> fills up. + + +*) snapshot <origin> <COW device> <persistent?> <chunksize> + +A snapshot is created of the <origin> block device. Changed chunks of +<chunksize> sectors will be stored on the <COW device>. Writes will +only go to the <COW device>. Reads will come from the <COW device> or +from <origin> for unchanged data. <COW device> will often be +smaller than the origin and if it fills up the snapshot will become +useless and be disabled, returning errors. So it is important to monitor +the amount of free space and expand the <COW device> before it fills up. + +<persistent?> is P (Persistent) or N (Not persistent - will not survive +after reboot). + + +How this is used by LVM2 +======================== +When you create the first LVM2 snapshot of a volume, four dm devices are used: + +1) a device containing the original mapping table of the source volume; +2) a device used as the <COW device>; +3) a "snapshot" device, combining #1 and #2, which is the visible snapshot + volume; +4) the "original" volume (which uses the device number used by the original + source volume), whose table is replaced by a "snapshot-origin" mapping + from device #1. + +A fixed naming scheme is used, so with the following commands: + +lvcreate -L 1G -n base volumeGroup +lvcreate -L 100M --snapshot -n snap volumeGroup/base + +we'll have this situation (with volumes in above order): + +# dmsetup table|grep volumeGroup + +volumeGroup-base-real: 0 2097152 linear 8:19 384 +volumeGroup-snap-cow: 0 204800 linear 8:19 2097536 +volumeGroup-snap: 0 2097152 snapshot 254:11 254:12 P 16 +volumeGroup-base: 0 2097152 snapshot-origin 254:11 + +# ls -lL /dev/mapper/volumeGroup-* +brw------- 1 root root 254, 11 29 ago 18:15 /dev/mapper/volumeGroup-base-real +brw------- 1 root root 254, 12 29 ago 18:15 /dev/mapper/volumeGroup-snap-cow +brw------- 1 root root 254, 13 29 ago 18:15 /dev/mapper/volumeGroup-snap +brw------- 1 root root 254, 10 29 ago 18:14 /dev/mapper/volumeGroup-base + diff --git a/Documentation/sparse.txt b/Documentation/sparse.txt index 5df44dc894e..1829009db77 100644 --- a/Documentation/sparse.txt +++ b/Documentation/sparse.txt @@ -51,9 +51,9 @@ or you don't get any checking at all. Where to get sparse ~~~~~~~~~~~~~~~~~~~ -With BK, you can just get it from +With git, you can just get it from - bk://sparse.bkbits.net/sparse + rsync://rsync.kernel.org/pub/scm/devel/sparse/sparse.git and DaveJ has tar-balls at diff --git a/Documentation/usb/URB.txt b/Documentation/usb/URB.txt index d59b95cc6f1..a49e5f2c2b4 100644 --- a/Documentation/usb/URB.txt +++ b/Documentation/usb/URB.txt @@ -1,5 +1,6 @@ Revised: 2000-Dec-05. Again: 2002-Jul-06 +Again: 2005-Sep-19 NOTE: @@ -18,8 +19,8 @@ called USB Request Block, or URB for short. and deliver the data and status back. - Execution of an URB is inherently an asynchronous operation, i.e. the - usb_submit_urb(urb) call returns immediately after it has successfully queued - the requested action. + usb_submit_urb(urb) call returns immediately after it has successfully + queued the requested action. - Transfers for one URB can be canceled with usb_unlink_urb(urb) at any time. @@ -94,8 +95,9 @@ To free an URB, use void usb_free_urb(struct urb *urb) -You may not free an urb that you've submitted, but which hasn't yet been -returned to you in a completion callback. +You may free an urb that you've submitted, but which hasn't yet been +returned to you in a completion callback. It will automatically be +deallocated when it is no longer in use. 1.4. What has to be filled in? @@ -145,30 +147,36 @@ to get seamless ISO streaming. 1.6. How to cancel an already running URB? -For an URB which you've submitted, but which hasn't been returned to -your driver by the host controller, call +There are two ways to cancel an URB you've submitted but which hasn't +been returned to your driver yet. For an asynchronous cancel, call int usb_unlink_urb(struct urb *urb) It removes the urb from the internal list and frees all allocated -HW descriptors. The status is changed to reflect unlinking. After -usb_unlink_urb() returns with that status code, you can free the URB -with usb_free_urb(). +HW descriptors. The status is changed to reflect unlinking. Note +that the URB will not normally have finished when usb_unlink_urb() +returns; you must still wait for the completion handler to be called. -There is also an asynchronous unlink mode. To use this, set the -the URB_ASYNC_UNLINK flag in urb->transfer flags before calling -usb_unlink_urb(). When using async unlinking, the URB will not -normally be unlinked when usb_unlink_urb() returns. Instead, wait -for the completion handler to be called. +To cancel an URB synchronously, call + + void usb_kill_urb(struct urb *urb) + +It does everything usb_unlink_urb does, and in addition it waits +until after the URB has been returned and the completion handler +has finished. It also marks the URB as temporarily unusable, so +that if the completion handler or anyone else tries to resubmit it +they will get a -EPERM error. Thus you can be sure that when +usb_kill_urb() returns, the URB is totally idle. 1.7. What about the completion handler? The handler is of the following type: - typedef void (*usb_complete_t)(struct urb *); + typedef void (*usb_complete_t)(struct urb *, struct pt_regs *) -i.e. it gets just the URB that caused the completion call. +I.e., it gets the URB that caused the completion call, plus the +register values at the time of the corresponding interrupt (if any). In the completion handler, you should have a look at urb->status to detect any USB errors. Since the context parameter is included in the URB, you can pass information to the completion handler. @@ -176,17 +184,11 @@ you can pass information to the completion handler. Note that even when an error (or unlink) is reported, data may have been transferred. That's because USB transfers are packetized; it might take sixteen packets to transfer your 1KByte buffer, and ten of them might -have transferred succesfully before the completion is called. +have transferred succesfully before the completion was called. NOTE: ***** WARNING ***** -Don't use urb->dev field in your completion handler; it's cleared -as part of giving urbs back to drivers. (Addressing an issue with -ownership of periodic URBs, which was otherwise ambiguous.) Instead, -use urb->context to hold all the data your driver needs. - -NOTE: ***** WARNING ***** -Also, NEVER SLEEP IN A COMPLETION HANDLER. These are normally called +NEVER SLEEP IN A COMPLETION HANDLER. These are normally called during hardware interrupt processing. If you can, defer substantial work to a tasklet (bottom half) to keep system latencies low. You'll probably need to use spinlocks to protect data structures you manipulate @@ -229,24 +231,10 @@ ISO data with some other event stream. Interrupt transfers, like isochronous transfers, are periodic, and happen in intervals that are powers of two (1, 2, 4 etc) units. Units are frames for full and low speed devices, and microframes for high speed ones. - -Currently, after you submit one interrupt URB, that urb is owned by the -host controller driver until you cancel it with usb_unlink_urb(). You -may unlink interrupt urbs in their completion handlers, if you need to. - -After a transfer completion is called, the URB is automagically resubmitted. -THIS BEHAVIOR IS EXPECTED TO BE REMOVED!! - -Interrupt transfers may only send (or receive) the "maxpacket" value for -the given interrupt endpoint; if you need more data, you will need to -copy that data out of (or into) another buffer. Similarly, you can't -queue interrupt transfers. -THESE RESTRICTIONS ARE EXPECTED TO BE REMOVED!! - -Note that this automagic resubmission model does make it awkward to use -interrupt OUT transfers. The portable solution involves unlinking those -OUT urbs after the data is transferred, and perhaps submitting a final -URB for a short packet. - The usb_submit_urb() call modifies urb->interval to the implemented interval value that is less than or equal to the requested interval value. + +In Linux 2.6, unlike earlier versions, interrupt URBs are not automagically +restarted when they complete. They end when the completion handler is +called, just like other URBs. If you want an interrupt URB to be restarted, +your completion handler must resubmit it. diff --git a/MAINTAINERS b/MAINTAINERS index dc8f3babcab..7d1dd5bad39 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1063,8 +1063,6 @@ M: wli@holomorphy.com S: Maintained I2C SUBSYSTEM -P: Greg Kroah-Hartman -M: greg@kroah.com P: Jean Delvare M: khali@linux-fr.org L: lm-sensors@lm-sensors.org @@ -1404,6 +1402,18 @@ L: linux-kernel@vger.kernel.org L: fastboot@osdl.org S: Maintained +KPROBES +P: Prasanna S Panchamukhi +M: prasanna@in.ibm.com +P: Ananth N Mavinakayanahalli +M: ananth@in.ibm.com +P: Anil S Keshavamurthy +M: anil.s.keshavamurthy@intel.com +P: David S. Miller +M: davem@davemloft.net +L: linux-kernel@vger.kernel.org +S: Maintained + LANMEDIA WAN CARD DRIVER P: Andrew Stanley-Jones M: asj@lanmedia.com @@ -2266,6 +2276,12 @@ M: kristen.c.accardi@intel.com L: pcihpd-discuss@lists.sourceforge.net S: Maintained +SKGE, SKY2 10/100/1000 GIGABIT ETHERNET DRIVERS +P: Stephen Hemminger +M: shemminger@osdl.org +L: netdev@vger.kernel.org +S: Maintained + SPARC (sparc32): P: William L. Irwin M: wli@holomorphy.com @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 14 -EXTRAVERSION =-rc1 +EXTRAVERSION =-rc2 NAME=Affluent Albatross # *DOCUMENTATION* @@ -149,6 +149,9 @@ CONFIGURING the kernel: "make gconfig" X windows (Gtk) based configuration tool. "make oldconfig" Default all questions based on the contents of your existing ./.config file. + "make silentoldconfig" + Like above, but avoids cluttering the screen + with questions already answered. NOTES on "make config": - having unnecessary drivers will make the kernel bigger, and can @@ -169,9 +172,6 @@ CONFIGURING the kernel: should probably answer 'n' to the questions for "development", "experimental", or "debugging" features. - - Check the top Makefile for further site-dependent configuration - (default SVGA mode etc). - COMPILING the kernel: - Make sure you have gcc 2.95.3 available. @@ -199,6 +199,9 @@ COMPILING the kernel: are installing a new kernel with the same version number as your working kernel, make a backup of your modules directory before you do a "make modules_install". + Alternatively, before compiling, use the kernel config option + "LOCALVERSION" to append a unique suffix to the regular kernel version. + LOCALVERSION can be set in the "General Setup" menu. - In order to boot your new kernel, you'll need to copy the kernel image (e.g. .../linux/arch/i386/boot/bzImage after compilation) diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index fa98dae3cd9..eb20c3afff5 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -127,6 +127,10 @@ common_shutdown_1(void *generic_ptr) /* If booted from SRM, reset some of the original environment. */ if (alpha_using_srm) { #ifdef CONFIG_DUMMY_CONSOLE + /* If we've gotten here after SysRq-b, leave interrupt + context before taking over the console. */ + if (in_interrupt()) + irq_exit(); /* This has the effect of resetting the VGA video origin. */ take_over_console(&dummy_con, 0, MAX_NR_CONSOLES-1, 1); #endif diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c index 9e36b07fa94..d5da6b1b28e 100644 --- a/arch/alpha/kernel/sys_dp264.c +++ b/arch/alpha/kernel/sys_dp264.c @@ -395,6 +395,22 @@ clipper_init_irq(void) */ static int __init +isa_irq_fixup(struct pci_dev *dev, int irq) +{ + u8 irq8; + + if (irq > 0) + return irq; + + /* This interrupt is routed via ISA bridge, so we'll + just have to trust whatever value the console might + have assigned. */ + pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq8); + + return irq8 & 0xf; +} + +static int __init dp264_map_irq(struct pci_dev *dev, u8 slot, u8 pin) { static char irq_tab[6][5] __initdata = { @@ -407,25 +423,13 @@ dp264_map_irq(struct pci_dev *dev, u8 slot, u8 pin) { 16+ 3, 16+ 3, 16+ 2, 16+ 1, 16+ 0} /* IdSel 10 slot 3 */ }; const long min_idsel = 5, max_idsel = 10, irqs_per_slot = 5; - struct pci_controller *hose = dev->sysdata; int irq = COMMON_TABLE_LOOKUP; - if (irq > 0) { + if (irq > 0) irq += 16 * hose->index; - } else { - /* ??? The Contaq IDE controller on the ISA bridge uses - "legacy" interrupts 14 and 15. I don't know if anything - can wind up at the same slot+pin on hose1, so we'll - just have to trust whatever value the console might - have assigned. */ - - u8 irq8; - pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq8); - irq = irq8; - } - return irq; + return isa_irq_fixup(dev, irq); } static int __init @@ -453,7 +457,8 @@ monet_map_irq(struct pci_dev *dev, u8 slot, u8 pin) { 24, 24, 25, 26, 27} /* IdSel 15 slot 5 PCI2*/ }; const long min_idsel = 3, max_idsel = 15, irqs_per_slot = 5; - return COMMON_TABLE_LOOKUP; + + return isa_irq_fixup(dev, COMMON_TABLE_LOOKUP); } static u8 __init @@ -507,7 +512,8 @@ webbrick_map_irq(struct pci_dev *dev, u8 slot, u8 pin) { 47, 47, 46, 45, 44}, /* IdSel 17 slot 3 */ }; const long min_idsel = 7, max_idsel = 17, irqs_per_slot = 5; - return COMMON_TABLE_LOOKUP; + + return isa_irq_fixup(dev, COMMON_TABLE_LOOKUP); } static int __init @@ -524,14 +530,13 @@ clipper_map_irq(struct pci_dev *dev, u8 slot, u8 pin) { -1, -1, -1, -1, -1} /* IdSel 7 ISA Bridge */ }; const long min_idsel = 1, max_idsel = 7, irqs_per_slot = 5; - struct pci_controller *hose = dev->sysdata; int irq = COMMON_TABLE_LOOKUP; if (irq > 0) irq += 16 * hose->index; - return irq; + return isa_irq_fixup(dev, irq); } static void __init diff --git a/arch/arm/boot/compressed/ofw-shark.c b/arch/arm/boot/compressed/ofw-shark.c index 7f6f5db0d06..465c54b6b12 100644 --- a/arch/arm/boot/compressed/ofw-shark.c +++ b/arch/arm/boot/compressed/ofw-shark.c @@ -256,5 +256,5 @@ asmlinkage void ofw_init(ofw_handle_t o, int *nomr, int *pointer) temp[11]='\0'; mem_len = OF_getproplen(o,phandle, temp); OF_getprop(o,phandle, temp, buffer, mem_len); - (unsigned char) pointer[32] = ((unsigned char *) buffer)[mem_len-2]; + * ((unsigned char *) &pointer[32]) = ((unsigned char *) buffer)[mem_len-2]; } diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 7152bfbee58..93b5e8e5292 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -537,7 +537,7 @@ ENTRY(__switch_to) #ifdef CONFIG_CPU_MPCORE clrex #else - strex r3, r4, [ip] @ Clear exclusive monitor + strex r5, r4, [ip] @ Clear exclusive monitor #endif #endif #if defined(CONFIG_CPU_XSCALE) && !defined(CONFIG_IWMMXT) diff --git a/arch/arm/kernel/io.c b/arch/arm/kernel/io.c index 6c20c1188b6..1f6822dfae7 100644 --- a/arch/arm/kernel/io.c +++ b/arch/arm/kernel/io.c @@ -7,7 +7,7 @@ * Copy data from IO memory space to "real" memory space. * This needs to be optimized. */ -void _memcpy_fromio(void *to, void __iomem *from, size_t count) +void _memcpy_fromio(void *to, const volatile void __iomem *from, size_t count) { unsigned char *t = to; while (count) { @@ -22,7 +22,7 @@ void _memcpy_fromio(void *to, void __iomem *from, size_t count) * Copy data from "real" memory space to IO memory space. * This needs to be optimized. */ -void _memcpy_toio(void __iomem *to, const void *from, size_t count) +void _memcpy_toio(volatile void __iomem *to, const void *from, size_t count) { const unsigned char *f = from; while (count) { @@ -37,7 +37,7 @@ void _memcpy_toio(void __iomem *to, const void *from, size_t count) * "memset" on IO memory space. * This needs to be optimized. */ -void _memset_io(void __iomem *dst, int c, size_t count) +void _memset_io(volatile void __iomem *dst, int c, size_t count) { while (count) { count--; diff --git a/arch/arm/kernel/semaphore.c b/arch/arm/kernel/semaphore.c index ac423e3e224..4c31f292305 100644 --- a/arch/arm/kernel/semaphore.c +++ b/arch/arm/kernel/semaphore.c @@ -178,7 +178,7 @@ int __down_trylock(struct semaphore * sem) * registers (r0 to r3 and lr), but not ip, as we use it as a return * value in some cases.. */ -asm(" .section .sched.text,\"ax\" \n\ +asm(" .section .sched.text,\"ax\",%progbits \n\ .align 5 \n\ .globl __down_failed \n\ __down_failed: \n\ diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 4554c961251..e7d22dbcb69 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -624,6 +624,9 @@ void __attribute__((noreturn)) __bug(const char *file, int line, void *data) printk(" - extra data = %p", data); printk("\n"); *(int *)0 = 0; + + /* Avoid "noreturn function does return" */ + for (;;); } EXPORT_SYMBOL(__bug); diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index ad2d66c93a5..08e58ecd44b 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -23,20 +23,20 @@ SECTIONS *(.init.text) _einittext = .; __proc_info_begin = .; - *(.proc.info) + *(.proc.info.init) __proc_info_end = .; __arch_info_begin = .; - *(.arch.info) + *(.arch.info.init) __arch_info_end = .; __tagtable_begin = .; - *(.taglist) + *(.taglist.init) __tagtable_end = .; . = ALIGN(16); __setup_start = .; *(.init.setup) __setup_end = .; __early_begin = .; - *(__early_param) + *(.early_param.init) __early_end = .; __initcall_start = .; *(.initcall1.init) diff --git a/arch/arm/mach-ixp4xx/ixdp425-setup.c b/arch/arm/mach-ixp4xx/ixdp425-setup.c index ae1fa099d5f..39b06ed8064 100644 --- a/arch/arm/mach-ixp4xx/ixdp425-setup.c +++ b/arch/arm/mach-ixp4xx/ixdp425-setup.c @@ -123,6 +123,7 @@ static void __init ixdp425_init(void) platform_add_devices(ixdp425_devices, ARRAY_SIZE(ixdp425_devices)); } +#ifdef CONFIG_ARCH_IXDP465 MACHINE_START(IXDP425, "Intel IXDP425 Development Platform") /* Maintainer: MontaVista Software, Inc. */ .phys_ram = PHYS_OFFSET, @@ -134,7 +135,9 @@ MACHINE_START(IXDP425, "Intel IXDP425 Development Platform") .boot_params = 0x0100, .init_machine = ixdp425_init, MACHINE_END +#endif +#ifdef CONFIG_MACH_IXDP465 MACHINE_START(IXDP465, "Intel IXDP465 Development Platform") /* Maintainer: MontaVista Software, Inc. */ .phys_ram = PHYS_OFFSET, @@ -146,7 +149,9 @@ MACHINE_START(IXDP465, "Intel IXDP465 Development Platform") .boot_params = 0x0100, .init_machine = ixdp425_init, MACHINE_END +#endif +#ifdef CONFIG_ARCH_PRPMC1100 MACHINE_START(IXCDP1100, "Intel IXCDP1100 Development Platform") /* Maintainer: MontaVista Software, Inc. */ .phys_ram = PHYS_OFFSET, @@ -158,6 +163,7 @@ MACHINE_START(IXCDP1100, "Intel IXCDP1100 Development Platform") .boot_params = 0x0100, .init_machine = ixdp425_init, MACHINE_END +#endif /* * Avila is functionally equivalent to IXDP425 except that it adds diff --git a/arch/arm/mach-s3c2410/mach-anubis.c b/arch/arm/mach-s3c2410/mach-anubis.c index f87aa0b669a..7c05f27fe1d 100644 --- a/arch/arm/mach-s3c2410/mach-anubis.c +++ b/arch/arm/mach-s3c2410/mach-anubis.c @@ -12,6 +12,7 @@ * * Modifications: * 02-May-2005 BJD Copied from mach-bast.c + * 20-Sep-2005 BJD Added static to non-exported items */ #include <linux/kernel.h> @@ -232,7 +233,7 @@ static struct s3c24xx_board anubis_board __initdata = { .clocks_count = ARRAY_SIZE(anubis_clocks) }; -void __init anubis_map_io(void) +static void __init anubis_map_io(void) { /* initialise the clocks */ diff --git a/arch/arm/mach-s3c2410/mach-bast.c b/arch/arm/mach-s3c2410/mach-bast.c index 1a3367da640..ed1f07d7252 100644 --- a/arch/arm/mach-s3c2410/mach-bast.c +++ b/arch/arm/mach-s3c2410/mach-bast.c @@ -31,6 +31,7 @@ * 17-Jul-2005 BJD Changed to platform device for SuperIO 16550s * 25-Jul-2005 BJD Removed ASIX static mappings * 27-Jul-2005 BJD Ensure maximum frequency of i2c bus + * 20-Sep-2005 BJD Added static to non-exported items */ #include <linux/kernel.h> @@ -428,7 +429,7 @@ static struct s3c24xx_board bast_board __initdata = { .clocks_count = ARRAY_SIZE(bast_clocks) }; -void __init bast_map_io(void) +static void __init bast_map_io(void) { /* initialise the clocks */ diff --git a/arch/arm/mach-s3c2410/mach-h1940.c b/arch/arm/mach-s3c2410/mach-h1940.c index 6ff1889fbd2..fb3cb01266e 100644 --- a/arch/arm/mach-s3c2410/mach-h1940.c +++ b/arch/arm/mach-s3c2410/mach-h1940.c @@ -24,6 +24,7 @@ * 10-Jan-2005 BJD Removed include of s3c2410.h * 14-Jan-2005 BJD Added clock init * 10-Mar-2005 LCVR Changed S3C2410_VA to S3C24XX_VA + * 20-Sep-2005 BJD Added static to non-exported items */ #include <linux/kernel.h> @@ -147,7 +148,7 @@ static struct s3c24xx_board h1940_board __initdata = { .devices_count = ARRAY_SIZE(h1940_devices) }; -void __init h1940_map_io(void) +static void __init h1940_map_io(void) { s3c24xx_init_io(h1940_iodesc, ARRAY_SIZE(h1940_iodesc)); s3c24xx_init_clocks(0); @@ -155,13 +156,13 @@ void __init h1940_map_io(void) s3c24xx_set_board(&h1940_board); } -void __init h1940_init_irq(void) +static void __init h1940_init_irq(void) { s3c24xx_init_irq(); } -void __init h1940_init(void) +static void __init h1940_init(void) { set_s3c2410fb_info(&h1940_lcdcfg); } diff --git a/arch/arm/mach-s3c2410/mach-n30.c b/arch/arm/mach-s3c2410/mach-n30.c index 66bf5bb2b3d..5c0f2b091f9 100644 --- a/arch/arm/mach-s3c2410/mach-n30.c +++ b/arch/arm/mach-s3c2410/mach-n30.c @@ -97,7 +97,7 @@ static struct s3c24xx_board n30_board __initdata = { .devices_count = ARRAY_SIZE(n30_devices) }; -void __init n30_map_io(void) +static void __init n30_map_io(void) { s3c24xx_init_io(n30_iodesc, ARRAY_SIZE(n30_iodesc)); s3c24xx_init_clocks(0); @@ -105,14 +105,14 @@ void __init n30_map_io(void) s3c24xx_set_board(&n30_board); } -void __init n30_init_irq(void) +static void __init n30_init_irq(void) { s3c24xx_init_irq(); } /* GPB3 is the line that controls the pull-up for the USB D+ line */ -void __init n30_init(void) +static void __init n30_init(void) { s3c_device_i2c.dev.platform_data = &n30_i2ccfg; diff --git a/arch/arm/mach-s3c2410/mach-nexcoder.c b/arch/arm/mach-s3c2410/mach-nexcoder.c index d24c242414c..c22f8216032 100644 --- a/arch/arm/mach-s3c2410/mach-nexcoder.c +++ b/arch/arm/mach-s3c2410/mach-nexcoder.c @@ -136,7 +136,7 @@ static void __init nexcoder_sensorboard_init(void) s3c2410_gpio_cfgpin(S3C2410_GPF2, S3C2410_GPF2_OUTP); // CAM_GPIO6 => CAM_PWRDN } -void __init nexcoder_map_io(void) +static void __init nexcoder_map_io(void) { s3c24xx_init_io(nexcoder_iodesc, ARRAY_SIZE(nexcoder_iodesc)); s3c24xx_init_clocks(0); diff --git a/arch/arm/mach-s3c2410/mach-otom.c b/arch/arm/mach-s3c2410/mach-otom.c index d901ed492ff..ad1459e402e 100644 --- a/arch/arm/mach-s3c2410/mach-otom.c +++ b/arch/arm/mach-s3c2410/mach-otom.c @@ -105,7 +105,7 @@ static struct s3c24xx_board otom11_board __initdata = { }; -void __init otom11_map_io(void) +static void __init otom11_map_io(void) { s3c24xx_init_io(otom11_iodesc, ARRAY_SIZE(otom11_iodesc)); s3c24xx_init_clocks(0); diff --git a/arch/arm/mach-s3c2410/mach-rx3715.c b/arch/arm/mach-s3c2410/mach-rx3715.c index a73d61c1de4..22d9e070fd6 100644 --- a/arch/arm/mach-s3c2410/mach-rx3715.c +++ b/arch/arm/mach-s3c2410/mach-rx3715.c @@ -16,6 +16,7 @@ * 14-Jan-2005 BJD Added new clock init * 10-Mar-2005 LCVR Changed S3C2410_VA to S3C24XX_VA * 14-Mar-2005 BJD Fixed __iomem warnings + * 20-Sep-2005 BJD Added static to non-exported items */ #include <linux/kernel.h> @@ -108,7 +109,7 @@ static struct s3c24xx_board rx3715_board __initdata = { .devices_count = ARRAY_SIZE(rx3715_devices) }; -void __init rx3715_map_io(void) +static void __init rx3715_map_io(void) { s3c24xx_init_io(rx3715_iodesc, ARRAY_SIZE(rx3715_iodesc)); s3c24xx_init_clocks(16934000); @@ -116,7 +117,7 @@ void __init rx3715_map_io(void) s3c24xx_set_board(&rx3715_board); } -void __init rx3715_init_irq(void) +static void __init rx3715_init_irq(void) { s3c24xx_init_irq(); } diff --git a/arch/arm/mach-s3c2410/mach-smdk2410.c b/arch/arm/mach-s3c2410/mach-smdk2410.c index 67e903a700d..2eda55a6b67 100644 --- a/arch/arm/mach-s3c2410/mach-smdk2410.c +++ b/arch/arm/mach-s3c2410/mach-smdk2410.c @@ -28,6 +28,7 @@ * Ben Dooks <ben@simtec.co.uk> * * 10-Mar-2005 LCVR Changed S3C2410_VA to S3C24XX_VA + * 20-Sep-2005 BJD Added static to non-exported items * ***********************************************************************/ @@ -97,7 +98,7 @@ static struct s3c24xx_board smdk2410_board __initdata = { .devices_count = ARRAY_SIZE(smdk2410_devices) }; -void __init smdk2410_map_io(void) +static void __init smdk2410_map_io(void) { s3c24xx_init_io(smdk2410_iodesc, ARRAY_SIZE(smdk2410_iodesc)); s3c24xx_init_clocks(0); @@ -105,7 +106,7 @@ void __init smdk2410_map_io(void) s3c24xx_set_board(&smdk2410_board); } -void __init smdk2410_init_irq(void) +static void __init smdk2410_init_irq(void) { s3c24xx_init_irq(); } diff --git a/arch/arm/mach-s3c2410/mach-smdk2440.c b/arch/arm/mach-s3c2410/mach-smdk2440.c index 357522106f6..722ef46b630 100644 --- a/arch/arm/mach-s3c2410/mach-smdk2440.c +++ b/arch/arm/mach-s3c2410/mach-smdk2440.c @@ -18,6 +18,7 @@ * 22-Feb-2005 BJD Updated for 2.6.11-rc5 relesa * 10-Mar-2005 LCVR Replaced S3C2410_VA by S3C24XX_VA * 14-Mar-2005 BJD void __iomem fixes + * 20-Sep-2005 BJD Added static to non-exported items */ #include <linux/kernel.h> @@ -98,7 +99,7 @@ static struct s3c24xx_board smdk2440_board __initdata = { .devices_count = ARRAY_SIZE(smdk2440_devices) }; -void __init smdk2440_map_io(void) +static void __init smdk2440_map_io(void) { s3c24xx_init_io(smdk2440_iodesc, ARRAY_SIZE(smdk2440_iodesc)); s3c24xx_init_clocks(16934400); @@ -106,7 +107,7 @@ void __init smdk2440_map_io(void) s3c24xx_set_board(&smdk2440_board); } -void __init smdk2440_machine_init(void) +static void __init smdk2440_machine_init(void) { /* Configure the LEDs (even if we have no LED support)*/ diff --git a/arch/arm/mach-s3c2410/mach-vr1000.c b/arch/arm/mach-s3c2410/mach-vr1000.c index 8f9ab2893df..663a7f98fc0 100644 --- a/arch/arm/mach-s3c2410/mach-vr1000.c +++ b/arch/arm/mach-s3c2410/mach-vr1000.c @@ -28,6 +28,7 @@ * 10-Mar-2005 LCVR Changed S3C2410_VA to S3C24XX_VA * 14-Mar-2006 BJD void __iomem fixes * 22-Jun-2006 BJD Added DM9000 platform information + * 20-Sep-2005 BJD Added static to non-exported items */ #include <linux/kernel.h> @@ -347,7 +348,7 @@ static void vr1000_power_off(void) s3c2410_gpio_setpin(S3C2410_GPB9, 1); } -void __init vr1000_map_io(void) +static void __init vr1000_map_io(void) { /* initialise clock sources */ diff --git a/arch/arm/mach-sa1100/generic.h b/arch/arm/mach-sa1100/generic.h index 279e3afa3c3..f085d68e568 100644 --- a/arch/arm/mach-sa1100/generic.h +++ b/arch/arm/mach-sa1100/generic.h @@ -39,3 +39,6 @@ extern void sa11x0_set_ssp_data(struct sa11x0_ssp_plat_ops *ops); struct irda_platform_data; void sa11x0_set_irda_data(struct irda_platform_data *irda); + +struct mcp_plat_data; +void sa11x0_set_mcp_data(struct mcp_plat_data *data); diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 0b6c4db44e0..4a884baf3b9 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -233,7 +233,17 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (in_interrupt() || !mm) goto no_context; - down_read(&mm->mmap_sem); + /* + * As per x86, we may deadlock here. However, since the kernel only + * validly references user space from well defined areas of the code, + * we can bug out early if this is from code which shouldn't. + */ + if (!down_read_trylock(&mm->mmap_sem)) { + if (!user_mode(regs) && !search_exception_tables(regs->ARM_pc)) + goto no_context; + down_read(&mm->mmap_sem); + } + fault = __do_page_fault(mm, addr, fsr, tsk); up_read(&mm->mmap_sem); diff --git a/arch/arm/mm/proc-arm1020.S b/arch/arm/mm/proc-arm1020.S index 1d739d282a4..82ec954e45b 100644 --- a/arch/arm/mm/proc-arm1020.S +++ b/arch/arm/mm/proc-arm1020.S @@ -509,7 +509,7 @@ cpu_arm1020_name: .align - .section ".proc.info", #alloc, #execinstr + .section ".proc.info.init", #alloc, #execinstr .type __arm1020_proc_info,#object __arm1020_proc_info: diff --git a/arch/arm/mm/proc-arm1020e.S b/arch/arm/mm/proc-arm1020e.S index 9b725665b5c..7375fe930f7 100644 --- a/arch/arm/mm/proc-arm1020e.S +++ b/arch/arm/mm/proc-arm1020e.S @@ -491,7 +491,7 @@ cpu_arm1020e_name: .align - .section ".proc.info", #alloc, #execinstr + .section ".proc.info.init", #alloc, #execinstr .type __arm1020e_proc_info,#object __arm1020e_proc_info: diff --git a/arch/arm/mm/proc-arm1022.S b/arch/arm/mm/proc-arm1022.S index 37b70fa21c7..6ca639094d6 100644 --- a/arch/arm/mm/proc-arm1022.S +++ b/arch/arm/mm/proc-arm1022.S @@ -473,7 +473,7 @@ cpu_arm1022_name: .align - .section ".proc.info", #alloc, #execinstr + .section ".proc.info.init", #alloc, #execinstr .type __arm1022_proc_info,#object __arm1022_proc_info: diff --git a/arch/arm/mm/proc-arm1026.S b/arch/arm/mm/proc-arm1026.S index 931b690d1be..10317e4f55d 100644 --- a/arch/arm/mm/proc-arm1026.S +++ b/arch/arm/mm/proc-arm1026.S @@ -469,7 +469,7 @@ cpu_arm1026_name: .align - .section ".proc.info", #alloc, #execinstr + .section ".proc.info.init", #alloc, #execinstr .type __arm1026_proc_info,#object __arm1026_proc_info: diff --git a/arch/arm/mm/proc-arm6_7.S b/arch/arm/mm/proc-arm6_7.S index d0f1bbb48f6..8e7e1e70ab0 100644 --- a/arch/arm/mm/proc-arm6_7.S +++ b/arch/arm/mm/proc-arm6_7.S @@ -332,7 +332,7 @@ cpu_arm710_name: .align - .section ".proc.info", #alloc, #execinstr + .section ".proc.info.init", #alloc, #execinstr .type __arm6_proc_info, #object __arm6_proc_info: diff --git a/arch/arm/mm/proc-arm720.S b/arch/arm/mm/proc-arm720.S index c69c9de3239..a13e0184d34 100644 --- a/arch/arm/mm/proc-arm720.S +++ b/arch/arm/mm/proc-arm720.S @@ -222,7 +222,7 @@ cpu_arm720_name: * See linux/include/asm-arm/procinfo.h for a definition of this structure. */ - .section ".proc.info", #alloc, #execinstr + .section ".proc.info.init", #alloc, #execinstr .type __arm710_proc_info, #object __arm710_proc_info: diff --git a/arch/arm/mm/proc-arm920.S b/arch/arm/mm/proc-arm920.S index 0f490a0fcb7..d1651389999 100644 --- a/arch/arm/mm/proc-arm920.S +++ b/arch/arm/mm/proc-arm920.S @@ -452,7 +452,7 @@ cpu_arm920_name: .align - .section ".proc.info", #alloc, #execinstr + .section ".proc.info.init", #alloc, #execinstr .type __arm920_proc_info,#object __arm920_proc_info: diff --git a/arch/arm/mm/proc-arm922.S b/arch/arm/mm/proc-arm922.S index 62bc34a139e..23b8ed97f4e 100644 --- a/arch/arm/mm/proc-arm922.S +++ b/arch/arm/mm/proc-arm922.S @@ -456,7 +456,7 @@ cpu_arm922_name: .align - .section ".proc.info", #alloc, #execinstr + .section ".proc.info.init", #alloc, #execinstr .type __arm922_proc_info,#object __arm922_proc_info: diff --git a/arch/arm/mm/proc-arm925.S b/arch/arm/mm/proc-arm925.S index ee49aa2ca78..ee95c52db51 100644 --- a/arch/arm/mm/proc-arm925.S +++ b/arch/arm/mm/proc-arm925.S @@ -521,7 +521,7 @@ cpu_arm925_name: .align - .section ".proc.info", #alloc, #execinstr + .section ".proc.info.init", #alloc, #execinstr .type __arm925_proc_info,#object __arm925_proc_info: diff --git a/arch/arm/mm/proc-arm926.S b/arch/arm/mm/proc-arm926.S index bb95cc9fed0..7d042dc20c4 100644 --- a/arch/arm/mm/proc-arm926.S +++ b/arch/arm/mm/proc-arm926.S @@ -471,7 +471,7 @@ cpu_arm926_name: .align - .section ".proc.info", #alloc, #execinstr + .section ".proc.info.init", #alloc, #execinstr .type __arm926_proc_info,#object __arm926_proc_info: diff --git a/arch/arm/mm/proc-sa110.S b/arch/arm/mm/proc-sa110.S index 34f7e7d3f41..bd330c4075a 100644 --- a/arch/arm/mm/proc-sa110.S +++ b/arch/arm/mm/proc-sa110.S @@ -249,7 +249,7 @@ cpu_sa110_name: .align - .section ".proc.info", #alloc, #execinstr + .section ".proc.info.init", #alloc, #execinstr .type __sa110_proc_info,#object __sa110_proc_info: diff --git a/arch/arm/mm/proc-sa1100.S b/arch/arm/mm/proc-sa1100.S index ca14f80d5ab..91b89124c0d 100644 --- a/arch/arm/mm/proc-sa1100.S +++ b/arch/arm/mm/proc-sa1100.S @@ -280,7 +280,7 @@ cpu_sa1110_name: .align - .section ".proc.info", #alloc, #execinstr + .section ".proc.info.init", #alloc, #execinstr .type __sa1100_proc_info,#object __sa1100_proc_info: diff --git a/arch/arm/mm/proc-v6.S b/arch/arm/mm/proc-v6.S index eb34823c9db..caf3b19b167 100644 --- a/arch/arm/mm/proc-v6.S +++ b/arch/arm/mm/proc-v6.S @@ -240,7 +240,7 @@ cpu_elf_name: .size cpu_elf_name, . - cpu_elf_name .align - .section ".proc.info", #alloc, #execinstr + .section ".proc.info.init", #alloc, #execinstr /* * Match any ARMv6 processor core. diff --git a/arch/arm/mm/proc-xscale.S b/arch/arm/mm/proc-xscale.S index b88de270014..861b3594728 100644 --- a/arch/arm/mm/proc-xscale.S +++ b/arch/arm/mm/proc-xscale.S @@ -578,7 +578,7 @@ cpu_pxa270_name: .align - .section ".proc.info", #alloc, #execinstr + .section ".proc.info.init", #alloc, #execinstr .type __80200_proc_info,#object __80200_proc_info: diff --git a/arch/ia64/hp/sim/simscsi.c b/arch/ia64/hp/sim/simscsi.c index 56405dbfd73..a18983a3c93 100644 --- a/arch/ia64/hp/sim/simscsi.c +++ b/arch/ia64/hp/sim/simscsi.c @@ -233,6 +233,23 @@ simscsi_readwrite10 (struct scsi_cmnd *sc, int mode) simscsi_readwrite(sc, mode, offset, ((sc->cmnd[7] << 8) | sc->cmnd[8])*512); } +static void simscsi_fillresult(struct scsi_cmnd *sc, char *buf, unsigned len) +{ + + int scatterlen = sc->use_sg; + struct scatterlist *slp; + + if (scatterlen == 0) + memcpy(sc->request_buffer, buf, len); + else for (slp = (struct scatterlist *)sc->buffer; scatterlen-- > 0 && len > 0; slp++) { + unsigned thislen = min(len, slp->length); + + memcpy(page_address(slp->page) + slp->offset, buf, thislen); + slp++; + len -= thislen; + } +} + static int simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) { @@ -240,6 +257,7 @@ simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) char fname[MAX_ROOT_LEN+16]; size_t disk_size; char *buf; + char localbuf[36]; #if DEBUG_SIMSCSI register long sp asm ("sp"); @@ -263,7 +281,7 @@ simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) /* disk doesn't exist... */ break; } - buf = sc->request_buffer; + buf = localbuf; buf[0] = 0; /* magnetic disk */ buf[1] = 0; /* not a removable medium */ buf[2] = 2; /* SCSI-2 compliant device */ @@ -273,6 +291,7 @@ simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) buf[6] = 0; /* reserved */ buf[7] = 0; /* various flags */ memcpy(buf + 8, "HP SIMULATED DISK 0.00", 28); + simscsi_fillresult(sc, buf, 36); sc->result = GOOD; break; @@ -304,16 +323,13 @@ simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) simscsi_readwrite10(sc, SSC_WRITE); break; - case READ_CAPACITY: if (desc[target_id] < 0 || sc->request_bufflen < 8) { break; } - buf = sc->request_buffer; - + buf = localbuf; disk_size = simscsi_get_disk_size(desc[target_id]); - /* pretend to be a 1GB disk (partition table contains real stuff): */ buf[0] = (disk_size >> 24) & 0xff; buf[1] = (disk_size >> 16) & 0xff; buf[2] = (disk_size >> 8) & 0xff; @@ -323,13 +339,14 @@ simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *)) buf[5] = 0; buf[6] = 2; buf[7] = 0; + simscsi_fillresult(sc, buf, 8); sc->result = GOOD; break; case MODE_SENSE: case MODE_SENSE_10: /* sd.c uses this to determine whether disk does write-caching. */ - memset(sc->request_buffer, 0, 128); + simscsi_fillresult(sc, (char *)empty_zero_page, sc->request_bufflen); sc->result = GOOD; break; diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S index 499a065f4e6..db32fc1d393 100644 --- a/arch/ia64/kernel/mca_asm.S +++ b/arch/ia64/kernel/mca_asm.S @@ -489,24 +489,27 @@ ia64_state_save: ;; st8 [temp1]=r17,16 // pal_min_state st8 [temp2]=r6,16 // prev_IA64_KR_CURRENT + mov r6=IA64_KR(CURRENT_STACK) + ;; + st8 [temp1]=r6,16 // prev_IA64_KR_CURRENT_STACK + st8 [temp2]=r0,16 // prev_task, starts off as NULL mov r6=cr.ifa ;; - st8 [temp1]=r0,16 // prev_task, starts off as NULL - st8 [temp2]=r12,16 // cr.isr + st8 [temp1]=r12,16 // cr.isr + st8 [temp2]=r6,16 // cr.ifa mov r12=cr.itir ;; - st8 [temp1]=r6,16 // cr.ifa - st8 [temp2]=r12,16 // cr.itir + st8 [temp1]=r12,16 // cr.itir + st8 [temp2]=r11,16 // cr.iipa mov r12=cr.iim ;; - st8 [temp1]=r11,16 // cr.iipa - st8 [temp2]=r12,16 // cr.iim - mov r6=cr.iha + st8 [temp1]=r12,16 // cr.iim (p1) mov r12=IA64_MCA_COLD_BOOT (p2) mov r12=IA64_INIT_WARM_BOOT + mov r6=cr.iha ;; - st8 [temp1]=r6,16 // cr.iha - st8 [temp2]=r12 // os_status, default is cold boot + st8 [temp2]=r6,16 // cr.iha + st8 [temp1]=r12 // os_status, default is cold boot mov r6=IA64_MCA_SAME_CONTEXT ;; st8 [temp1]=r6 // context, default is same context @@ -823,9 +826,12 @@ ia64_state_restore: ld8 r12=[temp1],16 // sal_ra ld8 r9=[temp2],16 // sal_gp ;; - ld8 r22=[temp1],24 // pal_min_state, virtual. skip prev_task + ld8 r22=[temp1],16 // pal_min_state, virtual ld8 r21=[temp2],16 // prev_IA64_KR_CURRENT ;; + ld8 r16=[temp1],16 // prev_IA64_KR_CURRENT_STACK + ld8 r20=[temp2],16 // prev_task + ;; ld8 temp3=[temp1],16 // cr.isr ld8 temp4=[temp2],16 // cr.ifa ;; @@ -846,6 +852,45 @@ ia64_state_restore: ld8 r8=[temp1] // os_status ld8 r10=[temp2] // context + /* Wire IA64_TR_CURRENT_STACK to the stack that we are resuming to. To + * avoid any dependencies on the algorithm in ia64_switch_to(), just + * purge any existing CURRENT_STACK mapping and insert the new one. + * + * r16 contains prev_IA64_KR_CURRENT_STACK, r21 contains + * prev_IA64_KR_CURRENT, these values may have been changed by the C + * code. Do not use r8, r9, r10, r22, they contain values ready for + * the return to SAL. + */ + + mov r15=IA64_KR(CURRENT_STACK) // physical granule mapped by IA64_TR_CURRENT_STACK + ;; + shl r15=r15,IA64_GRANULE_SHIFT + ;; + dep r15=-1,r15,61,3 // virtual granule + mov r18=IA64_GRANULE_SHIFT<<2 // for cr.itir.ps + ;; + ptr.d r15,r18 + ;; + srlz.d + + extr.u r19=r21,61,3 // r21 = prev_IA64_KR_CURRENT + shl r20=r16,IA64_GRANULE_SHIFT // r16 = prev_IA64_KR_CURRENT_STACK + movl r21=PAGE_KERNEL // page properties + ;; + mov IA64_KR(CURRENT_STACK)=r16 + cmp.ne p6,p0=RGN_KERNEL,r19 // new stack is in the kernel region? + or r21=r20,r21 // construct PA | page properties +(p6) br.spnt 1f // the dreaded cpu 0 idle task in region 5:( + ;; + mov cr.itir=r18 + mov cr.ifa=r21 + mov r20=IA64_TR_CURRENT_STACK + ;; + itr.d dtr[r20]=r21 + ;; + srlz.d +1: + br.sptk b0 //EndStub////////////////////////////////////////////////////////////////////// @@ -982,6 +1027,7 @@ ia64_set_kernel_registers: add temp4=temp4, temp1 // &struct ia64_sal_os_state.os_gp add r12=temp1, temp3 // kernel stack pointer on MCA/INIT stack add r13=temp1, r3 // set current to start of MCA/INIT stack + add r20=temp1, r3 // physical start of MCA/INIT stack ;; ld8 r1=[temp4] // OS GP from SAL OS state ;; @@ -991,7 +1037,35 @@ ia64_set_kernel_registers: ;; mov IA64_KR(CURRENT)=r13 - // FIXME: do I need to wire IA64_KR_CURRENT_STACK and IA64_TR_CURRENT_STACK? + /* Wire IA64_TR_CURRENT_STACK to the MCA/INIT handler stack. To avoid + * any dependencies on the algorithm in ia64_switch_to(), just purge + * any existing CURRENT_STACK mapping and insert the new one. + */ + + mov r16=IA64_KR(CURRENT_STACK) // physical granule mapped by IA64_TR_CURRENT_STACK + ;; + shl r16=r16,IA64_GRANULE_SHIFT + ;; + dep r16=-1,r16,61,3 // virtual granule + mov r18=IA64_GRANULE_SHIFT<<2 // for cr.itir.ps + ;; + ptr.d r16,r18 + ;; + srlz.d + + shr.u r16=r20,IA64_GRANULE_SHIFT // r20 = physical start of MCA/INIT stack + movl r21=PAGE_KERNEL // page properties + ;; + mov IA64_KR(CURRENT_STACK)=r16 + or r21=r20,r21 // construct PA | page properties + ;; + mov cr.itir=r18 + mov cr.ifa=r13 + mov r20=IA64_TR_CURRENT_STACK + ;; + itr.d dtr[r20]=r21 + ;; + srlz.d br.sptk b0 diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c index 80f83d6cdbf..f081c60ab20 100644 --- a/arch/ia64/kernel/mca_drv.c +++ b/arch/ia64/kernel/mca_drv.c @@ -56,8 +56,9 @@ static struct page *page_isolate[MAX_PAGE_ISOLATE]; static int num_page_isolate = 0; typedef enum { - ISOLATE_NG = 0, - ISOLATE_OK = 1 + ISOLATE_NG, + ISOLATE_OK, + ISOLATE_NONE } isolate_status_t; /* @@ -74,7 +75,7 @@ static struct { * @paddr: poisoned memory location * * Return value: - * ISOLATE_OK / ISOLATE_NG + * one of isolate_status_t, ISOLATE_OK/NG/NONE. */ static isolate_status_t @@ -85,7 +86,10 @@ mca_page_isolate(unsigned long paddr) /* whether physical address is valid or not */ if (!ia64_phys_addr_valid(paddr)) - return ISOLATE_NG; + return ISOLATE_NONE; + + if (!pfn_valid(paddr)) + return ISOLATE_NONE; /* convert physical address to physical page number */ p = pfn_to_page(paddr>>PAGE_SHIFT); @@ -122,10 +126,15 @@ mca_handler_bh(unsigned long paddr) current->pid, current->comm); spin_lock(&mca_bh_lock); - if (mca_page_isolate(paddr) == ISOLATE_OK) { + switch (mca_page_isolate(paddr)) { + case ISOLATE_OK: printk(KERN_DEBUG "Page isolation: ( %lx ) success.\n", paddr); - } else { + break; + case ISOLATE_NG: printk(KERN_DEBUG "Page isolation: ( %lx ) failure.\n", paddr); + break; + default: + break; } spin_unlock(&mca_bh_lock); diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig new file mode 100644 index 00000000000..edfac467b9e --- /dev/null +++ b/arch/powerpc/Kconfig @@ -0,0 +1,861 @@ +# For a description of the syntax of this configuration file, +# see Documentation/kbuild/kconfig-language.txt. +# + +mainmenu "Linux/PowerPC Kernel Configuration" + +config PPC64 + bool "64-bit kernel" + default n + help + This option selects whether a 32-bit or a 64-bit kernel + will be built. + +config PPC32 + bool + default y if !PPC64 + +config 64BIT + bool + default y if PPC64 + +config PPC_MERGE + def_bool y + +config MMU + bool + default y + +config UID16 + bool + +config GENERIC_HARDIRQS + bool + default y + +config RWSEM_GENERIC_SPINLOCK + bool + +config RWSEM_XCHGADD_ALGORITHM + bool + default y + +config GENERIC_CALIBRATE_DELAY + bool + default y + +config PPC + bool + default y + +config EARLY_PRINTK + bool + default y if PPC64 + +config COMPAT + bool + default y if PPC64 + +config SYSVIPC_COMPAT + bool + depends on COMPAT && SYSVIPC + default y + +# All PPC32s use generic nvram driver through ppc_md +config GENERIC_NVRAM + bool + default y if PPC32 + +config SCHED_NO_NO_OMIT_FRAME_POINTER + bool + default y + +config ARCH_MAY_HAVE_PC_FDC + bool + default y + +menu "Processor support" +choice + prompt "Processor Type" + depends on PPC32 + default 6xx + +config 6xx + bool "6xx/7xx/74xx" + select PPC_FPU + help + There are four families of PowerPC chips supported. The more common + types (601, 603, 604, 740, 750, 7400), the Motorola embedded + versions (821, 823, 850, 855, 860, 52xx, 82xx, 83xx), the AMCC + embedded versions (403 and 405) and the high end 64 bit Power + processors (POWER 3, POWER4, and IBM PPC970 also known as G5). + + Unless you are building a kernel for one of the embedded processor + systems, 64 bit IBM RS/6000 or an Apple G5, choose 6xx. + Note that the kernel runs in 32-bit mode even on 64-bit chips. + +config PPC_52xx + bool "Freescale 52xx" + +config PPC_82xx + bool "Freescale 82xx" + +config PPC_83xx + bool "Freescale 83xx" + +config 40x + bool "AMCC 40x" + +config 44x + bool "AMCC 44x" + +config PPC64BRIDGE + select PPC_FPU + bool "POWER3, POWER4 and PPC970 (G5)" + +config 8xx + bool "Freescale 8xx" + +config E200 + bool "Freescale e200" + +config E500 + bool "Freescale e500" +endchoice + +config POWER4_ONLY + bool "Optimize for POWER4" + depends on PPC64 || PPC64BRIDGE + default n + ---help--- + Cause the compiler to optimize for POWER4/POWER5/PPC970 processors. + The resulting binary will not work on POWER3 or RS64 processors + when compiled with binutils 2.15 or later. + +config POWER3 + bool + depends on PPC64 || PPC64BRIDGE + default y if !POWER4_ONLY + +config POWER4 + depends on PPC64 || PPC64BRIDGE + def_bool y + +config PPC_FPU + bool + default y if PPC64 + +config BOOKE + bool + depends on E200 || E500 + default y + +config FSL_BOOKE + bool + depends on E200 || E500 + default y + +config PTE_64BIT + bool + depends on 44x || E500 + default y if 44x + default y if E500 && PHYS_64BIT + +config PHYS_64BIT + bool 'Large physical address support' if E500 + depends on 44x || E500 + default y if 44x + ---help--- + This option enables kernel support for larger than 32-bit physical + addresses. This features is not be available on all e500 cores. + + If in doubt, say N here. + +config ALTIVEC + bool "AltiVec Support" + depends on 6xx || POWER4 + ---help--- + This option enables kernel support for the Altivec extensions to the + PowerPC processor. The kernel currently supports saving and restoring + altivec registers, and turning on the 'altivec enable' bit so user + processes can execute altivec instructions. + + This option is only usefully if you have a processor that supports + altivec (G4, otherwise known as 74xx series), but does not have + any affect on a non-altivec cpu (it does, however add code to the + kernel). + + If in doubt, say Y here. + +config SPE + bool "SPE Support" + depends on E200 || E500 + ---help--- + This option enables kernel support for the Signal Processing + Extensions (SPE) to the PowerPC processor. The kernel currently + supports saving and restoring SPE registers, and turning on the + 'spe enable' bit so user processes can execute SPE instructions. + + This option is only useful if you have a processor that supports + SPE (e500, otherwise known as 85xx series), but does not have any + effect on a non-spe cpu (it does, however add code to the kernel). + + If in doubt, say Y here. + +config PPC_STD_MMU + bool + depends on 6xx || POWER3 || POWER4 || PPC64 + default y + +config PPC_STD_MMU_32 + def_bool y + depends on PPC_STD_MMU && PPC32 + +config SMP + depends on PPC_STD_MMU + bool "Symmetric multi-processing support" + ---help--- + This enables support for systems with more than one CPU. If you have + a system with only one CPU, say N. If you have a system with more + than one CPU, say Y. Note that the kernel does not currently + support SMP machines with 603/603e/603ev or PPC750 ("G3") processors + since they have inadequate hardware support for multiprocessor + operation. + + If you say N here, the kernel will run on single and multiprocessor + machines, but will use only one CPU of a multiprocessor machine. If + you say Y here, the kernel will run on single-processor machines. + On a single-processor machine, the kernel will run faster if you say + N here. + + If you don't know what to do here, say N. + +config NR_CPUS + int "Maximum number of CPUs (2-32)" + range 2 128 + depends on SMP + default "32" if PPC64 + default "4" + +config NOT_COHERENT_CACHE + bool + depends on 4xx || 8xx || E200 + default y +endmenu + +source "init/Kconfig" + +menu "Platform support" + depends on PPC64 || 6xx + +choice + prompt "Machine type" + default PPC_MULTIPLATFORM + +config PPC_MULTIPLATFORM + bool "Generic desktop/server/laptop" + help + Select this option if configuring for an IBM pSeries or + RS/6000 machine, an Apple machine, or a PReP, CHRP, + Maple or Cell-based machine. + +config PPC_ISERIES + bool "IBM Legacy iSeries" + depends on PPC64 + +config EMBEDDED6xx + bool "Embedded 6xx/7xx/7xxx-based board" + depends on PPC32 + +config APUS + bool "Amiga-APUS" + depends on PPC32 && BROKEN + help + Select APUS if configuring for a PowerUP Amiga. + More information is available at: + <http://linux-apus.sourceforge.net/>. +endchoice + +config PPC_PSERIES + depends on PPC_MULTIPLATFORM && PPC64 + bool " IBM pSeries & new (POWER5-based) iSeries" + default y + +config PPC_CHRP + bool " Common Hardware Reference Platform (CHRP) based machines" + depends on PPC_MULTIPLATFORM && PPC32 + default y + +config PPC_PMAC + bool " Apple PowerMac based machines" + depends on PPC_MULTIPLATFORM + default y + +config PPC_PMAC64 + bool + depends on PPC_PMAC && POWER4 + default y + +config PPC_PREP + bool " PowerPC Reference Platform (PReP) based machines" + depends on PPC_MULTIPLATFORM && PPC32 + default y + +config PPC_MAPLE + depends on PPC_MULTIPLATFORM && PPC64 + bool " Maple 970FX Evaluation Board" + select U3_DART + select MPIC_BROKEN_U3 + default n + help + This option enables support for the Maple 970FX Evaluation Board. + For more informations, refer to <http://www.970eval.com> + +config PPC_BPA + bool " Broadband Processor Architecture" + depends on PPC_MULTIPLATFORM && PPC64 + +config PPC_OF + bool + depends on PPC_MULTIPLATFORM # for now + default y + +config XICS + depends on PPC_PSERIES + bool + default y + +config U3_DART + bool + depends on PPC_MULTIPLATFORM && PPC64 + default n + +config MPIC + depends on PPC_PSERIES || PPC_PMAC || PPC_MAPLE + bool + default y + +config MPIC_BROKEN_U3 + bool + depends on PPC_MAPLE + default y + +config BPA_IIC + depends on PPC_BPA + bool + default y + +config IBMVIO + depends on PPC_PSERIES || PPC_ISERIES + bool + default y + +source "drivers/cpufreq/Kconfig" + +config CPU_FREQ_PMAC + bool "Support for Apple PowerBooks" + depends on CPU_FREQ && ADB_PMU && PPC32 + select CPU_FREQ_TABLE + help + This adds support for frequency switching on Apple PowerBooks, + this currently includes some models of iBook & Titanium + PowerBook. + +config PPC601_SYNC_FIX + bool "Workarounds for PPC601 bugs" + depends on 6xx && (PPC_PREP || PPC_PMAC) + help + Some versions of the PPC601 (the first PowerPC chip) have bugs which + mean that extra synchronization instructions are required near + certain instructions, typically those that make major changes to the + CPU state. These extra instructions reduce performance slightly. + If you say N here, these extra instructions will not be included, + resulting in a kernel which will run faster but may not run at all + on some systems with the PPC601 chip. + + If in doubt, say Y here. + +config TAU + bool "Thermal Management Support" + depends on 6xx + help + G3 and G4 processors have an on-chip temperature sensor called the + 'Thermal Assist Unit (TAU)', which, in theory, can measure the on-die + temperature within 2-4 degrees Celsius. This option shows the current + on-die temperature in /proc/cpuinfo if the cpu supports it. + + Unfortunately, on some chip revisions, this sensor is very inaccurate + and in some cases, does not work at all, so don't assume the cpu + temp is actually what /proc/cpuinfo says it is. + +config TAU_INT + bool "Interrupt driven TAU driver (DANGEROUS)" + depends on TAU + ---help--- + The TAU supports an interrupt driven mode which causes an interrupt + whenever the temperature goes out of range. This is the fastest way + to get notified the temp has exceeded a range. With this option off, + a timer is used to re-check the temperature periodically. + + However, on some cpus it appears that the TAU interrupt hardware + is buggy and can cause a situation which would lead unexplained hard + lockups. + + Unless you are extending the TAU driver, or enjoy kernel/hardware + debugging, leave this option off. + +config TAU_AVERAGE + bool "Average high and low temp" + depends on TAU + ---help--- + The TAU hardware can compare the temperature to an upper and lower + bound. The default behavior is to show both the upper and lower + bound in /proc/cpuinfo. If the range is large, the temperature is + either changing a lot, or the TAU hardware is broken (likely on some + G4's). If the range is small (around 4 degrees), the temperature is + relatively stable. If you say Y here, a single temperature value, + halfway between the upper and lower bounds, will be reported in + /proc/cpuinfo. + + If in doubt, say N here. +endmenu + +source arch/powerpc/platforms/embedded6xx/Kconfig +source arch/powerpc/platforms/4xx/Kconfig +source arch/powerpc/platforms/85xx/Kconfig +source arch/powerpc/platforms/8xx/Kconfig + +menu "Kernel options" + +config HIGHMEM + bool "High memory support" + depends on PPC32 + +source kernel/Kconfig.hz +source kernel/Kconfig.preempt +source "fs/Kconfig.binfmt" + +# We optimistically allocate largepages from the VM, so make the limit +# large enough (16MB). This badly named config option is actually +# max order + 1 +config FORCE_MAX_ZONEORDER + int + depends on PPC64 + default "13" + +config MATH_EMULATION + bool "Math emulation" + depends on 4xx || 8xx || E200 || E500 + ---help--- + Some PowerPC chips designed for embedded applications do not have + a floating-point unit and therefore do not implement the + floating-point instructions in the PowerPC instruction set. If you + say Y here, the kernel will include code to emulate a floating-point + unit, which will allow programs that use floating-point + instructions to run. + +config IOMMU_VMERGE + bool "Enable IOMMU virtual merging (EXPERIMENTAL)" + depends on EXPERIMENTAL && PPC64 + default n + help + Cause IO segments sent to a device for DMA to be merged virtually + by the IOMMU when they happen to have been allocated contiguously. + This doesn't add pressure to the IOMMU allocator. However, some + drivers don't support getting large merged segments coming back + from *_map_sg(). Say Y if you know the drivers you are using are + properly handling this case. + +config HOTPLUG_CPU + bool "Support for enabling/disabling CPUs" + depends on SMP && HOTPLUG && EXPERIMENTAL && (PPC_PSERIES || PPC_PMAC) + ---help--- + Say Y here to be able to disable and re-enable individual + CPUs at runtime on SMP machines. + + Say N if you are unsure. + +config KEXEC + bool "kexec system call (EXPERIMENTAL)" + depends on PPC_MULTIPLATFORM && EXPERIMENTAL + help + kexec is a system call that implements the ability to shutdown your + current kernel, and to start another kernel. It is like a reboot + but it is indepedent of the system firmware. And like a reboot + you can start any kernel with it, not just Linux. + + The name comes from the similiarity to the exec system call. + + It is an ongoing process to be certain the hardware in a machine + is properly shutdown, so do not be surprised if this code does not + initially work for you. It may help to enable device hotplugging + support. As of this writing the exact hardware interface is + strongly in flux, so no good recommendation can be made. + +config EMBEDDEDBOOT + bool + depends on 8xx || 8260 + default y + +config PC_KEYBOARD + bool "PC PS/2 style Keyboard" + depends on 4xx || CPM2 + +config PPCBUG_NVRAM + bool "Enable reading PPCBUG NVRAM during boot" if PPLUS || LOPEC + default y if PPC_PREP + +config IRQ_ALL_CPUS + bool "Distribute interrupts on all CPUs by default" + depends on SMP && !MV64360 + help + This option gives the kernel permission to distribute IRQs across + multiple CPUs. Saying N here will route all IRQs to the first + CPU. Generally saying Y is safe, although some problems have been + reported with SMP Power Macintoshes with this option enabled. + +source "arch/powerpc/platforms/pseries/Kconfig" + +config ARCH_SELECT_MEMORY_MODEL + def_bool y + depends on PPC64 + +config ARCH_FLATMEM_ENABLE + def_bool y + depends on PPC64 && !NUMA + +config ARCH_DISCONTIGMEM_ENABLE + def_bool y + depends on SMP && PPC_PSERIES + +config ARCH_DISCONTIGMEM_DEFAULT + def_bool y + depends on ARCH_DISCONTIGMEM_ENABLE + +config ARCH_FLATMEM_ENABLE + def_bool y + depends on PPC64 + +config ARCH_SPARSEMEM_ENABLE + def_bool y + depends on ARCH_DISCONTIGMEM_ENABLE + +source "mm/Kconfig" + +config HAVE_ARCH_EARLY_PFN_TO_NID + def_bool y + depends on NEED_MULTIPLE_NODES + +# Some NUMA nodes have memory ranges that span +# other nodes. Even though a pfn is valid and +# between a node's start and end pfns, it may not +# reside on that node. +# +# This is a relatively temporary hack that should +# be able to go away when sparsemem is fully in +# place + +config NODES_SPAN_OTHER_NODES + def_bool y + depends on NEED_MULTIPLE_NODES + +config NUMA + bool "NUMA support" + default y if DISCONTIGMEM || SPARSEMEM + +config SCHED_SMT + bool "SMT (Hyperthreading) scheduler support" + depends on PPC64 && SMP + default off + help + SMT scheduler support improves the CPU scheduler's decision making + when dealing with POWER5 cpus at a cost of slightly increased + overhead in some places. If unsure say N here. + +config PROC_DEVICETREE + bool "Support for Open Firmware device tree in /proc" + depends on PPC_OF && PROC_FS + help + This option adds a device-tree directory under /proc which contains + an image of the device tree that the kernel copies from Open + Firmware. If unsure, say Y here. + +source "arch/powerpc/platforms/prep/Kconfig" + +config CMDLINE_BOOL + bool "Default bootloader kernel arguments" + depends on !PPC_ISERIES + +config CMDLINE + string "Initial kernel command string" + depends on CMDLINE_BOOL + default "console=ttyS0,9600 console=tty0 root=/dev/sda2" + help + On some platforms, there is currently no way for the boot loader to + pass arguments to the kernel. For these platforms, you can supply + some command-line options at build time by entering them here. In + most cases you will need to specify the root device here. + +if !44x || BROKEN +source kernel/power/Kconfig +endif + +config SECCOMP + bool "Enable seccomp to safely compute untrusted bytecode" + depends on PROC_FS + default y + help + This kernel feature is useful for number crunching applications + that may need to compute untrusted bytecode during their + execution. By using pipes or other transports made available to + the process as file descriptors supporting the read/write + syscalls, it's possible to isolate those applications in + their own address space using seccomp. Once seccomp is + enabled via /proc/<pid>/seccomp, it cannot be disabled + and the task is only allowed to execute a few safe syscalls + defined by each seccomp mode. + + If unsure, say Y. Only embedded should say N here. + +endmenu + +config ISA_DMA_API + bool + default y + +menu "Bus options" + +config ISA + bool "Support for ISA-bus hardware" + depends on PPC_PREP || PPC_CHRP + help + Find out whether you have ISA slots on your motherboard. ISA is the + name of a bus system, i.e. the way the CPU talks to the other stuff + inside your box. If you have an Apple machine, say N here; if you + have an IBM RS/6000 or pSeries machine or a PReP machine, say Y. If + you have an embedded board, consult your board documentation. + +config GENERIC_ISA_DMA + bool + depends on PPC64 || POWER4 || 6xx && !CPM2 + default y + +config EISA + bool + +config SBUS + bool + +# Yes MCA RS/6000s exist but Linux-PPC does not currently support any +config MCA + bool + +config PCI + bool "PCI support" if 40x || CPM2 || 83xx || 85xx || PPC_MPC52xx || (EMBEDDED && PPC_ISERIES) + default y if !40x && !CPM2 && !8xx && !APUS && !83xx && !85xx + default PCI_PERMEDIA if !4xx && !CPM2 && !8xx && APUS + default PCI_QSPAN if !4xx && !CPM2 && 8xx + help + Find out whether your system includes a PCI bus. PCI is the name of + a bus system, i.e. the way the CPU talks to the other stuff inside + your box. If you say Y here, the kernel will include drivers and + infrastructure code to support PCI bus devices. + +config PCI_DOMAINS + bool + default PCI + +config MPC83xx_PCI2 + bool " Supprt for 2nd PCI host controller" + depends on PCI && MPC834x + default y if MPC834x_SYS + +config PCI_QSPAN + bool "QSpan PCI" + depends on !4xx && !CPM2 && 8xx + help + Say Y here if you have a system based on a Motorola 8xx-series + embedded processor with a QSPAN PCI interface, otherwise say N. + +config PCI_8260 + bool + depends on PCI && 8260 + default y + +config 8260_PCI9 + bool " Enable workaround for MPC826x erratum PCI 9" + depends on PCI_8260 && !ADS8272 + default y + +choice + prompt " IDMA channel for PCI 9 workaround" + depends on 8260_PCI9 + +config 8260_PCI9_IDMA1 + bool "IDMA1" + +config 8260_PCI9_IDMA2 + bool "IDMA2" + +config 8260_PCI9_IDMA3 + bool "IDMA3" + +config 8260_PCI9_IDMA4 + bool "IDMA4" + +endchoice + +source "drivers/pci/Kconfig" + +source "drivers/pcmcia/Kconfig" + +source "drivers/pci/hotplug/Kconfig" + +endmenu + +menu "Advanced setup" + depends on PPC32 + +config ADVANCED_OPTIONS + bool "Prompt for advanced kernel configuration options" + help + This option will enable prompting for a variety of advanced kernel + configuration options. These options can cause the kernel to not + work if they are set incorrectly, but can be used to optimize certain + aspects of kernel memory management. + + Unless you know what you are doing, say N here. + +comment "Default settings for advanced configuration options are used" + depends on !ADVANCED_OPTIONS + +config HIGHMEM_START_BOOL + bool "Set high memory pool address" + depends on ADVANCED_OPTIONS && HIGHMEM + help + This option allows you to set the base address of the kernel virtual + area used to map high memory pages. This can be useful in + optimizing the layout of kernel virtual memory. + + Say N here unless you know what you are doing. + +config HIGHMEM_START + hex "Virtual start address of high memory pool" if HIGHMEM_START_BOOL + default "0xfe000000" + +config LOWMEM_SIZE_BOOL + bool "Set maximum low memory" + depends on ADVANCED_OPTIONS + help + This option allows you to set the maximum amount of memory which + will be used as "low memory", that is, memory which the kernel can + access directly, without having to set up a kernel virtual mapping. + This can be useful in optimizing the layout of kernel virtual + memory. + + Say N here unless you know what you are doing. + +config LOWMEM_SIZE + hex "Maximum low memory size (in bytes)" if LOWMEM_SIZE_BOOL + default "0x30000000" + +config KERNEL_START_BOOL + bool "Set custom kernel base address" + depends on ADVANCED_OPTIONS + help + This option allows you to set the kernel virtual address at which + the kernel will map low memory (the kernel image will be linked at + this address). This can be useful in optimizing the virtual memory + layout of the system. + + Say N here unless you know what you are doing. + +config KERNEL_START + hex "Virtual address of kernel base" if KERNEL_START_BOOL + default "0xc0000000" + +config TASK_SIZE_BOOL + bool "Set custom user task size" + depends on ADVANCED_OPTIONS + help + This option allows you to set the amount of virtual address space + allocated to user tasks. This can be useful in optimizing the + virtual memory layout of the system. + + Say N here unless you know what you are doing. + +config TASK_SIZE + hex "Size of user task space" if TASK_SIZE_BOOL + default "0x80000000" + +config CONSISTENT_START_BOOL + bool "Set custom consistent memory pool address" + depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE + help + This option allows you to set the base virtual address + of the the consistent memory pool. This pool of virtual + memory is used to make consistent memory allocations. + +config CONSISTENT_START + hex "Base virtual address of consistent memory pool" if CONSISTENT_START_BOOL + default "0xff100000" if NOT_COHERENT_CACHE + +config CONSISTENT_SIZE_BOOL + bool "Set custom consistent memory pool size" + depends on ADVANCED_OPTIONS && NOT_COHERENT_CACHE + help + This option allows you to set the size of the the + consistent memory pool. This pool of virtual memory + is used to make consistent memory allocations. + +config CONSISTENT_SIZE + hex "Size of consistent memory pool" if CONSISTENT_SIZE_BOOL + default "0x00200000" if NOT_COHERENT_CACHE + +config BOOT_LOAD_BOOL + bool "Set the boot link/load address" + depends on ADVANCED_OPTIONS && !PPC_MULTIPLATFORM + help + This option allows you to set the initial load address of the zImage + or zImage.initrd file. This can be useful if you are on a board + which has a small amount of memory. + + Say N here unless you know what you are doing. + +config BOOT_LOAD + hex "Link/load address for booting" if BOOT_LOAD_BOOL + default "0x00400000" if 40x || 8xx || 8260 + default "0x01000000" if 44x + default "0x00800000" + +config PIN_TLB + bool "Pinned Kernel TLBs (860 ONLY)" + depends on ADVANCED_OPTIONS && 8xx +endmenu + +source "net/Kconfig" + +source "drivers/Kconfig" + +source "fs/Kconfig" + +# XXX source "arch/ppc/8xx_io/Kconfig" + +# XXX source "arch/ppc/8260_io/Kconfig" + +source "arch/powerpc/platforms/iseries/Kconfig" + +source "lib/Kconfig" + +source "arch/powerpc/oprofile/Kconfig" + +source "arch/powerpc/Kconfig.debug" + +source "security/Kconfig" + +config KEYS_COMPAT + bool + depends on COMPAT && KEYS + default y + +source "crypto/Kconfig" diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug new file mode 100644 index 00000000000..61653cb60c4 --- /dev/null +++ b/arch/powerpc/Kconfig.debug @@ -0,0 +1,73 @@ +menu "Kernel hacking" + +source "lib/Kconfig.debug" + +config KGDB + bool "Include kgdb kernel debugger" + depends on DEBUG_KERNEL && (BROKEN || PPC_GEN550 || 4xx) + select DEBUG_INFO + help + Include in-kernel hooks for kgdb, the Linux kernel source level + debugger. See <http://kgdb.sourceforge.net/> for more information. + Unless you are intending to debug the kernel, say N here. + +choice + prompt "Serial Port" + depends on KGDB + default KGDB_TTYS1 + +config KGDB_TTYS0 + bool "ttyS0" + +config KGDB_TTYS1 + bool "ttyS1" + +config KGDB_TTYS2 + bool "ttyS2" + +config KGDB_TTYS3 + bool "ttyS3" + +endchoice + +config KGDB_CONSOLE + bool "Enable serial console thru kgdb port" + depends on KGDB && 8xx || CPM2 + help + If you enable this, all serial console messages will be sent + over the gdb stub. + If unsure, say N. + +config XMON + bool "Include xmon kernel debugger" + depends on DEBUG_KERNEL + help + Include in-kernel hooks for the xmon kernel monitor/debugger. + Unless you are intending to debug the kernel, say N here. + +config BDI_SWITCH + bool "Include BDI-2000 user context switcher" + depends on DEBUG_KERNEL + help + Include in-kernel support for the Abatron BDI2000 debugger. + Unless you are intending to debug the kernel with one of these + machines, say N here. + +config BOOTX_TEXT + bool "Support for early boot text console (BootX or OpenFirmware only)" + depends PPC_OF + help + Say Y here to see progress messages from the boot firmware in text + mode. Requires either BootX or Open Firmware. + +config SERIAL_TEXT_DEBUG + bool "Support for early boot texts over serial port" + depends on 4xx || LOPEC || MV64X60 || PPLUS || PRPMC800 || \ + PPC_GEN550 || PPC_MPC52xx + +config PPC_OCP + bool + depends on IBM_OCP || XILINX_OCP + default y + +endmenu diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile new file mode 100644 index 00000000000..8a65e112211 --- /dev/null +++ b/arch/powerpc/Makefile @@ -0,0 +1,222 @@ +# This file is included by the global makefile so that you can add your own +# architecture-specific flags and dependencies. Remember to do have actions +# for "archclean" and "archdep" for cleaning up and making dependencies for +# this architecture. +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1994 by Linus Torvalds +# Changes for PPC by Gary Thomas +# Rewritten by Cort Dougan and Paul Mackerras +# + +# This must match PAGE_OFFSET in include/asm-powerpc/page.h. +KERNELLOAD := $(CONFIG_KERNEL_START) + +HAS_BIARCH := $(call cc-option-yn, -m32) + +ifeq ($(CONFIG_PPC64),y) +SZ := 64 + +# Set default 32 bits cross compilers for vdso and boot wrapper +CROSS32_COMPILE ?= + +CROSS32CC := $(CROSS32_COMPILE)gcc +CROSS32AS := $(CROSS32_COMPILE)as +CROSS32LD := $(CROSS32_COMPILE)ld +CROSS32OBJCOPY := $(CROSS32_COMPILE)objcopy + +ifeq ($(HAS_BIARCH),y) +ifeq ($(CROSS32_COMPILE),) +CROSS32CC := $(CC) -m32 +CROSS32AS := $(AS) -a32 +CROSS32LD := $(LD) -m elf32ppc +CROSS32OBJCOPY := $(OBJCOPY) +endif +endif + +export CROSS32CC CROSS32AS CROSS32LD CROSS32OBJCOPY + +new_nm := $(shell if $(NM) --help 2>&1 | grep -- '--synthetic' > /dev/null; then echo y; else echo n; fi) + +ifeq ($(new_nm),y) +NM := $(NM) --synthetic +endif + +else +SZ := 32 +endif + +ifeq ($(HAS_BIARCH),y) +override AS += -a$(SZ) +override LD += -m elf$(SZ)ppc +override CC += -m$(SZ) +endif + +LDFLAGS_vmlinux := -Ttext $(KERNELLOAD) -Bstatic -e $(KERNELLOAD) + +# The -Iarch/$(ARCH)/include is temporary while we are merging +CPPFLAGS += -Iarch/$(ARCH) -Iarch/$(ARCH)/include +AFLAGS += -Iarch/$(ARCH) +CFLAGS += -Iarch/$(ARCH) -msoft-float -pipe +ifeq ($(CONFIG_PPC64),y) +CFLAGS += -mminimal-toc -mtraceback=none -mcall-aixdesc +else +CFLAGS += -ffixed-r2 -mmultiple +endif +CPP = $(CC) -E $(CFLAGS) +# Temporary hack until we have migrated to asm-powerpc +LINUXINCLUDE += -Iarch/$(ARCH)/include + +CHECKFLAGS += -m$(SZ) -D__powerpc__ -D__powerpc$(SZ)__ + +ifeq ($(CONFIG_PPC64),y) +GCC_VERSION := $(call cc-version) +GCC_BROKEN_VEC := $(shell if [ $(GCC_VERSION) -lt 0400 ] ; then echo "y"; fi) + +ifeq ($(CONFIG_POWER4_ONLY),y) +ifeq ($(CONFIG_ALTIVEC),y) +ifeq ($(GCC_BROKEN_VEC),y) + CFLAGS += $(call cc-option,-mcpu=970) +else + CFLAGS += $(call cc-option,-mcpu=power4) +endif +else + CFLAGS += $(call cc-option,-mcpu=power4) +endif +else + CFLAGS += $(call cc-option,-mtune=power4) +endif +endif + +# Enable unit-at-a-time mode when possible. It shrinks the +# kernel considerably. +CFLAGS += $(call cc-option,-funit-at-a-time) + +ifndef CONFIG_FSL_BOOKE +CFLAGS += -mstring +endif + +cpu-as-$(CONFIG_PPC64BRIDGE) += -Wa,-mppc64bridge +cpu-as-$(CONFIG_4xx) += -Wa,-m405 +cpu-as-$(CONFIG_6xx) += -Wa,-maltivec +cpu-as-$(CONFIG_POWER4) += -Wa,-maltivec +cpu-as-$(CONFIG_E500) += -Wa,-me500 +cpu-as-$(CONFIG_E200) += -Wa,-me200 + +AFLAGS += $(cpu-as-y) +CFLAGS += $(cpu-as-y) + +# Default to the common case. +KBUILD_DEFCONFIG := common_defconfig + +head-y := arch/powerpc/kernel/head.o +head-$(CONFIG_PPC64) := arch/powerpc/kernel/head_64.o +head-$(CONFIG_8xx) := arch/powerpc/kernel/head_8xx.o +head-$(CONFIG_4xx) := arch/powerpc/kernel/head_4xx.o +head-$(CONFIG_44x) := arch/powerpc/kernel/head_44x.o +head-$(CONFIG_FSL_BOOKE) := arch/powerpc/kernel/head_fsl_booke.o + +ifeq ($(CONFIG_PPC32),y) +head-$(CONFIG_6xx) += arch/powerpc/kernel/idle_6xx.o +head-$(CONFIG_POWER4) += arch/powerpc/kernel/idle_power4.o +head-$(CONFIG_PPC_FPU) += arch/powerpc/kernel/fpu.o +endif + +core-y += arch/powerpc/kernel/ \ + arch/powerpc/mm/ \ + arch/powerpc/lib/ \ + arch/powerpc/sysdev/ +core-$(CONFIG_PPC32) += arch/ppc/kernel/ \ + arch/ppc/syslib/ +core-$(CONFIG_PPC64) += arch/ppc64/kernel/ +core-$(CONFIG_PPC_PMAC) += arch/powerpc/platforms/powermac/ +core-$(CONFIG_4xx) += arch/ppc/platforms/4xx/ +core-$(CONFIG_83xx) += arch/ppc/platforms/83xx/ +core-$(CONFIG_85xx) += arch/ppc/platforms/85xx/ +core-$(CONFIG_MATH_EMULATION) += arch/ppc/math-emu/ +core-$(CONFIG_XMON) += arch/powerpc/xmon/ +core-$(CONFIG_APUS) += arch/ppc/amiga/ +drivers-$(CONFIG_8xx) += arch/ppc/8xx_io/ +drivers-$(CONFIG_4xx) += arch/ppc/4xx_io/ +drivers-$(CONFIG_CPM2) += arch/ppc/8260_io/ + +drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/ + +BOOT_TARGETS = zImage zImage.initrd znetboot znetboot.initrd vmlinux.sm + +.PHONY: $(BOOT_TARGETS) + +all: uImage zImage + +CPPFLAGS_vmlinux.lds := -Upowerpc + +# All the instructions talk about "make bzImage". +bzImage: zImage + +boot := arch/$(ARCH)/boot + +$(BOOT_TARGETS): vmlinux + $(Q)$(MAKE) $(build)=$(boot) $@ + +uImage: vmlinux + $(Q)$(MAKE) $(build)=$(boot)/images $(boot)/images/$@ + +define archhelp + @echo '* zImage - Compressed kernel image (arch/$(ARCH)/boot/images/zImage.*)' + @echo ' uImage - Create a bootable image for U-Boot / PPCBoot' + @echo ' install - Install kernel using' + @echo ' (your) ~/bin/installkernel or' + @echo ' (distribution) /sbin/installkernel or' + @echo ' install to $$(INSTALL_PATH) and run lilo' + @echo ' *_defconfig - Select default config from arch/$(ARCH)/ppc/configs' +endef + +archclean: + $(Q)$(MAKE) $(clean)=arch/ppc/boot + # Temporary hack until we have migrated to asm-powerpc + $(Q)rm -rf arch/$(ARCH)/include + +archprepare: checkbin + +# Temporary hack until we have migrated to asm-powerpc +ifeq ($(CONFIG_PPC64),y) +include/asm: arch/$(ARCH)/include/asm +arch/$(ARCH)/include/asm: + $(Q)if [ ! -d arch/$(ARCH)/include ]; then mkdir -p arch/$(ARCH)/include; fi + $(Q)ln -fsn $(srctree)/include/asm-ppc64 arch/$(ARCH)/include/asm +else +include/asm: arch/$(ARCH)/include/asm +arch/$(ARCH)/include/asm: + $(Q)if [ ! -d arch/$(ARCH)/include ]; then mkdir -p arch/$(ARCH)/include; fi + $(Q)ln -fsn $(srctree)/include/asm-ppc arch/$(ARCH)/include/asm +endif + +# Use the file '.tmp_gas_check' for binutils tests, as gas won't output +# to stdout and these checks are run even on install targets. +TOUT := .tmp_gas_check +# Ensure this is binutils 2.12.1 (or 2.12.90.0.7) or later for altivec +# instructions. +# gcc-3.4 and binutils-2.14 are a fatal combination. +GCC_VERSION := $(call cc-version) + +checkbin: + @if test "$(GCC_VERSION)" = "0304" ; then \ + if ! /bin/echo mftb 5 | $(AS) -v -mppc -many -o $(TOUT) >/dev/null 2>&1 ; then \ + echo -n '*** ${VERSION}.${PATCHLEVEL} kernels no longer build '; \ + echo 'correctly with gcc-3.4 and your version of binutils.'; \ + echo '*** Please upgrade your binutils or downgrade your gcc'; \ + false; \ + fi ; \ + fi + @if ! /bin/echo dssall | $(AS) -many -o $(TOUT) >/dev/null 2>&1 ; then \ + echo -n '*** ${VERSION}.${PATCHLEVEL} kernels no longer build ' ; \ + echo 'correctly with old versions of binutils.' ; \ + echo '*** Please upgrade your binutils to 2.12.1 or newer' ; \ + false ; \ + fi + +CLEAN_FILES += $(TOUT) + diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile new file mode 100644 index 00000000000..62c4a51a23d --- /dev/null +++ b/arch/powerpc/kernel/Makefile @@ -0,0 +1,18 @@ +# +# Makefile for the linux kernel. +# + +extra-$(CONFIG_PPC_STD_MMU) := head.o +extra_$(CONFIG_PPC64) := head_64.o +extra-$(CONFIG_40x) := head_4xx.o +extra-$(CONFIG_44x) := head_44x.o +extra-$(CONFIG_FSL_BOOKE) := head_fsl_booke.o +extra-$(CONFIG_8xx) := head_8xx.o +extra-$(CONFIG_6xx) += idle_6xx.o +extra-$(CONFIG_POWER4) += idle_power4.o +extra-$(CONFIG_PPC_FPU) += fpu.o +extra-y += vmlinux.lds + +obj-y := semaphore.o traps.o process.o + +obj-$(CONFIG_ALTIVEC) += vecemu.o vector.o diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c new file mode 100644 index 00000000000..16cf0b7ee2b --- /dev/null +++ b/arch/powerpc/kernel/asm-offsets.c @@ -0,0 +1,262 @@ +/* + * This program is used to generate definitions needed by + * assembly language modules. + * + * We use the technique used in the OSF Mach kernel code: + * generate asm statements containing #defines, + * compile this file to assembler, and then extract the + * #defines from the assembly-language output. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/suspend.h> +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/time.h> +#include <linux/hardirq.h> +#include <asm/io.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/processor.h> + +#include <asm/cputable.h> +#include <asm/thread_info.h> +#ifdef CONFIG_PPC64 +#include <asm/paca.h> +#include <asm/lppaca.h> +#include <asm/iSeries/HvLpEvent.h> +#include <asm/rtas.h> +#include <asm/cache.h> +#include <asm/systemcfg.h> +#include <asm/compat.h> +#endif + +#define DEFINE(sym, val) \ + asm volatile("\n->" #sym " %0 " #val : : "i" (val)) + +#define BLANK() asm volatile("\n->" : : ) + +int main(void) +{ + /* thread struct on stack */ + DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); + DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); + DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); +#ifdef CONFIG_PPC32 + DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags)); +#endif +#ifdef CONFIG_PPC64 + DEFINE(TI_SC_NOERR, offsetof(struct thread_info, syscall_noerror)); + DEFINE(THREAD_SHIFT, THREAD_SHIFT); +#endif + DEFINE(THREAD_SIZE, THREAD_SIZE); + + /* task_struct->thread */ + DEFINE(THREAD, offsetof(struct task_struct, thread)); + DEFINE(THREAD_INFO, offsetof(struct task_struct, thread_info)); + DEFINE(MM, offsetof(struct task_struct, mm)); + DEFINE(PTRACE, offsetof(struct task_struct, ptrace)); + DEFINE(KSP, offsetof(struct thread_struct, ksp)); + DEFINE(PGDIR, offsetof(struct thread_struct, pgdir)); + DEFINE(LAST_SYSCALL, offsetof(struct thread_struct, last_syscall)); + DEFINE(PT_REGS, offsetof(struct thread_struct, regs)); + DEFINE(THREAD_FPEXC_MODE, offsetof(struct thread_struct, fpexc_mode)); + DEFINE(THREAD_FPR0, offsetof(struct thread_struct, fpr[0])); + DEFINE(THREAD_FPSCR, offsetof(struct thread_struct, fpscr)); +#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) + DEFINE(THREAD_DBCR0, offsetof(struct thread_struct, dbcr0)); + DEFINE(PT_PTRACED, PT_PTRACED); +#endif +#ifdef CONFIG_PPC64 + DEFINE(KSP_VSID, offsetof(struct thread_struct, ksp_vsid)); +#endif + +#ifdef CONFIG_ALTIVEC + DEFINE(THREAD_VR0, offsetof(struct thread_struct, vr[0])); + DEFINE(THREAD_VRSAVE, offsetof(struct thread_struct, vrsave)); + DEFINE(THREAD_VSCR, offsetof(struct thread_struct, vscr)); + DEFINE(THREAD_USED_VR, offsetof(struct thread_struct, used_vr)); +#endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_SPE + DEFINE(THREAD_EVR0, offsetof(struct thread_struct, evr[0])); + DEFINE(THREAD_ACC, offsetof(struct thread_struct, acc)); + DEFINE(THREAD_SPEFSCR, offsetof(struct thread_struct, spefscr)); + DEFINE(THREAD_USED_SPE, offsetof(struct thread_struct, used_spe)); +#endif /* CONFIG_SPE */ + /* Interrupt register frame */ + DEFINE(STACK_FRAME_OVERHEAD, STACK_FRAME_OVERHEAD); +#ifndef CONFIG_PPC64 + DEFINE(INT_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)); +#else + DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)); + + /* 288 = # of volatile regs, int & fp, for leaf routines */ + /* which do not stack a frame. See the PPC64 ABI. */ + DEFINE(INT_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 288); +#endif + /* in fact we only use gpr0 - gpr9 and gpr20 - gpr23 */ + DEFINE(GPR0, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[0])); + DEFINE(GPR1, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[1])); + DEFINE(GPR2, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[2])); + DEFINE(GPR3, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[3])); + DEFINE(GPR4, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[4])); + DEFINE(GPR5, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[5])); + DEFINE(GPR6, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[6])); + DEFINE(GPR7, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[7])); + DEFINE(GPR8, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[8])); + DEFINE(GPR9, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[9])); + DEFINE(GPR10, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[10])); + DEFINE(GPR11, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[11])); + DEFINE(GPR12, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[12])); + DEFINE(GPR13, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[13])); + DEFINE(GPR14, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[14])); + DEFINE(GPR15, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[15])); + DEFINE(GPR16, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[16])); + DEFINE(GPR17, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[17])); + DEFINE(GPR18, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[18])); + DEFINE(GPR19, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[19])); + DEFINE(GPR20, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[20])); + DEFINE(GPR21, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[21])); + DEFINE(GPR22, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[22])); + DEFINE(GPR23, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[23])); + DEFINE(GPR24, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[24])); + DEFINE(GPR25, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[25])); + DEFINE(GPR26, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[26])); + DEFINE(GPR27, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[27])); + DEFINE(GPR28, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[28])); + DEFINE(GPR29, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[29])); + DEFINE(GPR30, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[30])); + DEFINE(GPR31, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, gpr[31])); + /* + * Note: these symbols include _ because they overlap with special + * register names + */ + DEFINE(_NIP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, nip)); + DEFINE(_MSR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, msr)); + DEFINE(_CTR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ctr)); + DEFINE(_LINK, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, link)); + DEFINE(_CCR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, ccr)); + DEFINE(_MQ, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, mq)); + DEFINE(_XER, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, xer)); + DEFINE(_DAR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dar)); + DEFINE(_DSISR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dsisr)); + /* The PowerPC 400-class & Book-E processors have neither the DAR nor the DSISR + * SPRs. Hence, we overload them to hold the similar DEAR and ESR SPRs + * for such processors. For critical interrupts we use them to + * hold SRR0 and SRR1. + */ + DEFINE(_DEAR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dar)); + DEFINE(_ESR, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, dsisr)); + DEFINE(ORIG_GPR3, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, orig_gpr3)); + DEFINE(RESULT, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, result)); + DEFINE(TRAP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, trap)); + DEFINE(CLONE_VM, CLONE_VM); + DEFINE(CLONE_UNTRACED, CLONE_UNTRACED); + DEFINE(MM_PGD, offsetof(struct mm_struct, pgd)); + + /* About the CPU features table */ + DEFINE(CPU_SPEC_ENTRY_SIZE, sizeof(struct cpu_spec)); + DEFINE(CPU_SPEC_PVR_MASK, offsetof(struct cpu_spec, pvr_mask)); + DEFINE(CPU_SPEC_PVR_VALUE, offsetof(struct cpu_spec, pvr_value)); + DEFINE(CPU_SPEC_FEATURES, offsetof(struct cpu_spec, cpu_features)); + DEFINE(CPU_SPEC_SETUP, offsetof(struct cpu_spec, cpu_setup)); + +#ifdef CONFIG_PPC64 + DEFINE(MM, offsetof(struct task_struct, mm)); + DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context)); + + DEFINE(DCACHEL1LINESIZE, offsetof(struct ppc64_caches, dline_size)); + DEFINE(DCACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_dline_size)); + DEFINE(DCACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, dlines_per_page)); + DEFINE(ICACHEL1LINESIZE, offsetof(struct ppc64_caches, iline_size)); + DEFINE(ICACHEL1LOGLINESIZE, offsetof(struct ppc64_caches, log_iline_size)); + DEFINE(ICACHEL1LINESPERPAGE, offsetof(struct ppc64_caches, ilines_per_page)); + DEFINE(PLATFORM, offsetof(struct systemcfg, platform)); + + /* paca */ + DEFINE(PACA_SIZE, sizeof(struct paca_struct)); + DEFINE(PACAPACAINDEX, offsetof(struct paca_struct, paca_index)); + DEFINE(PACAPROCSTART, offsetof(struct paca_struct, cpu_start)); + DEFINE(PACAKSAVE, offsetof(struct paca_struct, kstack)); + DEFINE(PACACURRENT, offsetof(struct paca_struct, __current)); + DEFINE(PACASAVEDMSR, offsetof(struct paca_struct, saved_msr)); + DEFINE(PACASTABREAL, offsetof(struct paca_struct, stab_real)); + DEFINE(PACASTABVIRT, offsetof(struct paca_struct, stab_addr)); + DEFINE(PACASTABRR, offsetof(struct paca_struct, stab_rr)); + DEFINE(PACAR1, offsetof(struct paca_struct, saved_r1)); + DEFINE(PACATOC, offsetof(struct paca_struct, kernel_toc)); + DEFINE(PACAPROCENABLED, offsetof(struct paca_struct, proc_enabled)); + DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); + DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); + DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); +#ifdef CONFIG_HUGETLB_PAGE + DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas)); + DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas)); +#endif /* CONFIG_HUGETLB_PAGE */ + DEFINE(PACADEFAULTDECR, offsetof(struct paca_struct, default_decr)); + DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen)); + DEFINE(PACA_EXMC, offsetof(struct paca_struct, exmc)); + DEFINE(PACA_EXSLB, offsetof(struct paca_struct, exslb)); + DEFINE(PACA_EXDSI, offsetof(struct paca_struct, exdsi)); + DEFINE(PACAEMERGSP, offsetof(struct paca_struct, emergency_sp)); + DEFINE(PACALPPACA, offsetof(struct paca_struct, lppaca)); + DEFINE(PACAHWCPUID, offsetof(struct paca_struct, hw_cpu_id)); + DEFINE(LPPACASRR0, offsetof(struct lppaca, saved_srr0)); + DEFINE(LPPACASRR1, offsetof(struct lppaca, saved_srr1)); + DEFINE(LPPACAANYINT, offsetof(struct lppaca, int_dword.any_int)); + DEFINE(LPPACADECRINT, offsetof(struct lppaca, int_dword.fields.decr_int)); + + /* RTAS */ + DEFINE(RTASBASE, offsetof(struct rtas_t, base)); + DEFINE(RTASENTRY, offsetof(struct rtas_t, entry)); + + DEFINE(_TRAP, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, trap)); + DEFINE(SOFTE, STACK_FRAME_OVERHEAD+offsetof(struct pt_regs, softe)); + + /* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */ + DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); + DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); + + /* These _only_ to be used with {PROM,RTAS}_FRAME_SIZE!!! */ + DEFINE(_SRR0, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs)); + DEFINE(_SRR1, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs)+8); + + /* systemcfg offsets for use by vdso */ + DEFINE(CFG_TB_ORIG_STAMP, offsetof(struct systemcfg, tb_orig_stamp)); + DEFINE(CFG_TB_TICKS_PER_SEC, offsetof(struct systemcfg, tb_ticks_per_sec)); + DEFINE(CFG_TB_TO_XS, offsetof(struct systemcfg, tb_to_xs)); + DEFINE(CFG_STAMP_XSEC, offsetof(struct systemcfg, stamp_xsec)); + DEFINE(CFG_TB_UPDATE_COUNT, offsetof(struct systemcfg, tb_update_count)); + DEFINE(CFG_TZ_MINUTEWEST, offsetof(struct systemcfg, tz_minuteswest)); + DEFINE(CFG_TZ_DSTTIME, offsetof(struct systemcfg, tz_dsttime)); + DEFINE(CFG_SYSCALL_MAP32, offsetof(struct systemcfg, syscall_map_32)); + DEFINE(CFG_SYSCALL_MAP64, offsetof(struct systemcfg, syscall_map_64)); + + /* timeval/timezone offsets for use by vdso */ + DEFINE(TVAL64_TV_SEC, offsetof(struct timeval, tv_sec)); + DEFINE(TVAL64_TV_USEC, offsetof(struct timeval, tv_usec)); + DEFINE(TVAL32_TV_SEC, offsetof(struct compat_timeval, tv_sec)); + DEFINE(TVAL32_TV_USEC, offsetof(struct compat_timeval, tv_usec)); + DEFINE(TZONE_TZ_MINWEST, offsetof(struct timezone, tz_minuteswest)); + DEFINE(TZONE_TZ_DSTTIME, offsetof(struct timezone, tz_dsttime)); +#endif + + DEFINE(pbe_address, offsetof(struct pbe, address)); + DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address)); + DEFINE(pbe_next, offsetof(struct pbe, next)); + + DEFINE(NUM_USER_SEGMENTS, TASK_SIZE>>28); + return 0; +} diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S new file mode 100644 index 00000000000..665d7d34304 --- /dev/null +++ b/arch/powerpc/kernel/fpu.S @@ -0,0 +1,133 @@ +/* + * FPU support code, moved here from head.S so that it can be used + * by chips which use other head-whatever.S files. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> +#include <asm/cputable.h> +#include <asm/cache.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> + +/* + * This task wants to use the FPU now. + * On UP, disable FP for the task which had the FPU previously, + * and save its floating-point registers in its thread_struct. + * Load up this task's FP registers from its thread_struct, + * enable the FPU for the current task and return to the task. + */ + .globl load_up_fpu +load_up_fpu: + mfmsr r5 + ori r5,r5,MSR_FP +#ifdef CONFIG_PPC64BRIDGE + clrldi r5,r5,1 /* turn off 64-bit mode */ +#endif /* CONFIG_PPC64BRIDGE */ + SYNC + MTMSRD(r5) /* enable use of fpu now */ + isync +/* + * For SMP, we don't do lazy FPU switching because it just gets too + * horrendously complex, especially when a task switches from one CPU + * to another. Instead we call giveup_fpu in switch_to. + */ +#ifndef CONFIG_SMP + tophys(r6,0) /* get __pa constant */ + addis r3,r6,last_task_used_math@ha + lwz r4,last_task_used_math@l(r3) + cmpwi 0,r4,0 + beq 1f + add r4,r4,r6 + addi r4,r4,THREAD /* want last_task_used_math->thread */ + SAVE_32FPRS(0, r4) + mffs fr0 + stfd fr0,THREAD_FPSCR-4(r4) + lwz r5,PT_REGS(r4) + add r5,r5,r6 + lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5) + li r10,MSR_FP|MSR_FE0|MSR_FE1 + andc r4,r4,r10 /* disable FP for previous task */ + stw r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#endif /* CONFIG_SMP */ + /* enable use of FP after return */ + mfspr r5,SPRN_SPRG3 /* current task's THREAD (phys) */ + lwz r4,THREAD_FPEXC_MODE(r5) + ori r9,r9,MSR_FP /* enable FP for current */ + or r9,r9,r4 + lfd fr0,THREAD_FPSCR-4(r5) + mtfsf 0xff,fr0 + REST_32FPRS(0, r5) +#ifndef CONFIG_SMP + subi r4,r5,THREAD + sub r4,r4,r6 + stw r4,last_task_used_math@l(r3) +#endif /* CONFIG_SMP */ + /* restore registers and return */ + /* we haven't used ctr or xer or lr */ + b fast_exception_return + +/* + * FP unavailable trap from kernel - print a message, but let + * the task use FP in the kernel until it returns to user mode. + */ + .globl KernelFP +KernelFP: + lwz r3,_MSR(r1) + ori r3,r3,MSR_FP + stw r3,_MSR(r1) /* enable use of FP after return */ + lis r3,86f@h + ori r3,r3,86f@l + mr r4,r2 /* current */ + lwz r5,_NIP(r1) + bl printk + b ret_from_except +86: .string "floating point used in kernel (task=%p, pc=%x)\n" + .align 4,0 + +/* + * giveup_fpu(tsk) + * Disable FP for the task given as the argument, + * and save the floating-point registers in its thread_struct. + * Enables the FPU for use in the kernel on return. + */ + .globl giveup_fpu +giveup_fpu: + mfmsr r5 + ori r5,r5,MSR_FP + SYNC_601 + ISYNC_601 + MTMSRD(r5) /* enable use of fpu now */ + SYNC_601 + isync + cmpwi 0,r3,0 + beqlr- /* if no previous owner, done */ + addi r3,r3,THREAD /* want THREAD of task */ + lwz r5,PT_REGS(r3) + cmpwi 0,r5,0 + SAVE_32FPRS(0, r3) + mffs fr0 + stfd fr0,THREAD_FPSCR-4(r3) + beq 1f + lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5) + li r3,MSR_FP|MSR_FE0|MSR_FE1 + andc r4,r4,r3 /* disable FP for previous task */ + stw r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#ifndef CONFIG_SMP + li r5,0 + lis r4,last_task_used_math@ha + stw r5,last_task_used_math@l(r4) +#endif /* CONFIG_SMP */ + blr diff --git a/arch/powerpc/kernel/head.S b/arch/powerpc/kernel/head.S new file mode 100644 index 00000000000..d05509f197d --- /dev/null +++ b/arch/powerpc/kernel/head.S @@ -0,0 +1,1545 @@ +/* + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP + * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu> + * Adapted for Power Macintosh by Paul Mackerras. + * Low-level exception handlers and MMU support + * rewritten by Paul Mackerras. + * Copyright (C) 1996 Paul Mackerras. + * MPC8xx modifications Copyright (C) 1997 Dan Malek (dmalek@jlc.net). + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * This file contains the low-level support and setup for the + * PowerPC platform, including trap and interrupt dispatch. + * (The PPC 8xx embedded CPUs use head_8xx.S instead.) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> +#include <asm/cputable.h> +#include <asm/cache.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> + +#ifdef CONFIG_APUS +#include <asm/amigappc.h> +#endif + +#ifdef CONFIG_PPC64BRIDGE +#define LOAD_BAT(n, reg, RA, RB) \ + ld RA,(n*32)+0(reg); \ + ld RB,(n*32)+8(reg); \ + mtspr SPRN_IBAT##n##U,RA; \ + mtspr SPRN_IBAT##n##L,RB; \ + ld RA,(n*32)+16(reg); \ + ld RB,(n*32)+24(reg); \ + mtspr SPRN_DBAT##n##U,RA; \ + mtspr SPRN_DBAT##n##L,RB; \ + +#else /* CONFIG_PPC64BRIDGE */ + +/* 601 only have IBAT; cr0.eq is set on 601 when using this macro */ +#define LOAD_BAT(n, reg, RA, RB) \ + /* see the comment for clear_bats() -- Cort */ \ + li RA,0; \ + mtspr SPRN_IBAT##n##U,RA; \ + mtspr SPRN_DBAT##n##U,RA; \ + lwz RA,(n*16)+0(reg); \ + lwz RB,(n*16)+4(reg); \ + mtspr SPRN_IBAT##n##U,RA; \ + mtspr SPRN_IBAT##n##L,RB; \ + beq 1f; \ + lwz RA,(n*16)+8(reg); \ + lwz RB,(n*16)+12(reg); \ + mtspr SPRN_DBAT##n##U,RA; \ + mtspr SPRN_DBAT##n##L,RB; \ +1: +#endif /* CONFIG_PPC64BRIDGE */ + + .text + .stabs "arch/ppc/kernel/",N_SO,0,0,0f + .stabs "head.S",N_SO,0,0,0f +0: + .globl _stext +_stext: + +/* + * _start is defined this way because the XCOFF loader in the OpenFirmware + * on the powermac expects the entry point to be a procedure descriptor. + */ + .text + .globl _start +_start: + /* + * These are here for legacy reasons, the kernel used to + * need to look like a coff function entry for the pmac + * but we're always started by some kind of bootloader now. + * -- Cort + */ + nop /* used by __secondary_hold on prep (mtx) and chrp smp */ + nop /* used by __secondary_hold on prep (mtx) and chrp smp */ + nop + +/* PMAC + * Enter here with the kernel text, data and bss loaded starting at + * 0, running with virtual == physical mapping. + * r5 points to the prom entry point (the client interface handler + * address). Address translation is turned on, with the prom + * managing the hash table. Interrupts are disabled. The stack + * pointer (r1) points to just below the end of the half-meg region + * from 0x380000 - 0x400000, which is mapped in already. + * + * If we are booted from MacOS via BootX, we enter with the kernel + * image loaded somewhere, and the following values in registers: + * r3: 'BooX' (0x426f6f58) + * r4: virtual address of boot_infos_t + * r5: 0 + * + * APUS + * r3: 'APUS' + * r4: physical address of memory base + * Linux/m68k style BootInfo structure at &_end. + * + * PREP + * This is jumped to on prep systems right after the kernel is relocated + * to its proper place in memory by the boot loader. The expected layout + * of the regs is: + * r3: ptr to residual data + * r4: initrd_start or if no initrd then 0 + * r5: initrd_end - unused if r4 is 0 + * r6: Start of command line string + * r7: End of command line string + * + * This just gets a minimal mmu environment setup so we can call + * start_here() to do the real work. + * -- Cort + */ + + .globl __start +__start: +/* + * We have to do any OF calls before we map ourselves to KERNELBASE, + * because OF may have I/O devices mapped into that area + * (particularly on CHRP). + */ + mr r31,r3 /* save parameters */ + mr r30,r4 + mr r29,r5 + mr r28,r6 + mr r27,r7 + li r24,0 /* cpu # */ + +/* + * early_init() does the early machine identification and does + * the necessary low-level setup and clears the BSS + * -- Cort <cort@fsmlabs.com> + */ + bl early_init + +/* + * On POWER4, we first need to tweak some CPU configuration registers + * like real mode cache inhibit or exception base + */ +#ifdef CONFIG_POWER4 + bl __970_cpu_preinit +#endif /* CONFIG_POWER4 */ + +#ifdef CONFIG_APUS +/* On APUS the __va/__pa constants need to be set to the correct + * values before continuing. + */ + mr r4,r30 + bl fix_mem_constants +#endif /* CONFIG_APUS */ + +/* Switch MMU off, clear BATs and flush TLB. At this point, r3 contains + * the physical address we are running at, returned by early_init() + */ + bl mmu_off +__after_mmu_off: +#ifndef CONFIG_POWER4 + bl clear_bats + bl flush_tlbs + + bl initial_bats +#if !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT) + bl setup_disp_bat +#endif +#else /* CONFIG_POWER4 */ + bl reloc_offset + bl initial_mm_power4 +#endif /* CONFIG_POWER4 */ + +/* + * Call setup_cpu for CPU 0 and initialize 6xx Idle + */ + bl reloc_offset + li r24,0 /* cpu# */ + bl call_setup_cpu /* Call setup_cpu for this CPU */ +#ifdef CONFIG_6xx + bl reloc_offset + bl init_idle_6xx +#endif /* CONFIG_6xx */ +#ifdef CONFIG_POWER4 + bl reloc_offset + bl init_idle_power4 +#endif /* CONFIG_POWER4 */ + + +#ifndef CONFIG_APUS +/* + * We need to run with _start at physical address 0. + * On CHRP, we are loaded at 0x10000 since OF on CHRP uses + * the exception vectors at 0 (and therefore this copy + * overwrites OF's exception vectors with our own). + * If the MMU is already turned on, we copy stuff to KERNELBASE, + * otherwise we copy it to 0. + */ + bl reloc_offset + mr r26,r3 + addis r4,r3,KERNELBASE@h /* current address of _start */ + cmpwi 0,r4,0 /* are we already running at 0? */ + bne relocate_kernel +#endif /* CONFIG_APUS */ +/* + * we now have the 1st 16M of ram mapped with the bats. + * prep needs the mmu to be turned on here, but pmac already has it on. + * this shouldn't bother the pmac since it just gets turned on again + * as we jump to our code at KERNELBASE. -- Cort + * Actually no, pmac doesn't have it on any more. BootX enters with MMU + * off, and in other cases, we now turn it off before changing BATs above. + */ +turn_on_mmu: + mfmsr r0 + ori r0,r0,MSR_DR|MSR_IR + mtspr SPRN_SRR1,r0 + lis r0,start_here@h + ori r0,r0,start_here@l + mtspr SPRN_SRR0,r0 + SYNC + RFI /* enables MMU */ + +/* + * We need __secondary_hold as a place to hold the other cpus on + * an SMP machine, even when we are running a UP kernel. + */ + . = 0xc0 /* for prep bootloader */ + li r3,1 /* MTX only has 1 cpu */ + .globl __secondary_hold +__secondary_hold: + /* tell the master we're here */ + stw r3,4(0) +#ifdef CONFIG_SMP +100: lwz r4,0(0) + /* wait until we're told to start */ + cmpw 0,r4,r3 + bne 100b + /* our cpu # was at addr 0 - go */ + mr r24,r3 /* cpu # */ + b __secondary_start +#else + b . +#endif /* CONFIG_SMP */ + +/* + * Exception entry code. This code runs with address translation + * turned off, i.e. using physical addresses. + * We assume sprg3 has the physical address of the current + * task's thread_struct. + */ +#define EXCEPTION_PROLOG \ + mtspr SPRN_SPRG0,r10; \ + mtspr SPRN_SPRG1,r11; \ + mfcr r10; \ + EXCEPTION_PROLOG_1; \ + EXCEPTION_PROLOG_2 + +#define EXCEPTION_PROLOG_1 \ + mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \ + andi. r11,r11,MSR_PR; \ + tophys(r11,r1); /* use tophys(r1) if kernel */ \ + beq 1f; \ + mfspr r11,SPRN_SPRG3; \ + lwz r11,THREAD_INFO-THREAD(r11); \ + addi r11,r11,THREAD_SIZE; \ + tophys(r11,r11); \ +1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */ + + +#define EXCEPTION_PROLOG_2 \ + CLR_TOP32(r11); \ + stw r10,_CCR(r11); /* save registers */ \ + stw r12,GPR12(r11); \ + stw r9,GPR9(r11); \ + mfspr r10,SPRN_SPRG0; \ + stw r10,GPR10(r11); \ + mfspr r12,SPRN_SPRG1; \ + stw r12,GPR11(r11); \ + mflr r10; \ + stw r10,_LINK(r11); \ + mfspr r12,SPRN_SRR0; \ + mfspr r9,SPRN_SRR1; \ + stw r1,GPR1(r11); \ + stw r1,0(r11); \ + tovirt(r1,r11); /* set new kernel sp */ \ + li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \ + MTMSRD(r10); /* (except for mach check in rtas) */ \ + stw r0,GPR0(r11); \ + SAVE_4GPRS(3, r11); \ + SAVE_2GPRS(7, r11) + +/* + * Note: code which follows this uses cr0.eq (set if from kernel), + * r11, r12 (SRR0), and r9 (SRR1). + * + * Note2: once we have set r1 we are in a position to take exceptions + * again, and we could thus set MSR:RI at that point. + */ + +/* + * Exception vectors. + */ +#define EXCEPTION(n, label, hdlr, xfer) \ + . = n; \ +label: \ + EXCEPTION_PROLOG; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + xfer(n, hdlr) + +#define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret) \ + li r10,trap; \ + stw r10,TRAP(r11); \ + li r10,MSR_KERNEL; \ + copyee(r10, r9); \ + bl tfer; \ +i##n: \ + .long hdlr; \ + .long ret + +#define COPY_EE(d, s) rlwimi d,s,0,16,16 +#define NOCOPY(d, s) + +#define EXC_XFER_STD(n, hdlr) \ + EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full, \ + ret_from_except_full) + +#define EXC_XFER_LITE(n, hdlr) \ + EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \ + ret_from_except) + +#define EXC_XFER_EE(n, hdlr) \ + EXC_XFER_TEMPLATE(n, hdlr, n, COPY_EE, transfer_to_handler_full, \ + ret_from_except_full) + +#define EXC_XFER_EE_LITE(n, hdlr) \ + EXC_XFER_TEMPLATE(n, hdlr, n+1, COPY_EE, transfer_to_handler, \ + ret_from_except) + +/* System reset */ +/* core99 pmac starts the seconary here by changing the vector, and + putting it back to what it was (UnknownException) when done. */ +#if defined(CONFIG_GEMINI) && defined(CONFIG_SMP) + . = 0x100 + b __secondary_start_gemini +#else + EXCEPTION(0x100, Reset, UnknownException, EXC_XFER_STD) +#endif + +/* Machine check */ +/* + * On CHRP, this is complicated by the fact that we could get a + * machine check inside RTAS, and we have no guarantee that certain + * critical registers will have the values we expect. The set of + * registers that might have bad values includes all the GPRs + * and all the BATs. We indicate that we are in RTAS by putting + * a non-zero value, the address of the exception frame to use, + * in SPRG2. The machine check handler checks SPRG2 and uses its + * value if it is non-zero. If we ever needed to free up SPRG2, + * we could use a field in the thread_info or thread_struct instead. + * (Other exception handlers assume that r1 is a valid kernel stack + * pointer when we take an exception from supervisor mode.) + * -- paulus. + */ + . = 0x200 + mtspr SPRN_SPRG0,r10 + mtspr SPRN_SPRG1,r11 + mfcr r10 +#ifdef CONFIG_PPC_CHRP + mfspr r11,SPRN_SPRG2 + cmpwi 0,r11,0 + bne 7f +#endif /* CONFIG_PPC_CHRP */ + EXCEPTION_PROLOG_1 +7: EXCEPTION_PROLOG_2 + addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef CONFIG_PPC_CHRP + mfspr r4,SPRN_SPRG2 + cmpwi cr1,r4,0 + bne cr1,1f +#endif + EXC_XFER_STD(0x200, MachineCheckException) +#ifdef CONFIG_PPC_CHRP +1: b machine_check_in_rtas +#endif + +/* Data access exception. */ + . = 0x300 +#ifdef CONFIG_PPC64BRIDGE + b DataAccess +DataAccessCont: +#else +DataAccess: + EXCEPTION_PROLOG +#endif /* CONFIG_PPC64BRIDGE */ + mfspr r10,SPRN_DSISR + andis. r0,r10,0xa470 /* weird error? */ + bne 1f /* if not, try to put a PTE */ + mfspr r4,SPRN_DAR /* into the hash table */ + rlwinm r3,r10,32-15,21,21 /* DSISR_STORE -> _PAGE_RW */ + bl hash_page +1: stw r10,_DSISR(r11) + mr r5,r10 + mfspr r4,SPRN_DAR + EXC_XFER_EE_LITE(0x300, handle_page_fault) + +#ifdef CONFIG_PPC64BRIDGE +/* SLB fault on data access. */ + . = 0x380 + b DataSegment +#endif /* CONFIG_PPC64BRIDGE */ + +/* Instruction access exception. */ + . = 0x400 +#ifdef CONFIG_PPC64BRIDGE + b InstructionAccess +InstructionAccessCont: +#else +InstructionAccess: + EXCEPTION_PROLOG +#endif /* CONFIG_PPC64BRIDGE */ + andis. r0,r9,0x4000 /* no pte found? */ + beq 1f /* if so, try to put a PTE */ + li r3,0 /* into the hash table */ + mr r4,r12 /* SRR0 is fault address */ + bl hash_page +1: mr r4,r12 + mr r5,r9 + EXC_XFER_EE_LITE(0x400, handle_page_fault) + +#ifdef CONFIG_PPC64BRIDGE +/* SLB fault on instruction access. */ + . = 0x480 + b InstructionSegment +#endif /* CONFIG_PPC64BRIDGE */ + +/* External interrupt */ + EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) + +/* Alignment exception */ + . = 0x600 +Alignment: + EXCEPTION_PROLOG + mfspr r4,SPRN_DAR + stw r4,_DAR(r11) + mfspr r5,SPRN_DSISR + stw r5,_DSISR(r11) + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_EE(0x600, AlignmentException) + +/* Program check exception */ + EXCEPTION(0x700, ProgramCheck, ProgramCheckException, EXC_XFER_STD) + +/* Floating-point unavailable */ + . = 0x800 +FPUnavailable: + EXCEPTION_PROLOG + bne load_up_fpu /* if from user, just load it up */ + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_EE_LITE(0x800, KernelFP) + +/* Decrementer */ + EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) + + EXCEPTION(0xa00, Trap_0a, UnknownException, EXC_XFER_EE) + EXCEPTION(0xb00, Trap_0b, UnknownException, EXC_XFER_EE) + +/* System call */ + . = 0xc00 +SystemCall: + EXCEPTION_PROLOG + EXC_XFER_EE_LITE(0xc00, DoSyscall) + +/* Single step - not used on 601 */ + EXCEPTION(0xd00, SingleStep, SingleStepException, EXC_XFER_STD) + EXCEPTION(0xe00, Trap_0e, UnknownException, EXC_XFER_EE) + +/* + * The Altivec unavailable trap is at 0x0f20. Foo. + * We effectively remap it to 0x3000. + * We include an altivec unavailable exception vector even if + * not configured for Altivec, so that you can't panic a + * non-altivec kernel running on a machine with altivec just + * by executing an altivec instruction. + */ + . = 0xf00 + b Trap_0f + + . = 0xf20 + b AltiVecUnavailable + +Trap_0f: + EXCEPTION_PROLOG + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_EE(0xf00, UnknownException) + +/* + * Handle TLB miss for instruction on 603/603e. + * Note: we get an alternate set of r0 - r3 to use automatically. + */ + . = 0x1000 +InstructionTLBMiss: +/* + * r0: stored ctr + * r1: linux style pte ( later becomes ppc hardware pte ) + * r2: ptr to linux-style pte + * r3: scratch + */ + mfctr r0 + /* Get PTE (linux-style) and check access */ + mfspr r3,SPRN_IMISS + lis r1,KERNELBASE@h /* check if kernel address */ + cmplw 0,r3,r1 + mfspr r2,SPRN_SPRG3 + li r1,_PAGE_USER|_PAGE_PRESENT /* low addresses tested as user */ + lwz r2,PGDIR(r2) + blt+ 112f + lis r2,swapper_pg_dir@ha /* if kernel address, use */ + addi r2,r2,swapper_pg_dir@l /* kernel page table */ + mfspr r1,SPRN_SRR1 /* and MSR_PR bit from SRR1 */ + rlwinm r1,r1,32-12,29,29 /* shift MSR_PR to _PAGE_USER posn */ +112: tophys(r2,r2) + rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r2) /* get pmd entry */ + rlwinm. r2,r2,0,0,19 /* extract address of pte page */ + beq- InstructionAddressInvalid /* return if no mapping */ + rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ + lwz r3,0(r2) /* get linux-style pte */ + andc. r1,r1,r3 /* check access & ~permission */ + bne- InstructionAddressInvalid /* return if access not permitted */ + ori r3,r3,_PAGE_ACCESSED /* set _PAGE_ACCESSED in pte */ + /* + * NOTE! We are assuming this is not an SMP system, otherwise + * we would need to update the pte atomically with lwarx/stwcx. + */ + stw r3,0(r2) /* update PTE (accessed bit) */ + /* Convert linux-style PTE to low word of PPC-style PTE */ + rlwinm r1,r3,32-10,31,31 /* _PAGE_RW -> PP lsb */ + rlwinm r2,r3,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */ + and r1,r1,r2 /* writable if _RW and _DIRTY */ + rlwimi r3,r3,32-1,30,30 /* _PAGE_USER -> PP msb */ + rlwimi r3,r3,32-1,31,31 /* _PAGE_USER -> PP lsb */ + ori r1,r1,0xe14 /* clear out reserved bits and M */ + andc r1,r3,r1 /* PP = user? (rw&dirty? 2: 3): 0 */ + mtspr SPRN_RPA,r1 + mfspr r3,SPRN_IMISS + tlbli r3 + mfspr r3,SPRN_SRR1 /* Need to restore CR0 */ + mtcrf 0x80,r3 + rfi +InstructionAddressInvalid: + mfspr r3,SPRN_SRR1 + rlwinm r1,r3,9,6,6 /* Get load/store bit */ + + addis r1,r1,0x2000 + mtspr SPRN_DSISR,r1 /* (shouldn't be needed) */ + mtctr r0 /* Restore CTR */ + andi. r2,r3,0xFFFF /* Clear upper bits of SRR1 */ + or r2,r2,r1 + mtspr SPRN_SRR1,r2 + mfspr r1,SPRN_IMISS /* Get failing address */ + rlwinm. r2,r2,0,31,31 /* Check for little endian access */ + rlwimi r2,r2,1,30,30 /* change 1 -> 3 */ + xor r1,r1,r2 + mtspr SPRN_DAR,r1 /* Set fault address */ + mfmsr r0 /* Restore "normal" registers */ + xoris r0,r0,MSR_TGPR>>16 + mtcrf 0x80,r3 /* Restore CR0 */ + mtmsr r0 + b InstructionAccess + +/* + * Handle TLB miss for DATA Load operation on 603/603e + */ + . = 0x1100 +DataLoadTLBMiss: +/* + * r0: stored ctr + * r1: linux style pte ( later becomes ppc hardware pte ) + * r2: ptr to linux-style pte + * r3: scratch + */ + mfctr r0 + /* Get PTE (linux-style) and check access */ + mfspr r3,SPRN_DMISS + lis r1,KERNELBASE@h /* check if kernel address */ + cmplw 0,r3,r1 + mfspr r2,SPRN_SPRG3 + li r1,_PAGE_USER|_PAGE_PRESENT /* low addresses tested as user */ + lwz r2,PGDIR(r2) + blt+ 112f + lis r2,swapper_pg_dir@ha /* if kernel address, use */ + addi r2,r2,swapper_pg_dir@l /* kernel page table */ + mfspr r1,SPRN_SRR1 /* and MSR_PR bit from SRR1 */ + rlwinm r1,r1,32-12,29,29 /* shift MSR_PR to _PAGE_USER posn */ +112: tophys(r2,r2) + rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r2) /* get pmd entry */ + rlwinm. r2,r2,0,0,19 /* extract address of pte page */ + beq- DataAddressInvalid /* return if no mapping */ + rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ + lwz r3,0(r2) /* get linux-style pte */ + andc. r1,r1,r3 /* check access & ~permission */ + bne- DataAddressInvalid /* return if access not permitted */ + ori r3,r3,_PAGE_ACCESSED /* set _PAGE_ACCESSED in pte */ + /* + * NOTE! We are assuming this is not an SMP system, otherwise + * we would need to update the pte atomically with lwarx/stwcx. + */ + stw r3,0(r2) /* update PTE (accessed bit) */ + /* Convert linux-style PTE to low word of PPC-style PTE */ + rlwinm r1,r3,32-10,31,31 /* _PAGE_RW -> PP lsb */ + rlwinm r2,r3,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */ + and r1,r1,r2 /* writable if _RW and _DIRTY */ + rlwimi r3,r3,32-1,30,30 /* _PAGE_USER -> PP msb */ + rlwimi r3,r3,32-1,31,31 /* _PAGE_USER -> PP lsb */ + ori r1,r1,0xe14 /* clear out reserved bits and M */ + andc r1,r3,r1 /* PP = user? (rw&dirty? 2: 3): 0 */ + mtspr SPRN_RPA,r1 + mfspr r3,SPRN_DMISS + tlbld r3 + mfspr r3,SPRN_SRR1 /* Need to restore CR0 */ + mtcrf 0x80,r3 + rfi +DataAddressInvalid: + mfspr r3,SPRN_SRR1 + rlwinm r1,r3,9,6,6 /* Get load/store bit */ + addis r1,r1,0x2000 + mtspr SPRN_DSISR,r1 + mtctr r0 /* Restore CTR */ + andi. r2,r3,0xFFFF /* Clear upper bits of SRR1 */ + mtspr SPRN_SRR1,r2 + mfspr r1,SPRN_DMISS /* Get failing address */ + rlwinm. r2,r2,0,31,31 /* Check for little endian access */ + beq 20f /* Jump if big endian */ + xori r1,r1,3 +20: mtspr SPRN_DAR,r1 /* Set fault address */ + mfmsr r0 /* Restore "normal" registers */ + xoris r0,r0,MSR_TGPR>>16 + mtcrf 0x80,r3 /* Restore CR0 */ + mtmsr r0 + b DataAccess + +/* + * Handle TLB miss for DATA Store on 603/603e + */ + . = 0x1200 +DataStoreTLBMiss: +/* + * r0: stored ctr + * r1: linux style pte ( later becomes ppc hardware pte ) + * r2: ptr to linux-style pte + * r3: scratch + */ + mfctr r0 + /* Get PTE (linux-style) and check access */ + mfspr r3,SPRN_DMISS + lis r1,KERNELBASE@h /* check if kernel address */ + cmplw 0,r3,r1 + mfspr r2,SPRN_SPRG3 + li r1,_PAGE_RW|_PAGE_USER|_PAGE_PRESENT /* access flags */ + lwz r2,PGDIR(r2) + blt+ 112f + lis r2,swapper_pg_dir@ha /* if kernel address, use */ + addi r2,r2,swapper_pg_dir@l /* kernel page table */ + mfspr r1,SPRN_SRR1 /* and MSR_PR bit from SRR1 */ + rlwinm r1,r1,32-12,29,29 /* shift MSR_PR to _PAGE_USER posn */ +112: tophys(r2,r2) + rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ + lwz r2,0(r2) /* get pmd entry */ + rlwinm. r2,r2,0,0,19 /* extract address of pte page */ + beq- DataAddressInvalid /* return if no mapping */ + rlwimi r2,r3,22,20,29 /* insert next 10 bits of address */ + lwz r3,0(r2) /* get linux-style pte */ + andc. r1,r1,r3 /* check access & ~permission */ + bne- DataAddressInvalid /* return if access not permitted */ + ori r3,r3,_PAGE_ACCESSED|_PAGE_DIRTY + /* + * NOTE! We are assuming this is not an SMP system, otherwise + * we would need to update the pte atomically with lwarx/stwcx. + */ + stw r3,0(r2) /* update PTE (accessed/dirty bits) */ + /* Convert linux-style PTE to low word of PPC-style PTE */ + rlwimi r3,r3,32-1,30,30 /* _PAGE_USER -> PP msb */ + li r1,0xe15 /* clear out reserved bits and M */ + andc r1,r3,r1 /* PP = user? 2: 0 */ + mtspr SPRN_RPA,r1 + mfspr r3,SPRN_DMISS + tlbld r3 + mfspr r3,SPRN_SRR1 /* Need to restore CR0 */ + mtcrf 0x80,r3 + rfi + +#ifndef CONFIG_ALTIVEC +#define AltivecAssistException UnknownException +#endif + + EXCEPTION(0x1300, Trap_13, InstructionBreakpoint, EXC_XFER_EE) + EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_EE) + EXCEPTION(0x1500, Trap_15, UnknownException, EXC_XFER_EE) +#ifdef CONFIG_POWER4 + EXCEPTION(0x1600, Trap_16, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1700, Trap_17, AltivecAssistException, EXC_XFER_EE) + EXCEPTION(0x1800, Trap_18, TAUException, EXC_XFER_STD) +#else /* !CONFIG_POWER4 */ + EXCEPTION(0x1600, Trap_16, AltivecAssistException, EXC_XFER_EE) + EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD) + EXCEPTION(0x1800, Trap_18, UnknownException, EXC_XFER_EE) +#endif /* CONFIG_POWER4 */ + EXCEPTION(0x1900, Trap_19, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1a00, Trap_1a, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1b00, Trap_1b, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1c00, Trap_1c, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1d00, Trap_1d, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1e00, Trap_1e, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1f00, Trap_1f, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_EE) + EXCEPTION(0x2100, Trap_21, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2200, Trap_22, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2300, Trap_23, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2400, Trap_24, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2500, Trap_25, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2600, Trap_26, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2700, Trap_27, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2800, Trap_28, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2900, Trap_29, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2a00, Trap_2a, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2b00, Trap_2b, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2c00, Trap_2c, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2d00, Trap_2d, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2e00, Trap_2e, UnknownException, EXC_XFER_EE) + EXCEPTION(0x2f00, MOLTrampoline, UnknownException, EXC_XFER_EE_LITE) + + .globl mol_trampoline + .set mol_trampoline, i0x2f00 + + . = 0x3000 + +AltiVecUnavailable: + EXCEPTION_PROLOG +#ifdef CONFIG_ALTIVEC + bne load_up_altivec /* if from user, just load it up */ +#endif /* CONFIG_ALTIVEC */ + EXC_XFER_EE_LITE(0xf20, AltivecUnavailException) + +#ifdef CONFIG_PPC64BRIDGE +DataAccess: + EXCEPTION_PROLOG + b DataAccessCont + +InstructionAccess: + EXCEPTION_PROLOG + b InstructionAccessCont + +DataSegment: + EXCEPTION_PROLOG + addi r3,r1,STACK_FRAME_OVERHEAD + mfspr r4,SPRN_DAR + stw r4,_DAR(r11) + EXC_XFER_STD(0x380, UnknownException) + +InstructionSegment: + EXCEPTION_PROLOG + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_STD(0x480, UnknownException) +#endif /* CONFIG_PPC64BRIDGE */ + +#ifdef CONFIG_ALTIVEC +/* Note that the AltiVec support is closely modeled after the FP + * support. Changes to one are likely to be applicable to the + * other! */ +load_up_altivec: +/* + * Disable AltiVec for the task which had AltiVec previously, + * and save its AltiVec registers in its thread_struct. + * Enables AltiVec for use in the kernel on return. + * On SMP we know the AltiVec units are free, since we give it up every + * switch. -- Kumar + */ + mfmsr r5 + oris r5,r5,MSR_VEC@h + MTMSRD(r5) /* enable use of AltiVec now */ + isync +/* + * For SMP, we don't do lazy AltiVec switching because it just gets too + * horrendously complex, especially when a task switches from one CPU + * to another. Instead we call giveup_altivec in switch_to. + */ +#ifndef CONFIG_SMP + tophys(r6,0) + addis r3,r6,last_task_used_altivec@ha + lwz r4,last_task_used_altivec@l(r3) + cmpwi 0,r4,0 + beq 1f + add r4,r4,r6 + addi r4,r4,THREAD /* want THREAD of last_task_used_altivec */ + SAVE_32VRS(0,r10,r4) + mfvscr vr0 + li r10,THREAD_VSCR + stvx vr0,r10,r4 + lwz r5,PT_REGS(r4) + add r5,r5,r6 + lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5) + lis r10,MSR_VEC@h + andc r4,r4,r10 /* disable altivec for previous task */ + stw r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#endif /* CONFIG_SMP */ + /* enable use of AltiVec after return */ + oris r9,r9,MSR_VEC@h + mfspr r5,SPRN_SPRG3 /* current task's THREAD (phys) */ + li r4,1 + li r10,THREAD_VSCR + stw r4,THREAD_USED_VR(r5) + lvx vr0,r10,r5 + mtvscr vr0 + REST_32VRS(0,r10,r5) +#ifndef CONFIG_SMP + subi r4,r5,THREAD + sub r4,r4,r6 + stw r4,last_task_used_altivec@l(r3) +#endif /* CONFIG_SMP */ + /* restore registers and return */ + /* we haven't used ctr or xer or lr */ + b fast_exception_return + +/* + * AltiVec unavailable trap from kernel - print a message, but let + * the task use AltiVec in the kernel until it returns to user mode. + */ +KernelAltiVec: + lwz r3,_MSR(r1) + oris r3,r3,MSR_VEC@h + stw r3,_MSR(r1) /* enable use of AltiVec after return */ + lis r3,87f@h + ori r3,r3,87f@l + mr r4,r2 /* current */ + lwz r5,_NIP(r1) + bl printk + b ret_from_except +87: .string "AltiVec used in kernel (task=%p, pc=%x) \n" + .align 4,0 + +/* + * giveup_altivec(tsk) + * Disable AltiVec for the task given as the argument, + * and save the AltiVec registers in its thread_struct. + * Enables AltiVec for use in the kernel on return. + */ + + .globl giveup_altivec +giveup_altivec: + mfmsr r5 + oris r5,r5,MSR_VEC@h + SYNC + MTMSRD(r5) /* enable use of AltiVec now */ + isync + cmpwi 0,r3,0 + beqlr- /* if no previous owner, done */ + addi r3,r3,THREAD /* want THREAD of task */ + lwz r5,PT_REGS(r3) + cmpwi 0,r5,0 + SAVE_32VRS(0, r4, r3) + mfvscr vr0 + li r4,THREAD_VSCR + stvx vr0,r4,r3 + beq 1f + lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5) + lis r3,MSR_VEC@h + andc r4,r4,r3 /* disable AltiVec for previous task */ + stw r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#ifndef CONFIG_SMP + li r5,0 + lis r4,last_task_used_altivec@ha + stw r5,last_task_used_altivec@l(r4) +#endif /* CONFIG_SMP */ + blr +#endif /* CONFIG_ALTIVEC */ + +/* + * This code is jumped to from the startup code to copy + * the kernel image to physical address 0. + */ +relocate_kernel: + addis r9,r26,klimit@ha /* fetch klimit */ + lwz r25,klimit@l(r9) + addis r25,r25,-KERNELBASE@h + li r3,0 /* Destination base address */ + li r6,0 /* Destination offset */ + li r5,0x4000 /* # bytes of memory to copy */ + bl copy_and_flush /* copy the first 0x4000 bytes */ + addi r0,r3,4f@l /* jump to the address of 4f */ + mtctr r0 /* in copy and do the rest. */ + bctr /* jump to the copy */ +4: mr r5,r25 + bl copy_and_flush /* copy the rest */ + b turn_on_mmu + +/* + * Copy routine used to copy the kernel to start at physical address 0 + * and flush and invalidate the caches as needed. + * r3 = dest addr, r4 = source addr, r5 = copy limit, r6 = start offset + * on exit, r3, r4, r5 are unchanged, r6 is updated to be >= r5. + */ +copy_and_flush: + addi r5,r5,-4 + addi r6,r6,-4 +4: li r0,L1_CACHE_LINE_SIZE/4 + mtctr r0 +3: addi r6,r6,4 /* copy a cache line */ + lwzx r0,r6,r4 + stwx r0,r6,r3 + bdnz 3b + dcbst r6,r3 /* write it to memory */ + sync + icbi r6,r3 /* flush the icache line */ + cmplw 0,r6,r5 + blt 4b + sync /* additional sync needed on g4 */ + isync + addi r5,r5,4 + addi r6,r6,4 + blr + +#ifdef CONFIG_APUS +/* + * On APUS the physical base address of the kernel is not known at compile + * time, which means the __pa/__va constants used are incorrect. In the + * __init section is recorded the virtual addresses of instructions using + * these constants, so all that has to be done is fix these before + * continuing the kernel boot. + * + * r4 = The physical address of the kernel base. + */ +fix_mem_constants: + mr r10,r4 + addis r10,r10,-KERNELBASE@h /* virt_to_phys constant */ + neg r11,r10 /* phys_to_virt constant */ + + lis r12,__vtop_table_begin@h + ori r12,r12,__vtop_table_begin@l + add r12,r12,r10 /* table begin phys address */ + lis r13,__vtop_table_end@h + ori r13,r13,__vtop_table_end@l + add r13,r13,r10 /* table end phys address */ + subi r12,r12,4 + subi r13,r13,4 +1: lwzu r14,4(r12) /* virt address of instruction */ + add r14,r14,r10 /* phys address of instruction */ + lwz r15,0(r14) /* instruction, now insert top */ + rlwimi r15,r10,16,16,31 /* half of vp const in low half */ + stw r15,0(r14) /* of instruction and restore. */ + dcbst r0,r14 /* write it to memory */ + sync + icbi r0,r14 /* flush the icache line */ + cmpw r12,r13 + bne 1b + sync /* additional sync needed on g4 */ + isync + +/* + * Map the memory where the exception handlers will + * be copied to when hash constants have been patched. + */ +#ifdef CONFIG_APUS_FAST_EXCEPT + lis r8,0xfff0 +#else + lis r8,0 +#endif + ori r8,r8,0x2 /* 128KB, supervisor */ + mtspr SPRN_DBAT3U,r8 + mtspr SPRN_DBAT3L,r8 + + lis r12,__ptov_table_begin@h + ori r12,r12,__ptov_table_begin@l + add r12,r12,r10 /* table begin phys address */ + lis r13,__ptov_table_end@h + ori r13,r13,__ptov_table_end@l + add r13,r13,r10 /* table end phys address */ + subi r12,r12,4 + subi r13,r13,4 +1: lwzu r14,4(r12) /* virt address of instruction */ + add r14,r14,r10 /* phys address of instruction */ + lwz r15,0(r14) /* instruction, now insert top */ + rlwimi r15,r11,16,16,31 /* half of pv const in low half*/ + stw r15,0(r14) /* of instruction and restore. */ + dcbst r0,r14 /* write it to memory */ + sync + icbi r0,r14 /* flush the icache line */ + cmpw r12,r13 + bne 1b + + sync /* additional sync needed on g4 */ + isync /* No speculative loading until now */ + blr + +/*********************************************************************** + * Please note that on APUS the exception handlers are located at the + * physical address 0xfff0000. For this reason, the exception handlers + * cannot use relative branches to access the code below. + ***********************************************************************/ +#endif /* CONFIG_APUS */ + +#ifdef CONFIG_SMP +#ifdef CONFIG_GEMINI + .globl __secondary_start_gemini +__secondary_start_gemini: + mfspr r4,SPRN_HID0 + ori r4,r4,HID0_ICFI + li r3,0 + ori r3,r3,HID0_ICE + andc r4,r4,r3 + mtspr SPRN_HID0,r4 + sync + b __secondary_start +#endif /* CONFIG_GEMINI */ + + .globl __secondary_start_pmac_0 +__secondary_start_pmac_0: + /* NB the entries for cpus 0, 1, 2 must each occupy 8 bytes. */ + li r24,0 + b 1f + li r24,1 + b 1f + li r24,2 + b 1f + li r24,3 +1: + /* on powersurge, we come in here with IR=0 and DR=1, and DBAT 0 + set to map the 0xf0000000 - 0xffffffff region */ + mfmsr r0 + rlwinm r0,r0,0,28,26 /* clear DR (0x10) */ + SYNC + mtmsr r0 + isync + + .globl __secondary_start +__secondary_start: +#ifdef CONFIG_PPC64BRIDGE + mfmsr r0 + clrldi r0,r0,1 /* make sure it's in 32-bit mode */ + SYNC + MTMSRD(r0) + isync +#endif + /* Copy some CPU settings from CPU 0 */ + bl __restore_cpu_setup + + lis r3,-KERNELBASE@h + mr r4,r24 + bl identify_cpu + bl call_setup_cpu /* Call setup_cpu for this CPU */ +#ifdef CONFIG_6xx + lis r3,-KERNELBASE@h + bl init_idle_6xx +#endif /* CONFIG_6xx */ +#ifdef CONFIG_POWER4 + lis r3,-KERNELBASE@h + bl init_idle_power4 +#endif /* CONFIG_POWER4 */ + + /* get current_thread_info and current */ + lis r1,secondary_ti@ha + tophys(r1,r1) + lwz r1,secondary_ti@l(r1) + tophys(r2,r1) + lwz r2,TI_TASK(r2) + + /* stack */ + addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + li r0,0 + tophys(r3,r1) + stw r0,0(r3) + + /* load up the MMU */ + bl load_up_mmu + + /* ptr to phys current thread */ + tophys(r4,r2) + addi r4,r4,THREAD /* phys address of our thread_struct */ + CLR_TOP32(r4) + mtspr SPRN_SPRG3,r4 + li r3,0 + mtspr SPRN_SPRG2,r3 /* 0 => not in RTAS */ + + /* enable MMU and jump to start_secondary */ + li r4,MSR_KERNEL + FIX_SRR1(r4,r5) + lis r3,start_secondary@h + ori r3,r3,start_secondary@l + mtspr SPRN_SRR0,r3 + mtspr SPRN_SRR1,r4 + SYNC + RFI +#endif /* CONFIG_SMP */ + +/* + * Those generic dummy functions are kept for CPUs not + * included in CONFIG_6xx + */ +_GLOBAL(__setup_cpu_power3) + blr +_GLOBAL(__setup_cpu_generic) + blr + +#if !defined(CONFIG_6xx) && !defined(CONFIG_POWER4) +_GLOBAL(__save_cpu_setup) + blr +_GLOBAL(__restore_cpu_setup) + blr +#endif /* !defined(CONFIG_6xx) && !defined(CONFIG_POWER4) */ + + +/* + * Load stuff into the MMU. Intended to be called with + * IR=0 and DR=0. + */ +load_up_mmu: + sync /* Force all PTE updates to finish */ + isync + tlbia /* Clear all TLB entries */ + sync /* wait for tlbia/tlbie to finish */ + TLBSYNC /* ... on all CPUs */ + /* Load the SDR1 register (hash table base & size) */ + lis r6,_SDR1@ha + tophys(r6,r6) + lwz r6,_SDR1@l(r6) + mtspr SPRN_SDR1,r6 +#ifdef CONFIG_PPC64BRIDGE + /* clear the ASR so we only use the pseudo-segment registers. */ + li r6,0 + mtasr r6 +#endif /* CONFIG_PPC64BRIDGE */ + li r0,16 /* load up segment register values */ + mtctr r0 /* for context 0 */ + lis r3,0x2000 /* Ku = 1, VSID = 0 */ + li r4,0 +3: mtsrin r3,r4 + addi r3,r3,0x111 /* increment VSID */ + addis r4,r4,0x1000 /* address of next segment */ + bdnz 3b +#ifndef CONFIG_POWER4 +/* Load the BAT registers with the values set up by MMU_init. + MMU_init takes care of whether we're on a 601 or not. */ + mfpvr r3 + srwi r3,r3,16 + cmpwi r3,1 + lis r3,BATS@ha + addi r3,r3,BATS@l + tophys(r3,r3) + LOAD_BAT(0,r3,r4,r5) + LOAD_BAT(1,r3,r4,r5) + LOAD_BAT(2,r3,r4,r5) + LOAD_BAT(3,r3,r4,r5) +#endif /* CONFIG_POWER4 */ + blr + +/* + * This is where the main kernel code starts. + */ +start_here: + /* ptr to current */ + lis r2,init_task@h + ori r2,r2,init_task@l + /* Set up for using our exception vectors */ + /* ptr to phys current thread */ + tophys(r4,r2) + addi r4,r4,THREAD /* init task's THREAD */ + CLR_TOP32(r4) + mtspr SPRN_SPRG3,r4 + li r3,0 + mtspr SPRN_SPRG2,r3 /* 0 => not in RTAS */ + + /* stack */ + lis r1,init_thread_union@ha + addi r1,r1,init_thread_union@l + li r0,0 + stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) +/* + * Do early bootinfo parsing, platform-specific initialization, + * and set up the MMU. + */ + mr r3,r31 + mr r4,r30 + mr r5,r29 + mr r6,r28 + mr r7,r27 + bl machine_init + bl MMU_init + +#ifdef CONFIG_APUS + /* Copy exception code to exception vector base on APUS. */ + lis r4,KERNELBASE@h +#ifdef CONFIG_APUS_FAST_EXCEPT + lis r3,0xfff0 /* Copy to 0xfff00000 */ +#else + lis r3,0 /* Copy to 0x00000000 */ +#endif + li r5,0x4000 /* # bytes of memory to copy */ + li r6,0 + bl copy_and_flush /* copy the first 0x4000 bytes */ +#endif /* CONFIG_APUS */ + +/* + * Go back to running unmapped so we can load up new values + * for SDR1 (hash table pointer) and the segment registers + * and change to using our exception vectors. + */ + lis r4,2f@h + ori r4,r4,2f@l + tophys(r4,r4) + li r3,MSR_KERNEL & ~(MSR_IR|MSR_DR) + FIX_SRR1(r3,r5) + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r3 + SYNC + RFI +/* Load up the kernel context */ +2: bl load_up_mmu + +#ifdef CONFIG_BDI_SWITCH + /* Add helper information for the Abatron bdiGDB debugger. + * We do this here because we know the mmu is disabled, and + * will be enabled for real in just a few instructions. + */ + lis r5, abatron_pteptrs@h + ori r5, r5, abatron_pteptrs@l + stw r5, 0xf0(r0) /* This much match your Abatron config */ + lis r6, swapper_pg_dir@h + ori r6, r6, swapper_pg_dir@l + tophys(r5, r5) + stw r6, 0(r5) +#endif /* CONFIG_BDI_SWITCH */ + +/* Now turn on the MMU for real! */ + li r4,MSR_KERNEL + FIX_SRR1(r4,r5) + lis r3,start_kernel@h + ori r3,r3,start_kernel@l + mtspr SPRN_SRR0,r3 + mtspr SPRN_SRR1,r4 + SYNC + RFI + +/* + * Set up the segment registers for a new context. + */ +_GLOBAL(set_context) + mulli r3,r3,897 /* multiply context by skew factor */ + rlwinm r3,r3,4,8,27 /* VSID = (context & 0xfffff) << 4 */ + addis r3,r3,0x6000 /* Set Ks, Ku bits */ + li r0,NUM_USER_SEGMENTS + mtctr r0 + +#ifdef CONFIG_BDI_SWITCH + /* Context switch the PTE pointer for the Abatron BDI2000. + * The PGDIR is passed as second argument. + */ + lis r5, KERNELBASE@h + lwz r5, 0xf0(r5) + stw r4, 0x4(r5) +#endif + li r4,0 + isync +3: +#ifdef CONFIG_PPC64BRIDGE + slbie r4 +#endif /* CONFIG_PPC64BRIDGE */ + mtsrin r3,r4 + addi r3,r3,0x111 /* next VSID */ + rlwinm r3,r3,0,8,3 /* clear out any overflow from VSID field */ + addis r4,r4,0x1000 /* address of next segment */ + bdnz 3b + sync + isync + blr + +/* + * An undocumented "feature" of 604e requires that the v bit + * be cleared before changing BAT values. + * + * Also, newer IBM firmware does not clear bat3 and 4 so + * this makes sure it's done. + * -- Cort + */ +clear_bats: + li r10,0 + mfspr r9,SPRN_PVR + rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */ + cmpwi r9, 1 + beq 1f + + mtspr SPRN_DBAT0U,r10 + mtspr SPRN_DBAT0L,r10 + mtspr SPRN_DBAT1U,r10 + mtspr SPRN_DBAT1L,r10 + mtspr SPRN_DBAT2U,r10 + mtspr SPRN_DBAT2L,r10 + mtspr SPRN_DBAT3U,r10 + mtspr SPRN_DBAT3L,r10 +1: + mtspr SPRN_IBAT0U,r10 + mtspr SPRN_IBAT0L,r10 + mtspr SPRN_IBAT1U,r10 + mtspr SPRN_IBAT1L,r10 + mtspr SPRN_IBAT2U,r10 + mtspr SPRN_IBAT2L,r10 + mtspr SPRN_IBAT3U,r10 + mtspr SPRN_IBAT3L,r10 +BEGIN_FTR_SECTION + /* Here's a tweak: at this point, CPU setup have + * not been called yet, so HIGH_BAT_EN may not be + * set in HID0 for the 745x processors. However, it + * seems that doesn't affect our ability to actually + * write to these SPRs. + */ + mtspr SPRN_DBAT4U,r10 + mtspr SPRN_DBAT4L,r10 + mtspr SPRN_DBAT5U,r10 + mtspr SPRN_DBAT5L,r10 + mtspr SPRN_DBAT6U,r10 + mtspr SPRN_DBAT6L,r10 + mtspr SPRN_DBAT7U,r10 + mtspr SPRN_DBAT7L,r10 + mtspr SPRN_IBAT4U,r10 + mtspr SPRN_IBAT4L,r10 + mtspr SPRN_IBAT5U,r10 + mtspr SPRN_IBAT5L,r10 + mtspr SPRN_IBAT6U,r10 + mtspr SPRN_IBAT6L,r10 + mtspr SPRN_IBAT7U,r10 + mtspr SPRN_IBAT7L,r10 +END_FTR_SECTION_IFSET(CPU_FTR_HAS_HIGH_BATS) + blr + +flush_tlbs: + lis r10, 0x40 +1: addic. r10, r10, -0x1000 + tlbie r10 + blt 1b + sync + blr + +mmu_off: + addi r4, r3, __after_mmu_off - _start + mfmsr r3 + andi. r0,r3,MSR_DR|MSR_IR /* MMU enabled? */ + beqlr + andc r3,r3,r0 + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r3 + sync + RFI + +#ifndef CONFIG_POWER4 +/* + * Use the first pair of BAT registers to map the 1st 16MB + * of RAM to KERNELBASE. From this point on we can't safely + * call OF any more. + */ +initial_bats: + lis r11,KERNELBASE@h +#ifndef CONFIG_PPC64BRIDGE + mfspr r9,SPRN_PVR + rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */ + cmpwi 0,r9,1 + bne 4f + ori r11,r11,4 /* set up BAT registers for 601 */ + li r8,0x7f /* valid, block length = 8MB */ + oris r9,r11,0x800000@h /* set up BAT reg for 2nd 8M */ + oris r10,r8,0x800000@h /* set up BAT reg for 2nd 8M */ + mtspr SPRN_IBAT0U,r11 /* N.B. 601 has valid bit in */ + mtspr SPRN_IBAT0L,r8 /* lower BAT register */ + mtspr SPRN_IBAT1U,r9 + mtspr SPRN_IBAT1L,r10 + isync + blr +#endif /* CONFIG_PPC64BRIDGE */ + +4: tophys(r8,r11) +#ifdef CONFIG_SMP + ori r8,r8,0x12 /* R/W access, M=1 */ +#else + ori r8,r8,2 /* R/W access */ +#endif /* CONFIG_SMP */ +#ifdef CONFIG_APUS + ori r11,r11,BL_8M<<2|0x2 /* set up 8MB BAT registers for 604 */ +#else + ori r11,r11,BL_256M<<2|0x2 /* set up BAT registers for 604 */ +#endif /* CONFIG_APUS */ + +#ifdef CONFIG_PPC64BRIDGE + /* clear out the high 32 bits in the BAT */ + clrldi r11,r11,32 + clrldi r8,r8,32 +#endif /* CONFIG_PPC64BRIDGE */ + mtspr SPRN_DBAT0L,r8 /* N.B. 6xx (not 601) have valid */ + mtspr SPRN_DBAT0U,r11 /* bit in upper BAT register */ + mtspr SPRN_IBAT0L,r8 + mtspr SPRN_IBAT0U,r11 + isync + blr + +#if !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT) +setup_disp_bat: + /* + * setup the display bat prepared for us in prom.c + */ + mflr r8 + bl reloc_offset + mtlr r8 + addis r8,r3,disp_BAT@ha + addi r8,r8,disp_BAT@l + lwz r11,0(r8) + lwz r8,4(r8) + mfspr r9,SPRN_PVR + rlwinm r9,r9,16,16,31 /* r9 = 1 for 601, 4 for 604 */ + cmpwi 0,r9,1 + beq 1f + mtspr SPRN_DBAT3L,r8 + mtspr SPRN_DBAT3U,r11 + blr +1: mtspr SPRN_IBAT3L,r8 + mtspr SPRN_IBAT3U,r11 + blr + +#endif /* !defined(CONFIG_APUS) && defined(CONFIG_BOOTX_TEXT) */ + +#else /* CONFIG_POWER4 */ +/* + * Load up the SDR1 and segment register values now + * since we don't have the BATs. + * Also make sure we are running in 32-bit mode. + */ + +initial_mm_power4: + addis r14,r3,_SDR1@ha /* get the value from _SDR1 */ + lwz r14,_SDR1@l(r14) /* assume hash table below 4GB */ + mtspr SPRN_SDR1,r14 + slbia + lis r4,0x2000 /* set pseudo-segment reg 12 */ + ori r5,r4,0x0ccc + mtsr 12,r5 +#if 0 + ori r5,r4,0x0888 /* set pseudo-segment reg 8 */ + mtsr 8,r5 /* (for access to serial port) */ +#endif +#ifdef CONFIG_BOOTX_TEXT + ori r5,r4,0x0999 /* set pseudo-segment reg 9 */ + mtsr 9,r5 /* (for access to screen) */ +#endif + mfmsr r0 + clrldi r0,r0,1 + sync + mtmsr r0 + isync + blr + +#endif /* CONFIG_POWER4 */ + +#ifdef CONFIG_8260 +/* Jump into the system reset for the rom. + * We first disable the MMU, and then jump to the ROM reset address. + * + * r3 is the board info structure, r4 is the location for starting. + * I use this for building a small kernel that can load other kernels, + * rather than trying to write or rely on a rom monitor that can tftp load. + */ + .globl m8260_gorom +m8260_gorom: + mfmsr r0 + rlwinm r0,r0,0,17,15 /* clear MSR_EE in r0 */ + sync + mtmsr r0 + sync + mfspr r11, SPRN_HID0 + lis r10, 0 + ori r10,r10,HID0_ICE|HID0_DCE + andc r11, r11, r10 + mtspr SPRN_HID0, r11 + isync + li r5, MSR_ME|MSR_RI + lis r6,2f@h + addis r6,r6,-KERNELBASE@h + ori r6,r6,2f@l + mtspr SPRN_SRR0,r6 + mtspr SPRN_SRR1,r5 + isync + sync + rfi +2: + mtlr r4 + blr +#endif + + +/* + * We put a few things here that have to be page-aligned. + * This stuff goes at the beginning of the data segment, + * which is page-aligned. + */ + .data + .globl sdata +sdata: + .globl empty_zero_page +empty_zero_page: + .space 4096 + + .globl swapper_pg_dir +swapper_pg_dir: + .space 4096 + +/* + * This space gets a copy of optional info passed to us by the bootstrap + * Used to pass parameters into the kernel like root=/dev/sda1, etc. + */ + .globl cmd_line +cmd_line: + .space 512 + + .globl intercept_table +intercept_table: + .long 0, 0, i0x200, i0x300, i0x400, 0, i0x600, i0x700 + .long i0x800, 0, 0, 0, 0, i0xd00, 0, 0 + .long 0, 0, 0, i0x1300, 0, 0, 0, 0 + .long 0, 0, 0, 0, 0, 0, 0, 0 + .long 0, 0, 0, 0, 0, 0, 0, 0 + .long 0, 0, 0, 0, 0, 0, 0, 0 + +/* Room for two PTE pointers, usually the kernel and current user pointers + * to their respective root page table. + */ +abatron_pteptrs: + .space 8 diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S new file mode 100644 index 00000000000..599245b0407 --- /dev/null +++ b/arch/powerpc/kernel/head_44x.S @@ -0,0 +1,778 @@ +/* + * arch/ppc/kernel/head_44x.S + * + * Kernel execution entry point code. + * + * Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org> + * Initial PowerPC version. + * Copyright (c) 1996 Cort Dougan <cort@cs.nmt.edu> + * Rewritten for PReP + * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au> + * Low-level exception handers, MMU support, and rewrite. + * Copyright (c) 1997 Dan Malek <dmalek@jlc.net> + * PowerPC 8xx modifications. + * Copyright (c) 1998-1999 TiVo, Inc. + * PowerPC 403GCX modifications. + * Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu> + * PowerPC 403GCX/405GP modifications. + * Copyright 2000 MontaVista Software Inc. + * PPC405 modifications + * PowerPC 403GCX/405GP modifications. + * Author: MontaVista Software, Inc. + * frank_rowand@mvista.com or source@mvista.com + * debbie_chu@mvista.com + * Copyright 2002-2005 MontaVista Software, Inc. + * PowerPC 44x support, Matt Porter <mporter@kernel.crashing.org> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> +#include <asm/ibm4xx.h> +#include <asm/ibm44x.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include "head_booke.h" + + +/* As with the other PowerPC ports, it is expected that when code + * execution begins here, the following registers contain valid, yet + * optional, information: + * + * r3 - Board info structure pointer (DRAM, frequency, MAC address, etc.) + * r4 - Starting address of the init RAM disk + * r5 - Ending address of the init RAM disk + * r6 - Start of kernel command line string (e.g. "mem=128") + * r7 - End of kernel command line string + * + */ + .text +_GLOBAL(_stext) +_GLOBAL(_start) + /* + * Reserve a word at a fixed location to store the address + * of abatron_pteptrs + */ + nop +/* + * Save parameters we are passed + */ + mr r31,r3 + mr r30,r4 + mr r29,r5 + mr r28,r6 + mr r27,r7 + li r24,0 /* CPU number */ + +/* + * Set up the initial MMU state + * + * We are still executing code at the virtual address + * mappings set by the firmware for the base of RAM. + * + * We first invalidate all TLB entries but the one + * we are running from. We then load the KERNELBASE + * mappings so we can begin to use kernel addresses + * natively and so the interrupt vector locations are + * permanently pinned (necessary since Book E + * implementations always have translation enabled). + * + * TODO: Use the known TLB entry we are running from to + * determine which physical region we are located + * in. This can be used to determine where in RAM + * (on a shared CPU system) or PCI memory space + * (on a DRAMless system) we are located. + * For now, we assume a perfect world which means + * we are located at the base of DRAM (physical 0). + */ + +/* + * Search TLB for entry that we are currently using. + * Invalidate all entries but the one we are using. + */ + /* Load our current PID->MMUCR TID and MSR IS->MMUCR STS */ + mfspr r3,SPRN_PID /* Get PID */ + mfmsr r4 /* Get MSR */ + andi. r4,r4,MSR_IS@l /* TS=1? */ + beq wmmucr /* If not, leave STS=0 */ + oris r3,r3,PPC44x_MMUCR_STS@h /* Set STS=1 */ +wmmucr: mtspr SPRN_MMUCR,r3 /* Put MMUCR */ + sync + + bl invstr /* Find our address */ +invstr: mflr r5 /* Make it accessible */ + tlbsx r23,0,r5 /* Find entry we are in */ + li r4,0 /* Start at TLB entry 0 */ + li r3,0 /* Set PAGEID inval value */ +1: cmpw r23,r4 /* Is this our entry? */ + beq skpinv /* If so, skip the inval */ + tlbwe r3,r4,PPC44x_TLB_PAGEID /* If not, inval the entry */ +skpinv: addi r4,r4,1 /* Increment */ + cmpwi r4,64 /* Are we done? */ + bne 1b /* If not, repeat */ + isync /* If so, context change */ + +/* + * Configure and load pinned entry into TLB slot 63. + */ + + lis r3,KERNELBASE@h /* Load the kernel virtual address */ + ori r3,r3,KERNELBASE@l + + /* Kernel is at the base of RAM */ + li r4, 0 /* Load the kernel physical address */ + + /* Load the kernel PID = 0 */ + li r0,0 + mtspr SPRN_PID,r0 + sync + + /* Initialize MMUCR */ + li r5,0 + mtspr SPRN_MMUCR,r5 + sync + + /* pageid fields */ + clrrwi r3,r3,10 /* Mask off the effective page number */ + ori r3,r3,PPC44x_TLB_VALID | PPC44x_TLB_256M + + /* xlat fields */ + clrrwi r4,r4,10 /* Mask off the real page number */ + /* ERPN is 0 for first 4GB page */ + + /* attrib fields */ + /* Added guarded bit to protect against speculative loads/stores */ + li r5,0 + ori r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G) + + li r0,63 /* TLB slot 63 */ + + tlbwe r3,r0,PPC44x_TLB_PAGEID /* Load the pageid fields */ + tlbwe r4,r0,PPC44x_TLB_XLAT /* Load the translation fields */ + tlbwe r5,r0,PPC44x_TLB_ATTRIB /* Load the attrib/access fields */ + + /* Force context change */ + mfmsr r0 + mtspr SPRN_SRR1, r0 + lis r0,3f@h + ori r0,r0,3f@l + mtspr SPRN_SRR0,r0 + sync + rfi + + /* If necessary, invalidate original entry we used */ +3: cmpwi r23,63 + beq 4f + li r6,0 + tlbwe r6,r23,PPC44x_TLB_PAGEID + isync + +4: +#ifdef CONFIG_SERIAL_TEXT_DEBUG + /* + * Add temporary UART mapping for early debug. + * We can map UART registers wherever we want as long as they don't + * interfere with other system mappings (e.g. with pinned entries). + * For an example of how we handle this - see ocotea.h. --ebs + */ + /* pageid fields */ + lis r3,UART0_IO_BASE@h + ori r3,r3,PPC44x_TLB_VALID | PPC44x_TLB_4K + + /* xlat fields */ + lis r4,UART0_PHYS_IO_BASE@h /* RPN depends on SoC */ +#ifndef CONFIG_440EP + ori r4,r4,0x0001 /* ERPN is 1 for second 4GB page */ +#endif + + /* attrib fields */ + li r5,0 + ori r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_I | PPC44x_TLB_G) + + li r0,0 /* TLB slot 0 */ + + tlbwe r3,r0,PPC44x_TLB_PAGEID /* Load the pageid fields */ + tlbwe r4,r0,PPC44x_TLB_XLAT /* Load the translation fields */ + tlbwe r5,r0,PPC44x_TLB_ATTRIB /* Load the attrib/access fields */ + + /* Force context change */ + isync +#endif /* CONFIG_SERIAL_TEXT_DEBUG */ + + /* Establish the interrupt vector offsets */ + SET_IVOR(0, CriticalInput); + SET_IVOR(1, MachineCheck); + SET_IVOR(2, DataStorage); + SET_IVOR(3, InstructionStorage); + SET_IVOR(4, ExternalInput); + SET_IVOR(5, Alignment); + SET_IVOR(6, Program); + SET_IVOR(7, FloatingPointUnavailable); + SET_IVOR(8, SystemCall); + SET_IVOR(9, AuxillaryProcessorUnavailable); + SET_IVOR(10, Decrementer); + SET_IVOR(11, FixedIntervalTimer); + SET_IVOR(12, WatchdogTimer); + SET_IVOR(13, DataTLBError); + SET_IVOR(14, InstructionTLBError); + SET_IVOR(15, Debug); + + /* Establish the interrupt vector base */ + lis r4,interrupt_base@h /* IVPR only uses the high 16-bits */ + mtspr SPRN_IVPR,r4 + +#ifdef CONFIG_440EP + /* Clear DAPUIB flag in CCR0 (enable APU between CPU and FPU) */ + mfspr r2,SPRN_CCR0 + lis r3,0xffef + ori r3,r3,0xffff + and r2,r2,r3 + mtspr SPRN_CCR0,r2 + isync +#endif + + /* + * This is where the main kernel code starts. + */ + + /* ptr to current */ + lis r2,init_task@h + ori r2,r2,init_task@l + + /* ptr to current thread */ + addi r4,r2,THREAD /* init task's THREAD */ + mtspr SPRN_SPRG3,r4 + + /* stack */ + lis r1,init_thread_union@h + ori r1,r1,init_thread_union@l + li r0,0 + stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + + bl early_init + +/* + * Decide what sort of machine this is and initialize the MMU. + */ + mr r3,r31 + mr r4,r30 + mr r5,r29 + mr r6,r28 + mr r7,r27 + bl machine_init + bl MMU_init + + /* Setup PTE pointers for the Abatron bdiGDB */ + lis r6, swapper_pg_dir@h + ori r6, r6, swapper_pg_dir@l + lis r5, abatron_pteptrs@h + ori r5, r5, abatron_pteptrs@l + lis r4, KERNELBASE@h + ori r4, r4, KERNELBASE@l + stw r5, 0(r4) /* Save abatron_pteptrs at a fixed location */ + stw r6, 0(r5) + + /* Let's move on */ + lis r4,start_kernel@h + ori r4,r4,start_kernel@l + lis r3,MSR_KERNEL@h + ori r3,r3,MSR_KERNEL@l + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r3 + rfi /* change context and jump to start_kernel */ + +/* + * Interrupt vector entry code + * + * The Book E MMUs are always on so we don't need to handle + * interrupts in real mode as with previous PPC processors. In + * this case we handle interrupts in the kernel virtual address + * space. + * + * Interrupt vectors are dynamically placed relative to the + * interrupt prefix as determined by the address of interrupt_base. + * The interrupt vectors offsets are programmed using the labels + * for each interrupt vector entry. + * + * Interrupt vectors must be aligned on a 16 byte boundary. + * We align on a 32 byte cache line boundary for good measure. + */ + +interrupt_base: + /* Critical Input Interrupt */ + CRITICAL_EXCEPTION(0x0100, CriticalInput, UnknownException) + + /* Machine Check Interrupt */ +#ifdef CONFIG_440A + MCHECK_EXCEPTION(0x0200, MachineCheck, MachineCheckException) +#else + CRITICAL_EXCEPTION(0x0200, MachineCheck, MachineCheckException) +#endif + + /* Data Storage Interrupt */ + START_EXCEPTION(DataStorage) + mtspr SPRN_SPRG0, r10 /* Save some working registers */ + mtspr SPRN_SPRG1, r11 + mtspr SPRN_SPRG4W, r12 + mtspr SPRN_SPRG5W, r13 + mfcr r11 + mtspr SPRN_SPRG7W, r11 + + /* + * Check if it was a store fault, if not then bail + * because a user tried to access a kernel or + * read-protected page. Otherwise, get the + * offending address and handle it. + */ + mfspr r10, SPRN_ESR + andis. r10, r10, ESR_ST@h + beq 2f + + mfspr r10, SPRN_DEAR /* Get faulting address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + lis r11, TASK_SIZE@h + cmplw r10, r11 + blt+ 3f + lis r11, swapper_pg_dir@h + ori r11, r11, swapper_pg_dir@l + + mfspr r12,SPRN_MMUCR + rlwinm r12,r12,0,0,23 /* Clear TID */ + + b 4f + + /* Get the PGD for the current thread */ +3: + mfspr r11,SPRN_SPRG3 + lwz r11,PGDIR(r11) + + /* Load PID into MMUCR TID */ + mfspr r12,SPRN_MMUCR /* Get MMUCR */ + mfspr r13,SPRN_PID /* Get PID */ + rlwimi r12,r13,0,24,31 /* Set TID */ + +4: + mtspr SPRN_MMUCR,r12 + + rlwinm r12, r10, 13, 19, 29 /* Compute pgdir/pmd offset */ + lwzx r11, r12, r11 /* Get pgd/pmd entry */ + rlwinm. r12, r11, 0, 0, 20 /* Extract pt base address */ + beq 2f /* Bail if no table */ + + rlwimi r12, r10, 23, 20, 28 /* Compute pte address */ + lwz r11, 4(r12) /* Get pte entry */ + + andi. r13, r11, _PAGE_RW /* Is it writeable? */ + beq 2f /* Bail if not */ + + /* Update 'changed'. + */ + ori r11, r11, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE + stw r11, 4(r12) /* Update Linux page table */ + + li r13, PPC44x_TLB_SR@l /* Set SR */ + rlwimi r13, r11, 29, 29, 29 /* SX = _PAGE_HWEXEC */ + rlwimi r13, r11, 0, 30, 30 /* SW = _PAGE_RW */ + rlwimi r13, r11, 29, 28, 28 /* UR = _PAGE_USER */ + rlwimi r12, r11, 31, 26, 26 /* (_PAGE_USER>>1)->r12 */ + rlwimi r12, r11, 29, 30, 30 /* (_PAGE_USER>>3)->r12 */ + and r12, r12, r11 /* HWEXEC/RW & USER */ + rlwimi r13, r12, 0, 26, 26 /* UX = HWEXEC & USER */ + rlwimi r13, r12, 3, 27, 27 /* UW = RW & USER */ + + rlwimi r11,r13,0,26,31 /* Insert static perms */ + + rlwinm r11,r11,0,20,15 /* Clear U0-U3 */ + + /* find the TLB index that caused the fault. It has to be here. */ + tlbsx r10, 0, r10 + + tlbwe r11, r10, PPC44x_TLB_ATTRIB /* Write ATTRIB */ + + /* Done...restore registers and get out of here. + */ + mfspr r11, SPRN_SPRG7R + mtcr r11 + mfspr r13, SPRN_SPRG5R + mfspr r12, SPRN_SPRG4R + + mfspr r11, SPRN_SPRG1 + mfspr r10, SPRN_SPRG0 + rfi /* Force context change */ + +2: + /* + * The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ + mfspr r11, SPRN_SPRG7R + mtcr r11 + mfspr r13, SPRN_SPRG5R + mfspr r12, SPRN_SPRG4R + + mfspr r11, SPRN_SPRG1 + mfspr r10, SPRN_SPRG0 + b data_access + + /* Instruction Storage Interrupt */ + INSTRUCTION_STORAGE_EXCEPTION + + /* External Input Interrupt */ + EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE) + + /* Alignment Interrupt */ + ALIGNMENT_EXCEPTION + + /* Program Interrupt */ + PROGRAM_EXCEPTION + + /* Floating Point Unavailable Interrupt */ +#ifdef CONFIG_PPC_FPU + FP_UNAVAILABLE_EXCEPTION +#else + EXCEPTION(0x2010, FloatingPointUnavailable, UnknownException, EXC_XFER_EE) +#endif + + /* System Call Interrupt */ + START_EXCEPTION(SystemCall) + NORMAL_EXCEPTION_PROLOG + EXC_XFER_EE_LITE(0x0c00, DoSyscall) + + /* Auxillary Processor Unavailable Interrupt */ + EXCEPTION(0x2020, AuxillaryProcessorUnavailable, UnknownException, EXC_XFER_EE) + + /* Decrementer Interrupt */ + DECREMENTER_EXCEPTION + + /* Fixed Internal Timer Interrupt */ + /* TODO: Add FIT support */ + EXCEPTION(0x1010, FixedIntervalTimer, UnknownException, EXC_XFER_EE) + + /* Watchdog Timer Interrupt */ + /* TODO: Add watchdog support */ +#ifdef CONFIG_BOOKE_WDT + CRITICAL_EXCEPTION(0x1020, WatchdogTimer, WatchdogException) +#else + CRITICAL_EXCEPTION(0x1020, WatchdogTimer, UnknownException) +#endif + + /* Data TLB Error Interrupt */ + START_EXCEPTION(DataTLBError) + mtspr SPRN_SPRG0, r10 /* Save some working registers */ + mtspr SPRN_SPRG1, r11 + mtspr SPRN_SPRG4W, r12 + mtspr SPRN_SPRG5W, r13 + mfcr r11 + mtspr SPRN_SPRG7W, r11 + mfspr r10, SPRN_DEAR /* Get faulting address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + lis r11, TASK_SIZE@h + cmplw r10, r11 + blt+ 3f + lis r11, swapper_pg_dir@h + ori r11, r11, swapper_pg_dir@l + + mfspr r12,SPRN_MMUCR + rlwinm r12,r12,0,0,23 /* Clear TID */ + + b 4f + + /* Get the PGD for the current thread */ +3: + mfspr r11,SPRN_SPRG3 + lwz r11,PGDIR(r11) + + /* Load PID into MMUCR TID */ + mfspr r12,SPRN_MMUCR + mfspr r13,SPRN_PID /* Get PID */ + rlwimi r12,r13,0,24,31 /* Set TID */ + +4: + mtspr SPRN_MMUCR,r12 + + rlwinm r12, r10, 13, 19, 29 /* Compute pgdir/pmd offset */ + lwzx r11, r12, r11 /* Get pgd/pmd entry */ + rlwinm. r12, r11, 0, 0, 20 /* Extract pt base address */ + beq 2f /* Bail if no table */ + + rlwimi r12, r10, 23, 20, 28 /* Compute pte address */ + lwz r11, 4(r12) /* Get pte entry */ + andi. r13, r11, _PAGE_PRESENT /* Is the page present? */ + beq 2f /* Bail if not present */ + + ori r11, r11, _PAGE_ACCESSED + stw r11, 4(r12) + + /* Jump to common tlb load */ + b finish_tlb_load + +2: + /* The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ + mfspr r11, SPRN_SPRG7R + mtcr r11 + mfspr r13, SPRN_SPRG5R + mfspr r12, SPRN_SPRG4R + mfspr r11, SPRN_SPRG1 + mfspr r10, SPRN_SPRG0 + b data_access + + /* Instruction TLB Error Interrupt */ + /* + * Nearly the same as above, except we get our + * information from different registers and bailout + * to a different point. + */ + START_EXCEPTION(InstructionTLBError) + mtspr SPRN_SPRG0, r10 /* Save some working registers */ + mtspr SPRN_SPRG1, r11 + mtspr SPRN_SPRG4W, r12 + mtspr SPRN_SPRG5W, r13 + mfcr r11 + mtspr SPRN_SPRG7W, r11 + mfspr r10, SPRN_SRR0 /* Get faulting address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + lis r11, TASK_SIZE@h + cmplw r10, r11 + blt+ 3f + lis r11, swapper_pg_dir@h + ori r11, r11, swapper_pg_dir@l + + mfspr r12,SPRN_MMUCR + rlwinm r12,r12,0,0,23 /* Clear TID */ + + b 4f + + /* Get the PGD for the current thread */ +3: + mfspr r11,SPRN_SPRG3 + lwz r11,PGDIR(r11) + + /* Load PID into MMUCR TID */ + mfspr r12,SPRN_MMUCR + mfspr r13,SPRN_PID /* Get PID */ + rlwimi r12,r13,0,24,31 /* Set TID */ + +4: + mtspr SPRN_MMUCR,r12 + + rlwinm r12, r10, 13, 19, 29 /* Compute pgdir/pmd offset */ + lwzx r11, r12, r11 /* Get pgd/pmd entry */ + rlwinm. r12, r11, 0, 0, 20 /* Extract pt base address */ + beq 2f /* Bail if no table */ + + rlwimi r12, r10, 23, 20, 28 /* Compute pte address */ + lwz r11, 4(r12) /* Get pte entry */ + andi. r13, r11, _PAGE_PRESENT /* Is the page present? */ + beq 2f /* Bail if not present */ + + ori r11, r11, _PAGE_ACCESSED + stw r11, 4(r12) + + /* Jump to common TLB load point */ + b finish_tlb_load + +2: + /* The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ + mfspr r11, SPRN_SPRG7R + mtcr r11 + mfspr r13, SPRN_SPRG5R + mfspr r12, SPRN_SPRG4R + mfspr r11, SPRN_SPRG1 + mfspr r10, SPRN_SPRG0 + b InstructionStorage + + /* Debug Interrupt */ + DEBUG_EXCEPTION + +/* + * Local functions + */ + /* + * Data TLB exceptions will bail out to this point + * if they can't resolve the lightweight TLB fault. + */ +data_access: + NORMAL_EXCEPTION_PROLOG + mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */ + stw r5,_ESR(r11) + mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ + EXC_XFER_EE_LITE(0x0300, handle_page_fault) + +/* + + * Both the instruction and data TLB miss get to this + * point to load the TLB. + * r10 - EA of fault + * r11 - available to use + * r12 - Pointer to the 64-bit PTE + * r13 - available to use + * MMUCR - loaded with proper value when we get here + * Upon exit, we reload everything and RFI. + */ +finish_tlb_load: + /* + * We set execute, because we don't have the granularity to + * properly set this at the page level (Linux problem). + * If shared is set, we cause a zero PID->TID load. + * Many of these bits are software only. Bits we don't set + * here we (properly should) assume have the appropriate value. + */ + + /* Load the next available TLB index */ + lis r13, tlb_44x_index@ha + lwz r13, tlb_44x_index@l(r13) + /* Load the TLB high watermark */ + lis r11, tlb_44x_hwater@ha + lwz r11, tlb_44x_hwater@l(r11) + + /* Increment, rollover, and store TLB index */ + addi r13, r13, 1 + cmpw 0, r13, r11 /* reserve entries */ + ble 7f + li r13, 0 +7: + /* Store the next available TLB index */ + lis r11, tlb_44x_index@ha + stw r13, tlb_44x_index@l(r11) + + lwz r11, 0(r12) /* Get MS word of PTE */ + lwz r12, 4(r12) /* Get LS word of PTE */ + rlwimi r11, r12, 0, 0 , 19 /* Insert RPN */ + tlbwe r11, r13, PPC44x_TLB_XLAT /* Write XLAT */ + + /* + * Create PAGEID. This is the faulting address, + * page size, and valid flag. + */ + li r11, PPC44x_TLB_VALID | PPC44x_TLB_4K + rlwimi r10, r11, 0, 20, 31 /* Insert valid and page size */ + tlbwe r10, r13, PPC44x_TLB_PAGEID /* Write PAGEID */ + + li r10, PPC44x_TLB_SR@l /* Set SR */ + rlwimi r10, r12, 0, 30, 30 /* Set SW = _PAGE_RW */ + rlwimi r10, r12, 29, 29, 29 /* SX = _PAGE_HWEXEC */ + rlwimi r10, r12, 29, 28, 28 /* UR = _PAGE_USER */ + rlwimi r11, r12, 31, 26, 26 /* (_PAGE_USER>>1)->r12 */ + and r11, r12, r11 /* HWEXEC & USER */ + rlwimi r10, r11, 0, 26, 26 /* UX = HWEXEC & USER */ + + rlwimi r12, r10, 0, 26, 31 /* Insert static perms */ + rlwinm r12, r12, 0, 20, 15 /* Clear U0-U3 */ + tlbwe r12, r13, PPC44x_TLB_ATTRIB /* Write ATTRIB */ + + /* Done...restore registers and get out of here. + */ + mfspr r11, SPRN_SPRG7R + mtcr r11 + mfspr r13, SPRN_SPRG5R + mfspr r12, SPRN_SPRG4R + mfspr r11, SPRN_SPRG1 + mfspr r10, SPRN_SPRG0 + rfi /* Force context change */ + +/* + * Global functions + */ + +/* + * extern void giveup_altivec(struct task_struct *prev) + * + * The 44x core does not have an AltiVec unit. + */ +_GLOBAL(giveup_altivec) + blr + +/* + * extern void giveup_fpu(struct task_struct *prev) + * + * The 44x core does not have an FPU. + */ +#ifndef CONFIG_PPC_FPU +_GLOBAL(giveup_fpu) + blr +#endif + +/* + * extern void abort(void) + * + * At present, this routine just applies a system reset. + */ +_GLOBAL(abort) + mfspr r13,SPRN_DBCR0 + oris r13,r13,DBCR0_RST_SYSTEM@h + mtspr SPRN_DBCR0,r13 + +_GLOBAL(set_context) + +#ifdef CONFIG_BDI_SWITCH + /* Context switch the PTE pointer for the Abatron BDI2000. + * The PGDIR is the second parameter. + */ + lis r5, abatron_pteptrs@h + ori r5, r5, abatron_pteptrs@l + stw r4, 0x4(r5) +#endif + mtspr SPRN_PID,r3 + isync /* Force context change */ + blr + +/* + * We put a few things here that have to be page-aligned. This stuff + * goes at the beginning of the data segment, which is page-aligned. + */ + .data +_GLOBAL(sdata) +_GLOBAL(empty_zero_page) + .space 4096 + +/* + * To support >32-bit physical addresses, we use an 8KB pgdir. + */ +_GLOBAL(swapper_pg_dir) + .space 8192 + +/* Reserved 4k for the critical exception stack & 4k for the machine + * check stack per CPU for kernel mode exceptions */ + .section .bss + .align 12 +exception_stack_bottom: + .space BOOKE_EXCEPTION_STACK_SIZE +_GLOBAL(exception_stack_top) + +/* + * This space gets a copy of optional info passed to us by the bootstrap + * which is used to pass parameters into the kernel like root=/dev/sda1, etc. + */ +_GLOBAL(cmd_line) + .space 512 + +/* + * Room for two PTE pointers, usually the kernel and current user pointers + * to their respective root page table. + */ +abatron_pteptrs: + .space 8 + + diff --git a/arch/powerpc/kernel/head_4xx.S b/arch/powerpc/kernel/head_4xx.S new file mode 100644 index 00000000000..8562b807b37 --- /dev/null +++ b/arch/powerpc/kernel/head_4xx.S @@ -0,0 +1,1016 @@ +/* + * Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org> + * Initial PowerPC version. + * Copyright (c) 1996 Cort Dougan <cort@cs.nmt.edu> + * Rewritten for PReP + * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au> + * Low-level exception handers, MMU support, and rewrite. + * Copyright (c) 1997 Dan Malek <dmalek@jlc.net> + * PowerPC 8xx modifications. + * Copyright (c) 1998-1999 TiVo, Inc. + * PowerPC 403GCX modifications. + * Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu> + * PowerPC 403GCX/405GP modifications. + * Copyright 2000 MontaVista Software Inc. + * PPC405 modifications + * PowerPC 403GCX/405GP modifications. + * Author: MontaVista Software, Inc. + * frank_rowand@mvista.com or source@mvista.com + * debbie_chu@mvista.com + * + * + * Module name: head_4xx.S + * + * Description: + * Kernel execution entry point code. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> +#include <asm/ibm4xx.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> + +/* As with the other PowerPC ports, it is expected that when code + * execution begins here, the following registers contain valid, yet + * optional, information: + * + * r3 - Board info structure pointer (DRAM, frequency, MAC address, etc.) + * r4 - Starting address of the init RAM disk + * r5 - Ending address of the init RAM disk + * r6 - Start of kernel command line string (e.g. "mem=96m") + * r7 - End of kernel command line string + * + * This is all going to change RSN when we add bi_recs....... -- Dan + */ + .text +_GLOBAL(_stext) +_GLOBAL(_start) + + /* Save parameters we are passed. + */ + mr r31,r3 + mr r30,r4 + mr r29,r5 + mr r28,r6 + mr r27,r7 + + /* We have to turn on the MMU right away so we get cache modes + * set correctly. + */ + bl initial_mmu + +/* We now have the lower 16 Meg mapped into TLB entries, and the caches + * ready to work. + */ +turn_on_mmu: + lis r0,MSR_KERNEL@h + ori r0,r0,MSR_KERNEL@l + mtspr SPRN_SRR1,r0 + lis r0,start_here@h + ori r0,r0,start_here@l + mtspr SPRN_SRR0,r0 + SYNC + rfi /* enables MMU */ + b . /* prevent prefetch past rfi */ + +/* + * This area is used for temporarily saving registers during the + * critical exception prolog. + */ + . = 0xc0 +crit_save: +_GLOBAL(crit_r10) + .space 4 +_GLOBAL(crit_r11) + .space 4 + +/* + * Exception vector entry code. This code runs with address translation + * turned off (i.e. using physical addresses). We assume SPRG3 has the + * physical address of the current task thread_struct. + * Note that we have to have decremented r1 before we write to any fields + * of the exception frame, since a critical interrupt could occur at any + * time, and it will write to the area immediately below the current r1. + */ +#define NORMAL_EXCEPTION_PROLOG \ + mtspr SPRN_SPRG0,r10; /* save two registers to work with */\ + mtspr SPRN_SPRG1,r11; \ + mtspr SPRN_SPRG2,r1; \ + mfcr r10; /* save CR in r10 for now */\ + mfspr r11,SPRN_SRR1; /* check whether user or kernel */\ + andi. r11,r11,MSR_PR; \ + beq 1f; \ + mfspr r1,SPRN_SPRG3; /* if from user, start at top of */\ + lwz r1,THREAD_INFO-THREAD(r1); /* this thread's kernel stack */\ + addi r1,r1,THREAD_SIZE; \ +1: subi r1,r1,INT_FRAME_SIZE; /* Allocate an exception frame */\ + tophys(r11,r1); \ + stw r10,_CCR(r11); /* save various registers */\ + stw r12,GPR12(r11); \ + stw r9,GPR9(r11); \ + mfspr r10,SPRN_SPRG0; \ + stw r10,GPR10(r11); \ + mfspr r12,SPRN_SPRG1; \ + stw r12,GPR11(r11); \ + mflr r10; \ + stw r10,_LINK(r11); \ + mfspr r10,SPRN_SPRG2; \ + mfspr r12,SPRN_SRR0; \ + stw r10,GPR1(r11); \ + mfspr r9,SPRN_SRR1; \ + stw r10,0(r11); \ + rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ + stw r0,GPR0(r11); \ + SAVE_4GPRS(3, r11); \ + SAVE_2GPRS(7, r11) + +/* + * Exception prolog for critical exceptions. This is a little different + * from the normal exception prolog above since a critical exception + * can potentially occur at any point during normal exception processing. + * Thus we cannot use the same SPRG registers as the normal prolog above. + * Instead we use a couple of words of memory at low physical addresses. + * This is OK since we don't support SMP on these processors. + */ +#define CRITICAL_EXCEPTION_PROLOG \ + stw r10,crit_r10@l(0); /* save two registers to work with */\ + stw r11,crit_r11@l(0); \ + mfcr r10; /* save CR in r10 for now */\ + mfspr r11,SPRN_SRR3; /* check whether user or kernel */\ + andi. r11,r11,MSR_PR; \ + lis r11,critical_stack_top@h; \ + ori r11,r11,critical_stack_top@l; \ + beq 1f; \ + /* COMING FROM USER MODE */ \ + mfspr r11,SPRN_SPRG3; /* if from user, start at top of */\ + lwz r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\ + addi r11,r11,THREAD_SIZE; \ +1: subi r11,r11,INT_FRAME_SIZE; /* Allocate an exception frame */\ + tophys(r11,r11); \ + stw r10,_CCR(r11); /* save various registers */\ + stw r12,GPR12(r11); \ + stw r9,GPR9(r11); \ + mflr r10; \ + stw r10,_LINK(r11); \ + mfspr r12,SPRN_DEAR; /* save DEAR and ESR in the frame */\ + stw r12,_DEAR(r11); /* since they may have had stuff */\ + mfspr r9,SPRN_ESR; /* in them at the point where the */\ + stw r9,_ESR(r11); /* exception was taken */\ + mfspr r12,SPRN_SRR2; \ + stw r1,GPR1(r11); \ + mfspr r9,SPRN_SRR3; \ + stw r1,0(r11); \ + tovirt(r1,r11); \ + rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\ + stw r0,GPR0(r11); \ + SAVE_4GPRS(3, r11); \ + SAVE_2GPRS(7, r11) + + /* + * State at this point: + * r9 saved in stack frame, now saved SRR3 & ~MSR_WE + * r10 saved in crit_r10 and in stack frame, trashed + * r11 saved in crit_r11 and in stack frame, + * now phys stack/exception frame pointer + * r12 saved in stack frame, now saved SRR2 + * CR saved in stack frame, CR0.EQ = !SRR3.PR + * LR, DEAR, ESR in stack frame + * r1 saved in stack frame, now virt stack/excframe pointer + * r0, r3-r8 saved in stack frame + */ + +/* + * Exception vectors. + */ +#define START_EXCEPTION(n, label) \ + . = n; \ +label: + +#define EXCEPTION(n, label, hdlr, xfer) \ + START_EXCEPTION(n, label); \ + NORMAL_EXCEPTION_PROLOG; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + xfer(n, hdlr) + +#define CRITICAL_EXCEPTION(n, label, hdlr) \ + START_EXCEPTION(n, label); \ + CRITICAL_EXCEPTION_PROLOG; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ + NOCOPY, crit_transfer_to_handler, \ + ret_from_crit_exc) + +#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret) \ + li r10,trap; \ + stw r10,TRAP(r11); \ + lis r10,msr@h; \ + ori r10,r10,msr@l; \ + copyee(r10, r9); \ + bl tfer; \ + .long hdlr; \ + .long ret + +#define COPY_EE(d, s) rlwimi d,s,0,16,16 +#define NOCOPY(d, s) + +#define EXC_XFER_STD(n, hdlr) \ + EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full, \ + ret_from_except_full) + +#define EXC_XFER_LITE(n, hdlr) \ + EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \ + ret_from_except) + +#define EXC_XFER_EE(n, hdlr) \ + EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, transfer_to_handler_full, \ + ret_from_except_full) + +#define EXC_XFER_EE_LITE(n, hdlr) \ + EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE, transfer_to_handler, \ + ret_from_except) + + +/* + * 0x0100 - Critical Interrupt Exception + */ + CRITICAL_EXCEPTION(0x0100, CriticalInterrupt, UnknownException) + +/* + * 0x0200 - Machine Check Exception + */ + CRITICAL_EXCEPTION(0x0200, MachineCheck, MachineCheckException) + +/* + * 0x0300 - Data Storage Exception + * This happens for just a few reasons. U0 set (but we don't do that), + * or zone protection fault (user violation, write to protected page). + * If this is just an update of modified status, we do that quickly + * and exit. Otherwise, we call heavywight functions to do the work. + */ + START_EXCEPTION(0x0300, DataStorage) + mtspr SPRN_SPRG0, r10 /* Save some working registers */ + mtspr SPRN_SPRG1, r11 +#ifdef CONFIG_403GCX + stw r12, 0(r0) + stw r9, 4(r0) + mfcr r11 + mfspr r12, SPRN_PID + stw r11, 8(r0) + stw r12, 12(r0) +#else + mtspr SPRN_SPRG4, r12 + mtspr SPRN_SPRG5, r9 + mfcr r11 + mfspr r12, SPRN_PID + mtspr SPRN_SPRG7, r11 + mtspr SPRN_SPRG6, r12 +#endif + + /* First, check if it was a zone fault (which means a user + * tried to access a kernel or read-protected page - always + * a SEGV). All other faults here must be stores, so no + * need to check ESR_DST as well. */ + mfspr r10, SPRN_ESR + andis. r10, r10, ESR_DIZ@h + bne 2f + + mfspr r10, SPRN_DEAR /* Get faulting address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + lis r11, TASK_SIZE@h + cmplw r10, r11 + blt+ 3f + lis r11, swapper_pg_dir@h + ori r11, r11, swapper_pg_dir@l + li r9, 0 + mtspr SPRN_PID, r9 /* TLB will have 0 TID */ + b 4f + + /* Get the PGD for the current thread. + */ +3: + mfspr r11,SPRN_SPRG3 + lwz r11,PGDIR(r11) +4: + tophys(r11, r11) + rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */ + lwz r11, 0(r11) /* Get L1 entry */ + rlwinm. r12, r11, 0, 0, 19 /* Extract L2 (pte) base address */ + beq 2f /* Bail if no table */ + + rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */ + lwz r11, 0(r12) /* Get Linux PTE */ + + andi. r9, r11, _PAGE_RW /* Is it writeable? */ + beq 2f /* Bail if not */ + + /* Update 'changed'. + */ + ori r11, r11, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE + stw r11, 0(r12) /* Update Linux page table */ + + /* Most of the Linux PTE is ready to load into the TLB LO. + * We set ZSEL, where only the LS-bit determines user access. + * We set execute, because we don't have the granularity to + * properly set this at the page level (Linux problem). + * If shared is set, we cause a zero PID->TID load. + * Many of these bits are software only. Bits we don't set + * here we (properly should) assume have the appropriate value. + */ + li r12, 0x0ce2 + andc r11, r11, r12 /* Make sure 20, 21 are zero */ + + /* find the TLB index that caused the fault. It has to be here. + */ + tlbsx r9, 0, r10 + + tlbwe r11, r9, TLB_DATA /* Load TLB LO */ + + /* Done...restore registers and get out of here. + */ +#ifdef CONFIG_403GCX + lwz r12, 12(r0) + lwz r11, 8(r0) + mtspr SPRN_PID, r12 + mtcr r11 + lwz r9, 4(r0) + lwz r12, 0(r0) +#else + mfspr r12, SPRN_SPRG6 + mfspr r11, SPRN_SPRG7 + mtspr SPRN_PID, r12 + mtcr r11 + mfspr r9, SPRN_SPRG5 + mfspr r12, SPRN_SPRG4 +#endif + mfspr r11, SPRN_SPRG1 + mfspr r10, SPRN_SPRG0 + PPC405_ERR77_SYNC + rfi /* Should sync shadow TLBs */ + b . /* prevent prefetch past rfi */ + +2: + /* The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ +#ifdef CONFIG_403GCX + lwz r12, 12(r0) + lwz r11, 8(r0) + mtspr SPRN_PID, r12 + mtcr r11 + lwz r9, 4(r0) + lwz r12, 0(r0) +#else + mfspr r12, SPRN_SPRG6 + mfspr r11, SPRN_SPRG7 + mtspr SPRN_PID, r12 + mtcr r11 + mfspr r9, SPRN_SPRG5 + mfspr r12, SPRN_SPRG4 +#endif + mfspr r11, SPRN_SPRG1 + mfspr r10, SPRN_SPRG0 + b DataAccess + +/* + * 0x0400 - Instruction Storage Exception + * This is caused by a fetch from non-execute or guarded pages. + */ + START_EXCEPTION(0x0400, InstructionAccess) + NORMAL_EXCEPTION_PROLOG + mr r4,r12 /* Pass SRR0 as arg2 */ + li r5,0 /* Pass zero as arg3 */ + EXC_XFER_EE_LITE(0x400, handle_page_fault) + +/* 0x0500 - External Interrupt Exception */ + EXCEPTION(0x0500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) + +/* 0x0600 - Alignment Exception */ + START_EXCEPTION(0x0600, Alignment) + NORMAL_EXCEPTION_PROLOG + mfspr r4,SPRN_DEAR /* Grab the DEAR and save it */ + stw r4,_DEAR(r11) + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_EE(0x600, AlignmentException) + +/* 0x0700 - Program Exception */ + START_EXCEPTION(0x0700, ProgramCheck) + NORMAL_EXCEPTION_PROLOG + mfspr r4,SPRN_ESR /* Grab the ESR and save it */ + stw r4,_ESR(r11) + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_STD(0x700, ProgramCheckException) + + EXCEPTION(0x0800, Trap_08, UnknownException, EXC_XFER_EE) + EXCEPTION(0x0900, Trap_09, UnknownException, EXC_XFER_EE) + EXCEPTION(0x0A00, Trap_0A, UnknownException, EXC_XFER_EE) + EXCEPTION(0x0B00, Trap_0B, UnknownException, EXC_XFER_EE) + +/* 0x0C00 - System Call Exception */ + START_EXCEPTION(0x0C00, SystemCall) + NORMAL_EXCEPTION_PROLOG + EXC_XFER_EE_LITE(0xc00, DoSyscall) + + EXCEPTION(0x0D00, Trap_0D, UnknownException, EXC_XFER_EE) + EXCEPTION(0x0E00, Trap_0E, UnknownException, EXC_XFER_EE) + EXCEPTION(0x0F00, Trap_0F, UnknownException, EXC_XFER_EE) + +/* 0x1000 - Programmable Interval Timer (PIT) Exception */ + START_EXCEPTION(0x1000, Decrementer) + NORMAL_EXCEPTION_PROLOG + lis r0,TSR_PIS@h + mtspr SPRN_TSR,r0 /* Clear the PIT exception */ + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_LITE(0x1000, timer_interrupt) + +#if 0 +/* NOTE: + * FIT and WDT handlers are not implemented yet. + */ + +/* 0x1010 - Fixed Interval Timer (FIT) Exception +*/ + STND_EXCEPTION(0x1010, FITException, UnknownException) + +/* 0x1020 - Watchdog Timer (WDT) Exception +*/ +#ifdef CONFIG_BOOKE_WDT + CRITICAL_EXCEPTION(0x1020, WDTException, WatchdogException) +#else + CRITICAL_EXCEPTION(0x1020, WDTException, UnknownException) +#endif +#endif + +/* 0x1100 - Data TLB Miss Exception + * As the name implies, translation is not in the MMU, so search the + * page tables and fix it. The only purpose of this function is to + * load TLB entries from the page table if they exist. + */ + START_EXCEPTION(0x1100, DTLBMiss) + mtspr SPRN_SPRG0, r10 /* Save some working registers */ + mtspr SPRN_SPRG1, r11 +#ifdef CONFIG_403GCX + stw r12, 0(r0) + stw r9, 4(r0) + mfcr r11 + mfspr r12, SPRN_PID + stw r11, 8(r0) + stw r12, 12(r0) +#else + mtspr SPRN_SPRG4, r12 + mtspr SPRN_SPRG5, r9 + mfcr r11 + mfspr r12, SPRN_PID + mtspr SPRN_SPRG7, r11 + mtspr SPRN_SPRG6, r12 +#endif + mfspr r10, SPRN_DEAR /* Get faulting address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + lis r11, TASK_SIZE@h + cmplw r10, r11 + blt+ 3f + lis r11, swapper_pg_dir@h + ori r11, r11, swapper_pg_dir@l + li r9, 0 + mtspr SPRN_PID, r9 /* TLB will have 0 TID */ + b 4f + + /* Get the PGD for the current thread. + */ +3: + mfspr r11,SPRN_SPRG3 + lwz r11,PGDIR(r11) +4: + tophys(r11, r11) + rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */ + lwz r12, 0(r11) /* Get L1 entry */ + andi. r9, r12, _PMD_PRESENT /* Check if it points to a PTE page */ + beq 2f /* Bail if no table */ + + rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */ + lwz r11, 0(r12) /* Get Linux PTE */ + andi. r9, r11, _PAGE_PRESENT + beq 5f + + ori r11, r11, _PAGE_ACCESSED + stw r11, 0(r12) + + /* Create TLB tag. This is the faulting address plus a static + * set of bits. These are size, valid, E, U0. + */ + li r12, 0x00c0 + rlwimi r10, r12, 0, 20, 31 + + b finish_tlb_load + +2: /* Check for possible large-page pmd entry */ + rlwinm. r9, r12, 2, 22, 24 + beq 5f + + /* Create TLB tag. This is the faulting address, plus a static + * set of bits (valid, E, U0) plus the size from the PMD. + */ + ori r9, r9, 0x40 + rlwimi r10, r9, 0, 20, 31 + mr r11, r12 + + b finish_tlb_load + +5: + /* The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ +#ifdef CONFIG_403GCX + lwz r12, 12(r0) + lwz r11, 8(r0) + mtspr SPRN_PID, r12 + mtcr r11 + lwz r9, 4(r0) + lwz r12, 0(r0) +#else + mfspr r12, SPRN_SPRG6 + mfspr r11, SPRN_SPRG7 + mtspr SPRN_PID, r12 + mtcr r11 + mfspr r9, SPRN_SPRG5 + mfspr r12, SPRN_SPRG4 +#endif + mfspr r11, SPRN_SPRG1 + mfspr r10, SPRN_SPRG0 + b DataAccess + +/* 0x1200 - Instruction TLB Miss Exception + * Nearly the same as above, except we get our information from different + * registers and bailout to a different point. + */ + START_EXCEPTION(0x1200, ITLBMiss) + mtspr SPRN_SPRG0, r10 /* Save some working registers */ + mtspr SPRN_SPRG1, r11 +#ifdef CONFIG_403GCX + stw r12, 0(r0) + stw r9, 4(r0) + mfcr r11 + mfspr r12, SPRN_PID + stw r11, 8(r0) + stw r12, 12(r0) +#else + mtspr SPRN_SPRG4, r12 + mtspr SPRN_SPRG5, r9 + mfcr r11 + mfspr r12, SPRN_PID + mtspr SPRN_SPRG7, r11 + mtspr SPRN_SPRG6, r12 +#endif + mfspr r10, SPRN_SRR0 /* Get faulting address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + lis r11, TASK_SIZE@h + cmplw r10, r11 + blt+ 3f + lis r11, swapper_pg_dir@h + ori r11, r11, swapper_pg_dir@l + li r9, 0 + mtspr SPRN_PID, r9 /* TLB will have 0 TID */ + b 4f + + /* Get the PGD for the current thread. + */ +3: + mfspr r11,SPRN_SPRG3 + lwz r11,PGDIR(r11) +4: + tophys(r11, r11) + rlwimi r11, r10, 12, 20, 29 /* Create L1 (pgdir/pmd) address */ + lwz r12, 0(r11) /* Get L1 entry */ + andi. r9, r12, _PMD_PRESENT /* Check if it points to a PTE page */ + beq 2f /* Bail if no table */ + + rlwimi r12, r10, 22, 20, 29 /* Compute PTE address */ + lwz r11, 0(r12) /* Get Linux PTE */ + andi. r9, r11, _PAGE_PRESENT + beq 5f + + ori r11, r11, _PAGE_ACCESSED + stw r11, 0(r12) + + /* Create TLB tag. This is the faulting address plus a static + * set of bits. These are size, valid, E, U0. + */ + li r12, 0x00c0 + rlwimi r10, r12, 0, 20, 31 + + b finish_tlb_load + +2: /* Check for possible large-page pmd entry */ + rlwinm. r9, r12, 2, 22, 24 + beq 5f + + /* Create TLB tag. This is the faulting address, plus a static + * set of bits (valid, E, U0) plus the size from the PMD. + */ + ori r9, r9, 0x40 + rlwimi r10, r9, 0, 20, 31 + mr r11, r12 + + b finish_tlb_load + +5: + /* The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ +#ifdef CONFIG_403GCX + lwz r12, 12(r0) + lwz r11, 8(r0) + mtspr SPRN_PID, r12 + mtcr r11 + lwz r9, 4(r0) + lwz r12, 0(r0) +#else + mfspr r12, SPRN_SPRG6 + mfspr r11, SPRN_SPRG7 + mtspr SPRN_PID, r12 + mtcr r11 + mfspr r9, SPRN_SPRG5 + mfspr r12, SPRN_SPRG4 +#endif + mfspr r11, SPRN_SPRG1 + mfspr r10, SPRN_SPRG0 + b InstructionAccess + + EXCEPTION(0x1300, Trap_13, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1400, Trap_14, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1500, Trap_15, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1600, Trap_16, UnknownException, EXC_XFER_EE) +#ifdef CONFIG_IBM405_ERR51 + /* 405GP errata 51 */ + START_EXCEPTION(0x1700, Trap_17) + b DTLBMiss +#else + EXCEPTION(0x1700, Trap_17, UnknownException, EXC_XFER_EE) +#endif + EXCEPTION(0x1800, Trap_18, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1900, Trap_19, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1A00, Trap_1A, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1B00, Trap_1B, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1C00, Trap_1C, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1D00, Trap_1D, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1E00, Trap_1E, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1F00, Trap_1F, UnknownException, EXC_XFER_EE) + +/* Check for a single step debug exception while in an exception + * handler before state has been saved. This is to catch the case + * where an instruction that we are trying to single step causes + * an exception (eg ITLB/DTLB miss) and thus the first instruction of + * the exception handler generates a single step debug exception. + * + * If we get a debug trap on the first instruction of an exception handler, + * we reset the MSR_DE in the _exception handler's_ MSR (the debug trap is + * a critical exception, so we are using SPRN_CSRR1 to manipulate the MSR). + * The exception handler was handling a non-critical interrupt, so it will + * save (and later restore) the MSR via SPRN_SRR1, which will still have + * the MSR_DE bit set. + */ + /* 0x2000 - Debug Exception */ + START_EXCEPTION(0x2000, DebugTrap) + CRITICAL_EXCEPTION_PROLOG + + /* + * If this is a single step or branch-taken exception in an + * exception entry sequence, it was probably meant to apply to + * the code where the exception occurred (since exception entry + * doesn't turn off DE automatically). We simulate the effect + * of turning off DE on entry to an exception handler by turning + * off DE in the SRR3 value and clearing the debug status. + */ + mfspr r10,SPRN_DBSR /* check single-step/branch taken */ + andis. r10,r10,DBSR_IC@h + beq+ 2f + + andi. r10,r9,MSR_IR|MSR_PR /* check supervisor + MMU off */ + beq 1f /* branch and fix it up */ + + mfspr r10,SPRN_SRR2 /* Faulting instruction address */ + cmplwi r10,0x2100 + bgt+ 2f /* address above exception vectors */ + + /* here it looks like we got an inappropriate debug exception. */ +1: rlwinm r9,r9,0,~MSR_DE /* clear DE in the SRR3 value */ + lis r10,DBSR_IC@h /* clear the IC event */ + mtspr SPRN_DBSR,r10 + /* restore state and get out */ + lwz r10,_CCR(r11) + lwz r0,GPR0(r11) + lwz r1,GPR1(r11) + mtcrf 0x80,r10 + mtspr SPRN_SRR2,r12 + mtspr SPRN_SRR3,r9 + lwz r9,GPR9(r11) + lwz r12,GPR12(r11) + lwz r10,crit_r10@l(0) + lwz r11,crit_r11@l(0) + PPC405_ERR77_SYNC + rfci + b . + + /* continue normal handling for a critical exception... */ +2: mfspr r4,SPRN_DBSR + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_TEMPLATE(DebugException, 0x2002, \ + (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \ + NOCOPY, crit_transfer_to_handler, ret_from_crit_exc) + +/* + * The other Data TLB exceptions bail out to this point + * if they can't resolve the lightweight TLB fault. + */ +DataAccess: + NORMAL_EXCEPTION_PROLOG + mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */ + stw r5,_ESR(r11) + mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ + EXC_XFER_EE_LITE(0x300, handle_page_fault) + +/* Other PowerPC processors, namely those derived from the 6xx-series + * have vectors from 0x2100 through 0x2F00 defined, but marked as reserved. + * However, for the 4xx-series processors these are neither defined nor + * reserved. + */ + + /* Damn, I came up one instruction too many to fit into the + * exception space :-). Both the instruction and data TLB + * miss get to this point to load the TLB. + * r10 - TLB_TAG value + * r11 - Linux PTE + * r12, r9 - avilable to use + * PID - loaded with proper value when we get here + * Upon exit, we reload everything and RFI. + * Actually, it will fit now, but oh well.....a common place + * to load the TLB. + */ +tlb_4xx_index: + .long 0 +finish_tlb_load: + /* load the next available TLB index. + */ + lwz r9, tlb_4xx_index@l(0) + addi r9, r9, 1 + andi. r9, r9, (PPC4XX_TLB_SIZE-1) + stw r9, tlb_4xx_index@l(0) + +6: + /* + * Clear out the software-only bits in the PTE to generate the + * TLB_DATA value. These are the bottom 2 bits of the RPM, the + * top 3 bits of the zone field, and M. + */ + li r12, 0x0ce2 + andc r11, r11, r12 + + tlbwe r11, r9, TLB_DATA /* Load TLB LO */ + tlbwe r10, r9, TLB_TAG /* Load TLB HI */ + + /* Done...restore registers and get out of here. + */ +#ifdef CONFIG_403GCX + lwz r12, 12(r0) + lwz r11, 8(r0) + mtspr SPRN_PID, r12 + mtcr r11 + lwz r9, 4(r0) + lwz r12, 0(r0) +#else + mfspr r12, SPRN_SPRG6 + mfspr r11, SPRN_SPRG7 + mtspr SPRN_PID, r12 + mtcr r11 + mfspr r9, SPRN_SPRG5 + mfspr r12, SPRN_SPRG4 +#endif + mfspr r11, SPRN_SPRG1 + mfspr r10, SPRN_SPRG0 + PPC405_ERR77_SYNC + rfi /* Should sync shadow TLBs */ + b . /* prevent prefetch past rfi */ + +/* extern void giveup_fpu(struct task_struct *prev) + * + * The PowerPC 4xx family of processors do not have an FPU, so this just + * returns. + */ +_GLOBAL(giveup_fpu) + blr + +/* This is where the main kernel code starts. + */ +start_here: + + /* ptr to current */ + lis r2,init_task@h + ori r2,r2,init_task@l + + /* ptr to phys current thread */ + tophys(r4,r2) + addi r4,r4,THREAD /* init task's THREAD */ + mtspr SPRN_SPRG3,r4 + + /* stack */ + lis r1,init_thread_union@ha + addi r1,r1,init_thread_union@l + li r0,0 + stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + + bl early_init /* We have to do this with MMU on */ + +/* + * Decide what sort of machine this is and initialize the MMU. + */ + mr r3,r31 + mr r4,r30 + mr r5,r29 + mr r6,r28 + mr r7,r27 + bl machine_init + bl MMU_init + +/* Go back to running unmapped so we can load up new values + * and change to using our exception vectors. + * On the 4xx, all we have to do is invalidate the TLB to clear + * the old 16M byte TLB mappings. + */ + lis r4,2f@h + ori r4,r4,2f@l + tophys(r4,r4) + lis r3,(MSR_KERNEL & ~(MSR_IR|MSR_DR))@h + ori r3,r3,(MSR_KERNEL & ~(MSR_IR|MSR_DR))@l + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r3 + rfi + b . /* prevent prefetch past rfi */ + +/* Load up the kernel context */ +2: + sync /* Flush to memory before changing TLB */ + tlbia + isync /* Flush shadow TLBs */ + + /* set up the PTE pointers for the Abatron bdiGDB. + */ + lis r6, swapper_pg_dir@h + ori r6, r6, swapper_pg_dir@l + lis r5, abatron_pteptrs@h + ori r5, r5, abatron_pteptrs@l + stw r5, 0xf0(r0) /* Must match your Abatron config file */ + tophys(r5,r5) + stw r6, 0(r5) + +/* Now turn on the MMU for real! */ + lis r4,MSR_KERNEL@h + ori r4,r4,MSR_KERNEL@l + lis r3,start_kernel@h + ori r3,r3,start_kernel@l + mtspr SPRN_SRR0,r3 + mtspr SPRN_SRR1,r4 + rfi /* enable MMU and jump to start_kernel */ + b . /* prevent prefetch past rfi */ + +/* Set up the initial MMU state so we can do the first level of + * kernel initialization. This maps the first 16 MBytes of memory 1:1 + * virtual to physical and more importantly sets the cache mode. + */ +initial_mmu: + tlbia /* Invalidate all TLB entries */ + isync + + /* We should still be executing code at physical address 0x0000xxxx + * at this point. However, start_here is at virtual address + * 0xC000xxxx. So, set up a TLB mapping to cover this once + * translation is enabled. + */ + + lis r3,KERNELBASE@h /* Load the kernel virtual address */ + ori r3,r3,KERNELBASE@l + tophys(r4,r3) /* Load the kernel physical address */ + + iccci r0,r3 /* Invalidate the i-cache before use */ + + /* Load the kernel PID. + */ + li r0,0 + mtspr SPRN_PID,r0 + sync + + /* Configure and load two entries into TLB slots 62 and 63. + * In case we are pinning TLBs, these are reserved in by the + * other TLB functions. If not reserving, then it doesn't + * matter where they are loaded. + */ + clrrwi r4,r4,10 /* Mask off the real page number */ + ori r4,r4,(TLB_WR | TLB_EX) /* Set the write and execute bits */ + + clrrwi r3,r3,10 /* Mask off the effective page number */ + ori r3,r3,(TLB_VALID | TLB_PAGESZ(PAGESZ_16M)) + + li r0,63 /* TLB slot 63 */ + + tlbwe r4,r0,TLB_DATA /* Load the data portion of the entry */ + tlbwe r3,r0,TLB_TAG /* Load the tag portion of the entry */ + +#if defined(CONFIG_SERIAL_TEXT_DEBUG) && defined(SERIAL_DEBUG_IO_BASE) + + /* Load a TLB entry for the UART, so that ppc4xx_progress() can use + * the UARTs nice and early. We use a 4k real==virtual mapping. */ + + lis r3,SERIAL_DEBUG_IO_BASE@h + ori r3,r3,SERIAL_DEBUG_IO_BASE@l + mr r4,r3 + clrrwi r4,r4,12 + ori r4,r4,(TLB_WR|TLB_I|TLB_M|TLB_G) + + clrrwi r3,r3,12 + ori r3,r3,(TLB_VALID | TLB_PAGESZ(PAGESZ_4K)) + + li r0,0 /* TLB slot 0 */ + tlbwe r4,r0,TLB_DATA + tlbwe r3,r0,TLB_TAG +#endif /* CONFIG_SERIAL_DEBUG_TEXT && SERIAL_DEBUG_IO_BASE */ + + isync + + /* Establish the exception vector base + */ + lis r4,KERNELBASE@h /* EVPR only uses the high 16-bits */ + tophys(r0,r4) /* Use the physical address */ + mtspr SPRN_EVPR,r0 + + blr + +_GLOBAL(abort) + mfspr r13,SPRN_DBCR0 + oris r13,r13,DBCR0_RST_SYSTEM@h + mtspr SPRN_DBCR0,r13 + +_GLOBAL(set_context) + +#ifdef CONFIG_BDI_SWITCH + /* Context switch the PTE pointer for the Abatron BDI2000. + * The PGDIR is the second parameter. + */ + lis r5, KERNELBASE@h + lwz r5, 0xf0(r5) + stw r4, 0x4(r5) +#endif + sync + mtspr SPRN_PID,r3 + isync /* Need an isync to flush shadow */ + /* TLBs after changing PID */ + blr + +/* We put a few things here that have to be page-aligned. This stuff + * goes at the beginning of the data segment, which is page-aligned. + */ + .data +_GLOBAL(sdata) +_GLOBAL(empty_zero_page) + .space 4096 +_GLOBAL(swapper_pg_dir) + .space 4096 + + +/* Stack for handling critical exceptions from kernel mode */ + .section .bss + .align 12 +exception_stack_bottom: + .space 4096 +critical_stack_top: +_GLOBAL(exception_stack_top) + +/* This space gets a copy of optional info passed to us by the bootstrap + * which is used to pass parameters into the kernel like root=/dev/sda1, etc. + */ +_GLOBAL(cmd_line) + .space 512 + +/* Room for two PTE pointers, usually the kernel and current user pointers + * to their respective root page table. + */ +abatron_pteptrs: + .space 8 diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S new file mode 100644 index 00000000000..22a5ee07e1e --- /dev/null +++ b/arch/powerpc/kernel/head_64.S @@ -0,0 +1,2011 @@ +/* + * arch/ppc64/kernel/head.S + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP + * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu> + * Adapted for Power Macintosh by Paul Mackerras. + * Low-level exception handlers and MMU support + * rewritten by Paul Mackerras. + * Copyright (C) 1996 Paul Mackerras. + * + * Adapted for 64bit PowerPC by Dave Engebretsen, Peter Bergner, and + * Mike Corrigan {engebret|bergner|mikejc}@us.ibm.com + * + * This file contains the low-level support and setup for the + * PowerPC-64 platform, including trap and interrupt dispatch. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/threads.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/systemcfg.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/bug.h> +#include <asm/cputable.h> +#include <asm/setup.h> +#include <asm/hvcall.h> +#include <asm/iSeries/LparMap.h> + +#ifdef CONFIG_PPC_ISERIES +#define DO_SOFT_DISABLE +#endif + +/* + * We layout physical memory as follows: + * 0x0000 - 0x00ff : Secondary processor spin code + * 0x0100 - 0x2fff : pSeries Interrupt prologs + * 0x3000 - 0x5fff : interrupt support, iSeries and common interrupt prologs + * 0x6000 - 0x6fff : Initial (CPU0) segment table + * 0x7000 - 0x7fff : FWNMI data area + * 0x8000 - : Early init and support code + */ + +/* + * SPRG Usage + * + * Register Definition + * + * SPRG0 reserved for hypervisor + * SPRG1 temp - used to save gpr + * SPRG2 temp - used to save gpr + * SPRG3 virt addr of paca + */ + +/* + * Entering into this code we make the following assumptions: + * For pSeries: + * 1. The MMU is off & open firmware is running in real mode. + * 2. The kernel is entered at __start + * + * For iSeries: + * 1. The MMU is on (as it always is for iSeries) + * 2. The kernel is entered at system_reset_iSeries + */ + + .text + .globl _stext +_stext: +#ifdef CONFIG_PPC_MULTIPLATFORM +_GLOBAL(__start) + /* NOP this out unconditionally */ +BEGIN_FTR_SECTION + b .__start_initialization_multiplatform +END_FTR_SECTION(0, 1) +#endif /* CONFIG_PPC_MULTIPLATFORM */ + + /* Catch branch to 0 in real mode */ + trap + +#ifdef CONFIG_PPC_ISERIES + /* + * At offset 0x20, there is a pointer to iSeries LPAR data. + * This is required by the hypervisor + */ + . = 0x20 + .llong hvReleaseData-KERNELBASE + + /* + * At offset 0x28 and 0x30 are offsets to the mschunks_map + * array (used by the iSeries LPAR debugger to do translation + * between physical addresses and absolute addresses) and + * to the pidhash table (also used by the debugger) + */ + .llong mschunks_map-KERNELBASE + .llong 0 /* pidhash-KERNELBASE SFRXXX */ + + /* Offset 0x38 - Pointer to start of embedded System.map */ + .globl embedded_sysmap_start +embedded_sysmap_start: + .llong 0 + /* Offset 0x40 - Pointer to end of embedded System.map */ + .globl embedded_sysmap_end +embedded_sysmap_end: + .llong 0 + +#endif /* CONFIG_PPC_ISERIES */ + + /* Secondary processors spin on this value until it goes to 1. */ + .globl __secondary_hold_spinloop +__secondary_hold_spinloop: + .llong 0x0 + + /* Secondary processors write this value with their cpu # */ + /* after they enter the spin loop immediately below. */ + .globl __secondary_hold_acknowledge +__secondary_hold_acknowledge: + .llong 0x0 + + . = 0x60 +/* + * The following code is used on pSeries to hold secondary processors + * in a spin loop after they have been freed from OpenFirmware, but + * before the bulk of the kernel has been relocated. This code + * is relocated to physical address 0x60 before prom_init is run. + * All of it must fit below the first exception vector at 0x100. + */ +_GLOBAL(__secondary_hold) + mfmsr r24 + ori r24,r24,MSR_RI + mtmsrd r24 /* RI on */ + + /* Grab our linux cpu number */ + mr r24,r3 + + /* Tell the master cpu we're here */ + /* Relocation is off & we are located at an address less */ + /* than 0x100, so only need to grab low order offset. */ + std r24,__secondary_hold_acknowledge@l(0) + sync + + /* All secondary cpus wait here until told to start. */ +100: ld r4,__secondary_hold_spinloop@l(0) + cmpdi 0,r4,1 + bne 100b + +#ifdef CONFIG_HMT + b .hmt_init +#else +#ifdef CONFIG_SMP + mr r3,r24 + b .pSeries_secondary_smp_init +#else + BUG_OPCODE +#endif +#endif + +/* This value is used to mark exception frames on the stack. */ + .section ".toc","aw" +exception_marker: + .tc ID_72656773_68657265[TC],0x7265677368657265 + .text + +/* + * The following macros define the code that appears as + * the prologue to each of the exception handlers. They + * are split into two parts to allow a single kernel binary + * to be used for pSeries and iSeries. + * LOL. One day... - paulus + */ + +/* + * We make as much of the exception code common between native + * exception handlers (including pSeries LPAR) and iSeries LPAR + * implementations as possible. + */ + +/* + * This is the start of the interrupt handlers for pSeries + * This code runs with relocation off. + */ +#define EX_R9 0 +#define EX_R10 8 +#define EX_R11 16 +#define EX_R12 24 +#define EX_R13 32 +#define EX_SRR0 40 +#define EX_R3 40 /* SLB miss saves R3, but not SRR0 */ +#define EX_DAR 48 +#define EX_LR 48 /* SLB miss saves LR, but not DAR */ +#define EX_DSISR 56 +#define EX_CCR 60 + +#define EXCEPTION_PROLOG_PSERIES(area, label) \ + mfspr r13,SPRG3; /* get paca address into r13 */ \ + std r9,area+EX_R9(r13); /* save r9 - r12 */ \ + std r10,area+EX_R10(r13); \ + std r11,area+EX_R11(r13); \ + std r12,area+EX_R12(r13); \ + mfspr r9,SPRG1; \ + std r9,area+EX_R13(r13); \ + mfcr r9; \ + clrrdi r12,r13,32; /* get high part of &label */ \ + mfmsr r10; \ + mfspr r11,SRR0; /* save SRR0 */ \ + ori r12,r12,(label)@l; /* virt addr of handler */ \ + ori r10,r10,MSR_IR|MSR_DR|MSR_RI; \ + mtspr SRR0,r12; \ + mfspr r12,SRR1; /* and SRR1 */ \ + mtspr SRR1,r10; \ + rfid; \ + b . /* prevent speculative execution */ + +/* + * This is the start of the interrupt handlers for iSeries + * This code runs with relocation on. + */ +#define EXCEPTION_PROLOG_ISERIES_1(area) \ + mfspr r13,SPRG3; /* get paca address into r13 */ \ + std r9,area+EX_R9(r13); /* save r9 - r12 */ \ + std r10,area+EX_R10(r13); \ + std r11,area+EX_R11(r13); \ + std r12,area+EX_R12(r13); \ + mfspr r9,SPRG1; \ + std r9,area+EX_R13(r13); \ + mfcr r9 + +#define EXCEPTION_PROLOG_ISERIES_2 \ + mfmsr r10; \ + ld r11,PACALPPACA+LPPACASRR0(r13); \ + ld r12,PACALPPACA+LPPACASRR1(r13); \ + ori r10,r10,MSR_RI; \ + mtmsrd r10,1 + +/* + * The common exception prolog is used for all except a few exceptions + * such as a segment miss on a kernel address. We have to be prepared + * to take another exception from the point where we first touch the + * kernel stack onwards. + * + * On entry r13 points to the paca, r9-r13 are saved in the paca, + * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and + * SRR1, and relocation is on. + */ +#define EXCEPTION_PROLOG_COMMON(n, area) \ + andi. r10,r12,MSR_PR; /* See if coming from user */ \ + mr r10,r1; /* Save r1 */ \ + subi r1,r1,INT_FRAME_SIZE; /* alloc frame on kernel stack */ \ + beq- 1f; \ + ld r1,PACAKSAVE(r13); /* kernel stack to use */ \ +1: cmpdi cr1,r1,0; /* check if r1 is in userspace */ \ + bge- cr1,bad_stack; /* abort if it is */ \ + std r9,_CCR(r1); /* save CR in stackframe */ \ + std r11,_NIP(r1); /* save SRR0 in stackframe */ \ + std r12,_MSR(r1); /* save SRR1 in stackframe */ \ + std r10,0(r1); /* make stack chain pointer */ \ + std r0,GPR0(r1); /* save r0 in stackframe */ \ + std r10,GPR1(r1); /* save r1 in stackframe */ \ + std r2,GPR2(r1); /* save r2 in stackframe */ \ + SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \ + SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe */ \ + ld r9,area+EX_R9(r13); /* move r9, r10 to stackframe */ \ + ld r10,area+EX_R10(r13); \ + std r9,GPR9(r1); \ + std r10,GPR10(r1); \ + ld r9,area+EX_R11(r13); /* move r11 - r13 to stackframe */ \ + ld r10,area+EX_R12(r13); \ + ld r11,area+EX_R13(r13); \ + std r9,GPR11(r1); \ + std r10,GPR12(r1); \ + std r11,GPR13(r1); \ + ld r2,PACATOC(r13); /* get kernel TOC into r2 */ \ + mflr r9; /* save LR in stackframe */ \ + std r9,_LINK(r1); \ + mfctr r10; /* save CTR in stackframe */ \ + std r10,_CTR(r1); \ + mfspr r11,XER; /* save XER in stackframe */ \ + std r11,_XER(r1); \ + li r9,(n)+1; \ + std r9,_TRAP(r1); /* set trap number */ \ + li r10,0; \ + ld r11,exception_marker@toc(r2); \ + std r10,RESULT(r1); /* clear regs->result */ \ + std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ + +/* + * Exception vectors. + */ +#define STD_EXCEPTION_PSERIES(n, label) \ + . = n; \ + .globl label##_pSeries; \ +label##_pSeries: \ + HMT_MEDIUM; \ + mtspr SPRG1,r13; /* save r13 */ \ + RUNLATCH_ON(r13); \ + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, label##_common) + +#define STD_EXCEPTION_ISERIES(n, label, area) \ + .globl label##_iSeries; \ +label##_iSeries: \ + HMT_MEDIUM; \ + mtspr SPRG1,r13; /* save r13 */ \ + RUNLATCH_ON(r13); \ + EXCEPTION_PROLOG_ISERIES_1(area); \ + EXCEPTION_PROLOG_ISERIES_2; \ + b label##_common + +#define MASKABLE_EXCEPTION_ISERIES(n, label) \ + .globl label##_iSeries; \ +label##_iSeries: \ + HMT_MEDIUM; \ + mtspr SPRG1,r13; /* save r13 */ \ + RUNLATCH_ON(r13); \ + EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN); \ + lbz r10,PACAPROCENABLED(r13); \ + cmpwi 0,r10,0; \ + beq- label##_iSeries_masked; \ + EXCEPTION_PROLOG_ISERIES_2; \ + b label##_common; \ + +#ifdef DO_SOFT_DISABLE +#define DISABLE_INTS \ + lbz r10,PACAPROCENABLED(r13); \ + li r11,0; \ + std r10,SOFTE(r1); \ + mfmsr r10; \ + stb r11,PACAPROCENABLED(r13); \ + ori r10,r10,MSR_EE; \ + mtmsrd r10,1 + +#define ENABLE_INTS \ + lbz r10,PACAPROCENABLED(r13); \ + mfmsr r11; \ + std r10,SOFTE(r1); \ + ori r11,r11,MSR_EE; \ + mtmsrd r11,1 + +#else /* hard enable/disable interrupts */ +#define DISABLE_INTS + +#define ENABLE_INTS \ + ld r12,_MSR(r1); \ + mfmsr r11; \ + rlwimi r11,r12,0,MSR_EE; \ + mtmsrd r11,1 + +#endif + +#define STD_EXCEPTION_COMMON(trap, label, hdlr) \ + .align 7; \ + .globl label##_common; \ +label##_common: \ + EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \ + DISABLE_INTS; \ + bl .save_nvgprs; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + bl hdlr; \ + b .ret_from_except + +#define STD_EXCEPTION_COMMON_LITE(trap, label, hdlr) \ + .align 7; \ + .globl label##_common; \ +label##_common: \ + EXCEPTION_PROLOG_COMMON(trap, PACA_EXGEN); \ + DISABLE_INTS; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + bl hdlr; \ + b .ret_from_except_lite + +/* + * Start of pSeries system interrupt routines + */ + . = 0x100 + .globl __start_interrupts +__start_interrupts: + + STD_EXCEPTION_PSERIES(0x100, system_reset) + + . = 0x200 +_machine_check_pSeries: + HMT_MEDIUM + mtspr SPRG1,r13 /* save r13 */ + RUNLATCH_ON(r13) + EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) + + . = 0x300 + .globl data_access_pSeries +data_access_pSeries: + HMT_MEDIUM + mtspr SPRG1,r13 +BEGIN_FTR_SECTION + mtspr SPRG2,r12 + mfspr r13,DAR + mfspr r12,DSISR + srdi r13,r13,60 + rlwimi r13,r12,16,0x20 + mfcr r12 + cmpwi r13,0x2c + beq .do_stab_bolted_pSeries + mtcrf 0x80,r12 + mfspr r12,SPRG2 +END_FTR_SECTION_IFCLR(CPU_FTR_SLB) + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, data_access_common) + + . = 0x380 + .globl data_access_slb_pSeries +data_access_slb_pSeries: + HMT_MEDIUM + mtspr SPRG1,r13 + RUNLATCH_ON(r13) + mfspr r13,SPRG3 /* get paca address into r13 */ + std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ + std r10,PACA_EXSLB+EX_R10(r13) + std r11,PACA_EXSLB+EX_R11(r13) + std r12,PACA_EXSLB+EX_R12(r13) + std r3,PACA_EXSLB+EX_R3(r13) + mfspr r9,SPRG1 + std r9,PACA_EXSLB+EX_R13(r13) + mfcr r9 + mfspr r12,SRR1 /* and SRR1 */ + mfspr r3,DAR + b .do_slb_miss /* Rel. branch works in real mode */ + + STD_EXCEPTION_PSERIES(0x400, instruction_access) + + . = 0x480 + .globl instruction_access_slb_pSeries +instruction_access_slb_pSeries: + HMT_MEDIUM + mtspr SPRG1,r13 + RUNLATCH_ON(r13) + mfspr r13,SPRG3 /* get paca address into r13 */ + std r9,PACA_EXSLB+EX_R9(r13) /* save r9 - r12 */ + std r10,PACA_EXSLB+EX_R10(r13) + std r11,PACA_EXSLB+EX_R11(r13) + std r12,PACA_EXSLB+EX_R12(r13) + std r3,PACA_EXSLB+EX_R3(r13) + mfspr r9,SPRG1 + std r9,PACA_EXSLB+EX_R13(r13) + mfcr r9 + mfspr r12,SRR1 /* and SRR1 */ + mfspr r3,SRR0 /* SRR0 is faulting address */ + b .do_slb_miss /* Rel. branch works in real mode */ + + STD_EXCEPTION_PSERIES(0x500, hardware_interrupt) + STD_EXCEPTION_PSERIES(0x600, alignment) + STD_EXCEPTION_PSERIES(0x700, program_check) + STD_EXCEPTION_PSERIES(0x800, fp_unavailable) + STD_EXCEPTION_PSERIES(0x900, decrementer) + STD_EXCEPTION_PSERIES(0xa00, trap_0a) + STD_EXCEPTION_PSERIES(0xb00, trap_0b) + + . = 0xc00 + .globl system_call_pSeries +system_call_pSeries: + HMT_MEDIUM + RUNLATCH_ON(r9) + mr r9,r13 + mfmsr r10 + mfspr r13,SPRG3 + mfspr r11,SRR0 + clrrdi r12,r13,32 + oris r12,r12,system_call_common@h + ori r12,r12,system_call_common@l + mtspr SRR0,r12 + ori r10,r10,MSR_IR|MSR_DR|MSR_RI + mfspr r12,SRR1 + mtspr SRR1,r10 + rfid + b . /* prevent speculative execution */ + + STD_EXCEPTION_PSERIES(0xd00, single_step) + STD_EXCEPTION_PSERIES(0xe00, trap_0e) + + /* We need to deal with the Altivec unavailable exception + * here which is at 0xf20, thus in the middle of the + * prolog code of the PerformanceMonitor one. A little + * trickery is thus necessary + */ + . = 0xf00 + b performance_monitor_pSeries + + STD_EXCEPTION_PSERIES(0xf20, altivec_unavailable) + + STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint) + STD_EXCEPTION_PSERIES(0x1700, altivec_assist) + + . = 0x3000 + +/*** pSeries interrupt support ***/ + + /* moved from 0xf00 */ + STD_EXCEPTION_PSERIES(., performance_monitor) + + .align 7 +_GLOBAL(do_stab_bolted_pSeries) + mtcrf 0x80,r12 + mfspr r12,SPRG2 + EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted) + +/* + * Vectors for the FWNMI option. Share common code. + */ + .globl system_reset_fwnmi +system_reset_fwnmi: + HMT_MEDIUM + mtspr SPRG1,r13 /* save r13 */ + RUNLATCH_ON(r13) + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common) + + .globl machine_check_fwnmi +machine_check_fwnmi: + HMT_MEDIUM + mtspr SPRG1,r13 /* save r13 */ + RUNLATCH_ON(r13) + EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) + +#ifdef CONFIG_PPC_ISERIES +/*** ISeries-LPAR interrupt handlers ***/ + + STD_EXCEPTION_ISERIES(0x200, machine_check, PACA_EXMC) + + .globl data_access_iSeries +data_access_iSeries: + mtspr SPRG1,r13 +BEGIN_FTR_SECTION + mtspr SPRG2,r12 + mfspr r13,DAR + mfspr r12,DSISR + srdi r13,r13,60 + rlwimi r13,r12,16,0x20 + mfcr r12 + cmpwi r13,0x2c + beq .do_stab_bolted_iSeries + mtcrf 0x80,r12 + mfspr r12,SPRG2 +END_FTR_SECTION_IFCLR(CPU_FTR_SLB) + EXCEPTION_PROLOG_ISERIES_1(PACA_EXGEN) + EXCEPTION_PROLOG_ISERIES_2 + b data_access_common + +.do_stab_bolted_iSeries: + mtcrf 0x80,r12 + mfspr r12,SPRG2 + EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) + EXCEPTION_PROLOG_ISERIES_2 + b .do_stab_bolted + + .globl data_access_slb_iSeries +data_access_slb_iSeries: + mtspr SPRG1,r13 /* save r13 */ + EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) + std r3,PACA_EXSLB+EX_R3(r13) + ld r12,PACALPPACA+LPPACASRR1(r13) + mfspr r3,DAR + b .do_slb_miss + + STD_EXCEPTION_ISERIES(0x400, instruction_access, PACA_EXGEN) + + .globl instruction_access_slb_iSeries +instruction_access_slb_iSeries: + mtspr SPRG1,r13 /* save r13 */ + EXCEPTION_PROLOG_ISERIES_1(PACA_EXSLB) + std r3,PACA_EXSLB+EX_R3(r13) + ld r12,PACALPPACA+LPPACASRR1(r13) + ld r3,PACALPPACA+LPPACASRR0(r13) + b .do_slb_miss + + MASKABLE_EXCEPTION_ISERIES(0x500, hardware_interrupt) + STD_EXCEPTION_ISERIES(0x600, alignment, PACA_EXGEN) + STD_EXCEPTION_ISERIES(0x700, program_check, PACA_EXGEN) + STD_EXCEPTION_ISERIES(0x800, fp_unavailable, PACA_EXGEN) + MASKABLE_EXCEPTION_ISERIES(0x900, decrementer) + STD_EXCEPTION_ISERIES(0xa00, trap_0a, PACA_EXGEN) + STD_EXCEPTION_ISERIES(0xb00, trap_0b, PACA_EXGEN) + + .globl system_call_iSeries +system_call_iSeries: + mr r9,r13 + mfspr r13,SPRG3 + EXCEPTION_PROLOG_ISERIES_2 + b system_call_common + + STD_EXCEPTION_ISERIES( 0xd00, single_step, PACA_EXGEN) + STD_EXCEPTION_ISERIES( 0xe00, trap_0e, PACA_EXGEN) + STD_EXCEPTION_ISERIES( 0xf00, performance_monitor, PACA_EXGEN) + + .globl system_reset_iSeries +system_reset_iSeries: + mfspr r13,SPRG3 /* Get paca address */ + mfmsr r24 + ori r24,r24,MSR_RI + mtmsrd r24 /* RI on */ + lhz r24,PACAPACAINDEX(r13) /* Get processor # */ + cmpwi 0,r24,0 /* Are we processor 0? */ + beq .__start_initialization_iSeries /* Start up the first processor */ + mfspr r4,SPRN_CTRLF + li r5,CTRL_RUNLATCH /* Turn off the run light */ + andc r4,r4,r5 + mtspr SPRN_CTRLT,r4 + +1: + HMT_LOW +#ifdef CONFIG_SMP + lbz r23,PACAPROCSTART(r13) /* Test if this processor + * should start */ + sync + LOADADDR(r3,current_set) + sldi r28,r24,3 /* get current_set[cpu#] */ + ldx r3,r3,r28 + addi r1,r3,THREAD_SIZE + subi r1,r1,STACK_FRAME_OVERHEAD + + cmpwi 0,r23,0 + beq iSeries_secondary_smp_loop /* Loop until told to go */ + bne .__secondary_start /* Loop until told to go */ +iSeries_secondary_smp_loop: + /* Let the Hypervisor know we are alive */ + /* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */ + lis r3,0x8002 + rldicr r3,r3,32,15 /* r0 = (r3 << 32) & 0xffff000000000000 */ +#else /* CONFIG_SMP */ + /* Yield the processor. This is required for non-SMP kernels + which are running on multi-threaded machines. */ + lis r3,0x8000 + rldicr r3,r3,32,15 /* r3 = (r3 << 32) & 0xffff000000000000 */ + addi r3,r3,18 /* r3 = 0x8000000000000012 which is "yield" */ + li r4,0 /* "yield timed" */ + li r5,-1 /* "yield forever" */ +#endif /* CONFIG_SMP */ + li r0,-1 /* r0=-1 indicates a Hypervisor call */ + sc /* Invoke the hypervisor via a system call */ + mfspr r13,SPRG3 /* Put r13 back ???? */ + b 1b /* If SMP not configured, secondaries + * loop forever */ + + .globl decrementer_iSeries_masked +decrementer_iSeries_masked: + li r11,1 + stb r11,PACALPPACA+LPPACADECRINT(r13) + lwz r12,PACADEFAULTDECR(r13) + mtspr SPRN_DEC,r12 + /* fall through */ + + .globl hardware_interrupt_iSeries_masked +hardware_interrupt_iSeries_masked: + mtcrf 0x80,r9 /* Restore regs */ + ld r11,PACALPPACA+LPPACASRR0(r13) + ld r12,PACALPPACA+LPPACASRR1(r13) + mtspr SRR0,r11 + mtspr SRR1,r12 + ld r9,PACA_EXGEN+EX_R9(r13) + ld r10,PACA_EXGEN+EX_R10(r13) + ld r11,PACA_EXGEN+EX_R11(r13) + ld r12,PACA_EXGEN+EX_R12(r13) + ld r13,PACA_EXGEN+EX_R13(r13) + rfid + b . /* prevent speculative execution */ +#endif /* CONFIG_PPC_ISERIES */ + +/*** Common interrupt handlers ***/ + + STD_EXCEPTION_COMMON(0x100, system_reset, .system_reset_exception) + + /* + * Machine check is different because we use a different + * save area: PACA_EXMC instead of PACA_EXGEN. + */ + .align 7 + .globl machine_check_common +machine_check_common: + EXCEPTION_PROLOG_COMMON(0x200, PACA_EXMC) + DISABLE_INTS + bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + bl .machine_check_exception + b .ret_from_except + + STD_EXCEPTION_COMMON_LITE(0x900, decrementer, .timer_interrupt) + STD_EXCEPTION_COMMON(0xa00, trap_0a, .unknown_exception) + STD_EXCEPTION_COMMON(0xb00, trap_0b, .unknown_exception) + STD_EXCEPTION_COMMON(0xd00, single_step, .single_step_exception) + STD_EXCEPTION_COMMON(0xe00, trap_0e, .unknown_exception) + STD_EXCEPTION_COMMON(0xf00, performance_monitor, .performance_monitor_exception) + STD_EXCEPTION_COMMON(0x1300, instruction_breakpoint, .instruction_breakpoint_exception) +#ifdef CONFIG_ALTIVEC + STD_EXCEPTION_COMMON(0x1700, altivec_assist, .altivec_assist_exception) +#else + STD_EXCEPTION_COMMON(0x1700, altivec_assist, .unknown_exception) +#endif + +/* + * Here we have detected that the kernel stack pointer is bad. + * R9 contains the saved CR, r13 points to the paca, + * r10 contains the (bad) kernel stack pointer, + * r11 and r12 contain the saved SRR0 and SRR1. + * We switch to using an emergency stack, save the registers there, + * and call kernel_bad_stack(), which panics. + */ +bad_stack: + ld r1,PACAEMERGSP(r13) + subi r1,r1,64+INT_FRAME_SIZE + std r9,_CCR(r1) + std r10,GPR1(r1) + std r11,_NIP(r1) + std r12,_MSR(r1) + mfspr r11,DAR + mfspr r12,DSISR + std r11,_DAR(r1) + std r12,_DSISR(r1) + mflr r10 + mfctr r11 + mfxer r12 + std r10,_LINK(r1) + std r11,_CTR(r1) + std r12,_XER(r1) + SAVE_GPR(0,r1) + SAVE_GPR(2,r1) + SAVE_4GPRS(3,r1) + SAVE_2GPRS(7,r1) + SAVE_10GPRS(12,r1) + SAVE_10GPRS(22,r1) + addi r11,r1,INT_FRAME_SIZE + std r11,0(r1) + li r12,0 + std r12,0(r11) + ld r2,PACATOC(r13) +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl .kernel_bad_stack + b 1b + +/* + * Return from an exception with minimal checks. + * The caller is assumed to have done EXCEPTION_PROLOG_COMMON. + * If interrupts have been enabled, or anything has been + * done that might have changed the scheduling status of + * any task or sent any task a signal, you should use + * ret_from_except or ret_from_except_lite instead of this. + */ +fast_exception_return: + ld r12,_MSR(r1) + ld r11,_NIP(r1) + andi. r3,r12,MSR_RI /* check if RI is set */ + beq- unrecov_fer + ld r3,_CCR(r1) + ld r4,_LINK(r1) + ld r5,_CTR(r1) + ld r6,_XER(r1) + mtcr r3 + mtlr r4 + mtctr r5 + mtxer r6 + REST_GPR(0, r1) + REST_8GPRS(2, r1) + + mfmsr r10 + clrrdi r10,r10,2 /* clear RI (LE is 0 already) */ + mtmsrd r10,1 + + mtspr SRR1,r12 + mtspr SRR0,r11 + REST_4GPRS(10, r1) + ld r1,GPR1(r1) + rfid + b . /* prevent speculative execution */ + +unrecov_fer: + bl .save_nvgprs +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl .unrecoverable_exception + b 1b + +/* + * Here r13 points to the paca, r9 contains the saved CR, + * SRR0 and SRR1 are saved in r11 and r12, + * r9 - r13 are saved in paca->exgen. + */ + .align 7 + .globl data_access_common +data_access_common: + RUNLATCH_ON(r10) /* It wont fit in the 0x300 handler */ + mfspr r10,DAR + std r10,PACA_EXGEN+EX_DAR(r13) + mfspr r10,DSISR + stw r10,PACA_EXGEN+EX_DSISR(r13) + EXCEPTION_PROLOG_COMMON(0x300, PACA_EXGEN) + ld r3,PACA_EXGEN+EX_DAR(r13) + lwz r4,PACA_EXGEN+EX_DSISR(r13) + li r5,0x300 + b .do_hash_page /* Try to handle as hpte fault */ + + .align 7 + .globl instruction_access_common +instruction_access_common: + EXCEPTION_PROLOG_COMMON(0x400, PACA_EXGEN) + ld r3,_NIP(r1) + andis. r4,r12,0x5820 + li r5,0x400 + b .do_hash_page /* Try to handle as hpte fault */ + + .align 7 + .globl hardware_interrupt_common + .globl hardware_interrupt_entry +hardware_interrupt_common: + EXCEPTION_PROLOG_COMMON(0x500, PACA_EXGEN) +hardware_interrupt_entry: + DISABLE_INTS + addi r3,r1,STACK_FRAME_OVERHEAD + bl .do_IRQ + b .ret_from_except_lite + + .align 7 + .globl alignment_common +alignment_common: + mfspr r10,DAR + std r10,PACA_EXGEN+EX_DAR(r13) + mfspr r10,DSISR + stw r10,PACA_EXGEN+EX_DSISR(r13) + EXCEPTION_PROLOG_COMMON(0x600, PACA_EXGEN) + ld r3,PACA_EXGEN+EX_DAR(r13) + lwz r4,PACA_EXGEN+EX_DSISR(r13) + std r3,_DAR(r1) + std r4,_DSISR(r1) + bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + ENABLE_INTS + bl .alignment_exception + b .ret_from_except + + .align 7 + .globl program_check_common +program_check_common: + EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN) + bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + ENABLE_INTS + bl .program_check_exception + b .ret_from_except + + .align 7 + .globl fp_unavailable_common +fp_unavailable_common: + EXCEPTION_PROLOG_COMMON(0x800, PACA_EXGEN) + bne .load_up_fpu /* if from user, just load it up */ + bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + ENABLE_INTS + bl .kernel_fp_unavailable_exception + BUG_OPCODE + +/* + * load_up_fpu(unused, unused, tsk) + * Disable FP for the task which had the FPU previously, + * and save its floating-point registers in its thread_struct. + * Enables the FPU for use in the kernel on return. + * On SMP we know the fpu is free, since we give it up every + * switch (ie, no lazy save of the FP registers). + * On entry: r13 == 'current' && last_task_used_math != 'current' + */ +_STATIC(load_up_fpu) + mfmsr r5 /* grab the current MSR */ + ori r5,r5,MSR_FP + mtmsrd r5 /* enable use of fpu now */ + isync +/* + * For SMP, we don't do lazy FPU switching because it just gets too + * horrendously complex, especially when a task switches from one CPU + * to another. Instead we call giveup_fpu in switch_to. + * + */ +#ifndef CONFIG_SMP + ld r3,last_task_used_math@got(r2) + ld r4,0(r3) + cmpdi 0,r4,0 + beq 1f + /* Save FP state to last_task_used_math's THREAD struct */ + addi r4,r4,THREAD + SAVE_32FPRS(0, r4) + mffs fr0 + stfd fr0,THREAD_FPSCR(r4) + /* Disable FP for last_task_used_math */ + ld r5,PT_REGS(r4) + ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) + li r6,MSR_FP|MSR_FE0|MSR_FE1 + andc r4,r4,r6 + std r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#endif /* CONFIG_SMP */ + /* enable use of FP after return */ + ld r4,PACACURRENT(r13) + addi r5,r4,THREAD /* Get THREAD */ + ld r4,THREAD_FPEXC_MODE(r5) + ori r12,r12,MSR_FP + or r12,r12,r4 + std r12,_MSR(r1) + lfd fr0,THREAD_FPSCR(r5) + mtfsf 0xff,fr0 + REST_32FPRS(0, r5) +#ifndef CONFIG_SMP + /* Update last_task_used_math to 'current' */ + subi r4,r5,THREAD /* Back to 'current' */ + std r4,0(r3) +#endif /* CONFIG_SMP */ + /* restore registers and return */ + b fast_exception_return + + .align 7 + .globl altivec_unavailable_common +altivec_unavailable_common: + EXCEPTION_PROLOG_COMMON(0xf20, PACA_EXGEN) +#ifdef CONFIG_ALTIVEC +BEGIN_FTR_SECTION + bne .load_up_altivec /* if from user, just load it up */ +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) +#endif + bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + ENABLE_INTS + bl .altivec_unavailable_exception + b .ret_from_except + +#ifdef CONFIG_ALTIVEC +/* + * load_up_altivec(unused, unused, tsk) + * Disable VMX for the task which had it previously, + * and save its vector registers in its thread_struct. + * Enables the VMX for use in the kernel on return. + * On SMP we know the VMX is free, since we give it up every + * switch (ie, no lazy save of the vector registers). + * On entry: r13 == 'current' && last_task_used_altivec != 'current' + */ +_STATIC(load_up_altivec) + mfmsr r5 /* grab the current MSR */ + oris r5,r5,MSR_VEC@h + mtmsrd r5 /* enable use of VMX now */ + isync + +/* + * For SMP, we don't do lazy VMX switching because it just gets too + * horrendously complex, especially when a task switches from one CPU + * to another. Instead we call giveup_altvec in switch_to. + * VRSAVE isn't dealt with here, that is done in the normal context + * switch code. Note that we could rely on vrsave value to eventually + * avoid saving all of the VREGs here... + */ +#ifndef CONFIG_SMP + ld r3,last_task_used_altivec@got(r2) + ld r4,0(r3) + cmpdi 0,r4,0 + beq 1f + /* Save VMX state to last_task_used_altivec's THREAD struct */ + addi r4,r4,THREAD + SAVE_32VRS(0,r5,r4) + mfvscr vr0 + li r10,THREAD_VSCR + stvx vr0,r10,r4 + /* Disable VMX for last_task_used_altivec */ + ld r5,PT_REGS(r4) + ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) + lis r6,MSR_VEC@h + andc r4,r4,r6 + std r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#endif /* CONFIG_SMP */ + /* Hack: if we get an altivec unavailable trap with VRSAVE + * set to all zeros, we assume this is a broken application + * that fails to set it properly, and thus we switch it to + * all 1's + */ + mfspr r4,SPRN_VRSAVE + cmpdi 0,r4,0 + bne+ 1f + li r4,-1 + mtspr SPRN_VRSAVE,r4 +1: + /* enable use of VMX after return */ + ld r4,PACACURRENT(r13) + addi r5,r4,THREAD /* Get THREAD */ + oris r12,r12,MSR_VEC@h + std r12,_MSR(r1) + li r4,1 + li r10,THREAD_VSCR + stw r4,THREAD_USED_VR(r5) + lvx vr0,r10,r5 + mtvscr vr0 + REST_32VRS(0,r4,r5) +#ifndef CONFIG_SMP + /* Update last_task_used_math to 'current' */ + subi r4,r5,THREAD /* Back to 'current' */ + std r4,0(r3) +#endif /* CONFIG_SMP */ + /* restore registers and return */ + b fast_exception_return +#endif /* CONFIG_ALTIVEC */ + +/* + * Hash table stuff + */ + .align 7 +_GLOBAL(do_hash_page) + std r3,_DAR(r1) + std r4,_DSISR(r1) + + andis. r0,r4,0xa450 /* weird error? */ + bne- .handle_page_fault /* if not, try to insert a HPTE */ +BEGIN_FTR_SECTION + andis. r0,r4,0x0020 /* Is it a segment table fault? */ + bne- .do_ste_alloc /* If so handle it */ +END_FTR_SECTION_IFCLR(CPU_FTR_SLB) + + /* + * We need to set the _PAGE_USER bit if MSR_PR is set or if we are + * accessing a userspace segment (even from the kernel). We assume + * kernel addresses always have the high bit set. + */ + rlwinm r4,r4,32-25+9,31-9,31-9 /* DSISR_STORE -> _PAGE_RW */ + rotldi r0,r3,15 /* Move high bit into MSR_PR posn */ + orc r0,r12,r0 /* MSR_PR | ~high_bit */ + rlwimi r4,r0,32-13,30,30 /* becomes _PAGE_USER access bit */ + ori r4,r4,1 /* add _PAGE_PRESENT */ + rlwimi r4,r5,22+2,31-2,31-2 /* Set _PAGE_EXEC if trap is 0x400 */ + + /* + * On iSeries, we soft-disable interrupts here, then + * hard-enable interrupts so that the hash_page code can spin on + * the hash_table_lock without problems on a shared processor. + */ + DISABLE_INTS + + /* + * r3 contains the faulting address + * r4 contains the required access permissions + * r5 contains the trap number + * + * at return r3 = 0 for success + */ + bl .hash_page /* build HPTE if possible */ + cmpdi r3,0 /* see if hash_page succeeded */ + +#ifdef DO_SOFT_DISABLE + /* + * If we had interrupts soft-enabled at the point where the + * DSI/ISI occurred, and an interrupt came in during hash_page, + * handle it now. + * We jump to ret_from_except_lite rather than fast_exception_return + * because ret_from_except_lite will check for and handle pending + * interrupts if necessary. + */ + beq .ret_from_except_lite + /* For a hash failure, we don't bother re-enabling interrupts */ + ble- 12f + + /* + * hash_page couldn't handle it, set soft interrupt enable back + * to what it was before the trap. Note that .local_irq_restore + * handles any interrupts pending at this point. + */ + ld r3,SOFTE(r1) + bl .local_irq_restore + b 11f +#else + beq fast_exception_return /* Return from exception on success */ + ble- 12f /* Failure return from hash_page */ + + /* fall through */ +#endif + +/* Here we have a page fault that hash_page can't handle. */ +_GLOBAL(handle_page_fault) + ENABLE_INTS +11: ld r4,_DAR(r1) + ld r5,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD + bl .do_page_fault + cmpdi r3,0 + beq+ .ret_from_except_lite + bl .save_nvgprs + mr r5,r3 + addi r3,r1,STACK_FRAME_OVERHEAD + lwz r4,_DAR(r1) + bl .bad_page_fault + b .ret_from_except + +/* We have a page fault that hash_page could handle but HV refused + * the PTE insertion + */ +12: bl .save_nvgprs + addi r3,r1,STACK_FRAME_OVERHEAD + lwz r4,_DAR(r1) + bl .low_hash_fault + b .ret_from_except + + /* here we have a segment miss */ +_GLOBAL(do_ste_alloc) + bl .ste_allocate /* try to insert stab entry */ + cmpdi r3,0 + beq+ fast_exception_return + b .handle_page_fault + +/* + * r13 points to the PACA, r9 contains the saved CR, + * r11 and r12 contain the saved SRR0 and SRR1. + * r9 - r13 are saved in paca->exslb. + * We assume we aren't going to take any exceptions during this procedure. + * We assume (DAR >> 60) == 0xc. + */ + .align 7 +_GLOBAL(do_stab_bolted) + stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ + std r11,PACA_EXSLB+EX_SRR0(r13) /* save SRR0 in exc. frame */ + + /* Hash to the primary group */ + ld r10,PACASTABVIRT(r13) + mfspr r11,DAR + srdi r11,r11,28 + rldimi r10,r11,7,52 /* r10 = first ste of the group */ + + /* Calculate VSID */ + /* This is a kernel address, so protovsid = ESID */ + ASM_VSID_SCRAMBLE(r11, r9) + rldic r9,r11,12,16 /* r9 = vsid << 12 */ + + /* Search the primary group for a free entry */ +1: ld r11,0(r10) /* Test valid bit of the current ste */ + andi. r11,r11,0x80 + beq 2f + addi r10,r10,16 + andi. r11,r10,0x70 + bne 1b + + /* Stick for only searching the primary group for now. */ + /* At least for now, we use a very simple random castout scheme */ + /* Use the TB as a random number ; OR in 1 to avoid entry 0 */ + mftb r11 + rldic r11,r11,4,57 /* r11 = (r11 << 4) & 0x70 */ + ori r11,r11,0x10 + + /* r10 currently points to an ste one past the group of interest */ + /* make it point to the randomly selected entry */ + subi r10,r10,128 + or r10,r10,r11 /* r10 is the entry to invalidate */ + + isync /* mark the entry invalid */ + ld r11,0(r10) + rldicl r11,r11,56,1 /* clear the valid bit */ + rotldi r11,r11,8 + std r11,0(r10) + sync + + clrrdi r11,r11,28 /* Get the esid part of the ste */ + slbie r11 + +2: std r9,8(r10) /* Store the vsid part of the ste */ + eieio + + mfspr r11,DAR /* Get the new esid */ + clrrdi r11,r11,28 /* Permits a full 32b of ESID */ + ori r11,r11,0x90 /* Turn on valid and kp */ + std r11,0(r10) /* Put new entry back into the stab */ + + sync + + /* All done -- return from exception. */ + lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ + ld r11,PACA_EXSLB+EX_SRR0(r13) /* get saved SRR0 */ + + andi. r10,r12,MSR_RI + beq- unrecov_slb + + mtcrf 0x80,r9 /* restore CR */ + + mfmsr r10 + clrrdi r10,r10,2 + mtmsrd r10,1 + + mtspr SRR0,r11 + mtspr SRR1,r12 + ld r9,PACA_EXSLB+EX_R9(r13) + ld r10,PACA_EXSLB+EX_R10(r13) + ld r11,PACA_EXSLB+EX_R11(r13) + ld r12,PACA_EXSLB+EX_R12(r13) + ld r13,PACA_EXSLB+EX_R13(r13) + rfid + b . /* prevent speculative execution */ + +/* + * r13 points to the PACA, r9 contains the saved CR, + * r11 and r12 contain the saved SRR0 and SRR1. + * r3 has the faulting address + * r9 - r13 are saved in paca->exslb. + * r3 is saved in paca->slb_r3 + * We assume we aren't going to take any exceptions during this procedure. + */ +_GLOBAL(do_slb_miss) + mflr r10 + + stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */ + std r10,PACA_EXSLB+EX_LR(r13) /* save LR */ + + bl .slb_allocate /* handle it */ + + /* All done -- return from exception. */ + + ld r10,PACA_EXSLB+EX_LR(r13) + ld r3,PACA_EXSLB+EX_R3(r13) + lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */ +#ifdef CONFIG_PPC_ISERIES + ld r11,PACALPPACA+LPPACASRR0(r13) /* get SRR0 value */ +#endif /* CONFIG_PPC_ISERIES */ + + mtlr r10 + + andi. r10,r12,MSR_RI /* check for unrecoverable exception */ + beq- unrecov_slb + +.machine push +.machine "power4" + mtcrf 0x80,r9 + mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */ +.machine pop + +#ifdef CONFIG_PPC_ISERIES + mtspr SRR0,r11 + mtspr SRR1,r12 +#endif /* CONFIG_PPC_ISERIES */ + ld r9,PACA_EXSLB+EX_R9(r13) + ld r10,PACA_EXSLB+EX_R10(r13) + ld r11,PACA_EXSLB+EX_R11(r13) + ld r12,PACA_EXSLB+EX_R12(r13) + ld r13,PACA_EXSLB+EX_R13(r13) + rfid + b . /* prevent speculative execution */ + +unrecov_slb: + EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB) + DISABLE_INTS + bl .save_nvgprs +1: addi r3,r1,STACK_FRAME_OVERHEAD + bl .unrecoverable_exception + b 1b + +/* + * Space for CPU0's segment table. + * + * On iSeries, the hypervisor must fill in at least one entry before + * we get control (with relocate on). The address is give to the hv + * as a page number (see xLparMap in LparData.c), so this must be at a + * fixed address (the linker can't compute (u64)&initial_stab >> + * PAGE_SHIFT). + */ + . = STAB0_PHYS_ADDR /* 0x6000 */ + .globl initial_stab +initial_stab: + .space 4096 + +/* + * Data area reserved for FWNMI option. + * This address (0x7000) is fixed by the RPA. + */ + .= 0x7000 + .globl fwnmi_data_area +fwnmi_data_area: + + /* iSeries does not use the FWNMI stuff, so it is safe to put + * this here, even if we later allow kernels that will boot on + * both pSeries and iSeries */ +#ifdef CONFIG_PPC_ISERIES + . = LPARMAP_PHYS +#include "lparmap.s" +/* + * This ".text" is here for old compilers that generate a trailing + * .note section when compiling .c files to .s + */ + .text +#endif /* CONFIG_PPC_ISERIES */ + + . = 0x8000 + +/* + * On pSeries, secondary processors spin in the following code. + * At entry, r3 = this processor's number (physical cpu id) + */ +_GLOBAL(pSeries_secondary_smp_init) + mr r24,r3 + + /* turn on 64-bit mode */ + bl .enable_64b_mode + isync + + /* Copy some CPU settings from CPU 0 */ + bl .__restore_cpu_setup + + /* Set up a paca value for this processor. Since we have the + * physical cpu id in r24, we need to search the pacas to find + * which logical id maps to our physical one. + */ + LOADADDR(r13, paca) /* Get base vaddr of paca array */ + li r5,0 /* logical cpu id */ +1: lhz r6,PACAHWCPUID(r13) /* Load HW procid from paca */ + cmpw r6,r24 /* Compare to our id */ + beq 2f + addi r13,r13,PACA_SIZE /* Loop to next PACA on miss */ + addi r5,r5,1 + cmpwi r5,NR_CPUS + blt 1b + + mr r3,r24 /* not found, copy phys to r3 */ + b .kexec_wait /* next kernel might do better */ + +2: mtspr SPRG3,r13 /* Save vaddr of paca in SPRG3 */ + /* From now on, r24 is expected to be logical cpuid */ + mr r24,r5 +3: HMT_LOW + lbz r23,PACAPROCSTART(r13) /* Test if this processor should */ + /* start. */ + sync + + /* Create a temp kernel stack for use before relocation is on. */ + ld r1,PACAEMERGSP(r13) + subi r1,r1,STACK_FRAME_OVERHEAD + + cmpwi 0,r23,0 +#ifdef CONFIG_SMP + bne .__secondary_start +#endif + b 3b /* Loop until told to go */ + +#ifdef CONFIG_PPC_ISERIES +_STATIC(__start_initialization_iSeries) + /* Clear out the BSS */ + LOADADDR(r11,__bss_stop) + LOADADDR(r8,__bss_start) + sub r11,r11,r8 /* bss size */ + addi r11,r11,7 /* round up to an even double word */ + rldicl. r11,r11,61,3 /* shift right by 3 */ + beq 4f + addi r8,r8,-8 + li r0,0 + mtctr r11 /* zero this many doublewords */ +3: stdu r0,8(r8) + bdnz 3b +4: + LOADADDR(r1,init_thread_union) + addi r1,r1,THREAD_SIZE + li r0,0 + stdu r0,-STACK_FRAME_OVERHEAD(r1) + + LOADADDR(r3,cpu_specs) + LOADADDR(r4,cur_cpu_spec) + li r5,0 + bl .identify_cpu + + LOADADDR(r2,__toc_start) + addi r2,r2,0x4000 + addi r2,r2,0x4000 + + bl .iSeries_early_setup + + /* relocation is on at this point */ + + b .start_here_common +#endif /* CONFIG_PPC_ISERIES */ + +#ifdef CONFIG_PPC_MULTIPLATFORM + +_STATIC(__mmu_off) + mfmsr r3 + andi. r0,r3,MSR_IR|MSR_DR + beqlr + andc r3,r3,r0 + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r3 + sync + rfid + b . /* prevent speculative execution */ + + +/* + * Here is our main kernel entry point. We support currently 2 kind of entries + * depending on the value of r5. + * + * r5 != NULL -> OF entry, we go to prom_init, "legacy" parameter content + * in r3...r7 + * + * r5 == NULL -> kexec style entry. r3 is a physical pointer to the + * DT block, r4 is a physical pointer to the kernel itself + * + */ +_GLOBAL(__start_initialization_multiplatform) + /* + * Are we booted from a PROM Of-type client-interface ? + */ + cmpldi cr0,r5,0 + bne .__boot_from_prom /* yes -> prom */ + + /* Save parameters */ + mr r31,r3 + mr r30,r4 + + /* Make sure we are running in 64 bits mode */ + bl .enable_64b_mode + + /* Setup some critical 970 SPRs before switching MMU off */ + bl .__970_cpu_preinit + + /* cpu # */ + li r24,0 + + /* Switch off MMU if not already */ + LOADADDR(r4, .__after_prom_start - KERNELBASE) + add r4,r4,r30 + bl .__mmu_off + b .__after_prom_start + +_STATIC(__boot_from_prom) + /* Save parameters */ + mr r31,r3 + mr r30,r4 + mr r29,r5 + mr r28,r6 + mr r27,r7 + + /* Make sure we are running in 64 bits mode */ + bl .enable_64b_mode + + /* put a relocation offset into r3 */ + bl .reloc_offset + + LOADADDR(r2,__toc_start) + addi r2,r2,0x4000 + addi r2,r2,0x4000 + + /* Relocate the TOC from a virt addr to a real addr */ + sub r2,r2,r3 + + /* Restore parameters */ + mr r3,r31 + mr r4,r30 + mr r5,r29 + mr r6,r28 + mr r7,r27 + + /* Do all of the interaction with OF client interface */ + bl .prom_init + /* We never return */ + trap + +/* + * At this point, r3 contains the physical address we are running at, + * returned by prom_init() + */ +_STATIC(__after_prom_start) + +/* + * We need to run with __start at physical address 0. + * This will leave some code in the first 256B of + * real memory, which are reserved for software use. + * The remainder of the first page is loaded with the fixed + * interrupt vectors. The next two pages are filled with + * unknown exception placeholders. + * + * Note: This process overwrites the OF exception vectors. + * r26 == relocation offset + * r27 == KERNELBASE + */ + bl .reloc_offset + mr r26,r3 + SET_REG_TO_CONST(r27,KERNELBASE) + + li r3,0 /* target addr */ + + // XXX FIXME: Use phys returned by OF (r30) + sub r4,r27,r26 /* source addr */ + /* current address of _start */ + /* i.e. where we are running */ + /* the source addr */ + + LOADADDR(r5,copy_to_here) /* # bytes of memory to copy */ + sub r5,r5,r27 + + li r6,0x100 /* Start offset, the first 0x100 */ + /* bytes were copied earlier. */ + + bl .copy_and_flush /* copy the first n bytes */ + /* this includes the code being */ + /* executed here. */ + + LOADADDR(r0, 4f) /* Jump to the copy of this code */ + mtctr r0 /* that we just made/relocated */ + bctr + +4: LOADADDR(r5,klimit) + sub r5,r5,r26 + ld r5,0(r5) /* get the value of klimit */ + sub r5,r5,r27 + bl .copy_and_flush /* copy the rest */ + b .start_here_multiplatform + +#endif /* CONFIG_PPC_MULTIPLATFORM */ + +/* + * Copy routine used to copy the kernel to start at physical address 0 + * and flush and invalidate the caches as needed. + * r3 = dest addr, r4 = source addr, r5 = copy limit, r6 = start offset + * on exit, r3, r4, r5 are unchanged, r6 is updated to be >= r5. + * + * Note: this routine *only* clobbers r0, r6 and lr + */ +_GLOBAL(copy_and_flush) + addi r5,r5,-8 + addi r6,r6,-8 +4: li r0,16 /* Use the least common */ + /* denominator cache line */ + /* size. This results in */ + /* extra cache line flushes */ + /* but operation is correct. */ + /* Can't get cache line size */ + /* from NACA as it is being */ + /* moved too. */ + + mtctr r0 /* put # words/line in ctr */ +3: addi r6,r6,8 /* copy a cache line */ + ldx r0,r6,r4 + stdx r0,r6,r3 + bdnz 3b + dcbst r6,r3 /* write it to memory */ + sync + icbi r6,r3 /* flush the icache line */ + cmpld 0,r6,r5 + blt 4b + sync + addi r5,r5,8 + addi r6,r6,8 + blr + +.align 8 +copy_to_here: + +#ifdef CONFIG_SMP +#ifdef CONFIG_PPC_PMAC +/* + * On PowerMac, secondary processors starts from the reset vector, which + * is temporarily turned into a call to one of the functions below. + */ + .section ".text"; + .align 2 ; + + .globl pmac_secondary_start_1 +pmac_secondary_start_1: + li r24, 1 + b .pmac_secondary_start + + .globl pmac_secondary_start_2 +pmac_secondary_start_2: + li r24, 2 + b .pmac_secondary_start + + .globl pmac_secondary_start_3 +pmac_secondary_start_3: + li r24, 3 + b .pmac_secondary_start + +_GLOBAL(pmac_secondary_start) + /* turn on 64-bit mode */ + bl .enable_64b_mode + isync + + /* Copy some CPU settings from CPU 0 */ + bl .__restore_cpu_setup + + /* pSeries do that early though I don't think we really need it */ + mfmsr r3 + ori r3,r3,MSR_RI + mtmsrd r3 /* RI on */ + + /* Set up a paca value for this processor. */ + LOADADDR(r4, paca) /* Get base vaddr of paca array */ + mulli r13,r24,PACA_SIZE /* Calculate vaddr of right paca */ + add r13,r13,r4 /* for this processor. */ + mtspr SPRG3,r13 /* Save vaddr of paca in SPRG3 */ + + /* Create a temp kernel stack for use before relocation is on. */ + ld r1,PACAEMERGSP(r13) + subi r1,r1,STACK_FRAME_OVERHEAD + + b .__secondary_start + +#endif /* CONFIG_PPC_PMAC */ + +/* + * This function is called after the master CPU has released the + * secondary processors. The execution environment is relocation off. + * The paca for this processor has the following fields initialized at + * this point: + * 1. Processor number + * 2. Segment table pointer (virtual address) + * On entry the following are set: + * r1 = stack pointer. vaddr for iSeries, raddr (temp stack) for pSeries + * r24 = cpu# (in Linux terms) + * r13 = paca virtual address + * SPRG3 = paca virtual address + */ +_GLOBAL(__secondary_start) + + HMT_MEDIUM /* Set thread priority to MEDIUM */ + + ld r2,PACATOC(r13) + li r6,0 + stb r6,PACAPROCENABLED(r13) + +#ifndef CONFIG_PPC_ISERIES + /* Initialize the page table pointer register. */ + LOADADDR(r6,_SDR1) + ld r6,0(r6) /* get the value of _SDR1 */ + mtspr SDR1,r6 /* set the htab location */ +#endif + /* Initialize the first segment table (or SLB) entry */ + ld r3,PACASTABVIRT(r13) /* get addr of segment table */ + bl .stab_initialize + + /* Initialize the kernel stack. Just a repeat for iSeries. */ + LOADADDR(r3,current_set) + sldi r28,r24,3 /* get current_set[cpu#] */ + ldx r1,r3,r28 + addi r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD + std r1,PACAKSAVE(r13) + + ld r3,PACASTABREAL(r13) /* get raddr of segment table */ + ori r4,r3,1 /* turn on valid bit */ + +#ifdef CONFIG_PPC_ISERIES + li r0,-1 /* hypervisor call */ + li r3,1 + sldi r3,r3,63 /* 0x8000000000000000 */ + ori r3,r3,4 /* 0x8000000000000004 */ + sc /* HvCall_setASR */ +#else + /* set the ASR */ + ld r3,systemcfg@got(r2) /* r3 = ptr to systemcfg */ + ld r3,0(r3) + lwz r3,PLATFORM(r3) /* r3 = platform flags */ + andi. r3,r3,PLATFORM_LPAR /* Test if bit 0 is set (LPAR bit) */ + beq 98f /* branch if result is 0 */ + mfspr r3,PVR + srwi r3,r3,16 + cmpwi r3,0x37 /* SStar */ + beq 97f + cmpwi r3,0x36 /* IStar */ + beq 97f + cmpwi r3,0x34 /* Pulsar */ + bne 98f +97: li r3,H_SET_ASR /* hcall = H_SET_ASR */ + HVSC /* Invoking hcall */ + b 99f +98: /* !(rpa hypervisor) || !(star) */ + mtasr r4 /* set the stab location */ +99: +#endif + li r7,0 + mtlr r7 + + /* enable MMU and jump to start_secondary */ + LOADADDR(r3,.start_secondary_prolog) + SET_REG_TO_CONST(r4, MSR_KERNEL) +#ifdef DO_SOFT_DISABLE + ori r4,r4,MSR_EE +#endif + mtspr SRR0,r3 + mtspr SRR1,r4 + rfid + b . /* prevent speculative execution */ + +/* + * Running with relocation on at this point. All we want to do is + * zero the stack back-chain pointer before going into C code. + */ +_GLOBAL(start_secondary_prolog) + li r3,0 + std r3,0(r1) /* Zero the stack frame pointer */ + bl .start_secondary +#endif + +/* + * This subroutine clobbers r11 and r12 + */ +_GLOBAL(enable_64b_mode) + mfmsr r11 /* grab the current MSR */ + li r12,1 + rldicr r12,r12,MSR_SF_LG,(63-MSR_SF_LG) + or r11,r11,r12 + li r12,1 + rldicr r12,r12,MSR_ISF_LG,(63-MSR_ISF_LG) + or r11,r11,r12 + mtmsrd r11 + isync + blr + +#ifdef CONFIG_PPC_MULTIPLATFORM +/* + * This is where the main kernel code starts. + */ +_STATIC(start_here_multiplatform) + /* get a new offset, now that the kernel has moved. */ + bl .reloc_offset + mr r26,r3 + + /* Clear out the BSS. It may have been done in prom_init, + * already but that's irrelevant since prom_init will soon + * be detached from the kernel completely. Besides, we need + * to clear it now for kexec-style entry. + */ + LOADADDR(r11,__bss_stop) + LOADADDR(r8,__bss_start) + sub r11,r11,r8 /* bss size */ + addi r11,r11,7 /* round up to an even double word */ + rldicl. r11,r11,61,3 /* shift right by 3 */ + beq 4f + addi r8,r8,-8 + li r0,0 + mtctr r11 /* zero this many doublewords */ +3: stdu r0,8(r8) + bdnz 3b +4: + + mfmsr r6 + ori r6,r6,MSR_RI + mtmsrd r6 /* RI on */ + +#ifdef CONFIG_HMT + /* Start up the second thread on cpu 0 */ + mfspr r3,PVR + srwi r3,r3,16 + cmpwi r3,0x34 /* Pulsar */ + beq 90f + cmpwi r3,0x36 /* Icestar */ + beq 90f + cmpwi r3,0x37 /* SStar */ + beq 90f + b 91f /* HMT not supported */ +90: li r3,0 + bl .hmt_start_secondary +91: +#endif + + /* The following gets the stack and TOC set up with the regs */ + /* pointing to the real addr of the kernel stack. This is */ + /* all done to support the C function call below which sets */ + /* up the htab. This is done because we have relocated the */ + /* kernel but are still running in real mode. */ + + LOADADDR(r3,init_thread_union) + sub r3,r3,r26 + + /* set up a stack pointer (physical address) */ + addi r1,r3,THREAD_SIZE + li r0,0 + stdu r0,-STACK_FRAME_OVERHEAD(r1) + + /* set up the TOC (physical address) */ + LOADADDR(r2,__toc_start) + addi r2,r2,0x4000 + addi r2,r2,0x4000 + sub r2,r2,r26 + + LOADADDR(r3,cpu_specs) + sub r3,r3,r26 + LOADADDR(r4,cur_cpu_spec) + sub r4,r4,r26 + mr r5,r26 + bl .identify_cpu + + /* Save some low level config HIDs of CPU0 to be copied to + * other CPUs later on, or used for suspend/resume + */ + bl .__save_cpu_setup + sync + + /* Setup a valid physical PACA pointer in SPRG3 for early_setup + * note that boot_cpuid can always be 0 nowadays since there is + * nowhere it can be initialized differently before we reach this + * code + */ + LOADADDR(r27, boot_cpuid) + sub r27,r27,r26 + lwz r27,0(r27) + + LOADADDR(r24, paca) /* Get base vaddr of paca array */ + mulli r13,r27,PACA_SIZE /* Calculate vaddr of right paca */ + add r13,r13,r24 /* for this processor. */ + sub r13,r13,r26 /* convert to physical addr */ + mtspr SPRG3,r13 /* PPPBBB: Temp... -Peter */ + + /* Do very early kernel initializations, including initial hash table, + * stab and slb setup before we turn on relocation. */ + + /* Restore parameters passed from prom_init/kexec */ + mr r3,r31 + bl .early_setup + + /* set the ASR */ + ld r3,PACASTABREAL(r13) + ori r4,r3,1 /* turn on valid bit */ + ld r3,systemcfg@got(r2) /* r3 = ptr to systemcfg */ + ld r3,0(r3) + lwz r3,PLATFORM(r3) /* r3 = platform flags */ + andi. r3,r3,PLATFORM_LPAR /* Test if bit 0 is set (LPAR bit) */ + beq 98f /* branch if result is 0 */ + mfspr r3,PVR + srwi r3,r3,16 + cmpwi r3,0x37 /* SStar */ + beq 97f + cmpwi r3,0x36 /* IStar */ + beq 97f + cmpwi r3,0x34 /* Pulsar */ + bne 98f +97: li r3,H_SET_ASR /* hcall = H_SET_ASR */ + HVSC /* Invoking hcall */ + b 99f +98: /* !(rpa hypervisor) || !(star) */ + mtasr r4 /* set the stab location */ +99: + /* Set SDR1 (hash table pointer) */ + ld r3,systemcfg@got(r2) /* r3 = ptr to systemcfg */ + ld r3,0(r3) + lwz r3,PLATFORM(r3) /* r3 = platform flags */ + /* Test if bit 0 is set (LPAR bit) */ + andi. r3,r3,PLATFORM_LPAR + bne 98f /* branch if result is !0 */ + LOADADDR(r6,_SDR1) /* Only if NOT LPAR */ + sub r6,r6,r26 + ld r6,0(r6) /* get the value of _SDR1 */ + mtspr SDR1,r6 /* set the htab location */ +98: + LOADADDR(r3,.start_here_common) + SET_REG_TO_CONST(r4, MSR_KERNEL) + mtspr SRR0,r3 + mtspr SRR1,r4 + rfid + b . /* prevent speculative execution */ +#endif /* CONFIG_PPC_MULTIPLATFORM */ + + /* This is where all platforms converge execution */ +_STATIC(start_here_common) + /* relocation is on at this point */ + + /* The following code sets up the SP and TOC now that we are */ + /* running with translation enabled. */ + + LOADADDR(r3,init_thread_union) + + /* set up the stack */ + addi r1,r3,THREAD_SIZE + li r0,0 + stdu r0,-STACK_FRAME_OVERHEAD(r1) + + /* Apply the CPUs-specific fixups (nop out sections not relevant + * to this CPU + */ + li r3,0 + bl .do_cpu_ftr_fixups + + LOADADDR(r26, boot_cpuid) + lwz r26,0(r26) + + LOADADDR(r24, paca) /* Get base vaddr of paca array */ + mulli r13,r26,PACA_SIZE /* Calculate vaddr of right paca */ + add r13,r13,r24 /* for this processor. */ + mtspr SPRG3,r13 + + /* ptr to current */ + LOADADDR(r4,init_task) + std r4,PACACURRENT(r13) + + /* Load the TOC */ + ld r2,PACATOC(r13) + std r1,PACAKSAVE(r13) + + bl .setup_system + + /* Load up the kernel context */ +5: +#ifdef DO_SOFT_DISABLE + li r5,0 + stb r5,PACAPROCENABLED(r13) /* Soft Disabled */ + mfmsr r5 + ori r5,r5,MSR_EE /* Hard Enabled */ + mtmsrd r5 +#endif + + bl .start_kernel + +_GLOBAL(hmt_init) +#ifdef CONFIG_HMT + LOADADDR(r5, hmt_thread_data) + mfspr r7,PVR + srwi r7,r7,16 + cmpwi r7,0x34 /* Pulsar */ + beq 90f + cmpwi r7,0x36 /* Icestar */ + beq 91f + cmpwi r7,0x37 /* SStar */ + beq 91f + b 101f +90: mfspr r6,PIR + andi. r6,r6,0x1f + b 92f +91: mfspr r6,PIR + andi. r6,r6,0x3ff +92: sldi r4,r24,3 + stwx r6,r5,r4 + bl .hmt_start_secondary + b 101f + +__hmt_secondary_hold: + LOADADDR(r5, hmt_thread_data) + clrldi r5,r5,4 + li r7,0 + mfspr r6,PIR + mfspr r8,PVR + srwi r8,r8,16 + cmpwi r8,0x34 + bne 93f + andi. r6,r6,0x1f + b 103f +93: andi. r6,r6,0x3f + +103: lwzx r8,r5,r7 + cmpw r8,r6 + beq 104f + addi r7,r7,8 + b 103b + +104: addi r7,r7,4 + lwzx r9,r5,r7 + mr r24,r9 +101: +#endif + mr r3,r24 + b .pSeries_secondary_smp_init + +#ifdef CONFIG_HMT +_GLOBAL(hmt_start_secondary) + LOADADDR(r4,__hmt_secondary_hold) + clrldi r4,r4,4 + mtspr NIADORM, r4 + mfspr r4, MSRDORM + li r5, -65 + and r4, r4, r5 + mtspr MSRDORM, r4 + lis r4,0xffef + ori r4,r4,0x7403 + mtspr TSC, r4 + li r4,0x1f4 + mtspr TST, r4 + mfspr r4, HID0 + ori r4, r4, 0x1 + mtspr HID0, r4 + mfspr r4, SPRN_CTRLF + oris r4, r4, 0x40 + mtspr SPRN_CTRLT, r4 + blr +#endif + +#if defined(CONFIG_KEXEC) || (defined(CONFIG_SMP) && !defined(CONFIG_PPC_ISERIES)) +_GLOBAL(smp_release_cpus) + /* All secondary cpus are spinning on a common + * spinloop, release them all now so they can start + * to spin on their individual paca spinloops. + * For non SMP kernels, the secondary cpus never + * get out of the common spinloop. + */ + li r3,1 + LOADADDR(r5,__secondary_hold_spinloop) + std r3,0(r5) + sync + blr +#endif /* CONFIG_SMP && !CONFIG_PPC_ISERIES */ + + +/* + * We put a few things here that have to be page-aligned. + * This stuff goes at the beginning of the bss, which is page-aligned. + */ + .section ".bss" + + .align PAGE_SHIFT + + .globl empty_zero_page +empty_zero_page: + .space PAGE_SIZE + + .globl swapper_pg_dir +swapper_pg_dir: + .space PAGE_SIZE + +/* + * This space gets a copy of optional info passed to us by the bootstrap + * Used to pass parameters into the kernel like root=/dev/sda1, etc. + */ + .globl cmd_line +cmd_line: + .space COMMAND_LINE_SIZE diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S new file mode 100644 index 00000000000..cb1a3a54a02 --- /dev/null +++ b/arch/powerpc/kernel/head_8xx.S @@ -0,0 +1,860 @@ +/* + * arch/ppc/kernel/except_8xx.S + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP + * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu> + * Low-level exception handlers and MMU support + * rewritten by Paul Mackerras. + * Copyright (C) 1996 Paul Mackerras. + * MPC8xx modifications by Dan Malek + * Copyright (C) 1997 Dan Malek (dmalek@jlc.net). + * + * This file contains low-level support and setup for PowerPC 8xx + * embedded processors, including trap and interrupt dispatch. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/cache.h> +#include <asm/pgtable.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> + +/* Macro to make the code more readable. */ +#ifdef CONFIG_8xx_CPU6 +#define DO_8xx_CPU6(val, reg) \ + li reg, val; \ + stw reg, 12(r0); \ + lwz reg, 12(r0); +#else +#define DO_8xx_CPU6(val, reg) +#endif + .text + .globl _stext +_stext: + .text + .globl _start +_start: + +/* MPC8xx + * This port was done on an MBX board with an 860. Right now I only + * support an ELF compressed (zImage) boot from EPPC-Bug because the + * code there loads up some registers before calling us: + * r3: ptr to board info data + * r4: initrd_start or if no initrd then 0 + * r5: initrd_end - unused if r4 is 0 + * r6: Start of command line string + * r7: End of command line string + * + * I decided to use conditional compilation instead of checking PVR and + * adding more processor specific branches around code I don't need. + * Since this is an embedded processor, I also appreciate any memory + * savings I can get. + * + * The MPC8xx does not have any BATs, but it supports large page sizes. + * We first initialize the MMU to support 8M byte pages, then load one + * entry into each of the instruction and data TLBs to map the first + * 8M 1:1. I also mapped an additional I/O space 1:1 so we can get to + * the "internal" processor registers before MMU_init is called. + * + * The TLB code currently contains a major hack. Since I use the condition + * code register, I have to save and restore it. I am out of registers, so + * I just store it in memory location 0 (the TLB handlers are not reentrant). + * To avoid making any decisions, I need to use the "segment" valid bit + * in the first level table, but that would require many changes to the + * Linux page directory/table functions that I don't want to do right now. + * + * I used to use SPRG2 for a temporary register in the TLB handler, but it + * has since been put to other uses. I now use a hack to save a register + * and the CCR at memory location 0.....Someday I'll fix this..... + * -- Dan + */ + .globl __start +__start: + mr r31,r3 /* save parameters */ + mr r30,r4 + mr r29,r5 + mr r28,r6 + mr r27,r7 + + /* We have to turn on the MMU right away so we get cache modes + * set correctly. + */ + bl initial_mmu + +/* We now have the lower 8 Meg mapped into TLB entries, and the caches + * ready to work. + */ + +turn_on_mmu: + mfmsr r0 + ori r0,r0,MSR_DR|MSR_IR + mtspr SPRN_SRR1,r0 + lis r0,start_here@h + ori r0,r0,start_here@l + mtspr SPRN_SRR0,r0 + SYNC + rfi /* enables MMU */ + +/* + * Exception entry code. This code runs with address translation + * turned off, i.e. using physical addresses. + * We assume sprg3 has the physical address of the current + * task's thread_struct. + */ +#define EXCEPTION_PROLOG \ + mtspr SPRN_SPRG0,r10; \ + mtspr SPRN_SPRG1,r11; \ + mfcr r10; \ + EXCEPTION_PROLOG_1; \ + EXCEPTION_PROLOG_2 + +#define EXCEPTION_PROLOG_1 \ + mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \ + andi. r11,r11,MSR_PR; \ + tophys(r11,r1); /* use tophys(r1) if kernel */ \ + beq 1f; \ + mfspr r11,SPRN_SPRG3; \ + lwz r11,THREAD_INFO-THREAD(r11); \ + addi r11,r11,THREAD_SIZE; \ + tophys(r11,r11); \ +1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */ + + +#define EXCEPTION_PROLOG_2 \ + CLR_TOP32(r11); \ + stw r10,_CCR(r11); /* save registers */ \ + stw r12,GPR12(r11); \ + stw r9,GPR9(r11); \ + mfspr r10,SPRN_SPRG0; \ + stw r10,GPR10(r11); \ + mfspr r12,SPRN_SPRG1; \ + stw r12,GPR11(r11); \ + mflr r10; \ + stw r10,_LINK(r11); \ + mfspr r12,SPRN_SRR0; \ + mfspr r9,SPRN_SRR1; \ + stw r1,GPR1(r11); \ + stw r1,0(r11); \ + tovirt(r1,r11); /* set new kernel sp */ \ + li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \ + MTMSRD(r10); /* (except for mach check in rtas) */ \ + stw r0,GPR0(r11); \ + SAVE_4GPRS(3, r11); \ + SAVE_2GPRS(7, r11) + +/* + * Note: code which follows this uses cr0.eq (set if from kernel), + * r11, r12 (SRR0), and r9 (SRR1). + * + * Note2: once we have set r1 we are in a position to take exceptions + * again, and we could thus set MSR:RI at that point. + */ + +/* + * Exception vectors. + */ +#define EXCEPTION(n, label, hdlr, xfer) \ + . = n; \ +label: \ + EXCEPTION_PROLOG; \ + addi r3,r1,STACK_FRAME_OVERHEAD; \ + xfer(n, hdlr) + +#define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret) \ + li r10,trap; \ + stw r10,TRAP(r11); \ + li r10,MSR_KERNEL; \ + copyee(r10, r9); \ + bl tfer; \ +i##n: \ + .long hdlr; \ + .long ret + +#define COPY_EE(d, s) rlwimi d,s,0,16,16 +#define NOCOPY(d, s) + +#define EXC_XFER_STD(n, hdlr) \ + EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full, \ + ret_from_except_full) + +#define EXC_XFER_LITE(n, hdlr) \ + EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \ + ret_from_except) + +#define EXC_XFER_EE(n, hdlr) \ + EXC_XFER_TEMPLATE(n, hdlr, n, COPY_EE, transfer_to_handler_full, \ + ret_from_except_full) + +#define EXC_XFER_EE_LITE(n, hdlr) \ + EXC_XFER_TEMPLATE(n, hdlr, n+1, COPY_EE, transfer_to_handler, \ + ret_from_except) + +/* System reset */ + EXCEPTION(0x100, Reset, UnknownException, EXC_XFER_STD) + +/* Machine check */ + . = 0x200 +MachineCheck: + EXCEPTION_PROLOG + mfspr r4,SPRN_DAR + stw r4,_DAR(r11) + mfspr r5,SPRN_DSISR + stw r5,_DSISR(r11) + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_STD(0x200, MachineCheckException) + +/* Data access exception. + * This is "never generated" by the MPC8xx. We jump to it for other + * translation errors. + */ + . = 0x300 +DataAccess: + EXCEPTION_PROLOG + mfspr r10,SPRN_DSISR + stw r10,_DSISR(r11) + mr r5,r10 + mfspr r4,SPRN_DAR + EXC_XFER_EE_LITE(0x300, handle_page_fault) + +/* Instruction access exception. + * This is "never generated" by the MPC8xx. We jump to it for other + * translation errors. + */ + . = 0x400 +InstructionAccess: + EXCEPTION_PROLOG + mr r4,r12 + mr r5,r9 + EXC_XFER_EE_LITE(0x400, handle_page_fault) + +/* External interrupt */ + EXCEPTION(0x500, HardwareInterrupt, do_IRQ, EXC_XFER_LITE) + +/* Alignment exception */ + . = 0x600 +Alignment: + EXCEPTION_PROLOG + mfspr r4,SPRN_DAR + stw r4,_DAR(r11) + mfspr r5,SPRN_DSISR + stw r5,_DSISR(r11) + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_EE(0x600, AlignmentException) + +/* Program check exception */ + EXCEPTION(0x700, ProgramCheck, ProgramCheckException, EXC_XFER_STD) + +/* No FPU on MPC8xx. This exception is not supposed to happen. +*/ + EXCEPTION(0x800, FPUnavailable, UnknownException, EXC_XFER_STD) + +/* Decrementer */ + EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE) + + EXCEPTION(0xa00, Trap_0a, UnknownException, EXC_XFER_EE) + EXCEPTION(0xb00, Trap_0b, UnknownException, EXC_XFER_EE) + +/* System call */ + . = 0xc00 +SystemCall: + EXCEPTION_PROLOG + EXC_XFER_EE_LITE(0xc00, DoSyscall) + +/* Single step - not used on 601 */ + EXCEPTION(0xd00, SingleStep, SingleStepException, EXC_XFER_STD) + EXCEPTION(0xe00, Trap_0e, UnknownException, EXC_XFER_EE) + EXCEPTION(0xf00, Trap_0f, UnknownException, EXC_XFER_EE) + +/* On the MPC8xx, this is a software emulation interrupt. It occurs + * for all unimplemented and illegal instructions. + */ + EXCEPTION(0x1000, SoftEmu, SoftwareEmulation, EXC_XFER_STD) + + . = 0x1100 +/* + * For the MPC8xx, this is a software tablewalk to load the instruction + * TLB. It is modelled after the example in the Motorola manual. The task + * switch loads the M_TWB register with the pointer to the first level table. + * If we discover there is no second level table (value is zero) or if there + * is an invalid pte, we load that into the TLB, which causes another fault + * into the TLB Error interrupt where we can handle such problems. + * We have to use the MD_xxx registers for the tablewalk because the + * equivalent MI_xxx registers only perform the attribute functions. + */ +InstructionTLBMiss: +#ifdef CONFIG_8xx_CPU6 + stw r3, 8(r0) +#endif + DO_8xx_CPU6(0x3f80, r3) + mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ + mfcr r10 + stw r10, 0(r0) + stw r11, 4(r0) + mfspr r10, SPRN_SRR0 /* Get effective address of fault */ + DO_8xx_CPU6(0x3780, r3) + mtspr SPRN_MD_EPN, r10 /* Have to use MD_EPN for walk, MI_EPN can't */ + mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + andi. r11, r10, 0x0800 /* Address >= 0x80000000 */ + beq 3f + lis r11, swapper_pg_dir@h + ori r11, r11, swapper_pg_dir@l + rlwimi r10, r11, 0, 2, 19 +3: + lwz r11, 0(r10) /* Get the level 1 entry */ + rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ + beq 2f /* If zero, don't try to find a pte */ + + /* We have a pte table, so load the MI_TWC with the attributes + * for this "segment." + */ + ori r11,r11,1 /* Set valid bit */ + DO_8xx_CPU6(0x2b80, r3) + mtspr SPRN_MI_TWC, r11 /* Set segment attributes */ + DO_8xx_CPU6(0x3b80, r3) + mtspr SPRN_MD_TWC, r11 /* Load pte table base address */ + mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ + lwz r10, 0(r11) /* Get the pte */ + + ori r10, r10, _PAGE_ACCESSED + stw r10, 0(r11) + + /* The Linux PTE won't go exactly into the MMU TLB. + * Software indicator bits 21, 22 and 28 must be clear. + * Software indicator bits 24, 25, 26, and 27 must be + * set. All other Linux PTE bits control the behavior + * of the MMU. + */ +2: li r11, 0x00f0 + rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ + DO_8xx_CPU6(0x2d80, r3) + mtspr SPRN_MI_RPN, r10 /* Update TLB entry */ + + mfspr r10, SPRN_M_TW /* Restore registers */ + lwz r11, 0(r0) + mtcr r11 + lwz r11, 4(r0) +#ifdef CONFIG_8xx_CPU6 + lwz r3, 8(r0) +#endif + rfi + + . = 0x1200 +DataStoreTLBMiss: +#ifdef CONFIG_8xx_CPU6 + stw r3, 8(r0) +#endif + DO_8xx_CPU6(0x3f80, r3) + mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ + mfcr r10 + stw r10, 0(r0) + stw r11, 4(r0) + mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + andi. r11, r10, 0x0800 + beq 3f + lis r11, swapper_pg_dir@h + ori r11, r11, swapper_pg_dir@l + rlwimi r10, r11, 0, 2, 19 +3: + lwz r11, 0(r10) /* Get the level 1 entry */ + rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ + beq 2f /* If zero, don't try to find a pte */ + + /* We have a pte table, so load fetch the pte from the table. + */ + ori r11, r11, 1 /* Set valid bit in physical L2 page */ + DO_8xx_CPU6(0x3b80, r3) + mtspr SPRN_MD_TWC, r11 /* Load pte table base address */ + mfspr r10, SPRN_MD_TWC /* ....and get the pte address */ + lwz r10, 0(r10) /* Get the pte */ + + /* Insert the Guarded flag into the TWC from the Linux PTE. + * It is bit 27 of both the Linux PTE and the TWC (at least + * I got that right :-). It will be better when we can put + * this into the Linux pgd/pmd and load it in the operation + * above. + */ + rlwimi r11, r10, 0, 27, 27 + DO_8xx_CPU6(0x3b80, r3) + mtspr SPRN_MD_TWC, r11 + + mfspr r11, SPRN_MD_TWC /* get the pte address again */ + ori r10, r10, _PAGE_ACCESSED + stw r10, 0(r11) + + /* The Linux PTE won't go exactly into the MMU TLB. + * Software indicator bits 21, 22 and 28 must be clear. + * Software indicator bits 24, 25, 26, and 27 must be + * set. All other Linux PTE bits control the behavior + * of the MMU. + */ +2: li r11, 0x00f0 + rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ + DO_8xx_CPU6(0x3d80, r3) + mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ + + mfspr r10, SPRN_M_TW /* Restore registers */ + lwz r11, 0(r0) + mtcr r11 + lwz r11, 4(r0) +#ifdef CONFIG_8xx_CPU6 + lwz r3, 8(r0) +#endif + rfi + +/* This is an instruction TLB error on the MPC8xx. This could be due + * to many reasons, such as executing guarded memory or illegal instruction + * addresses. There is nothing to do but handle a big time error fault. + */ + . = 0x1300 +InstructionTLBError: + b InstructionAccess + +/* This is the data TLB error on the MPC8xx. This could be due to + * many reasons, including a dirty update to a pte. We can catch that + * one here, but anything else is an error. First, we track down the + * Linux pte. If it is valid, write access is allowed, but the + * page dirty bit is not set, we will set it and reload the TLB. For + * any other case, we bail out to a higher level function that can + * handle it. + */ + . = 0x1400 +DataTLBError: +#ifdef CONFIG_8xx_CPU6 + stw r3, 8(r0) +#endif + DO_8xx_CPU6(0x3f80, r3) + mtspr SPRN_M_TW, r10 /* Save a couple of working registers */ + mfcr r10 + stw r10, 0(r0) + stw r11, 4(r0) + + /* First, make sure this was a store operation. + */ + mfspr r10, SPRN_DSISR + andis. r11, r10, 0x0200 /* If set, indicates store op */ + beq 2f + + /* The EA of a data TLB miss is automatically stored in the MD_EPN + * register. The EA of a data TLB error is automatically stored in + * the DAR, but not the MD_EPN register. We must copy the 20 most + * significant bits of the EA from the DAR to MD_EPN before we + * start walking the page tables. We also need to copy the CASID + * value from the M_CASID register. + * Addendum: The EA of a data TLB error is _supposed_ to be stored + * in DAR, but it seems that this doesn't happen in some cases, such + * as when the error is due to a dcbi instruction to a page with a + * TLB that doesn't have the changed bit set. In such cases, there + * does not appear to be any way to recover the EA of the error + * since it is neither in DAR nor MD_EPN. As a workaround, the + * _PAGE_HWWRITE bit is set for all kernel data pages when the PTEs + * are initialized in mapin_ram(). This will avoid the problem, + * assuming we only use the dcbi instruction on kernel addresses. + */ + mfspr r10, SPRN_DAR + rlwinm r11, r10, 0, 0, 19 + ori r11, r11, MD_EVALID + mfspr r10, SPRN_M_CASID + rlwimi r11, r10, 0, 28, 31 + DO_8xx_CPU6(0x3780, r3) + mtspr SPRN_MD_EPN, r11 + + mfspr r10, SPRN_M_TWB /* Get level 1 table entry address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + andi. r11, r10, 0x0800 + beq 3f + lis r11, swapper_pg_dir@h + ori r11, r11, swapper_pg_dir@l + rlwimi r10, r11, 0, 2, 19 +3: + lwz r11, 0(r10) /* Get the level 1 entry */ + rlwinm. r10, r11,0,0,19 /* Extract page descriptor page address */ + beq 2f /* If zero, bail */ + + /* We have a pte table, so fetch the pte from the table. + */ + ori r11, r11, 1 /* Set valid bit in physical L2 page */ + DO_8xx_CPU6(0x3b80, r3) + mtspr SPRN_MD_TWC, r11 /* Load pte table base address */ + mfspr r11, SPRN_MD_TWC /* ....and get the pte address */ + lwz r10, 0(r11) /* Get the pte */ + + andi. r11, r10, _PAGE_RW /* Is it writeable? */ + beq 2f /* Bail out if not */ + + /* Update 'changed', among others. + */ + ori r10, r10, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE + mfspr r11, SPRN_MD_TWC /* Get pte address again */ + stw r10, 0(r11) /* and update pte in table */ + + /* The Linux PTE won't go exactly into the MMU TLB. + * Software indicator bits 21, 22 and 28 must be clear. + * Software indicator bits 24, 25, 26, and 27 must be + * set. All other Linux PTE bits control the behavior + * of the MMU. + */ + li r11, 0x00f0 + rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */ + DO_8xx_CPU6(0x3d80, r3) + mtspr SPRN_MD_RPN, r10 /* Update TLB entry */ + + mfspr r10, SPRN_M_TW /* Restore registers */ + lwz r11, 0(r0) + mtcr r11 + lwz r11, 4(r0) +#ifdef CONFIG_8xx_CPU6 + lwz r3, 8(r0) +#endif + rfi +2: + mfspr r10, SPRN_M_TW /* Restore registers */ + lwz r11, 0(r0) + mtcr r11 + lwz r11, 4(r0) +#ifdef CONFIG_8xx_CPU6 + lwz r3, 8(r0) +#endif + b DataAccess + + EXCEPTION(0x1500, Trap_15, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1600, Trap_16, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1700, Trap_17, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1800, Trap_18, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1900, Trap_19, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1a00, Trap_1a, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1b00, Trap_1b, UnknownException, EXC_XFER_EE) + +/* On the MPC8xx, these next four traps are used for development + * support of breakpoints and such. Someday I will get around to + * using them. + */ + EXCEPTION(0x1c00, Trap_1c, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1d00, Trap_1d, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1e00, Trap_1e, UnknownException, EXC_XFER_EE) + EXCEPTION(0x1f00, Trap_1f, UnknownException, EXC_XFER_EE) + + . = 0x2000 + + .globl giveup_fpu +giveup_fpu: + blr + +/* + * This is where the main kernel code starts. + */ +start_here: + /* ptr to current */ + lis r2,init_task@h + ori r2,r2,init_task@l + + /* ptr to phys current thread */ + tophys(r4,r2) + addi r4,r4,THREAD /* init task's THREAD */ + mtspr SPRN_SPRG3,r4 + li r3,0 + mtspr SPRN_SPRG2,r3 /* 0 => r1 has kernel sp */ + + /* stack */ + lis r1,init_thread_union@ha + addi r1,r1,init_thread_union@l + li r0,0 + stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + + bl early_init /* We have to do this with MMU on */ + +/* + * Decide what sort of machine this is and initialize the MMU. + */ + mr r3,r31 + mr r4,r30 + mr r5,r29 + mr r6,r28 + mr r7,r27 + bl machine_init + bl MMU_init + +/* + * Go back to running unmapped so we can load up new values + * and change to using our exception vectors. + * On the 8xx, all we have to do is invalidate the TLB to clear + * the old 8M byte TLB mappings and load the page table base register. + */ + /* The right way to do this would be to track it down through + * init's THREAD like the context switch code does, but this is + * easier......until someone changes init's static structures. + */ + lis r6, swapper_pg_dir@h + ori r6, r6, swapper_pg_dir@l + tophys(r6,r6) +#ifdef CONFIG_8xx_CPU6 + lis r4, cpu6_errata_word@h + ori r4, r4, cpu6_errata_word@l + li r3, 0x3980 + stw r3, 12(r4) + lwz r3, 12(r4) +#endif + mtspr SPRN_M_TWB, r6 + lis r4,2f@h + ori r4,r4,2f@l + tophys(r4,r4) + li r3,MSR_KERNEL & ~(MSR_IR|MSR_DR) + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r3 + rfi +/* Load up the kernel context */ +2: + SYNC /* Force all PTE updates to finish */ + tlbia /* Clear all TLB entries */ + sync /* wait for tlbia/tlbie to finish */ + TLBSYNC /* ... on all CPUs */ + + /* set up the PTE pointers for the Abatron bdiGDB. + */ + tovirt(r6,r6) + lis r5, abatron_pteptrs@h + ori r5, r5, abatron_pteptrs@l + stw r5, 0xf0(r0) /* Must match your Abatron config file */ + tophys(r5,r5) + stw r6, 0(r5) + +/* Now turn on the MMU for real! */ + li r4,MSR_KERNEL + lis r3,start_kernel@h + ori r3,r3,start_kernel@l + mtspr SPRN_SRR0,r3 + mtspr SPRN_SRR1,r4 + rfi /* enable MMU and jump to start_kernel */ + +/* Set up the initial MMU state so we can do the first level of + * kernel initialization. This maps the first 8 MBytes of memory 1:1 + * virtual to physical. Also, set the cache mode since that is defined + * by TLB entries and perform any additional mapping (like of the IMMR). + * If configured to pin some TLBs, we pin the first 8 Mbytes of kernel, + * 24 Mbytes of data, and the 8M IMMR space. Anything not covered by + * these mappings is mapped by page tables. + */ +initial_mmu: + tlbia /* Invalidate all TLB entries */ +#ifdef CONFIG_PIN_TLB + lis r8, MI_RSV4I@h + ori r8, r8, 0x1c00 +#else + li r8, 0 +#endif + mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */ + +#ifdef CONFIG_PIN_TLB + lis r10, (MD_RSV4I | MD_RESETVAL)@h + ori r10, r10, 0x1c00 + mr r8, r10 +#else + lis r10, MD_RESETVAL@h +#endif +#ifndef CONFIG_8xx_COPYBACK + oris r10, r10, MD_WTDEF@h +#endif + mtspr SPRN_MD_CTR, r10 /* Set data TLB control */ + + /* Now map the lower 8 Meg into the TLBs. For this quick hack, + * we can load the instruction and data TLB registers with the + * same values. + */ + lis r8, KERNELBASE@h /* Create vaddr for TLB */ + ori r8, r8, MI_EVALID /* Mark it valid */ + mtspr SPRN_MI_EPN, r8 + mtspr SPRN_MD_EPN, r8 + li r8, MI_PS8MEG /* Set 8M byte page */ + ori r8, r8, MI_SVALID /* Make it valid */ + mtspr SPRN_MI_TWC, r8 + mtspr SPRN_MD_TWC, r8 + li r8, MI_BOOTINIT /* Create RPN for address 0 */ + mtspr SPRN_MI_RPN, r8 /* Store TLB entry */ + mtspr SPRN_MD_RPN, r8 + lis r8, MI_Kp@h /* Set the protection mode */ + mtspr SPRN_MI_AP, r8 + mtspr SPRN_MD_AP, r8 + + /* Map another 8 MByte at the IMMR to get the processor + * internal registers (among other things). + */ +#ifdef CONFIG_PIN_TLB + addi r10, r10, 0x0100 + mtspr SPRN_MD_CTR, r10 +#endif + mfspr r9, 638 /* Get current IMMR */ + andis. r9, r9, 0xff80 /* Get 8Mbyte boundary */ + + mr r8, r9 /* Create vaddr for TLB */ + ori r8, r8, MD_EVALID /* Mark it valid */ + mtspr SPRN_MD_EPN, r8 + li r8, MD_PS8MEG /* Set 8M byte page */ + ori r8, r8, MD_SVALID /* Make it valid */ + mtspr SPRN_MD_TWC, r8 + mr r8, r9 /* Create paddr for TLB */ + ori r8, r8, MI_BOOTINIT|0x2 /* Inhibit cache -- Cort */ + mtspr SPRN_MD_RPN, r8 + +#ifdef CONFIG_PIN_TLB + /* Map two more 8M kernel data pages. + */ + addi r10, r10, 0x0100 + mtspr SPRN_MD_CTR, r10 + + lis r8, KERNELBASE@h /* Create vaddr for TLB */ + addis r8, r8, 0x0080 /* Add 8M */ + ori r8, r8, MI_EVALID /* Mark it valid */ + mtspr SPRN_MD_EPN, r8 + li r9, MI_PS8MEG /* Set 8M byte page */ + ori r9, r9, MI_SVALID /* Make it valid */ + mtspr SPRN_MD_TWC, r9 + li r11, MI_BOOTINIT /* Create RPN for address 0 */ + addis r11, r11, 0x0080 /* Add 8M */ + mtspr SPRN_MD_RPN, r8 + + addis r8, r8, 0x0080 /* Add 8M */ + mtspr SPRN_MD_EPN, r8 + mtspr SPRN_MD_TWC, r9 + addis r11, r11, 0x0080 /* Add 8M */ + mtspr SPRN_MD_RPN, r8 +#endif + + /* Since the cache is enabled according to the information we + * just loaded into the TLB, invalidate and enable the caches here. + * We should probably check/set other modes....later. + */ + lis r8, IDC_INVALL@h + mtspr SPRN_IC_CST, r8 + mtspr SPRN_DC_CST, r8 + lis r8, IDC_ENABLE@h + mtspr SPRN_IC_CST, r8 +#ifdef CONFIG_8xx_COPYBACK + mtspr SPRN_DC_CST, r8 +#else + /* For a debug option, I left this here to easily enable + * the write through cache mode + */ + lis r8, DC_SFWT@h + mtspr SPRN_DC_CST, r8 + lis r8, IDC_ENABLE@h + mtspr SPRN_DC_CST, r8 +#endif + blr + + +/* + * Set up to use a given MMU context. + * r3 is context number, r4 is PGD pointer. + * + * We place the physical address of the new task page directory loaded + * into the MMU base register, and set the ASID compare register with + * the new "context." + */ +_GLOBAL(set_context) + +#ifdef CONFIG_BDI_SWITCH + /* Context switch the PTE pointer for the Abatron BDI2000. + * The PGDIR is passed as second argument. + */ + lis r5, KERNELBASE@h + lwz r5, 0xf0(r5) + stw r4, 0x4(r5) +#endif + +#ifdef CONFIG_8xx_CPU6 + lis r6, cpu6_errata_word@h + ori r6, r6, cpu6_errata_word@l + tophys (r4, r4) + li r7, 0x3980 + stw r7, 12(r6) + lwz r7, 12(r6) + mtspr SPRN_M_TWB, r4 /* Update MMU base address */ + li r7, 0x3380 + stw r7, 12(r6) + lwz r7, 12(r6) + mtspr SPRN_M_CASID, r3 /* Update context */ +#else + mtspr SPRN_M_CASID,r3 /* Update context */ + tophys (r4, r4) + mtspr SPRN_M_TWB, r4 /* and pgd */ +#endif + SYNC + blr + +#ifdef CONFIG_8xx_CPU6 +/* It's here because it is unique to the 8xx. + * It is important we get called with interrupts disabled. I used to + * do that, but it appears that all code that calls this already had + * interrupt disabled. + */ + .globl set_dec_cpu6 +set_dec_cpu6: + lis r7, cpu6_errata_word@h + ori r7, r7, cpu6_errata_word@l + li r4, 0x2c00 + stw r4, 8(r7) + lwz r4, 8(r7) + mtspr 22, r3 /* Update Decrementer */ + SYNC + blr +#endif + +/* + * We put a few things here that have to be page-aligned. + * This stuff goes at the beginning of the data segment, + * which is page-aligned. + */ + .data + .globl sdata +sdata: + .globl empty_zero_page +empty_zero_page: + .space 4096 + + .globl swapper_pg_dir +swapper_pg_dir: + .space 4096 + +/* + * This space gets a copy of optional info passed to us by the bootstrap + * Used to pass parameters into the kernel like root=/dev/sda1, etc. + */ + .globl cmd_line +cmd_line: + .space 512 + +/* Room for two PTE table poiners, usually the kernel and current user + * pointer to their respective root page table (pgdir). + */ +abatron_pteptrs: + .space 8 + +#ifdef CONFIG_8xx_CPU6 + .globl cpu6_errata_word +cpu6_errata_word: + .space 16 +#endif + diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S new file mode 100644 index 00000000000..eba5a5f8ff0 --- /dev/null +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -0,0 +1,1058 @@ +/* + * arch/ppc/kernel/head_fsl_booke.S + * + * Kernel execution entry point code. + * + * Copyright (c) 1995-1996 Gary Thomas <gdt@linuxppc.org> + * Initial PowerPC version. + * Copyright (c) 1996 Cort Dougan <cort@cs.nmt.edu> + * Rewritten for PReP + * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au> + * Low-level exception handers, MMU support, and rewrite. + * Copyright (c) 1997 Dan Malek <dmalek@jlc.net> + * PowerPC 8xx modifications. + * Copyright (c) 1998-1999 TiVo, Inc. + * PowerPC 403GCX modifications. + * Copyright (c) 1999 Grant Erickson <grant@lcse.umn.edu> + * PowerPC 403GCX/405GP modifications. + * Copyright 2000 MontaVista Software Inc. + * PPC405 modifications + * PowerPC 403GCX/405GP modifications. + * Author: MontaVista Software, Inc. + * frank_rowand@mvista.com or source@mvista.com + * debbie_chu@mvista.com + * Copyright 2002-2004 MontaVista Software, Inc. + * PowerPC 44x support, Matt Porter <mporter@kernel.crashing.org> + * Copyright 2004 Freescale Semiconductor, Inc + * PowerPC e500 modifications, Kumar Gala <kumar.gala@freescale.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#include <linux/config.h> +#include <linux/threads.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/mmu.h> +#include <asm/pgtable.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include "head_booke.h" + +/* As with the other PowerPC ports, it is expected that when code + * execution begins here, the following registers contain valid, yet + * optional, information: + * + * r3 - Board info structure pointer (DRAM, frequency, MAC address, etc.) + * r4 - Starting address of the init RAM disk + * r5 - Ending address of the init RAM disk + * r6 - Start of kernel command line string (e.g. "mem=128") + * r7 - End of kernel command line string + * + */ + .text +_GLOBAL(_stext) +_GLOBAL(_start) + /* + * Reserve a word at a fixed location to store the address + * of abatron_pteptrs + */ + nop +/* + * Save parameters we are passed + */ + mr r31,r3 + mr r30,r4 + mr r29,r5 + mr r28,r6 + mr r27,r7 + li r24,0 /* CPU number */ + +/* We try to not make any assumptions about how the boot loader + * setup or used the TLBs. We invalidate all mappings from the + * boot loader and load a single entry in TLB1[0] to map the + * first 16M of kernel memory. Any boot info passed from the + * bootloader needs to live in this first 16M. + * + * Requirement on bootloader: + * - The page we're executing in needs to reside in TLB1 and + * have IPROT=1. If not an invalidate broadcast could + * evict the entry we're currently executing in. + * + * r3 = Index of TLB1 were executing in + * r4 = Current MSR[IS] + * r5 = Index of TLB1 temp mapping + * + * Later in mapin_ram we will correctly map lowmem, and resize TLB1[0] + * if needed + */ + +/* 1. Find the index of the entry we're executing in */ + bl invstr /* Find our address */ +invstr: mflr r6 /* Make it accessible */ + mfmsr r7 + rlwinm r4,r7,27,31,31 /* extract MSR[IS] */ + mfspr r7, SPRN_PID0 + slwi r7,r7,16 + or r7,r7,r4 + mtspr SPRN_MAS6,r7 + tlbsx 0,r6 /* search MSR[IS], SPID=PID0 */ +#ifndef CONFIG_E200 + mfspr r7,SPRN_MAS1 + andis. r7,r7,MAS1_VALID@h + bne match_TLB + mfspr r7,SPRN_PID1 + slwi r7,r7,16 + or r7,r7,r4 + mtspr SPRN_MAS6,r7 + tlbsx 0,r6 /* search MSR[IS], SPID=PID1 */ + mfspr r7,SPRN_MAS1 + andis. r7,r7,MAS1_VALID@h + bne match_TLB + mfspr r7, SPRN_PID2 + slwi r7,r7,16 + or r7,r7,r4 + mtspr SPRN_MAS6,r7 + tlbsx 0,r6 /* Fall through, we had to match */ +#endif +match_TLB: + mfspr r7,SPRN_MAS0 + rlwinm r3,r7,16,20,31 /* Extract MAS0(Entry) */ + + mfspr r7,SPRN_MAS1 /* Insure IPROT set */ + oris r7,r7,MAS1_IPROT@h + mtspr SPRN_MAS1,r7 + tlbwe + +/* 2. Invalidate all entries except the entry we're executing in */ + mfspr r9,SPRN_TLB1CFG + andi. r9,r9,0xfff + li r6,0 /* Set Entry counter to 0 */ +1: lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */ + rlwimi r7,r6,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r6) */ + mtspr SPRN_MAS0,r7 + tlbre + mfspr r7,SPRN_MAS1 + rlwinm r7,r7,0,2,31 /* Clear MAS1 Valid and IPROT */ + cmpw r3,r6 + beq skpinv /* Dont update the current execution TLB */ + mtspr SPRN_MAS1,r7 + tlbwe + isync +skpinv: addi r6,r6,1 /* Increment */ + cmpw r6,r9 /* Are we done? */ + bne 1b /* If not, repeat */ + + /* Invalidate TLB0 */ + li r6,0x04 + tlbivax 0,r6 +#ifdef CONFIG_SMP + tlbsync +#endif + /* Invalidate TLB1 */ + li r6,0x0c + tlbivax 0,r6 +#ifdef CONFIG_SMP + tlbsync +#endif + msync + +/* 3. Setup a temp mapping and jump to it */ + andi. r5, r3, 0x1 /* Find an entry not used and is non-zero */ + addi r5, r5, 0x1 + lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */ + rlwimi r7,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */ + mtspr SPRN_MAS0,r7 + tlbre + + /* Just modify the entry ID and EPN for the temp mapping */ + lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */ + rlwimi r7,r5,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r5) */ + mtspr SPRN_MAS0,r7 + xori r6,r4,1 /* Setup TMP mapping in the other Address space */ + slwi r6,r6,12 + oris r6,r6,(MAS1_VALID|MAS1_IPROT)@h + ori r6,r6,(MAS1_TSIZE(BOOKE_PAGESZ_4K))@l + mtspr SPRN_MAS1,r6 + mfspr r6,SPRN_MAS2 + li r7,0 /* temp EPN = 0 */ + rlwimi r7,r6,0,20,31 + mtspr SPRN_MAS2,r7 + tlbwe + + xori r6,r4,1 + slwi r6,r6,5 /* setup new context with other address space */ + bl 1f /* Find our address */ +1: mflr r9 + rlwimi r7,r9,0,20,31 + addi r7,r7,24 + mtspr SPRN_SRR0,r7 + mtspr SPRN_SRR1,r6 + rfi + +/* 4. Clear out PIDs & Search info */ + li r6,0 + mtspr SPRN_PID0,r6 +#ifndef CONFIG_E200 + mtspr SPRN_PID1,r6 + mtspr SPRN_PID2,r6 +#endif + mtspr SPRN_MAS6,r6 + +/* 5. Invalidate mapping we started in */ + lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */ + rlwimi r7,r3,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r3) */ + mtspr SPRN_MAS0,r7 + tlbre + li r6,0 + mtspr SPRN_MAS1,r6 + tlbwe + /* Invalidate TLB1 */ + li r9,0x0c + tlbivax 0,r9 +#ifdef CONFIG_SMP + tlbsync +#endif + msync + +/* 6. Setup KERNELBASE mapping in TLB1[0] */ + lis r6,0x1000 /* Set MAS0(TLBSEL) = TLB1(1), ESEL = 0 */ + mtspr SPRN_MAS0,r6 + lis r6,(MAS1_VALID|MAS1_IPROT)@h + ori r6,r6,(MAS1_TSIZE(BOOKE_PAGESZ_16M))@l + mtspr SPRN_MAS1,r6 + li r7,0 + lis r6,KERNELBASE@h + ori r6,r6,KERNELBASE@l + rlwimi r6,r7,0,20,31 + mtspr SPRN_MAS2,r6 + li r7,(MAS3_SX|MAS3_SW|MAS3_SR) + mtspr SPRN_MAS3,r7 + tlbwe + +/* 7. Jump to KERNELBASE mapping */ + lis r7,MSR_KERNEL@h + ori r7,r7,MSR_KERNEL@l + bl 1f /* Find our address */ +1: mflr r9 + rlwimi r6,r9,0,20,31 + addi r6,r6,24 + mtspr SPRN_SRR0,r6 + mtspr SPRN_SRR1,r7 + rfi /* start execution out of TLB1[0] entry */ + +/* 8. Clear out the temp mapping */ + lis r7,0x1000 /* Set MAS0(TLBSEL) = 1 */ + rlwimi r7,r5,16,4,15 /* Setup MAS0 = TLBSEL | ESEL(r5) */ + mtspr SPRN_MAS0,r7 + tlbre + mtspr SPRN_MAS1,r8 + tlbwe + /* Invalidate TLB1 */ + li r9,0x0c + tlbivax 0,r9 +#ifdef CONFIG_SMP + tlbsync +#endif + msync + + /* Establish the interrupt vector offsets */ + SET_IVOR(0, CriticalInput); + SET_IVOR(1, MachineCheck); + SET_IVOR(2, DataStorage); + SET_IVOR(3, InstructionStorage); + SET_IVOR(4, ExternalInput); + SET_IVOR(5, Alignment); + SET_IVOR(6, Program); + SET_IVOR(7, FloatingPointUnavailable); + SET_IVOR(8, SystemCall); + SET_IVOR(9, AuxillaryProcessorUnavailable); + SET_IVOR(10, Decrementer); + SET_IVOR(11, FixedIntervalTimer); + SET_IVOR(12, WatchdogTimer); + SET_IVOR(13, DataTLBError); + SET_IVOR(14, InstructionTLBError); + SET_IVOR(15, Debug); + SET_IVOR(32, SPEUnavailable); + SET_IVOR(33, SPEFloatingPointData); + SET_IVOR(34, SPEFloatingPointRound); +#ifndef CONFIG_E200 + SET_IVOR(35, PerformanceMonitor); +#endif + + /* Establish the interrupt vector base */ + lis r4,interrupt_base@h /* IVPR only uses the high 16-bits */ + mtspr SPRN_IVPR,r4 + + /* Setup the defaults for TLB entries */ + li r2,(MAS4_TSIZED(BOOKE_PAGESZ_4K))@l +#ifdef CONFIG_E200 + oris r2,r2,MAS4_TLBSELD(1)@h +#endif + mtspr SPRN_MAS4, r2 + +#if 0 + /* Enable DOZE */ + mfspr r2,SPRN_HID0 + oris r2,r2,HID0_DOZE@h + mtspr SPRN_HID0, r2 +#endif +#ifdef CONFIG_E200 + /* enable dedicated debug exception handling resources (Debug APU) */ + mfspr r2,SPRN_HID0 + ori r2,r2,HID0_DAPUEN@l + mtspr SPRN_HID0,r2 +#endif + +#if !defined(CONFIG_BDI_SWITCH) + /* + * The Abatron BDI JTAG debugger does not tolerate others + * mucking with the debug registers. + */ + lis r2,DBCR0_IDM@h + mtspr SPRN_DBCR0,r2 + /* clear any residual debug events */ + li r2,-1 + mtspr SPRN_DBSR,r2 +#endif + + /* + * This is where the main kernel code starts. + */ + + /* ptr to current */ + lis r2,init_task@h + ori r2,r2,init_task@l + + /* ptr to current thread */ + addi r4,r2,THREAD /* init task's THREAD */ + mtspr SPRN_SPRG3,r4 + + /* stack */ + lis r1,init_thread_union@h + ori r1,r1,init_thread_union@l + li r0,0 + stwu r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1) + + bl early_init + + mfspr r3,SPRN_TLB1CFG + andi. r3,r3,0xfff + lis r4,num_tlbcam_entries@ha + stw r3,num_tlbcam_entries@l(r4) +/* + * Decide what sort of machine this is and initialize the MMU. + */ + mr r3,r31 + mr r4,r30 + mr r5,r29 + mr r6,r28 + mr r7,r27 + bl machine_init + bl MMU_init + + /* Setup PTE pointers for the Abatron bdiGDB */ + lis r6, swapper_pg_dir@h + ori r6, r6, swapper_pg_dir@l + lis r5, abatron_pteptrs@h + ori r5, r5, abatron_pteptrs@l + lis r4, KERNELBASE@h + ori r4, r4, KERNELBASE@l + stw r5, 0(r4) /* Save abatron_pteptrs at a fixed location */ + stw r6, 0(r5) + + /* Let's move on */ + lis r4,start_kernel@h + ori r4,r4,start_kernel@l + lis r3,MSR_KERNEL@h + ori r3,r3,MSR_KERNEL@l + mtspr SPRN_SRR0,r4 + mtspr SPRN_SRR1,r3 + rfi /* change context and jump to start_kernel */ + +/* Macros to hide the PTE size differences + * + * FIND_PTE -- walks the page tables given EA & pgdir pointer + * r10 -- EA of fault + * r11 -- PGDIR pointer + * r12 -- free + * label 2: is the bailout case + * + * if we find the pte (fall through): + * r11 is low pte word + * r12 is pointer to the pte + */ +#ifdef CONFIG_PTE_64BIT +#define PTE_FLAGS_OFFSET 4 +#define FIND_PTE \ + rlwinm r12, r10, 13, 19, 29; /* Compute pgdir/pmd offset */ \ + lwzx r11, r12, r11; /* Get pgd/pmd entry */ \ + rlwinm. r12, r11, 0, 0, 20; /* Extract pt base address */ \ + beq 2f; /* Bail if no table */ \ + rlwimi r12, r10, 23, 20, 28; /* Compute pte address */ \ + lwz r11, 4(r12); /* Get pte entry */ +#else +#define PTE_FLAGS_OFFSET 0 +#define FIND_PTE \ + rlwimi r11, r10, 12, 20, 29; /* Create L1 (pgdir/pmd) address */ \ + lwz r11, 0(r11); /* Get L1 entry */ \ + rlwinm. r12, r11, 0, 0, 19; /* Extract L2 (pte) base address */ \ + beq 2f; /* Bail if no table */ \ + rlwimi r12, r10, 22, 20, 29; /* Compute PTE address */ \ + lwz r11, 0(r12); /* Get Linux PTE */ +#endif + +/* + * Interrupt vector entry code + * + * The Book E MMUs are always on so we don't need to handle + * interrupts in real mode as with previous PPC processors. In + * this case we handle interrupts in the kernel virtual address + * space. + * + * Interrupt vectors are dynamically placed relative to the + * interrupt prefix as determined by the address of interrupt_base. + * The interrupt vectors offsets are programmed using the labels + * for each interrupt vector entry. + * + * Interrupt vectors must be aligned on a 16 byte boundary. + * We align on a 32 byte cache line boundary for good measure. + */ + +interrupt_base: + /* Critical Input Interrupt */ + CRITICAL_EXCEPTION(0x0100, CriticalInput, UnknownException) + + /* Machine Check Interrupt */ +#ifdef CONFIG_E200 + /* no RFMCI, MCSRRs on E200 */ + CRITICAL_EXCEPTION(0x0200, MachineCheck, MachineCheckException) +#else + MCHECK_EXCEPTION(0x0200, MachineCheck, MachineCheckException) +#endif + + /* Data Storage Interrupt */ + START_EXCEPTION(DataStorage) + mtspr SPRN_SPRG0, r10 /* Save some working registers */ + mtspr SPRN_SPRG1, r11 + mtspr SPRN_SPRG4W, r12 + mtspr SPRN_SPRG5W, r13 + mfcr r11 + mtspr SPRN_SPRG7W, r11 + + /* + * Check if it was a store fault, if not then bail + * because a user tried to access a kernel or + * read-protected page. Otherwise, get the + * offending address and handle it. + */ + mfspr r10, SPRN_ESR + andis. r10, r10, ESR_ST@h + beq 2f + + mfspr r10, SPRN_DEAR /* Get faulting address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + lis r11, TASK_SIZE@h + ori r11, r11, TASK_SIZE@l + cmplw 0, r10, r11 + bge 2f + + /* Get the PGD for the current thread */ +3: + mfspr r11,SPRN_SPRG3 + lwz r11,PGDIR(r11) +4: + FIND_PTE + + /* Are _PAGE_USER & _PAGE_RW set & _PAGE_HWWRITE not? */ + andi. r13, r11, _PAGE_RW|_PAGE_USER|_PAGE_HWWRITE + cmpwi 0, r13, _PAGE_RW|_PAGE_USER + bne 2f /* Bail if not */ + + /* Update 'changed'. */ + ori r11, r11, _PAGE_DIRTY|_PAGE_ACCESSED|_PAGE_HWWRITE + stw r11, PTE_FLAGS_OFFSET(r12) /* Update Linux page table */ + + /* MAS2 not updated as the entry does exist in the tlb, this + fault taken to detect state transition (eg: COW -> DIRTY) + */ + andi. r11, r11, _PAGE_HWEXEC + rlwimi r11, r11, 31, 27, 27 /* SX <- _PAGE_HWEXEC */ + ori r11, r11, (MAS3_UW|MAS3_SW|MAS3_UR|MAS3_SR)@l /* set static perms */ + + /* update search PID in MAS6, AS = 0 */ + mfspr r12, SPRN_PID0 + slwi r12, r12, 16 + mtspr SPRN_MAS6, r12 + + /* find the TLB index that caused the fault. It has to be here. */ + tlbsx 0, r10 + + /* only update the perm bits, assume the RPN is fine */ + mfspr r12, SPRN_MAS3 + rlwimi r12, r11, 0, 20, 31 + mtspr SPRN_MAS3,r12 + tlbwe + + /* Done...restore registers and get out of here. */ + mfspr r11, SPRN_SPRG7R + mtcr r11 + mfspr r13, SPRN_SPRG5R + mfspr r12, SPRN_SPRG4R + mfspr r11, SPRN_SPRG1 + mfspr r10, SPRN_SPRG0 + rfi /* Force context change */ + +2: + /* + * The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ + mfspr r11, SPRN_SPRG7R + mtcr r11 + mfspr r13, SPRN_SPRG5R + mfspr r12, SPRN_SPRG4R + mfspr r11, SPRN_SPRG1 + mfspr r10, SPRN_SPRG0 + b data_access + + /* Instruction Storage Interrupt */ + INSTRUCTION_STORAGE_EXCEPTION + + /* External Input Interrupt */ + EXCEPTION(0x0500, ExternalInput, do_IRQ, EXC_XFER_LITE) + + /* Alignment Interrupt */ + ALIGNMENT_EXCEPTION + + /* Program Interrupt */ + PROGRAM_EXCEPTION + + /* Floating Point Unavailable Interrupt */ +#ifdef CONFIG_PPC_FPU + FP_UNAVAILABLE_EXCEPTION +#else +#ifdef CONFIG_E200 + /* E200 treats 'normal' floating point instructions as FP Unavail exception */ + EXCEPTION(0x0800, FloatingPointUnavailable, ProgramCheckException, EXC_XFER_EE) +#else + EXCEPTION(0x0800, FloatingPointUnavailable, UnknownException, EXC_XFER_EE) +#endif +#endif + + /* System Call Interrupt */ + START_EXCEPTION(SystemCall) + NORMAL_EXCEPTION_PROLOG + EXC_XFER_EE_LITE(0x0c00, DoSyscall) + + /* Auxillary Processor Unavailable Interrupt */ + EXCEPTION(0x2900, AuxillaryProcessorUnavailable, UnknownException, EXC_XFER_EE) + + /* Decrementer Interrupt */ + DECREMENTER_EXCEPTION + + /* Fixed Internal Timer Interrupt */ + /* TODO: Add FIT support */ + EXCEPTION(0x3100, FixedIntervalTimer, UnknownException, EXC_XFER_EE) + + /* Watchdog Timer Interrupt */ +#ifdef CONFIG_BOOKE_WDT + CRITICAL_EXCEPTION(0x3200, WatchdogTimer, WatchdogException) +#else + CRITICAL_EXCEPTION(0x3200, WatchdogTimer, UnknownException) +#endif + + /* Data TLB Error Interrupt */ + START_EXCEPTION(DataTLBError) + mtspr SPRN_SPRG0, r10 /* Save some working registers */ + mtspr SPRN_SPRG1, r11 + mtspr SPRN_SPRG4W, r12 + mtspr SPRN_SPRG5W, r13 + mfcr r11 + mtspr SPRN_SPRG7W, r11 + mfspr r10, SPRN_DEAR /* Get faulting address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + lis r11, TASK_SIZE@h + ori r11, r11, TASK_SIZE@l + cmplw 5, r10, r11 + blt 5, 3f + lis r11, swapper_pg_dir@h + ori r11, r11, swapper_pg_dir@l + + mfspr r12,SPRN_MAS1 /* Set TID to 0 */ + rlwinm r12,r12,0,16,1 + mtspr SPRN_MAS1,r12 + + b 4f + + /* Get the PGD for the current thread */ +3: + mfspr r11,SPRN_SPRG3 + lwz r11,PGDIR(r11) + +4: + FIND_PTE + andi. r13, r11, _PAGE_PRESENT /* Is the page present? */ + beq 2f /* Bail if not present */ + +#ifdef CONFIG_PTE_64BIT + lwz r13, 0(r12) +#endif + ori r11, r11, _PAGE_ACCESSED + stw r11, PTE_FLAGS_OFFSET(r12) + + /* Jump to common tlb load */ + b finish_tlb_load +2: + /* The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ + mfspr r11, SPRN_SPRG7R + mtcr r11 + mfspr r13, SPRN_SPRG5R + mfspr r12, SPRN_SPRG4R + mfspr r11, SPRN_SPRG1 + mfspr r10, SPRN_SPRG0 + b data_access + + /* Instruction TLB Error Interrupt */ + /* + * Nearly the same as above, except we get our + * information from different registers and bailout + * to a different point. + */ + START_EXCEPTION(InstructionTLBError) + mtspr SPRN_SPRG0, r10 /* Save some working registers */ + mtspr SPRN_SPRG1, r11 + mtspr SPRN_SPRG4W, r12 + mtspr SPRN_SPRG5W, r13 + mfcr r11 + mtspr SPRN_SPRG7W, r11 + mfspr r10, SPRN_SRR0 /* Get faulting address */ + + /* If we are faulting a kernel address, we have to use the + * kernel page tables. + */ + lis r11, TASK_SIZE@h + ori r11, r11, TASK_SIZE@l + cmplw 5, r10, r11 + blt 5, 3f + lis r11, swapper_pg_dir@h + ori r11, r11, swapper_pg_dir@l + + mfspr r12,SPRN_MAS1 /* Set TID to 0 */ + rlwinm r12,r12,0,16,1 + mtspr SPRN_MAS1,r12 + + b 4f + + /* Get the PGD for the current thread */ +3: + mfspr r11,SPRN_SPRG3 + lwz r11,PGDIR(r11) + +4: + FIND_PTE + andi. r13, r11, _PAGE_PRESENT /* Is the page present? */ + beq 2f /* Bail if not present */ + +#ifdef CONFIG_PTE_64BIT + lwz r13, 0(r12) +#endif + ori r11, r11, _PAGE_ACCESSED + stw r11, PTE_FLAGS_OFFSET(r12) + + /* Jump to common TLB load point */ + b finish_tlb_load + +2: + /* The bailout. Restore registers to pre-exception conditions + * and call the heavyweights to help us out. + */ + mfspr r11, SPRN_SPRG7R + mtcr r11 + mfspr r13, SPRN_SPRG5R + mfspr r12, SPRN_SPRG4R + mfspr r11, SPRN_SPRG1 + mfspr r10, SPRN_SPRG0 + b InstructionStorage + +#ifdef CONFIG_SPE + /* SPE Unavailable */ + START_EXCEPTION(SPEUnavailable) + NORMAL_EXCEPTION_PROLOG + bne load_up_spe + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_EE_LITE(0x2010, KernelSPE) +#else + EXCEPTION(0x2020, SPEUnavailable, UnknownException, EXC_XFER_EE) +#endif /* CONFIG_SPE */ + + /* SPE Floating Point Data */ +#ifdef CONFIG_SPE + EXCEPTION(0x2030, SPEFloatingPointData, SPEFloatingPointException, EXC_XFER_EE); +#else + EXCEPTION(0x2040, SPEFloatingPointData, UnknownException, EXC_XFER_EE) +#endif /* CONFIG_SPE */ + + /* SPE Floating Point Round */ + EXCEPTION(0x2050, SPEFloatingPointRound, UnknownException, EXC_XFER_EE) + + /* Performance Monitor */ + EXCEPTION(0x2060, PerformanceMonitor, PerformanceMonitorException, EXC_XFER_STD) + + + /* Debug Interrupt */ + DEBUG_EXCEPTION + +/* + * Local functions + */ + + /* + * Data TLB exceptions will bail out to this point + * if they can't resolve the lightweight TLB fault. + */ +data_access: + NORMAL_EXCEPTION_PROLOG + mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */ + stw r5,_ESR(r11) + mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ + andis. r10,r5,(ESR_ILK|ESR_DLK)@h + bne 1f + EXC_XFER_EE_LITE(0x0300, handle_page_fault) +1: + addi r3,r1,STACK_FRAME_OVERHEAD + EXC_XFER_EE_LITE(0x0300, CacheLockingException) + +/* + + * Both the instruction and data TLB miss get to this + * point to load the TLB. + * r10 - EA of fault + * r11 - TLB (info from Linux PTE) + * r12, r13 - available to use + * CR5 - results of addr < TASK_SIZE + * MAS0, MAS1 - loaded with proper value when we get here + * MAS2, MAS3 - will need additional info from Linux PTE + * Upon exit, we reload everything and RFI. + */ +finish_tlb_load: + /* + * We set execute, because we don't have the granularity to + * properly set this at the page level (Linux problem). + * Many of these bits are software only. Bits we don't set + * here we (properly should) assume have the appropriate value. + */ + + mfspr r12, SPRN_MAS2 +#ifdef CONFIG_PTE_64BIT + rlwimi r12, r11, 26, 24, 31 /* extract ...WIMGE from pte */ +#else + rlwimi r12, r11, 26, 27, 31 /* extract WIMGE from pte */ +#endif + mtspr SPRN_MAS2, r12 + + bge 5, 1f + + /* is user addr */ + andi. r12, r11, (_PAGE_USER | _PAGE_HWWRITE | _PAGE_HWEXEC) + andi. r10, r11, _PAGE_USER /* Test for _PAGE_USER */ + srwi r10, r12, 1 + or r12, r12, r10 /* Copy user perms into supervisor */ + iseleq r12, 0, r12 + b 2f + + /* is kernel addr */ +1: rlwinm r12, r11, 31, 29, 29 /* Extract _PAGE_HWWRITE into SW */ + ori r12, r12, (MAS3_SX | MAS3_SR) + +#ifdef CONFIG_PTE_64BIT +2: rlwimi r12, r13, 24, 0, 7 /* grab RPN[32:39] */ + rlwimi r12, r11, 24, 8, 19 /* grab RPN[40:51] */ + mtspr SPRN_MAS3, r12 +BEGIN_FTR_SECTION + srwi r10, r13, 8 /* grab RPN[8:31] */ + mtspr SPRN_MAS7, r10 +END_FTR_SECTION_IFSET(CPU_FTR_BIG_PHYS) +#else +2: rlwimi r11, r12, 0, 20, 31 /* Extract RPN from PTE and merge with perms */ + mtspr SPRN_MAS3, r11 +#endif +#ifdef CONFIG_E200 + /* Round robin TLB1 entries assignment */ + mfspr r12, SPRN_MAS0 + + /* Extract TLB1CFG(NENTRY) */ + mfspr r11, SPRN_TLB1CFG + andi. r11, r11, 0xfff + + /* Extract MAS0(NV) */ + andi. r13, r12, 0xfff + addi r13, r13, 1 + cmpw 0, r13, r11 + addi r12, r12, 1 + + /* check if we need to wrap */ + blt 7f + + /* wrap back to first free tlbcam entry */ + lis r13, tlbcam_index@ha + lwz r13, tlbcam_index@l(r13) + rlwimi r12, r13, 0, 20, 31 +7: + mtspr SPRN_MAS0,r12 +#endif /* CONFIG_E200 */ + + tlbwe + + /* Done...restore registers and get out of here. */ + mfspr r11, SPRN_SPRG7R + mtcr r11 + mfspr r13, SPRN_SPRG5R + mfspr r12, SPRN_SPRG4R + mfspr r11, SPRN_SPRG1 + mfspr r10, SPRN_SPRG0 + rfi /* Force context change */ + +#ifdef CONFIG_SPE +/* Note that the SPE support is closely modeled after the AltiVec + * support. Changes to one are likely to be applicable to the + * other! */ +load_up_spe: +/* + * Disable SPE for the task which had SPE previously, + * and save its SPE registers in its thread_struct. + * Enables SPE for use in the kernel on return. + * On SMP we know the SPE units are free, since we give it up every + * switch. -- Kumar + */ + mfmsr r5 + oris r5,r5,MSR_SPE@h + mtmsr r5 /* enable use of SPE now */ + isync +/* + * For SMP, we don't do lazy SPE switching because it just gets too + * horrendously complex, especially when a task switches from one CPU + * to another. Instead we call giveup_spe in switch_to. + */ +#ifndef CONFIG_SMP + lis r3,last_task_used_spe@ha + lwz r4,last_task_used_spe@l(r3) + cmpi 0,r4,0 + beq 1f + addi r4,r4,THREAD /* want THREAD of last_task_used_spe */ + SAVE_32EVRS(0,r10,r4) + evxor evr10, evr10, evr10 /* clear out evr10 */ + evmwumiaa evr10, evr10, evr10 /* evr10 <- ACC = 0 * 0 + ACC */ + li r5,THREAD_ACC + evstddx evr10, r4, r5 /* save off accumulator */ + lwz r5,PT_REGS(r4) + lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5) + lis r10,MSR_SPE@h + andc r4,r4,r10 /* disable SPE for previous task */ + stw r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#endif /* CONFIG_SMP */ + /* enable use of SPE after return */ + oris r9,r9,MSR_SPE@h + mfspr r5,SPRN_SPRG3 /* current task's THREAD (phys) */ + li r4,1 + li r10,THREAD_ACC + stw r4,THREAD_USED_SPE(r5) + evlddx evr4,r10,r5 + evmra evr4,evr4 + REST_32EVRS(0,r10,r5) +#ifndef CONFIG_SMP + subi r4,r5,THREAD + stw r4,last_task_used_spe@l(r3) +#endif /* CONFIG_SMP */ + /* restore registers and return */ +2: REST_4GPRS(3, r11) + lwz r10,_CCR(r11) + REST_GPR(1, r11) + mtcr r10 + lwz r10,_LINK(r11) + mtlr r10 + REST_GPR(10, r11) + mtspr SPRN_SRR1,r9 + mtspr SPRN_SRR0,r12 + REST_GPR(9, r11) + REST_GPR(12, r11) + lwz r11,GPR11(r11) + SYNC + rfi + +/* + * SPE unavailable trap from kernel - print a message, but let + * the task use SPE in the kernel until it returns to user mode. + */ +KernelSPE: + lwz r3,_MSR(r1) + oris r3,r3,MSR_SPE@h + stw r3,_MSR(r1) /* enable use of SPE after return */ + lis r3,87f@h + ori r3,r3,87f@l + mr r4,r2 /* current */ + lwz r5,_NIP(r1) + bl printk + b ret_from_except +87: .string "SPE used in kernel (task=%p, pc=%x) \n" + .align 4,0 + +#endif /* CONFIG_SPE */ + +/* + * Global functions + */ + +/* + * extern void loadcam_entry(unsigned int index) + * + * Load TLBCAM[index] entry in to the L2 CAM MMU + */ +_GLOBAL(loadcam_entry) + lis r4,TLBCAM@ha + addi r4,r4,TLBCAM@l + mulli r5,r3,20 + add r3,r5,r4 + lwz r4,0(r3) + mtspr SPRN_MAS0,r4 + lwz r4,4(r3) + mtspr SPRN_MAS1,r4 + lwz r4,8(r3) + mtspr SPRN_MAS2,r4 + lwz r4,12(r3) + mtspr SPRN_MAS3,r4 + tlbwe + isync + blr + +/* + * extern void giveup_altivec(struct task_struct *prev) + * + * The e500 core does not have an AltiVec unit. + */ +_GLOBAL(giveup_altivec) + blr + +#ifdef CONFIG_SPE +/* + * extern void giveup_spe(struct task_struct *prev) + * + */ +_GLOBAL(giveup_spe) + mfmsr r5 + oris r5,r5,MSR_SPE@h + SYNC + mtmsr r5 /* enable use of SPE now */ + isync + cmpi 0,r3,0 + beqlr- /* if no previous owner, done */ + addi r3,r3,THREAD /* want THREAD of task */ + lwz r5,PT_REGS(r3) + cmpi 0,r5,0 + SAVE_32EVRS(0, r4, r3) + evxor evr6, evr6, evr6 /* clear out evr6 */ + evmwumiaa evr6, evr6, evr6 /* evr6 <- ACC = 0 * 0 + ACC */ + li r4,THREAD_ACC + evstddx evr6, r4, r3 /* save off accumulator */ + mfspr r6,SPRN_SPEFSCR + stw r6,THREAD_SPEFSCR(r3) /* save spefscr register value */ + beq 1f + lwz r4,_MSR-STACK_FRAME_OVERHEAD(r5) + lis r3,MSR_SPE@h + andc r4,r4,r3 /* disable SPE for previous task */ + stw r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#ifndef CONFIG_SMP + li r5,0 + lis r4,last_task_used_spe@ha + stw r5,last_task_used_spe@l(r4) +#endif /* CONFIG_SMP */ + blr +#endif /* CONFIG_SPE */ + +/* + * extern void giveup_fpu(struct task_struct *prev) + * + * Not all FSL Book-E cores have an FPU + */ +#ifndef CONFIG_PPC_FPU +_GLOBAL(giveup_fpu) + blr +#endif + +/* + * extern void abort(void) + * + * At present, this routine just applies a system reset. + */ +_GLOBAL(abort) + li r13,0 + mtspr SPRN_DBCR0,r13 /* disable all debug events */ + mfmsr r13 + ori r13,r13,MSR_DE@l /* Enable Debug Events */ + mtmsr r13 + mfspr r13,SPRN_DBCR0 + lis r13,(DBCR0_IDM|DBCR0_RST_CHIP)@h + mtspr SPRN_DBCR0,r13 + +_GLOBAL(set_context) + +#ifdef CONFIG_BDI_SWITCH + /* Context switch the PTE pointer for the Abatron BDI2000. + * The PGDIR is the second parameter. + */ + lis r5, abatron_pteptrs@h + ori r5, r5, abatron_pteptrs@l + stw r4, 0x4(r5) +#endif + mtspr SPRN_PID,r3 + isync /* Force context change */ + blr + +/* + * We put a few things here that have to be page-aligned. This stuff + * goes at the beginning of the data segment, which is page-aligned. + */ + .data +_GLOBAL(sdata) +_GLOBAL(empty_zero_page) + .space 4096 +_GLOBAL(swapper_pg_dir) + .space 4096 + +/* Reserved 4k for the critical exception stack & 4k for the machine + * check stack per CPU for kernel mode exceptions */ + .section .bss + .align 12 +exception_stack_bottom: + .space BOOKE_EXCEPTION_STACK_SIZE * NR_CPUS +_GLOBAL(exception_stack_top) + +/* + * This space gets a copy of optional info passed to us by the bootstrap + * which is used to pass parameters into the kernel like root=/dev/sda1, etc. + */ +_GLOBAL(cmd_line) + .space 512 + +/* + * Room for two PTE pointers, usually the kernel and current user pointers + * to their respective root page table. + */ +abatron_pteptrs: + .space 8 + diff --git a/arch/powerpc/kernel/idle_6xx.S b/arch/powerpc/kernel/idle_6xx.S new file mode 100644 index 00000000000..1a2194cf682 --- /dev/null +++ b/arch/powerpc/kernel/idle_6xx.S @@ -0,0 +1,233 @@ +/* + * This file contains the power_save function for 6xx & 7xxx CPUs + * rewritten in assembler + * + * Warning ! This code assumes that if your machine has a 750fx + * it will have PLL 1 set to low speed mode (used during NAP/DOZE). + * if this is not the case some additional changes will have to + * be done to check a runtime var (a bit like powersave-nap) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/threads.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> + +#undef DEBUG + + .text + +/* + * Init idle, called at early CPU setup time from head.S for each CPU + * Make sure no rest of NAP mode remains in HID0, save default + * values for some CPU specific registers. Called with r24 + * containing CPU number and r3 reloc offset + */ +_GLOBAL(init_idle_6xx) +BEGIN_FTR_SECTION + mfspr r4,SPRN_HID0 + rlwinm r4,r4,0,10,8 /* Clear NAP */ + mtspr SPRN_HID0, r4 + b 1f +END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) + blr +1: + slwi r5,r24,2 + add r5,r5,r3 +BEGIN_FTR_SECTION + mfspr r4,SPRN_MSSCR0 + addis r6,r5, nap_save_msscr0@ha + stw r4,nap_save_msscr0@l(r6) +END_FTR_SECTION_IFSET(CPU_FTR_NAP_DISABLE_L2_PR) +BEGIN_FTR_SECTION + mfspr r4,SPRN_HID1 + addis r6,r5,nap_save_hid1@ha + stw r4,nap_save_hid1@l(r6) +END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX) + blr + +/* + * Here is the power_save_6xx function. This could eventually be + * split into several functions & changing the function pointer + * depending on the various features. + */ +_GLOBAL(ppc6xx_idle) + /* Check if we can nap or doze, put HID0 mask in r3 + */ + lis r3, 0 +BEGIN_FTR_SECTION + lis r3,HID0_DOZE@h +END_FTR_SECTION_IFSET(CPU_FTR_CAN_DOZE) +BEGIN_FTR_SECTION + /* We must dynamically check for the NAP feature as it + * can be cleared by CPU init after the fixups are done + */ + lis r4,cur_cpu_spec@ha + lwz r4,cur_cpu_spec@l(r4) + lwz r4,CPU_SPEC_FEATURES(r4) + andi. r0,r4,CPU_FTR_CAN_NAP + beq 1f + /* Now check if user or arch enabled NAP mode */ + lis r4,powersave_nap@ha + lwz r4,powersave_nap@l(r4) + cmpwi 0,r4,0 + beq 1f + lis r3,HID0_NAP@h +1: +END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) + cmpwi 0,r3,0 + beqlr + + /* Clear MSR:EE */ + mfmsr r7 + rlwinm r0,r7,0,17,15 + mtmsr r0 + + /* Check current_thread_info()->flags */ + rlwinm r4,r1,0,0,18 + lwz r4,TI_FLAGS(r4) + andi. r0,r4,_TIF_NEED_RESCHED + beq 1f + mtmsr r7 /* out of line this ? */ + blr +1: + /* Some pre-nap cleanups needed on some CPUs */ + andis. r0,r3,HID0_NAP@h + beq 2f +BEGIN_FTR_SECTION + /* Disable L2 prefetch on some 745x and try to ensure + * L2 prefetch engines are idle. As explained by errata + * text, we can't be sure they are, we just hope very hard + * that well be enough (sic !). At least I noticed Apple + * doesn't even bother doing the dcbf's here... + */ + mfspr r4,SPRN_MSSCR0 + rlwinm r4,r4,0,0,29 + sync + mtspr SPRN_MSSCR0,r4 + sync + isync + lis r4,KERNELBASE@h + dcbf 0,r4 + dcbf 0,r4 + dcbf 0,r4 + dcbf 0,r4 +END_FTR_SECTION_IFSET(CPU_FTR_NAP_DISABLE_L2_PR) +#ifdef DEBUG + lis r6,nap_enter_count@ha + lwz r4,nap_enter_count@l(r6) + addi r4,r4,1 + stw r4,nap_enter_count@l(r6) +#endif +2: +BEGIN_FTR_SECTION + /* Go to low speed mode on some 750FX */ + lis r4,powersave_lowspeed@ha + lwz r4,powersave_lowspeed@l(r4) + cmpwi 0,r4,0 + beq 1f + mfspr r4,SPRN_HID1 + oris r4,r4,0x0001 + mtspr SPRN_HID1,r4 +1: +END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX) + + /* Go to NAP or DOZE now */ + mfspr r4,SPRN_HID0 + lis r5,(HID0_NAP|HID0_SLEEP)@h +BEGIN_FTR_SECTION + oris r5,r5,HID0_DOZE@h +END_FTR_SECTION_IFSET(CPU_FTR_CAN_DOZE) + andc r4,r4,r5 + or r4,r4,r3 +BEGIN_FTR_SECTION + oris r4,r4,HID0_DPM@h /* that should be done once for all */ +END_FTR_SECTION_IFCLR(CPU_FTR_NO_DPM) + mtspr SPRN_HID0,r4 +BEGIN_FTR_SECTION + DSSALL + sync +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + ori r7,r7,MSR_EE /* Could be ommited (already set) */ + oris r7,r7,MSR_POW@h + sync + isync + mtmsr r7 + isync + sync + blr + +/* + * Return from NAP/DOZE mode, restore some CPU specific registers, + * we are called with DR/IR still off and r2 containing physical + * address of current. + */ +_GLOBAL(power_save_6xx_restore) + mfspr r11,SPRN_HID0 + rlwinm. r11,r11,0,10,8 /* Clear NAP & copy NAP bit !state to cr1 EQ */ + cror 4*cr1+eq,4*cr0+eq,4*cr0+eq +BEGIN_FTR_SECTION + rlwinm r11,r11,0,9,7 /* Clear DOZE */ +END_FTR_SECTION_IFSET(CPU_FTR_CAN_DOZE) + mtspr SPRN_HID0, r11 + +#ifdef DEBUG + beq cr1,1f + lis r11,(nap_return_count-KERNELBASE)@ha + lwz r9,nap_return_count@l(r11) + addi r9,r9,1 + stw r9,nap_return_count@l(r11) +1: +#endif + + rlwinm r9,r1,0,0,18 + tophys(r9,r9) + lwz r11,TI_CPU(r9) + slwi r11,r11,2 + /* Todo make sure all these are in the same page + * and load r22 (@ha part + CPU offset) only once + */ +BEGIN_FTR_SECTION + beq cr1,1f + addis r9,r11,(nap_save_msscr0-KERNELBASE)@ha + lwz r9,nap_save_msscr0@l(r9) + mtspr SPRN_MSSCR0, r9 + sync + isync +1: +END_FTR_SECTION_IFSET(CPU_FTR_NAP_DISABLE_L2_PR) +BEGIN_FTR_SECTION + addis r9,r11,(nap_save_hid1-KERNELBASE)@ha + lwz r9,nap_save_hid1@l(r9) + mtspr SPRN_HID1, r9 +END_FTR_SECTION_IFSET(CPU_FTR_DUAL_PLL_750FX) + b transfer_to_handler_cont + + .data + +_GLOBAL(nap_save_msscr0) + .space 4*NR_CPUS + +_GLOBAL(nap_save_hid1) + .space 4*NR_CPUS + +_GLOBAL(powersave_nap) + .long 0 +_GLOBAL(powersave_lowspeed) + .long 0 + +#ifdef DEBUG +_GLOBAL(nap_enter_count) + .space 4 +_GLOBAL(nap_return_count) + .space 4 +#endif diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c new file mode 100644 index 00000000000..f5a9d2a84fa --- /dev/null +++ b/arch/powerpc/kernel/process.c @@ -0,0 +1,724 @@ +/* + * arch/ppc/kernel/process.c + * + * Derived from "arch/i386/kernel/process.c" + * Copyright (C) 1995 Linus Torvalds + * + * Updated and modified by Cort Dougan (cort@cs.nmt.edu) and + * Paul Mackerras (paulus@cs.anu.edu.au) + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/ptrace.h> +#include <linux/slab.h> +#include <linux/user.h> +#include <linux/elf.h> +#include <linux/init.h> +#include <linux/prctl.h> +#include <linux/init_task.h> +#include <linux/module.h> +#include <linux/kallsyms.h> +#include <linux/mqueue.h> +#include <linux/hardirq.h> + +#include <asm/pgtable.h> +#include <asm/uaccess.h> +#include <asm/system.h> +#include <asm/io.h> +#include <asm/processor.h> +#include <asm/mmu.h> +#include <asm/prom.h> + +extern unsigned long _get_SP(void); + +#ifndef CONFIG_SMP +struct task_struct *last_task_used_math = NULL; +struct task_struct *last_task_used_altivec = NULL; +struct task_struct *last_task_used_spe = NULL; +#endif + +static struct fs_struct init_fs = INIT_FS; +static struct files_struct init_files = INIT_FILES; +static struct signal_struct init_signals = INIT_SIGNALS(init_signals); +static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); +struct mm_struct init_mm = INIT_MM(init_mm); +EXPORT_SYMBOL(init_mm); + +/* this is 8kB-aligned so we can get to the thread_info struct + at the base of it from the stack pointer with 1 integer instruction. */ +union thread_union init_thread_union + __attribute__((__section__(".data.init_task"))) = +{ INIT_THREAD_INFO(init_task) }; + +/* initial task structure */ +struct task_struct init_task = INIT_TASK(init_task); +EXPORT_SYMBOL(init_task); + +/* only used to get secondary processor up */ +struct task_struct *current_set[NR_CPUS] = {&init_task, }; + +/* + * Make sure the floating-point register state in the + * the thread_struct is up to date for task tsk. + */ +void flush_fp_to_thread(struct task_struct *tsk) +{ + if (tsk->thread.regs) { + /* + * We need to disable preemption here because if we didn't, + * another process could get scheduled after the regs->msr + * test but before we have finished saving the FP registers + * to the thread_struct. That process could take over the + * FPU, and then when we get scheduled again we would store + * bogus values for the remaining FP registers. + */ + preempt_disable(); + if (tsk->thread.regs->msr & MSR_FP) { +#ifdef CONFIG_SMP + /* + * This should only ever be called for current or + * for a stopped child process. Since we save away + * the FP register state on context switch on SMP, + * there is something wrong if a stopped child appears + * to still have its FP state in the CPU registers. + */ + BUG_ON(tsk != current); +#endif + giveup_fpu(current); + } + preempt_enable(); + } +} + +void enable_kernel_fp(void) +{ + WARN_ON(preemptible()); + +#ifdef CONFIG_SMP + if (current->thread.regs && (current->thread.regs->msr & MSR_FP)) + giveup_fpu(current); + else + giveup_fpu(NULL); /* just enables FP for kernel */ +#else + giveup_fpu(last_task_used_math); +#endif /* CONFIG_SMP */ +} +EXPORT_SYMBOL(enable_kernel_fp); + +int dump_task_fpu(struct task_struct *tsk, elf_fpregset_t *fpregs) +{ + if (!tsk->thread.regs) + return 0; + flush_fp_to_thread(current); + + memcpy(fpregs, &tsk->thread.fpr[0], sizeof(*fpregs)); + + return 1; +} + +#ifdef CONFIG_ALTIVEC +void enable_kernel_altivec(void) +{ + WARN_ON(preemptible()); + +#ifdef CONFIG_SMP + if (current->thread.regs && (current->thread.regs->msr & MSR_VEC)) + giveup_altivec(current); + else + giveup_altivec(NULL); /* just enable AltiVec for kernel - force */ +#else + giveup_altivec(last_task_used_altivec); +#endif /* CONFIG_SMP */ +} +EXPORT_SYMBOL(enable_kernel_altivec); + +/* + * Make sure the VMX/Altivec register state in the + * the thread_struct is up to date for task tsk. + */ +void flush_altivec_to_thread(struct task_struct *tsk) +{ + if (tsk->thread.regs) { + preempt_disable(); + if (tsk->thread.regs->msr & MSR_VEC) { +#ifdef CONFIG_SMP + BUG_ON(tsk != current); +#endif + giveup_altivec(current); + } + preempt_enable(); + } +} + +int dump_task_altivec(struct pt_regs *regs, elf_vrregset_t *vrregs) +{ + flush_altivec_to_thread(current); + memcpy(vrregs, ¤t->thread.vr[0], sizeof(*vrregs)); + return 1; +} +#endif /* CONFIG_ALTIVEC */ + +#ifdef CONFIG_SPE + +void enable_kernel_spe(void) +{ + WARN_ON(preemptible()); + +#ifdef CONFIG_SMP + if (current->thread.regs && (current->thread.regs->msr & MSR_SPE)) + giveup_spe(current); + else + giveup_spe(NULL); /* just enable SPE for kernel - force */ +#else + giveup_spe(last_task_used_spe); +#endif /* __SMP __ */ +} +EXPORT_SYMBOL(enable_kernel_spe); + +void flush_spe_to_thread(struct task_struct *tsk) +{ + if (tsk->thread.regs) { + preempt_disable(); + if (tsk->thread.regs->msr & MSR_SPE) { +#ifdef CONFIG_SMP + BUG_ON(tsk != current); +#endif + giveup_spe(current); + } + preempt_enable(); + } +} + +int dump_spe(struct pt_regs *regs, elf_vrregset_t *evrregs) +{ + flush_spe_to_thread(current); + /* We copy u32 evr[32] + u64 acc + u32 spefscr -> 35 */ + memcpy(evrregs, ¤t->thread.evr[0], sizeof(u32) * 35); + return 1; +} +#endif /* CONFIG_SPE */ + +static void set_dabr_spr(unsigned long val) +{ + mtspr(SPRN_DABR, val); +} + +int set_dabr(unsigned long dabr) +{ + int ret = 0; + +#ifdef CONFIG_PPC64 + if (firmware_has_feature(FW_FEATURE_XDABR)) { + /* We want to catch accesses from kernel and userspace */ + unsigned long flags = H_DABRX_KERNEL|H_DABRX_USER; + ret = plpar_set_xdabr(dabr, flags); + } else if (firmware_has_feature(FW_FEATURE_DABR)) { + ret = plpar_set_dabr(dabr); + } else +#endif + set_dabr_spr(dabr); + + return ret; +} + +static DEFINE_PER_CPU(unsigned long, current_dabr); + +struct task_struct *__switch_to(struct task_struct *prev, + struct task_struct *new) +{ + struct thread_struct *new_thread, *old_thread; + unsigned long flags; + struct task_struct *last; + +#ifdef CONFIG_SMP + /* avoid complexity of lazy save/restore of fpu + * by just saving it every time we switch out if + * this task used the fpu during the last quantum. + * + * If it tries to use the fpu again, it'll trap and + * reload its fp regs. So we don't have to do a restore + * every switch, just a save. + * -- Cort + */ + if (prev->thread.regs && (prev->thread.regs->msr & MSR_FP)) + giveup_fpu(prev); +#ifdef CONFIG_ALTIVEC + /* + * If the previous thread used altivec in the last quantum + * (thus changing altivec regs) then save them. + * We used to check the VRSAVE register but not all apps + * set it, so we don't rely on it now (and in fact we need + * to save & restore VSCR even if VRSAVE == 0). -- paulus + * + * On SMP we always save/restore altivec regs just to avoid the + * complexity of changing processors. + * -- Cort + */ + if (prev->thread.regs && (prev->thread.regs->msr & MSR_VEC)) + giveup_altivec(prev); + /* Avoid the trap. On smp this this never happens since + * we don't set last_task_used_altivec -- Cort + */ + if (new->thread.regs && last_task_used_altivec == new) + new->thread.regs->msr |= MSR_VEC; +#endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_SPE + /* + * If the previous thread used spe in the last quantum + * (thus changing spe regs) then save them. + * + * On SMP we always save/restore spe regs just to avoid the + * complexity of changing processors. + */ + if ((prev->thread.regs && (prev->thread.regs->msr & MSR_SPE))) + giveup_spe(prev); + /* Avoid the trap. On smp this this never happens since + * we don't set last_task_used_spe + */ + if (new->thread.regs && last_task_used_spe == new) + new->thread.regs->msr |= MSR_SPE; +#endif /* CONFIG_SPE */ +#endif /* CONFIG_SMP */ + +#ifdef CONFIG_PPC64 /* for now */ + if (unlikely(__get_cpu_var(current_dabr) != new->thread.dabr)) { + set_dabr(new->thread.dabr); + __get_cpu_var(current_dabr) = new->thread.dabr; + } +#endif + + new_thread = &new->thread; + old_thread = ¤t->thread; + local_irq_save(flags); + last = _switch(old_thread, new_thread); + + local_irq_restore(flags); + + return last; +} + +void show_regs(struct pt_regs * regs) +{ + int i, trap; + + printk("NIP: %08lX LR: %08lX SP: %08lX REGS: %p TRAP: %04lx %s\n", + regs->nip, regs->link, regs->gpr[1], regs, regs->trap, + print_tainted()); + printk("MSR: %08lx EE: %01x PR: %01x FP: %01x ME: %01x IR/DR: %01x%01x\n", + regs->msr, regs->msr&MSR_EE ? 1 : 0, regs->msr&MSR_PR ? 1 : 0, + regs->msr & MSR_FP ? 1 : 0,regs->msr&MSR_ME ? 1 : 0, + regs->msr&MSR_IR ? 1 : 0, + regs->msr&MSR_DR ? 1 : 0); + trap = TRAP(regs); + if (trap == 0x300 || trap == 0x600) + printk("DAR: %08lX, DSISR: %08lX\n", regs->dar, regs->dsisr); + printk("TASK = %p[%d] '%s' THREAD: %p\n", + current, current->pid, current->comm, current->thread_info); + printk("Last syscall: %ld ", current->thread.last_syscall); + +#ifdef CONFIG_SMP + printk(" CPU: %d", smp_processor_id()); +#endif /* CONFIG_SMP */ + + for (i = 0; i < 32; i++) { + long r; + if ((i % 8) == 0) + printk("\n" KERN_INFO "GPR%02d: ", i); + if (__get_user(r, ®s->gpr[i])) + break; + printk("%08lX ", r); + if (i == 12 && !FULL_REGS(regs)) + break; + } + printk("\n"); +#ifdef CONFIG_KALLSYMS + /* + * Lookup NIP late so we have the best change of getting the + * above info out without failing + */ + printk("NIP [%08lx] ", regs->nip); + print_symbol("%s\n", regs->nip); + printk("LR [%08lx] ", regs->link); + print_symbol("%s\n", regs->link); +#endif + show_stack(current, (unsigned long *) regs->gpr[1]); +} + +void exit_thread(void) +{ +#ifndef CONFIG_SMP + if (last_task_used_math == current) + last_task_used_math = NULL; +#ifdef CONFIG_ALTIVEC + if (last_task_used_altivec == current) + last_task_used_altivec = NULL; +#endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_SPE + if (last_task_used_spe == current) + last_task_used_spe = NULL; +#endif +#endif /* CONFIG_SMP */ +} + +void flush_thread(void) +{ +#ifndef CONFIG_SMP + if (last_task_used_math == current) + last_task_used_math = NULL; +#ifdef CONFIG_ALTIVEC + if (last_task_used_altivec == current) + last_task_used_altivec = NULL; +#endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_SPE + if (last_task_used_spe == current) + last_task_used_spe = NULL; +#endif +#endif /* CONFIG_SMP */ + +#ifdef CONFIG_PPC64 /* for now */ + if (current->thread.dabr) { + current->thread.dabr = 0; + set_dabr(0); + } +#endif +} + +void +release_thread(struct task_struct *t) +{ +} + +/* + * This gets called before we allocate a new thread and copy + * the current task into it. + */ +void prepare_to_copy(struct task_struct *tsk) +{ + flush_fp_to_thread(current); + flush_altivec_to_thread(current); + flush_spe_to_thread(current); +} + +/* + * Copy a thread.. + */ +int +copy_thread(int nr, unsigned long clone_flags, unsigned long usp, + unsigned long unused, + struct task_struct *p, struct pt_regs *regs) +{ + struct pt_regs *childregs, *kregs; + extern void ret_from_fork(void); + unsigned long sp = (unsigned long)p->thread_info + THREAD_SIZE; + unsigned long childframe; + + CHECK_FULL_REGS(regs); + /* Copy registers */ + sp -= sizeof(struct pt_regs); + childregs = (struct pt_regs *) sp; + *childregs = *regs; + if ((childregs->msr & MSR_PR) == 0) { + /* for kernel thread, set `current' and stackptr in new task */ + childregs->gpr[1] = sp + sizeof(struct pt_regs); + childregs->gpr[2] = (unsigned long) p; + p->thread.regs = NULL; /* no user register state */ + } else { + childregs->gpr[1] = usp; + p->thread.regs = childregs; + if (clone_flags & CLONE_SETTLS) + childregs->gpr[2] = childregs->gpr[6]; + } + childregs->gpr[3] = 0; /* Result from fork() */ + sp -= STACK_FRAME_OVERHEAD; + childframe = sp; + + /* + * The way this works is that at some point in the future + * some task will call _switch to switch to the new task. + * That will pop off the stack frame created below and start + * the new task running at ret_from_fork. The new task will + * do some house keeping and then return from the fork or clone + * system call, using the stack frame created above. + */ + sp -= sizeof(struct pt_regs); + kregs = (struct pt_regs *) sp; + sp -= STACK_FRAME_OVERHEAD; + p->thread.ksp = sp; + kregs->nip = (unsigned long)ret_from_fork; + + p->thread.last_syscall = -1; + + return 0; +} + +/* + * Set up a thread for executing a new program + */ +void start_thread(struct pt_regs *regs, unsigned long nip, unsigned long sp) +{ + set_fs(USER_DS); + memset(regs->gpr, 0, sizeof(regs->gpr)); + regs->ctr = 0; + regs->link = 0; + regs->xer = 0; + regs->ccr = 0; + regs->mq = 0; + regs->nip = nip; + regs->gpr[1] = sp; + regs->msr = MSR_USER; +#ifndef CONFIG_SMP + if (last_task_used_math == current) + last_task_used_math = NULL; +#ifdef CONFIG_ALTIVEC + if (last_task_used_altivec == current) + last_task_used_altivec = NULL; +#endif +#ifdef CONFIG_SPE + if (last_task_used_spe == current) + last_task_used_spe = NULL; +#endif +#endif /* CONFIG_SMP */ + memset(current->thread.fpr, 0, sizeof(current->thread.fpr)); + current->thread.fpscr = 0; +#ifdef CONFIG_ALTIVEC + memset(current->thread.vr, 0, sizeof(current->thread.vr)); + memset(¤t->thread.vscr, 0, sizeof(current->thread.vscr)); + current->thread.vrsave = 0; + current->thread.used_vr = 0; +#endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_SPE + memset(current->thread.evr, 0, sizeof(current->thread.evr)); + current->thread.acc = 0; + current->thread.spefscr = 0; + current->thread.used_spe = 0; +#endif /* CONFIG_SPE */ +} + +#define PR_FP_ALL_EXCEPT (PR_FP_EXC_DIV | PR_FP_EXC_OVF | PR_FP_EXC_UND \ + | PR_FP_EXC_RES | PR_FP_EXC_INV) + +int set_fpexc_mode(struct task_struct *tsk, unsigned int val) +{ + struct pt_regs *regs = tsk->thread.regs; + + /* This is a bit hairy. If we are an SPE enabled processor + * (have embedded fp) we store the IEEE exception enable flags in + * fpexc_mode. fpexc_mode is also used for setting FP exception + * mode (asyn, precise, disabled) for 'Classic' FP. */ + if (val & PR_FP_EXC_SW_ENABLE) { +#ifdef CONFIG_SPE + tsk->thread.fpexc_mode = val & + (PR_FP_EXC_SW_ENABLE | PR_FP_ALL_EXCEPT); +#else + return -EINVAL; +#endif + } else { + /* on a CONFIG_SPE this does not hurt us. The bits that + * __pack_fe01 use do not overlap with bits used for + * PR_FP_EXC_SW_ENABLE. Additionally, the MSR[FE0,FE1] bits + * on CONFIG_SPE implementations are reserved so writing to + * them does not change anything */ + if (val > PR_FP_EXC_PRECISE) + return -EINVAL; + tsk->thread.fpexc_mode = __pack_fe01(val); + if (regs != NULL && (regs->msr & MSR_FP) != 0) + regs->msr = (regs->msr & ~(MSR_FE0|MSR_FE1)) + | tsk->thread.fpexc_mode; + } + return 0; +} + +int get_fpexc_mode(struct task_struct *tsk, unsigned long adr) +{ + unsigned int val; + + if (tsk->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) +#ifdef CONFIG_SPE + val = tsk->thread.fpexc_mode; +#else + return -EINVAL; +#endif + else + val = __unpack_fe01(tsk->thread.fpexc_mode); + return put_user(val, (unsigned int __user *) adr); +} + +int sys_clone(unsigned long clone_flags, unsigned long usp, + int __user *parent_tidp, void __user *child_threadptr, + int __user *child_tidp, int p6, + struct pt_regs *regs) +{ + CHECK_FULL_REGS(regs); + if (usp == 0) + usp = regs->gpr[1]; /* stack pointer for child */ + return do_fork(clone_flags, usp, regs, 0, parent_tidp, child_tidp); +} + +int sys_fork(unsigned long p1, unsigned long p2, unsigned long p3, + unsigned long p4, unsigned long p5, unsigned long p6, + struct pt_regs *regs) +{ + CHECK_FULL_REGS(regs); + return do_fork(SIGCHLD, regs->gpr[1], regs, 0, NULL, NULL); +} + +int sys_vfork(unsigned long p1, unsigned long p2, unsigned long p3, + unsigned long p4, unsigned long p5, unsigned long p6, + struct pt_regs *regs) +{ + CHECK_FULL_REGS(regs); + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->gpr[1], + regs, 0, NULL, NULL); +} + +int sys_execve(unsigned long a0, unsigned long a1, unsigned long a2, + unsigned long a3, unsigned long a4, unsigned long a5, + struct pt_regs *regs) +{ + int error; + char * filename; + + filename = getname((char __user *) a0); + error = PTR_ERR(filename); + if (IS_ERR(filename)) + goto out; + flush_fp_to_thread(current); + flush_altivec_to_thread(current); + flush_spe_to_thread(current); + if (error == 0) { + task_lock(current); + current->ptrace &= ~PT_DTRACE; + task_unlock(current); + } + putname(filename); +out: + return error; +} + +static int validate_sp(unsigned long sp, struct task_struct *p, + unsigned long nbytes) +{ + unsigned long stack_page = (unsigned long)p->thread_info; + + if (sp >= stack_page + sizeof(struct thread_struct) + && sp <= stack_page + THREAD_SIZE - nbytes) + return 1; + +#ifdef CONFIG_IRQSTACKS + stack_page = (unsigned long) hardirq_ctx[task_cpu(p)]; + if (sp >= stack_page + sizeof(struct thread_struct) + && sp <= stack_page + THREAD_SIZE - nbytes) + return 1; + + stack_page = (unsigned long) softirq_ctx[task_cpu(p)]; + if (sp >= stack_page + sizeof(struct thread_struct) + && sp <= stack_page + THREAD_SIZE - nbytes) + return 1; +#endif + + return 0; +} + +void dump_stack(void) +{ + show_stack(current, NULL); +} + +EXPORT_SYMBOL(dump_stack); + +void show_stack(struct task_struct *tsk, unsigned long *stack) +{ + unsigned long sp, stack_top, prev_sp, ret; + int count = 0; + unsigned long next_exc = 0; + struct pt_regs *regs; + extern char ret_from_except, ret_from_except_full, ret_from_syscall; + + sp = (unsigned long) stack; + if (tsk == NULL) + tsk = current; + if (sp == 0) { + if (tsk == current) + asm("mr %0,1" : "=r" (sp)); + else + sp = tsk->thread.ksp; + } + + prev_sp = (unsigned long) (tsk->thread_info + 1); + stack_top = (unsigned long) tsk->thread_info + THREAD_SIZE; + while (count < 16 && sp > prev_sp && sp < stack_top && (sp & 3) == 0) { + if (count == 0) { + printk("Call trace:"); +#ifdef CONFIG_KALLSYMS + printk("\n"); +#endif + } else { + if (next_exc) { + ret = next_exc; + next_exc = 0; + } else + ret = *(unsigned long *)(sp + 4); + printk(" [%08lx] ", ret); +#ifdef CONFIG_KALLSYMS + print_symbol("%s", ret); + printk("\n"); +#endif + if (ret == (unsigned long) &ret_from_except + || ret == (unsigned long) &ret_from_except_full + || ret == (unsigned long) &ret_from_syscall) { + /* sp + 16 points to an exception frame */ + regs = (struct pt_regs *) (sp + 16); + if (sp + 16 + sizeof(*regs) <= stack_top) + next_exc = regs->nip; + } + } + ++count; + sp = *(unsigned long *)sp; + } +#ifndef CONFIG_KALLSYMS + if (count > 0) + printk("\n"); +#endif +} + +unsigned long get_wchan(struct task_struct *p) +{ + unsigned long ip, sp; + int count = 0; + + if (!p || p == current || p->state == TASK_RUNNING) + return 0; + + sp = p->thread.ksp; + if (!validate_sp(sp, p, 16)) + return 0; + + do { + sp = *(unsigned long *)sp; + if (!validate_sp(sp, p, 16)) + return 0; + if (count > 0) { + ip = *(unsigned long *)(sp + 4); + if (!in_sched_functions(ip)) + return ip; + } + } while (count++ < 16); + return 0; +} +EXPORT_SYMBOL(get_wchan); diff --git a/arch/powerpc/kernel/semaphore.c b/arch/powerpc/kernel/semaphore.c new file mode 100644 index 00000000000..2f8c3c95139 --- /dev/null +++ b/arch/powerpc/kernel/semaphore.c @@ -0,0 +1,135 @@ +/* + * PowerPC-specific semaphore code. + * + * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * April 2001 - Reworked by Paul Mackerras <paulus@samba.org> + * to eliminate the SMP races in the old version between the updates + * of `count' and `waking'. Now we use negative `count' values to + * indicate that some process(es) are waiting for the semaphore. + */ + +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/module.h> + +#include <asm/atomic.h> +#include <asm/semaphore.h> +#include <asm/errno.h> + +/* + * Atomically update sem->count. + * This does the equivalent of the following: + * + * old_count = sem->count; + * tmp = MAX(old_count, 0) + incr; + * sem->count = tmp; + * return old_count; + */ +static inline int __sem_update_count(struct semaphore *sem, int incr) +{ + int old_count, tmp; + + __asm__ __volatile__("\n" +"1: lwarx %0,0,%3\n" +" srawi %1,%0,31\n" +" andc %1,%0,%1\n" +" add %1,%1,%4\n" + PPC405_ERR77(0,%3) +" stwcx. %1,0,%3\n" +" bne 1b" + : "=&r" (old_count), "=&r" (tmp), "=m" (sem->count) + : "r" (&sem->count), "r" (incr), "m" (sem->count) + : "cc"); + + return old_count; +} + +void __up(struct semaphore *sem) +{ + /* + * Note that we incremented count in up() before we came here, + * but that was ineffective since the result was <= 0, and + * any negative value of count is equivalent to 0. + * This ends up setting count to 1, unless count is now > 0 + * (i.e. because some other cpu has called up() in the meantime), + * in which case we just increment count. + */ + __sem_update_count(sem, 1); + wake_up(&sem->wait); +} +EXPORT_SYMBOL(__up); + +/* + * Note that when we come in to __down or __down_interruptible, + * we have already decremented count, but that decrement was + * ineffective since the result was < 0, and any negative value + * of count is equivalent to 0. + * Thus it is only when we decrement count from some value > 0 + * that we have actually got the semaphore. + */ +void __sched __down(struct semaphore *sem) +{ + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + + __set_task_state(tsk, TASK_UNINTERRUPTIBLE); + add_wait_queue_exclusive(&sem->wait, &wait); + + /* + * Try to get the semaphore. If the count is > 0, then we've + * got the semaphore; we decrement count and exit the loop. + * If the count is 0 or negative, we set it to -1, indicating + * that we are asleep, and then sleep. + */ + while (__sem_update_count(sem, -1) <= 0) { + schedule(); + set_task_state(tsk, TASK_UNINTERRUPTIBLE); + } + remove_wait_queue(&sem->wait, &wait); + __set_task_state(tsk, TASK_RUNNING); + + /* + * If there are any more sleepers, wake one of them up so + * that it can either get the semaphore, or set count to -1 + * indicating that there are still processes sleeping. + */ + wake_up(&sem->wait); +} +EXPORT_SYMBOL(__down); + +int __sched __down_interruptible(struct semaphore * sem) +{ + int retval = 0; + struct task_struct *tsk = current; + DECLARE_WAITQUEUE(wait, tsk); + + __set_task_state(tsk, TASK_INTERRUPTIBLE); + add_wait_queue_exclusive(&sem->wait, &wait); + + while (__sem_update_count(sem, -1) <= 0) { + if (signal_pending(current)) { + /* + * A signal is pending - give up trying. + * Set sem->count to 0 if it is negative, + * since we are no longer sleeping. + */ + __sem_update_count(sem, 0); + retval = -EINTR; + break; + } + schedule(); + set_task_state(tsk, TASK_INTERRUPTIBLE); + } + remove_wait_queue(&sem->wait, &wait); + __set_task_state(tsk, TASK_RUNNING); + + wake_up(&sem->wait); + return retval; +} +EXPORT_SYMBOL(__down_interruptible); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c new file mode 100644 index 00000000000..c7afbbba0f3 --- /dev/null +++ b/arch/powerpc/kernel/traps.c @@ -0,0 +1,1047 @@ +/* + * arch/powerpc/kernel/traps.c + * + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Modified by Cort Dougan (cort@cs.nmt.edu) + * and Paul Mackerras (paulus@samba.org) + */ + +/* + * This file handles the architecture-dependent parts of hardware exceptions + */ + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/ptrace.h> +#include <linux/slab.h> +#include <linux/user.h> +#include <linux/a.out.h> +#include <linux/interrupt.h> +#include <linux/config.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/prctl.h> +#include <linux/delay.h> +#include <linux/kprobes.h> +#include <asm/kdebug.h> + +#include <asm/pgtable.h> +#include <asm/uaccess.h> +#include <asm/system.h> +#include <asm/io.h> +#include <asm/reg.h> +#include <asm/xmon.h> +#ifdef CONFIG_PMAC_BACKLIGHT +#include <asm/backlight.h> +#endif +#include <asm/perfmon.h> + +#ifdef CONFIG_DEBUGGER +int (*__debugger)(struct pt_regs *regs); +int (*__debugger_ipi)(struct pt_regs *regs); +int (*__debugger_bpt)(struct pt_regs *regs); +int (*__debugger_sstep)(struct pt_regs *regs); +int (*__debugger_iabr_match)(struct pt_regs *regs); +int (*__debugger_dabr_match)(struct pt_regs *regs); +int (*__debugger_fault_handler)(struct pt_regs *regs); + +EXPORT_SYMBOL(__debugger); +EXPORT_SYMBOL(__debugger_ipi); +EXPORT_SYMBOL(__debugger_bpt); +EXPORT_SYMBOL(__debugger_sstep); +EXPORT_SYMBOL(__debugger_iabr_match); +EXPORT_SYMBOL(__debugger_dabr_match); +EXPORT_SYMBOL(__debugger_fault_handler); +#endif + +struct notifier_block *powerpc_die_chain; +static DEFINE_SPINLOCK(die_notifier_lock); + +int register_die_notifier(struct notifier_block *nb) +{ + int err = 0; + unsigned long flags; + + spin_lock_irqsave(&die_notifier_lock, flags); + err = notifier_chain_register(&powerpc_die_chain, nb); + spin_unlock_irqrestore(&die_notifier_lock, flags); + return err; +} + +/* + * Trap & Exception support + */ + +static DEFINE_SPINLOCK(die_lock); + +int die(const char *str, struct pt_regs *regs, long err) +{ + static int die_counter; + int nl = 0; + + if (debugger(regs)) + return 1; + + console_verbose(); + spin_lock_irq(&die_lock); + bust_spinlocks(1); +#ifdef CONFIG_PMAC_BACKLIGHT + if (_machine == _MACH_Pmac) { + set_backlight_enable(1); + set_backlight_level(BACKLIGHT_MAX); + } +#endif + printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter); +#ifdef CONFIG_PREEMPT + printk("PREEMPT "); + nl = 1; +#endif +#ifdef CONFIG_SMP + printk("SMP NR_CPUS=%d ", NR_CPUS); + nl = 1; +#endif +#ifdef CONFIG_DEBUG_PAGEALLOC + printk("DEBUG_PAGEALLOC "); + nl = 1; +#endif +#ifdef CONFIG_NUMA + printk("NUMA "); + nl = 1; +#endif +#ifdef CONFIG_PPC64 + switch (systemcfg->platform) { + case PLATFORM_PSERIES: + printk("PSERIES "); + nl = 1; + break; + case PLATFORM_PSERIES_LPAR: + printk("PSERIES LPAR "); + nl = 1; + break; + case PLATFORM_ISERIES_LPAR: + printk("ISERIES LPAR "); + nl = 1; + break; + case PLATFORM_POWERMAC: + printk("POWERMAC "); + nl = 1; + break; + case PLATFORM_BPA: + printk("BPA "); + nl = 1; + break; + } +#endif + if (nl) + printk("\n"); + print_modules(); + show_regs(regs); + bust_spinlocks(0); + spin_unlock_irq(&die_lock); + + if (in_interrupt()) + panic("Fatal exception in interrupt"); + + if (panic_on_oops) { + panic("Fatal exception"); + } + do_exit(err); + + return 0; +} + +void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) +{ + siginfo_t info; + + if (!user_mode(regs)) { + if (die("Exception in kernel mode", regs, signr)) + return; + } + + memset(&info, 0, sizeof(info)); + info.si_signo = signr; + info.si_code = code; + info.si_addr = (void __user *) addr; + force_sig_info(signr, &info, current); + + /* + * Init gets no signals that it doesn't have a handler for. + * That's all very well, but if it has caused a synchronous + * exception and we ignore the resulting signal, it will just + * generate the same exception over and over again and we get + * nowhere. Better to kill it and let the kernel panic. + */ + if (current->pid == 1) { + __sighandler_t handler; + + spin_lock_irq(¤t->sighand->siglock); + handler = current->sighand->action[signr-1].sa.sa_handler; + spin_unlock_irq(¤t->sighand->siglock); + if (handler == SIG_DFL) { + /* init has generated a synchronous exception + and it doesn't have a handler for the signal */ + printk(KERN_CRIT "init has generated signal %d " + "but has no handler for it\n", signr); + do_exit(signr); + } + } +} + +#ifdef CONFIG_PPC64 +void system_reset_exception(struct pt_regs *regs) +{ + /* See if any machine dependent calls */ + if (ppc_md.system_reset_exception) + ppc_md.system_reset_exception(regs); + + die("System Reset", regs, SIGABRT); + + /* Must die if the interrupt is not recoverable */ + if (!(regs->msr & MSR_RI)) + panic("Unrecoverable System Reset"); + + /* What should we do here? We could issue a shutdown or hard reset. */ +} +#endif + +/* + * I/O accesses can cause machine checks on powermacs. + * Check if the NIP corresponds to the address of a sync + * instruction for which there is an entry in the exception + * table. + * Note that the 601 only takes a machine check on TEA + * (transfer error ack) signal assertion, and does not + * set any of the top 16 bits of SRR1. + * -- paulus. + */ +static inline int check_io_access(struct pt_regs *regs) +{ +#ifdef CONFIG_PPC_PMAC + unsigned long msr = regs->msr; + const struct exception_table_entry *entry; + unsigned int *nip = (unsigned int *)regs->nip; + + if (((msr & 0xffff0000) == 0 || (msr & (0x80000 | 0x40000))) + && (entry = search_exception_tables(regs->nip)) != NULL) { + /* + * Check that it's a sync instruction, or somewhere + * in the twi; isync; nop sequence that inb/inw/inl uses. + * As the address is in the exception table + * we should be able to read the instr there. + * For the debug message, we look at the preceding + * load or store. + */ + if (*nip == 0x60000000) /* nop */ + nip -= 2; + else if (*nip == 0x4c00012c) /* isync */ + --nip; + if (*nip == 0x7c0004ac || (*nip >> 26) == 3) { + /* sync or twi */ + unsigned int rb; + + --nip; + rb = (*nip >> 11) & 0x1f; + printk(KERN_DEBUG "%s bad port %lx at %p\n", + (*nip & 0x100)? "OUT to": "IN from", + regs->gpr[rb] - _IO_BASE, nip); + regs->msr |= MSR_RI; + regs->nip = entry->fixup; + return 1; + } + } +#endif /* CONFIG_PPC_PMAC */ + return 0; +} + +#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) +/* On 4xx, the reason for the machine check or program exception + is in the ESR. */ +#define get_reason(regs) ((regs)->dsisr) +#ifndef CONFIG_FSL_BOOKE +#define get_mc_reason(regs) ((regs)->dsisr) +#else +#define get_mc_reason(regs) (mfspr(SPRN_MCSR)) +#endif +#define REASON_FP ESR_FP +#define REASON_ILLEGAL (ESR_PIL | ESR_PUO) +#define REASON_PRIVILEGED ESR_PPR +#define REASON_TRAP ESR_PTR + +/* single-step stuff */ +#define single_stepping(regs) (current->thread.dbcr0 & DBCR0_IC) +#define clear_single_step(regs) (current->thread.dbcr0 &= ~DBCR0_IC) + +#else +/* On non-4xx, the reason for the machine check or program + exception is in the MSR. */ +#define get_reason(regs) ((regs)->msr) +#define get_mc_reason(regs) ((regs)->msr) +#define REASON_FP 0x100000 +#define REASON_ILLEGAL 0x80000 +#define REASON_PRIVILEGED 0x40000 +#define REASON_TRAP 0x20000 + +#define single_stepping(regs) ((regs)->msr & MSR_SE) +#define clear_single_step(regs) ((regs)->msr &= ~MSR_SE) +#endif + +/* + * This is "fall-back" implementation for configurations + * which don't provide platform-specific machine check info + */ +void __attribute__ ((weak)) +platform_machine_check(struct pt_regs *regs) +{ +} + +void MachineCheckException(struct pt_regs *regs) +{ +#ifdef CONFIG_PPC64 + int recover = 0; + + /* See if any machine dependent calls */ + if (ppc_md.machine_check_exception) + recover = ppc_md.machine_check_exception(regs); + + if (recover) + return; +#else + unsigned long reason = get_mc_reason(regs); + + if (user_mode(regs)) { + regs->msr |= MSR_RI; + _exception(SIGBUS, regs, BUS_ADRERR, regs->nip); + return; + } + +#if defined(CONFIG_8xx) && defined(CONFIG_PCI) + /* the qspan pci read routines can cause machine checks -- Cort */ + bad_page_fault(regs, regs->dar, SIGBUS); + return; +#endif + + if (debugger_fault_handler(regs)) { + regs->msr |= MSR_RI; + return; + } + + if (check_io_access(regs)) + return; + +#if defined(CONFIG_4xx) && !defined(CONFIG_440A) + if (reason & ESR_IMCP) { + printk("Instruction"); + mtspr(SPRN_ESR, reason & ~ESR_IMCP); + } else + printk("Data"); + printk(" machine check in kernel mode.\n"); +#elif defined(CONFIG_440A) + printk("Machine check in kernel mode.\n"); + if (reason & ESR_IMCP){ + printk("Instruction Synchronous Machine Check exception\n"); + mtspr(SPRN_ESR, reason & ~ESR_IMCP); + } + else { + u32 mcsr = mfspr(SPRN_MCSR); + if (mcsr & MCSR_IB) + printk("Instruction Read PLB Error\n"); + if (mcsr & MCSR_DRB) + printk("Data Read PLB Error\n"); + if (mcsr & MCSR_DWB) + printk("Data Write PLB Error\n"); + if (mcsr & MCSR_TLBP) + printk("TLB Parity Error\n"); + if (mcsr & MCSR_ICP){ + flush_instruction_cache(); + printk("I-Cache Parity Error\n"); + } + if (mcsr & MCSR_DCSP) + printk("D-Cache Search Parity Error\n"); + if (mcsr & MCSR_DCFP) + printk("D-Cache Flush Parity Error\n"); + if (mcsr & MCSR_IMPE) + printk("Machine Check exception is imprecise\n"); + + /* Clear MCSR */ + mtspr(SPRN_MCSR, mcsr); + } +#elif defined (CONFIG_E500) + printk("Machine check in kernel mode.\n"); + printk("Caused by (from MCSR=%lx): ", reason); + + if (reason & MCSR_MCP) + printk("Machine Check Signal\n"); + if (reason & MCSR_ICPERR) + printk("Instruction Cache Parity Error\n"); + if (reason & MCSR_DCP_PERR) + printk("Data Cache Push Parity Error\n"); + if (reason & MCSR_DCPERR) + printk("Data Cache Parity Error\n"); + if (reason & MCSR_GL_CI) + printk("Guarded Load or Cache-Inhibited stwcx.\n"); + if (reason & MCSR_BUS_IAERR) + printk("Bus - Instruction Address Error\n"); + if (reason & MCSR_BUS_RAERR) + printk("Bus - Read Address Error\n"); + if (reason & MCSR_BUS_WAERR) + printk("Bus - Write Address Error\n"); + if (reason & MCSR_BUS_IBERR) + printk("Bus - Instruction Data Error\n"); + if (reason & MCSR_BUS_RBERR) + printk("Bus - Read Data Bus Error\n"); + if (reason & MCSR_BUS_WBERR) + printk("Bus - Read Data Bus Error\n"); + if (reason & MCSR_BUS_IPERR) + printk("Bus - Instruction Parity Error\n"); + if (reason & MCSR_BUS_RPERR) + printk("Bus - Read Parity Error\n"); +#elif defined (CONFIG_E200) + printk("Machine check in kernel mode.\n"); + printk("Caused by (from MCSR=%lx): ", reason); + + if (reason & MCSR_MCP) + printk("Machine Check Signal\n"); + if (reason & MCSR_CP_PERR) + printk("Cache Push Parity Error\n"); + if (reason & MCSR_CPERR) + printk("Cache Parity Error\n"); + if (reason & MCSR_EXCP_ERR) + printk("ISI, ITLB, or Bus Error on first instruction fetch for an exception handler\n"); + if (reason & MCSR_BUS_IRERR) + printk("Bus - Read Bus Error on instruction fetch\n"); + if (reason & MCSR_BUS_DRERR) + printk("Bus - Read Bus Error on data load\n"); + if (reason & MCSR_BUS_WRERR) + printk("Bus - Write Bus Error on buffered store or cache line push\n"); +#else /* !CONFIG_4xx && !CONFIG_E500 && !CONFIG_E200 */ + printk("Machine check in kernel mode.\n"); + printk("Caused by (from SRR1=%lx): ", reason); + switch (reason & 0x601F0000) { + case 0x80000: + printk("Machine check signal\n"); + break; + case 0: /* for 601 */ + case 0x40000: + case 0x140000: /* 7450 MSS error and TEA */ + printk("Transfer error ack signal\n"); + break; + case 0x20000: + printk("Data parity error signal\n"); + break; + case 0x10000: + printk("Address parity error signal\n"); + break; + case 0x20000000: + printk("L1 Data Cache error\n"); + break; + case 0x40000000: + printk("L1 Instruction Cache error\n"); + break; + case 0x00100000: + printk("L2 data cache parity error\n"); + break; + default: + printk("Unknown values in msr\n"); + } +#endif /* CONFIG_4xx */ + + /* + * Optional platform-provided routine to print out + * additional info, e.g. bus error registers. + */ + platform_machine_check(regs); +#endif /* CONFIG_PPC64 */ + + if (debugger_fault_handler(regs)) + return; + die("Machine check", regs, SIGBUS); + + /* Must die if the interrupt is not recoverable */ + if (!(regs->msr & MSR_RI)) + panic("Unrecoverable Machine check"); +} + +void SMIException(struct pt_regs *regs) +{ + die("System Management Interrupt", regs, SIGABRT); +} + +void UnknownException(struct pt_regs *regs) +{ + printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", + regs->nip, regs->msr, regs->trap); + + _exception(SIGTRAP, regs, 0, 0); +} + +void InstructionBreakpoint(struct pt_regs *regs) +{ + if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5, + 5, SIGTRAP) == NOTIFY_STOP) + return; + if (debugger_iabr_match(regs)) + return; + _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); +} + +void RunModeException(struct pt_regs *regs) +{ + _exception(SIGTRAP, regs, 0, 0); +} + +void SingleStepException(struct pt_regs *regs) +{ + regs->msr &= ~(MSR_SE | MSR_BE); /* Turn off 'trace' bits */ + + if (notify_die(DIE_SSTEP, "single_step", regs, 5, + 5, SIGTRAP) == NOTIFY_STOP) + return; + if (debugger_sstep(regs)) + return; + + _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); +} + +/* + * After we have successfully emulated an instruction, we have to + * check if the instruction was being single-stepped, and if so, + * pretend we got a single-step exception. This was pointed out + * by Kumar Gala. -- paulus + */ +static void emulate_single_step(struct pt_regs *regs) +{ + if (single_stepping(regs)) { + clear_single_step(regs); + _exception(SIGTRAP, regs, TRAP_TRACE, 0); + } +} + +/* Illegal instruction emulation support. Originally written to + * provide the PVR to user applications using the mfspr rd, PVR. + * Return non-zero if we can't emulate, or -EFAULT if the associated + * memory access caused an access fault. Return zero on success. + * + * There are a couple of ways to do this, either "decode" the instruction + * or directly match lots of bits. In this case, matching lots of + * bits is faster and easier. + * + */ +#define INST_MFSPR_PVR 0x7c1f42a6 +#define INST_MFSPR_PVR_MASK 0xfc1fffff + +#define INST_DCBA 0x7c0005ec +#define INST_DCBA_MASK 0x7c0007fe + +#define INST_MCRXR 0x7c000400 +#define INST_MCRXR_MASK 0x7c0007fe + +#define INST_STRING 0x7c00042a +#define INST_STRING_MASK 0x7c0007fe +#define INST_STRING_GEN_MASK 0x7c00067e +#define INST_LSWI 0x7c0004aa +#define INST_LSWX 0x7c00042a +#define INST_STSWI 0x7c0005aa +#define INST_STSWX 0x7c00052a + +static int emulate_string_inst(struct pt_regs *regs, u32 instword) +{ + u8 rT = (instword >> 21) & 0x1f; + u8 rA = (instword >> 16) & 0x1f; + u8 NB_RB = (instword >> 11) & 0x1f; + u32 num_bytes; + unsigned long EA; + int pos = 0; + + /* Early out if we are an invalid form of lswx */ + if ((instword & INST_STRING_MASK) == INST_LSWX) + if ((rT == rA) || (rT == NB_RB)) + return -EINVAL; + + EA = (rA == 0) ? 0 : regs->gpr[rA]; + + switch (instword & INST_STRING_MASK) { + case INST_LSWX: + case INST_STSWX: + EA += NB_RB; + num_bytes = regs->xer & 0x7f; + break; + case INST_LSWI: + case INST_STSWI: + num_bytes = (NB_RB == 0) ? 32 : NB_RB; + break; + default: + return -EINVAL; + } + + while (num_bytes != 0) + { + u8 val; + u32 shift = 8 * (3 - (pos & 0x3)); + + switch ((instword & INST_STRING_MASK)) { + case INST_LSWX: + case INST_LSWI: + if (get_user(val, (u8 __user *)EA)) + return -EFAULT; + /* first time updating this reg, + * zero it out */ + if (pos == 0) + regs->gpr[rT] = 0; + regs->gpr[rT] |= val << shift; + break; + case INST_STSWI: + case INST_STSWX: + val = regs->gpr[rT] >> shift; + if (put_user(val, (u8 __user *)EA)) + return -EFAULT; + break; + } + /* move EA to next address */ + EA += 1; + num_bytes--; + + /* manage our position within the register */ + if (++pos == 4) { + pos = 0; + if (++rT == 32) + rT = 0; + } + } + + return 0; +} + +static int emulate_instruction(struct pt_regs *regs) +{ + u32 instword; + u32 rd; + + if (!user_mode(regs)) + return -EINVAL; + CHECK_FULL_REGS(regs); + + if (get_user(instword, (u32 __user *)(regs->nip))) + return -EFAULT; + + /* Emulate the mfspr rD, PVR. */ + if ((instword & INST_MFSPR_PVR_MASK) == INST_MFSPR_PVR) { + rd = (instword >> 21) & 0x1f; + regs->gpr[rd] = mfspr(SPRN_PVR); + return 0; + } + + /* Emulating the dcba insn is just a no-op. */ + if ((instword & INST_DCBA_MASK) == INST_DCBA) + return 0; + + /* Emulate the mcrxr insn. */ + if ((instword & INST_MCRXR_MASK) == INST_MCRXR) { + int shift = (instword >> 21) & 0x1c; + unsigned long msk = 0xf0000000UL >> shift; + + regs->ccr = (regs->ccr & ~msk) | ((regs->xer >> shift) & msk); + regs->xer &= ~0xf0000000UL; + return 0; + } + + /* Emulate load/store string insn. */ + if ((instword & INST_STRING_GEN_MASK) == INST_STRING) + return emulate_string_inst(regs, instword); + + return -EINVAL; +} + +/* + * Look through the list of trap instructions that are used for BUG(), + * BUG_ON() and WARN_ON() and see if we hit one. At this point we know + * that the exception was caused by a trap instruction of some kind. + * Returns 1 if we should continue (i.e. it was a WARN_ON) or 0 + * otherwise. + */ +extern struct bug_entry __start___bug_table[], __stop___bug_table[]; + +#ifndef CONFIG_MODULES +#define module_find_bug(x) NULL +#endif + +struct bug_entry *find_bug(unsigned long bugaddr) +{ + struct bug_entry *bug; + + for (bug = __start___bug_table; bug < __stop___bug_table; ++bug) + if (bugaddr == bug->bug_addr) + return bug; + return module_find_bug(bugaddr); +} + +int check_bug_trap(struct pt_regs *regs) +{ + struct bug_entry *bug; + unsigned long addr; + + if (regs->msr & MSR_PR) + return 0; /* not in kernel */ + addr = regs->nip; /* address of trap instruction */ + if (addr < PAGE_OFFSET) + return 0; + bug = find_bug(regs->nip); + if (bug == NULL) + return 0; + if (bug->line & BUG_WARNING_TRAP) { + /* this is a WARN_ON rather than BUG/BUG_ON */ +#ifdef CONFIG_XMON + xmon_printf(KERN_ERR "Badness in %s at %s:%d\n", + bug->function, bug->file, + bug->line & ~BUG_WARNING_TRAP); +#endif /* CONFIG_XMON */ + printk(KERN_ERR "Badness in %s at %s:%d\n", + bug->function, bug->file, + bug->line & ~BUG_WARNING_TRAP); + dump_stack(); + return 1; + } +#ifdef CONFIG_XMON + xmon_printf(KERN_CRIT "kernel BUG in %s at %s:%d!\n", + bug->function, bug->file, bug->line); + xmon(regs); +#endif /* CONFIG_XMON */ + printk(KERN_CRIT "kernel BUG in %s at %s:%d!\n", + bug->function, bug->file, bug->line); + + return 0; +} + +void ProgramCheckException(struct pt_regs *regs) +{ + unsigned int reason = get_reason(regs); + extern int do_mathemu(struct pt_regs *regs); + +#ifdef CONFIG_MATH_EMULATION + /* (reason & REASON_ILLEGAL) would be the obvious thing here, + * but there seems to be a hardware bug on the 405GP (RevD) + * that means ESR is sometimes set incorrectly - either to + * ESR_DST (!?) or 0. In the process of chasing this with the + * hardware people - not sure if it can happen on any illegal + * instruction or only on FP instructions, whether there is a + * pattern to occurences etc. -dgibson 31/Mar/2003 */ + if (!(reason & REASON_TRAP) && do_mathemu(regs) == 0) { + emulate_single_step(regs); + return; + } +#endif /* CONFIG_MATH_EMULATION */ + + if (reason & REASON_FP) { + /* IEEE FP exception */ + int code = 0; + u32 fpscr; + + /* We must make sure the FP state is consistent with + * our MSR_FP in regs + */ + preempt_disable(); + if (regs->msr & MSR_FP) + giveup_fpu(current); + preempt_enable(); + + fpscr = current->thread.fpscr; + fpscr &= fpscr << 22; /* mask summary bits with enables */ + if (fpscr & FPSCR_VX) + code = FPE_FLTINV; + else if (fpscr & FPSCR_OX) + code = FPE_FLTOVF; + else if (fpscr & FPSCR_UX) + code = FPE_FLTUND; + else if (fpscr & FPSCR_ZX) + code = FPE_FLTDIV; + else if (fpscr & FPSCR_XX) + code = FPE_FLTRES; + _exception(SIGFPE, regs, code, regs->nip); + return; + } + + if (reason & REASON_TRAP) { + /* trap exception */ + if (debugger_bpt(regs)) + return; + if (check_bug_trap(regs)) { + regs->nip += 4; + return; + } + _exception(SIGTRAP, regs, TRAP_BRKPT, 0); + return; + } + + /* Try to emulate it if we should. */ + if (reason & (REASON_ILLEGAL | REASON_PRIVILEGED)) { + switch (emulate_instruction(regs)) { + case 0: + regs->nip += 4; + emulate_single_step(regs); + return; + case -EFAULT: + _exception(SIGSEGV, regs, SEGV_MAPERR, regs->nip); + return; + } + } + + if (reason & REASON_PRIVILEGED) + _exception(SIGILL, regs, ILL_PRVOPC, regs->nip); + else + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); +} + +void AlignmentException(struct pt_regs *regs) +{ + int fixed; + + fixed = fix_alignment(regs); + + if (fixed == 1) { + regs->nip += 4; /* skip over emulated instruction */ + emulate_single_step(regs); + return; + } + + /* Operand address was bad */ + if (fixed == -EFAULT) { + if (user_mode(regs)) + _exception(SIGSEGV, regs, SEGV_ACCERR, regs->dar); + else + /* Search exception table */ + bad_page_fault(regs, regs->dar, SIGSEGV); + return; + } + _exception(SIGBUS, regs, BUS_ADRALN, regs->dar); +} + +void StackOverflow(struct pt_regs *regs) +{ + printk(KERN_CRIT "Kernel stack overflow in process %p, r1=%lx\n", + current, regs->gpr[1]); + debugger(regs); + show_regs(regs); + panic("kernel stack overflow"); +} + +void nonrecoverable_exception(struct pt_regs *regs) +{ + printk(KERN_ERR "Non-recoverable exception at PC=%lx MSR=%lx\n", + regs->nip, regs->msr); + debugger(regs); + die("nonrecoverable exception", regs, SIGKILL); +} + +void trace_syscall(struct pt_regs *regs) +{ + printk("Task: %p(%d), PC: %08lX/%08lX, Syscall: %3ld, Result: %s%ld %s\n", + current, current->pid, regs->nip, regs->link, regs->gpr[0], + regs->ccr&0x10000000?"Error=":"", regs->gpr[3], print_tainted()); +} + +#ifdef CONFIG_8xx +void SoftwareEmulation(struct pt_regs *regs) +{ + extern int do_mathemu(struct pt_regs *); + extern int Soft_emulate_8xx(struct pt_regs *); + int errcode; + + CHECK_FULL_REGS(regs); + + if (!user_mode(regs)) { + debugger(regs); + die("Kernel Mode Software FPU Emulation", regs, SIGFPE); + } + +#ifdef CONFIG_MATH_EMULATION + errcode = do_mathemu(regs); +#else + errcode = Soft_emulate_8xx(regs); +#endif + if (errcode) { + if (errcode > 0) + _exception(SIGFPE, regs, 0, 0); + else if (errcode == -EFAULT) + _exception(SIGSEGV, regs, 0, 0); + else + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + } else + emulate_single_step(regs); +} +#endif /* CONFIG_8xx */ + +#if defined(CONFIG_40x) || defined(CONFIG_BOOKE) + +void DebugException(struct pt_regs *regs, unsigned long debug_status) +{ + if (debug_status & DBSR_IC) { /* instruction completion */ + regs->msr &= ~MSR_DE; + if (user_mode(regs)) { + current->thread.dbcr0 &= ~DBCR0_IC; + } else { + /* Disable instruction completion */ + mtspr(SPRN_DBCR0, mfspr(SPRN_DBCR0) & ~DBCR0_IC); + /* Clear the instruction completion event */ + mtspr(SPRN_DBSR, DBSR_IC); + if (debugger_sstep(regs)) + return; + } + _exception(SIGTRAP, regs, TRAP_TRACE, 0); + } +} +#endif /* CONFIG_4xx || CONFIG_BOOKE */ + +#if !defined(CONFIG_TAU_INT) +void TAUException(struct pt_regs *regs) +{ + printk("TAU trap at PC: %lx, MSR: %lx, vector=%lx %s\n", + regs->nip, regs->msr, regs->trap, print_tainted()); +} +#endif /* CONFIG_INT_TAU */ + +void AltivecUnavailException(struct pt_regs *regs) +{ + static int kernel_altivec_count; + +#ifndef CONFIG_ALTIVEC + if (user_mode(regs)) { + /* A user program has executed an altivec instruction, + but this kernel doesn't support altivec. */ + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); + return; + } +#endif + /* The kernel has executed an altivec instruction without + first enabling altivec. Whinge but let it do it. */ + if (++kernel_altivec_count < 10) + printk(KERN_ERR "AltiVec used in kernel (task=%p, pc=%lx)\n", + current, regs->nip); + regs->msr |= MSR_VEC; +} + +#ifdef CONFIG_ALTIVEC +void AltivecAssistException(struct pt_regs *regs) +{ + int err; + + preempt_disable(); + if (regs->msr & MSR_VEC) + giveup_altivec(current); + preempt_enable(); + if (!user_mode(regs)) { + printk(KERN_EMERG "VMX/Altivec assist exception in kernel mode" + " at %lx\n", regs->nip); + die("Kernel Altivec assist exception", regs, SIGILL); + } + + err = emulate_altivec(regs); + if (err == 0) { + regs->nip += 4; /* skip emulated instruction */ + emulate_single_step(regs); + return; + } + + if (err == -EFAULT) { + /* got an error reading the instruction */ + _exception(SIGSEGV, regs, SEGV_ACCERR, regs->nip); + } else { + /* didn't recognize the instruction */ + /* XXX quick hack for now: set the non-Java bit in the VSCR */ + if (printk_ratelimit()) + printk(KERN_ERR "Unrecognized altivec instruction " + "in %s at %lx\n", current->comm, regs->nip); + current->thread.vscr.u[3] |= 0x10000; + } +} +#endif /* CONFIG_ALTIVEC */ + +#ifdef CONFIG_E500 +void PerformanceMonitorException(struct pt_regs *regs) +{ + perf_irq(regs); +} +#endif + +#ifdef CONFIG_FSL_BOOKE +void CacheLockingException(struct pt_regs *regs, unsigned long address, + unsigned long error_code) +{ + /* We treat cache locking instructions from the user + * as priv ops, in the future we could try to do + * something smarter + */ + if (error_code & (ESR_DLK|ESR_ILK)) + _exception(SIGILL, regs, ILL_PRVOPC, regs->nip); + return; +} +#endif /* CONFIG_FSL_BOOKE */ + +#ifdef CONFIG_SPE +void SPEFloatingPointException(struct pt_regs *regs) +{ + unsigned long spefscr; + int fpexc_mode; + int code = 0; + + spefscr = current->thread.spefscr; + fpexc_mode = current->thread.fpexc_mode; + + /* Hardware does not neccessarily set sticky + * underflow/overflow/invalid flags */ + if ((spefscr & SPEFSCR_FOVF) && (fpexc_mode & PR_FP_EXC_OVF)) { + code = FPE_FLTOVF; + spefscr |= SPEFSCR_FOVFS; + } + else if ((spefscr & SPEFSCR_FUNF) && (fpexc_mode & PR_FP_EXC_UND)) { + code = FPE_FLTUND; + spefscr |= SPEFSCR_FUNFS; + } + else if ((spefscr & SPEFSCR_FDBZ) && (fpexc_mode & PR_FP_EXC_DIV)) + code = FPE_FLTDIV; + else if ((spefscr & SPEFSCR_FINV) && (fpexc_mode & PR_FP_EXC_INV)) { + code = FPE_FLTINV; + spefscr |= SPEFSCR_FINVS; + } + else if ((spefscr & (SPEFSCR_FG | SPEFSCR_FX)) && (fpexc_mode & PR_FP_EXC_RES)) + code = FPE_FLTRES; + + current->thread.spefscr = spefscr; + + _exception(SIGFPE, regs, code, regs->nip); + return; +} +#endif + +#ifdef CONFIG_BOOKE_WDT +/* + * Default handler for a Watchdog exception, + * spins until a reboot occurs + */ +void __attribute__ ((weak)) WatchdogHandler(struct pt_regs *regs) +{ + /* Generic WatchdogHandler, implement your own */ + mtspr(SPRN_TCR, mfspr(SPRN_TCR)&(~TCR_WIE)); + return; +} + +void WatchdogException(struct pt_regs *regs) +{ + printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n"); + WatchdogHandler(regs); +} +#endif + +void __init trap_init(void) +{ +} diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S new file mode 100644 index 00000000000..12cb90bc209 --- /dev/null +++ b/arch/powerpc/kernel/vector.S @@ -0,0 +1,197 @@ +#include <linux/config.h> +#include <asm/ppc_asm.h> +#include <asm/processor.h> + +/* + * The routines below are in assembler so we can closely control the + * usage of floating-point registers. These routines must be called + * with preempt disabled. + */ +#ifdef CONFIG_PPC32 + .data +fpzero: + .long 0 +fpone: + .long 0x3f800000 /* 1.0 in single-precision FP */ +fphalf: + .long 0x3f000000 /* 0.5 in single-precision FP */ + +#define LDCONST(fr, name) \ + lis r11,name@ha; \ + lfs fr,name@l(r11) +#else + + .section ".toc","aw" +fpzero: + .tc FD_0_0[TC],0 +fpone: + .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */ +fphalf: + .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */ + +#define LDCONST(fr, name) \ + lfd fr,name@toc(r2) +#endif + + .text +/* + * Internal routine to enable floating point and set FPSCR to 0. + * Don't call it from C; it doesn't use the normal calling convention. + */ +fpenable: +#ifdef CONFIG_PPC32 + stwu r1,-64(r1) +#else + stdu r1,-64(r1) +#endif + mfmsr r10 + ori r11,r10,MSR_FP + mtmsr r11 + isync + stfd fr0,24(r1) + stfd fr1,16(r1) + stfd fr31,8(r1) + LDCONST(fr1, fpzero) + mffs fr31 + mtfsf 0xff,fr1 + blr + +fpdisable: + mtlr r12 + mtfsf 0xff,fr31 + lfd fr31,8(r1) + lfd fr1,16(r1) + lfd fr0,24(r1) + mtmsr r10 + isync + addi r1,r1,64 + blr + +/* + * Vector add, floating point. + */ +_GLOBAL(vaddfp) + mflr r12 + bl fpenable + li r0,4 + mtctr r0 + li r6,0 +1: lfsx fr0,r4,r6 + lfsx fr1,r5,r6 + fadds fr0,fr0,fr1 + stfsx fr0,r3,r6 + addi r6,r6,4 + bdnz 1b + b fpdisable + +/* + * Vector subtract, floating point. + */ +_GLOBAL(vsubfp) + mflr r12 + bl fpenable + li r0,4 + mtctr r0 + li r6,0 +1: lfsx fr0,r4,r6 + lfsx fr1,r5,r6 + fsubs fr0,fr0,fr1 + stfsx fr0,r3,r6 + addi r6,r6,4 + bdnz 1b + b fpdisable + +/* + * Vector multiply and add, floating point. + */ +_GLOBAL(vmaddfp) + mflr r12 + bl fpenable + stfd fr2,32(r1) + li r0,4 + mtctr r0 + li r7,0 +1: lfsx fr0,r4,r7 + lfsx fr1,r5,r7 + lfsx fr2,r6,r7 + fmadds fr0,fr0,fr2,fr1 + stfsx fr0,r3,r7 + addi r7,r7,4 + bdnz 1b + lfd fr2,32(r1) + b fpdisable + +/* + * Vector negative multiply and subtract, floating point. + */ +_GLOBAL(vnmsubfp) + mflr r12 + bl fpenable + stfd fr2,32(r1) + li r0,4 + mtctr r0 + li r7,0 +1: lfsx fr0,r4,r7 + lfsx fr1,r5,r7 + lfsx fr2,r6,r7 + fnmsubs fr0,fr0,fr2,fr1 + stfsx fr0,r3,r7 + addi r7,r7,4 + bdnz 1b + lfd fr2,32(r1) + b fpdisable + +/* + * Vector reciprocal estimate. We just compute 1.0/x. + * r3 -> destination, r4 -> source. + */ +_GLOBAL(vrefp) + mflr r12 + bl fpenable + li r0,4 + LDCONST(fr1, fpone) + mtctr r0 + li r6,0 +1: lfsx fr0,r4,r6 + fdivs fr0,fr1,fr0 + stfsx fr0,r3,r6 + addi r6,r6,4 + bdnz 1b + b fpdisable + +/* + * Vector reciprocal square-root estimate, floating point. + * We use the frsqrte instruction for the initial estimate followed + * by 2 iterations of Newton-Raphson to get sufficient accuracy. + * r3 -> destination, r4 -> source. + */ +_GLOBAL(vrsqrtefp) + mflr r12 + bl fpenable + stfd fr2,32(r1) + stfd fr3,40(r1) + stfd fr4,48(r1) + stfd fr5,56(r1) + li r0,4 + LDCONST(fr4, fpone) + LDCONST(fr5, fphalf) + mtctr r0 + li r6,0 +1: lfsx fr0,r4,r6 + frsqrte fr1,fr0 /* r = frsqrte(s) */ + fmuls fr3,fr1,fr0 /* r * s */ + fmuls fr2,fr1,fr5 /* r * 0.5 */ + fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ + fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ + fmuls fr3,fr1,fr0 /* r * s */ + fmuls fr2,fr1,fr5 /* r * 0.5 */ + fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */ + fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */ + stfsx fr1,r3,r6 + addi r6,r6,4 + bdnz 1b + lfd fr5,56(r1) + lfd fr4,48(r1) + lfd fr3,40(r1) + lfd fr2,32(r1) + b fpdisable diff --git a/arch/powerpc/kernel/vmlinux.lds b/arch/powerpc/kernel/vmlinux.lds new file mode 100644 index 00000000000..d62c288a81d --- /dev/null +++ b/arch/powerpc/kernel/vmlinux.lds @@ -0,0 +1,174 @@ +/* Align . to a 8 byte boundary equals to maximum function alignment. */ +/* sched.text is aling to function alignment to secure we have same + * address even at second ld pass when generating System.map */ +/* spinlock.text is aling to function alignment to secure we have same + * address even at second ld pass when generating System.map */ + /* DWARF debug sections. + Symbols in the DWARF debugging sections are relative to + the beginning of the section so we begin them at 0. */ + /* Stabs debugging sections. */ +OUTPUT_ARCH(powerpc:common) +jiffies = jiffies_64 + 4; +SECTIONS +{ + /* Read-only sections, merged into text segment: */ + . = + SIZEOF_HEADERS; + .interp : { *(.interp) } + .hash : { *(.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .rel.text : { *(.rel.text) } + .rela.text : { *(.rela.text) } + .rel.data : { *(.rel.data) } + .rela.data : { *(.rela.data) } + .rel.rodata : { *(.rel.rodata) } + .rela.rodata : { *(.rela.rodata) } + .rel.got : { *(.rel.got) } + .rela.got : { *(.rela.got) } + .rel.ctors : { *(.rel.ctors) } + .rela.ctors : { *(.rela.ctors) } + .rel.dtors : { *(.rel.dtors) } + .rela.dtors : { *(.rela.dtors) } + .rel.bss : { *(.rel.bss) } + .rela.bss : { *(.rela.bss) } + .rel.plt : { *(.rel.plt) } + .rela.plt : { *(.rela.plt) } +/* .init : { *(.init) } =0*/ + .plt : { *(.plt) } + .text : + { + *(.text) + . = ALIGN(8); __sched_text_start = .; *(.sched.text) __sched_text_end = .; + . = ALIGN(8); __lock_text_start = .; *(.spinlock.text) __lock_text_end = .; + *(.fixup) + *(.got1) + __got2_start = .; + *(.got2) + __got2_end = .; + } + _etext = .; + PROVIDE (etext = .); + .rodata : AT(ADDR(.rodata) - 0) { *(.rodata) *(.rodata.*) *(__vermagic) } .rodata1 : AT(ADDR(.rodata1) - 0) { *(.rodata1) } .pci_fixup : AT(ADDR(.pci_fixup) - 0) { __start_pci_fixups_early = .; *(.pci_fixup_early) __end_pci_fixups_early = .; __start_pci_fixups_header = .; *(.pci_fixup_header) __end_pci_fixups_header = .; __start_pci_fixups_final = .; *(.pci_fixup_final) __end_pci_fixups_final = .; __start_pci_fixups_enable = .; *(.pci_fixup_enable) __end_pci_fixups_enable = .; } __ksymtab : AT(ADDR(__ksymtab) - 0) { __start___ksymtab = .; *(__ksymtab) __stop___ksymtab = .; } __ksymtab_gpl : AT(ADDR(__ksymtab_gpl) - 0) { __start___ksymtab_gpl = .; *(__ksymtab_gpl) __stop___ksymtab_gpl = .; } __kcrctab : AT(ADDR(__kcrctab) - 0) { __start___kcrctab = .; *(__kcrctab) __stop___kcrctab = .; } __kcrctab_gpl : AT(ADDR(__kcrctab_gpl) - 0) { __start___kcrctab_gpl = .; *(__kcrctab_gpl) __stop___kcrctab_gpl = .; } __ksymtab_strings : AT(ADDR(__ksymtab_strings) - 0) { *(__ksymtab_strings) } __param : AT(ADDR(__param) - 0) { __start___param = .; *(__param) __stop___param = .; } + .fini : { *(.fini) } =0 + .ctors : { *(.ctors) } + .dtors : { *(.dtors) } + .fixup : { *(.fixup) } + __ex_table : { + __start___ex_table = .; + *(__ex_table) + __stop___ex_table = .; + } + __bug_table : { + __start___bug_table = .; + *(__bug_table) + __stop___bug_table = .; + } + /* Read-write section, merged into data segment: */ + . = ALIGN(4096); + .data : + { + *(.data) + *(.data1) + *(.sdata) + *(.sdata2) + *(.got.plt) *(.got) + *(.dynamic) + CONSTRUCTORS + } + + . = ALIGN(4096); + __nosave_begin = .; + .data_nosave : { *(.data.nosave) } + . = ALIGN(4096); + __nosave_end = .; + + . = ALIGN(32); + .data.cacheline_aligned : { *(.data.cacheline_aligned) } + + _edata = .; + PROVIDE (edata = .); + + . = ALIGN(8192); + .data.init_task : { *(.data.init_task) } + + . = ALIGN(4096); + __init_begin = .; + .init.text : { + _sinittext = .; + *(.init.text) + _einittext = .; + } + /* .exit.text is discarded at runtime, not link time, + to deal with references from __bug_table */ + .exit.text : { *(.exit.text) } + .init.data : { + *(.init.data); + __vtop_table_begin = .; + *(.vtop_fixup); + __vtop_table_end = .; + __ptov_table_begin = .; + *(.ptov_fixup); + __ptov_table_end = .; + } + . = ALIGN(16); + __setup_start = .; + .init.setup : { *(.init.setup) } + __setup_end = .; + __initcall_start = .; + .initcall.init : { + *(.initcall1.init) + *(.initcall2.init) + *(.initcall3.init) + *(.initcall4.init) + *(.initcall5.init) + *(.initcall6.init) + *(.initcall7.init) + } + __initcall_end = .; + + __con_initcall_start = .; + .con_initcall.init : { *(.con_initcall.init) } + __con_initcall_end = .; + + .security_initcall.init : AT(ADDR(.security_initcall.init) - 0) { __security_initcall_start = .; *(.security_initcall.init) __security_initcall_end = .; } + + __start___ftr_fixup = .; + __ftr_fixup : { *(__ftr_fixup) } + __stop___ftr_fixup = .; + + . = ALIGN(32); + __per_cpu_start = .; + .data.percpu : { *(.data.percpu) } + __per_cpu_end = .; + + . = ALIGN(4096); + __initramfs_start = .; + .init.ramfs : { *(.init.ramfs) } + __initramfs_end = .; + + . = ALIGN(4096); + __init_end = .; + + . = ALIGN(4096); + _sextratext = .; + _eextratext = .; + + __bss_start = .; + .bss : + { + *(.sbss) *(.scommon) + *(.dynbss) + *(.bss) + *(COMMON) + } + __bss_stop = .; + + _end = . ; + PROVIDE (end = .); + + /* Sections to be discarded. */ + /DISCARD/ : { + *(.exitcall.exit) + *(.exit.data) + } +} diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S new file mode 100644 index 00000000000..09c6525cfa6 --- /dev/null +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -0,0 +1,172 @@ +#include <asm-generic/vmlinux.lds.h> + +OUTPUT_ARCH(powerpc:common) +jiffies = jiffies_64 + 4; +SECTIONS +{ + /* Read-only sections, merged into text segment: */ + . = + SIZEOF_HEADERS; + .interp : { *(.interp) } + .hash : { *(.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .rel.text : { *(.rel.text) } + .rela.text : { *(.rela.text) } + .rel.data : { *(.rel.data) } + .rela.data : { *(.rela.data) } + .rel.rodata : { *(.rel.rodata) } + .rela.rodata : { *(.rela.rodata) } + .rel.got : { *(.rel.got) } + .rela.got : { *(.rela.got) } + .rel.ctors : { *(.rel.ctors) } + .rela.ctors : { *(.rela.ctors) } + .rel.dtors : { *(.rel.dtors) } + .rela.dtors : { *(.rela.dtors) } + .rel.bss : { *(.rel.bss) } + .rela.bss : { *(.rela.bss) } + .rel.plt : { *(.rel.plt) } + .rela.plt : { *(.rela.plt) } +/* .init : { *(.init) } =0*/ + .plt : { *(.plt) } + .text : + { + *(.text) + SCHED_TEXT + LOCK_TEXT + *(.fixup) + *(.got1) + __got2_start = .; + *(.got2) + __got2_end = .; + } + _etext = .; + PROVIDE (etext = .); + + RODATA + .fini : { *(.fini) } =0 + .ctors : { *(.ctors) } + .dtors : { *(.dtors) } + + .fixup : { *(.fixup) } + + __ex_table : { + __start___ex_table = .; + *(__ex_table) + __stop___ex_table = .; + } + + __bug_table : { + __start___bug_table = .; + *(__bug_table) + __stop___bug_table = .; + } + + /* Read-write section, merged into data segment: */ + . = ALIGN(4096); + .data : + { + *(.data) + *(.data1) + *(.sdata) + *(.sdata2) + *(.got.plt) *(.got) + *(.dynamic) + CONSTRUCTORS + } + + . = ALIGN(4096); + __nosave_begin = .; + .data_nosave : { *(.data.nosave) } + . = ALIGN(4096); + __nosave_end = .; + + . = ALIGN(32); + .data.cacheline_aligned : { *(.data.cacheline_aligned) } + + _edata = .; + PROVIDE (edata = .); + + . = ALIGN(8192); + .data.init_task : { *(.data.init_task) } + + . = ALIGN(4096); + __init_begin = .; + .init.text : { + _sinittext = .; + *(.init.text) + _einittext = .; + } + /* .exit.text is discarded at runtime, not link time, + to deal with references from __bug_table */ + .exit.text : { *(.exit.text) } + .init.data : { + *(.init.data); + __vtop_table_begin = .; + *(.vtop_fixup); + __vtop_table_end = .; + __ptov_table_begin = .; + *(.ptov_fixup); + __ptov_table_end = .; + } + . = ALIGN(16); + __setup_start = .; + .init.setup : { *(.init.setup) } + __setup_end = .; + __initcall_start = .; + .initcall.init : { + *(.initcall1.init) + *(.initcall2.init) + *(.initcall3.init) + *(.initcall4.init) + *(.initcall5.init) + *(.initcall6.init) + *(.initcall7.init) + } + __initcall_end = .; + + __con_initcall_start = .; + .con_initcall.init : { *(.con_initcall.init) } + __con_initcall_end = .; + + SECURITY_INIT + + __start___ftr_fixup = .; + __ftr_fixup : { *(__ftr_fixup) } + __stop___ftr_fixup = .; + + . = ALIGN(32); + __per_cpu_start = .; + .data.percpu : { *(.data.percpu) } + __per_cpu_end = .; + + . = ALIGN(4096); + __initramfs_start = .; + .init.ramfs : { *(.init.ramfs) } + __initramfs_end = .; + + . = ALIGN(4096); + __init_end = .; + + . = ALIGN(4096); + _sextratext = .; + _eextratext = .; + + __bss_start = .; + .bss : + { + *(.sbss) *(.scommon) + *(.dynbss) + *(.bss) + *(COMMON) + } + __bss_stop = .; + + _end = . ; + PROVIDE (end = .); + + /* Sections to be discarded. */ + /DISCARD/ : { + *(.exitcall.exit) + *(.exit.data) + } +} diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile new file mode 100644 index 00000000000..347f9798e43 --- /dev/null +++ b/arch/powerpc/lib/Makefile @@ -0,0 +1,9 @@ +# +# Makefile for ppc-specific library files.. +# + +obj-y := strcase.o string.o +obj-$(CONFIG_PPC32) += div64.o copy32.o checksum.o +obj-$(CONFIG_PPC64) += copypage.o copyuser.o memcpy.o usercopy.o \ + sstep.o checksum64.o +obj-$(CONFIG_PPC_ISERIES) += e2a.o diff --git a/arch/powerpc/lib/checksum.S b/arch/powerpc/lib/checksum.S new file mode 100644 index 00000000000..7874e8a8045 --- /dev/null +++ b/arch/powerpc/lib/checksum.S @@ -0,0 +1,225 @@ +/* + * This file contains assembly-language implementations + * of IP-style 1's complement checksum routines. + * + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au). + */ + +#include <linux/sys.h> +#include <asm/processor.h> +#include <asm/errno.h> +#include <asm/ppc_asm.h> + + .text + +/* + * ip_fast_csum(buf, len) -- Optimized for IP header + * len is in words and is always >= 5. + */ +_GLOBAL(ip_fast_csum) + lwz r0,0(r3) + lwzu r5,4(r3) + addic. r4,r4,-2 + addc r0,r0,r5 + mtctr r4 + blelr- +1: lwzu r4,4(r3) + adde r0,r0,r4 + bdnz 1b + addze r0,r0 /* add in final carry */ + rlwinm r3,r0,16,0,31 /* fold two halves together */ + add r3,r0,r3 + not r3,r3 + srwi r3,r3,16 + blr + +/* + * Compute checksum of TCP or UDP pseudo-header: + * csum_tcpudp_magic(saddr, daddr, len, proto, sum) + */ +_GLOBAL(csum_tcpudp_magic) + rlwimi r5,r6,16,0,15 /* put proto in upper half of len */ + addc r0,r3,r4 /* add 4 32-bit words together */ + adde r0,r0,r5 + adde r0,r0,r7 + addze r0,r0 /* add in final carry */ + rlwinm r3,r0,16,0,31 /* fold two halves together */ + add r3,r0,r3 + not r3,r3 + srwi r3,r3,16 + blr + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * csum_partial(buff, len, sum) + */ +_GLOBAL(csum_partial) + addic r0,r5,0 + subi r3,r3,4 + srwi. r6,r4,2 + beq 3f /* if we're doing < 4 bytes */ + andi. r5,r3,2 /* Align buffer to longword boundary */ + beq+ 1f + lhz r5,4(r3) /* do 2 bytes to get aligned */ + addi r3,r3,2 + subi r4,r4,2 + addc r0,r0,r5 + srwi. r6,r4,2 /* # words to do */ + beq 3f +1: mtctr r6 +2: lwzu r5,4(r3) /* the bdnz has zero overhead, so it should */ + adde r0,r0,r5 /* be unnecessary to unroll this loop */ + bdnz 2b + andi. r4,r4,3 +3: cmpwi 0,r4,2 + blt+ 4f + lhz r5,4(r3) + addi r3,r3,2 + subi r4,r4,2 + adde r0,r0,r5 +4: cmpwi 0,r4,1 + bne+ 5f + lbz r5,4(r3) + slwi r5,r5,8 /* Upper byte of word */ + adde r0,r0,r5 +5: addze r3,r0 /* add in final carry */ + blr + +/* + * Computes the checksum of a memory block at src, length len, + * and adds in "sum" (32-bit), while copying the block to dst. + * If an access exception occurs on src or dst, it stores -EFAULT + * to *src_err or *dst_err respectively, and (for an error on + * src) zeroes the rest of dst. + * + * csum_partial_copy_generic(src, dst, len, sum, src_err, dst_err) + */ +_GLOBAL(csum_partial_copy_generic) + addic r0,r6,0 + subi r3,r3,4 + subi r4,r4,4 + srwi. r6,r5,2 + beq 3f /* if we're doing < 4 bytes */ + andi. r9,r4,2 /* Align dst to longword boundary */ + beq+ 1f +81: lhz r6,4(r3) /* do 2 bytes to get aligned */ + addi r3,r3,2 + subi r5,r5,2 +91: sth r6,4(r4) + addi r4,r4,2 + addc r0,r0,r6 + srwi. r6,r5,2 /* # words to do */ + beq 3f +1: srwi. r6,r5,4 /* # groups of 4 words to do */ + beq 10f + mtctr r6 +71: lwz r6,4(r3) +72: lwz r9,8(r3) +73: lwz r10,12(r3) +74: lwzu r11,16(r3) + adde r0,r0,r6 +75: stw r6,4(r4) + adde r0,r0,r9 +76: stw r9,8(r4) + adde r0,r0,r10 +77: stw r10,12(r4) + adde r0,r0,r11 +78: stwu r11,16(r4) + bdnz 71b +10: rlwinm. r6,r5,30,30,31 /* # words left to do */ + beq 13f + mtctr r6 +82: lwzu r9,4(r3) +92: stwu r9,4(r4) + adde r0,r0,r9 + bdnz 82b +13: andi. r5,r5,3 +3: cmpwi 0,r5,2 + blt+ 4f +83: lhz r6,4(r3) + addi r3,r3,2 + subi r5,r5,2 +93: sth r6,4(r4) + addi r4,r4,2 + adde r0,r0,r6 +4: cmpwi 0,r5,1 + bne+ 5f +84: lbz r6,4(r3) +94: stb r6,4(r4) + slwi r6,r6,8 /* Upper byte of word */ + adde r0,r0,r6 +5: addze r3,r0 /* add in final carry */ + blr + +/* These shouldn't go in the fixup section, since that would + cause the ex_table addresses to get out of order. */ + +src_error_4: + mfctr r6 /* update # bytes remaining from ctr */ + rlwimi r5,r6,4,0,27 + b 79f +src_error_1: + li r6,0 + subi r5,r5,2 +95: sth r6,4(r4) + addi r4,r4,2 +79: srwi. r6,r5,2 + beq 3f + mtctr r6 +src_error_2: + li r6,0 +96: stwu r6,4(r4) + bdnz 96b +3: andi. r5,r5,3 + beq src_error +src_error_3: + li r6,0 + mtctr r5 + addi r4,r4,3 +97: stbu r6,1(r4) + bdnz 97b +src_error: + cmpwi 0,r7,0 + beq 1f + li r6,-EFAULT + stw r6,0(r7) +1: addze r3,r0 + blr + +dst_error: + cmpwi 0,r8,0 + beq 1f + li r6,-EFAULT + stw r6,0(r8) +1: addze r3,r0 + blr + +.section __ex_table,"a" + .long 81b,src_error_1 + .long 91b,dst_error + .long 71b,src_error_4 + .long 72b,src_error_4 + .long 73b,src_error_4 + .long 74b,src_error_4 + .long 75b,dst_error + .long 76b,dst_error + .long 77b,dst_error + .long 78b,dst_error + .long 82b,src_error_2 + .long 92b,dst_error + .long 83b,src_error_3 + .long 93b,dst_error + .long 84b,src_error_3 + .long 94b,dst_error + .long 95b,dst_error + .long 96b,dst_error + .long 97b,dst_error diff --git a/arch/powerpc/lib/checksum64.S b/arch/powerpc/lib/checksum64.S new file mode 100644 index 00000000000..ef96c6c58ef --- /dev/null +++ b/arch/powerpc/lib/checksum64.S @@ -0,0 +1,229 @@ +/* + * This file contains assembly-language implementations + * of IP-style 1's complement checksum routines. + * + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au). + */ + +#include <linux/sys.h> +#include <asm/processor.h> +#include <asm/errno.h> +#include <asm/ppc_asm.h> + +/* + * ip_fast_csum(r3=buf, r4=len) -- Optimized for IP header + * len is in words and is always >= 5. + * + * In practice len == 5, but this is not guaranteed. So this code does not + * attempt to use doubleword instructions. + */ +_GLOBAL(ip_fast_csum) + lwz r0,0(r3) + lwzu r5,4(r3) + addic. r4,r4,-2 + addc r0,r0,r5 + mtctr r4 + blelr- +1: lwzu r4,4(r3) + adde r0,r0,r4 + bdnz 1b + addze r0,r0 /* add in final carry */ + rldicl r4,r0,32,0 /* fold two 32-bit halves together */ + add r0,r0,r4 + srdi r0,r0,32 + rlwinm r3,r0,16,0,31 /* fold two halves together */ + add r3,r0,r3 + not r3,r3 + srwi r3,r3,16 + blr + +/* + * Compute checksum of TCP or UDP pseudo-header: + * csum_tcpudp_magic(r3=saddr, r4=daddr, r5=len, r6=proto, r7=sum) + * No real gain trying to do this specially for 64 bit, but + * the 32 bit addition may spill into the upper bits of + * the doubleword so we still must fold it down from 64. + */ +_GLOBAL(csum_tcpudp_magic) + rlwimi r5,r6,16,0,15 /* put proto in upper half of len */ + addc r0,r3,r4 /* add 4 32-bit words together */ + adde r0,r0,r5 + adde r0,r0,r7 + rldicl r4,r0,32,0 /* fold 64 bit value */ + add r0,r4,r0 + srdi r0,r0,32 + rlwinm r3,r0,16,0,31 /* fold two halves together */ + add r3,r0,r3 + not r3,r3 + srwi r3,r3,16 + blr + +/* + * Computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit). + * + * This code assumes at least halfword alignment, though the length + * can be any number of bytes. The sum is accumulated in r5. + * + * csum_partial(r3=buff, r4=len, r5=sum) + */ +_GLOBAL(csum_partial) + subi r3,r3,8 /* we'll offset by 8 for the loads */ + srdi. r6,r4,3 /* divide by 8 for doubleword count */ + addic r5,r5,0 /* clear carry */ + beq 3f /* if we're doing < 8 bytes */ + andi. r0,r3,2 /* aligned on a word boundary already? */ + beq+ 1f + lhz r6,8(r3) /* do 2 bytes to get aligned */ + addi r3,r3,2 + subi r4,r4,2 + addc r5,r5,r6 + srdi. r6,r4,3 /* recompute number of doublewords */ + beq 3f /* any left? */ +1: mtctr r6 +2: ldu r6,8(r3) /* main sum loop */ + adde r5,r5,r6 + bdnz 2b + andi. r4,r4,7 /* compute bytes left to sum after doublewords */ +3: cmpwi 0,r4,4 /* is at least a full word left? */ + blt 4f + lwz r6,8(r3) /* sum this word */ + addi r3,r3,4 + subi r4,r4,4 + adde r5,r5,r6 +4: cmpwi 0,r4,2 /* is at least a halfword left? */ + blt+ 5f + lhz r6,8(r3) /* sum this halfword */ + addi r3,r3,2 + subi r4,r4,2 + adde r5,r5,r6 +5: cmpwi 0,r4,1 /* is at least a byte left? */ + bne+ 6f + lbz r6,8(r3) /* sum this byte */ + slwi r6,r6,8 /* this byte is assumed to be the upper byte of a halfword */ + adde r5,r5,r6 +6: addze r5,r5 /* add in final carry */ + rldicl r4,r5,32,0 /* fold two 32-bit halves together */ + add r3,r4,r5 + srdi r3,r3,32 + blr + +/* + * Computes the checksum of a memory block at src, length len, + * and adds in "sum" (32-bit), while copying the block to dst. + * If an access exception occurs on src or dst, it stores -EFAULT + * to *src_err or *dst_err respectively, and (for an error on + * src) zeroes the rest of dst. + * + * This code needs to be reworked to take advantage of 64 bit sum+copy. + * However, due to tokenring halfword alignment problems this will be very + * tricky. For now we'll leave it until we instrument it somehow. + * + * csum_partial_copy_generic(r3=src, r4=dst, r5=len, r6=sum, r7=src_err, r8=dst_err) + */ +_GLOBAL(csum_partial_copy_generic) + addic r0,r6,0 + subi r3,r3,4 + subi r4,r4,4 + srwi. r6,r5,2 + beq 3f /* if we're doing < 4 bytes */ + andi. r9,r4,2 /* Align dst to longword boundary */ + beq+ 1f +81: lhz r6,4(r3) /* do 2 bytes to get aligned */ + addi r3,r3,2 + subi r5,r5,2 +91: sth r6,4(r4) + addi r4,r4,2 + addc r0,r0,r6 + srwi. r6,r5,2 /* # words to do */ + beq 3f +1: mtctr r6 +82: lwzu r6,4(r3) /* the bdnz has zero overhead, so it should */ +92: stwu r6,4(r4) /* be unnecessary to unroll this loop */ + adde r0,r0,r6 + bdnz 82b + andi. r5,r5,3 +3: cmpwi 0,r5,2 + blt+ 4f +83: lhz r6,4(r3) + addi r3,r3,2 + subi r5,r5,2 +93: sth r6,4(r4) + addi r4,r4,2 + adde r0,r0,r6 +4: cmpwi 0,r5,1 + bne+ 5f +84: lbz r6,4(r3) +94: stb r6,4(r4) + slwi r6,r6,8 /* Upper byte of word */ + adde r0,r0,r6 +5: addze r3,r0 /* add in final carry (unlikely with 64-bit regs) */ + rldicl r4,r3,32,0 /* fold 64 bit value */ + add r3,r4,r3 + srdi r3,r3,32 + blr + +/* These shouldn't go in the fixup section, since that would + cause the ex_table addresses to get out of order. */ + + .globl src_error_1 +src_error_1: + li r6,0 + subi r5,r5,2 +95: sth r6,4(r4) + addi r4,r4,2 + srwi. r6,r5,2 + beq 3f + mtctr r6 + .globl src_error_2 +src_error_2: + li r6,0 +96: stwu r6,4(r4) + bdnz 96b +3: andi. r5,r5,3 + beq src_error + .globl src_error_3 +src_error_3: + li r6,0 + mtctr r5 + addi r4,r4,3 +97: stbu r6,1(r4) + bdnz 97b + .globl src_error +src_error: + cmpdi 0,r7,0 + beq 1f + li r6,-EFAULT + stw r6,0(r7) +1: addze r3,r0 + blr + + .globl dst_error +dst_error: + cmpdi 0,r8,0 + beq 1f + li r6,-EFAULT + stw r6,0(r8) +1: addze r3,r0 + blr + +.section __ex_table,"a" + .align 3 + .llong 81b,src_error_1 + .llong 91b,dst_error + .llong 82b,src_error_2 + .llong 92b,dst_error + .llong 83b,src_error_3 + .llong 93b,dst_error + .llong 84b,src_error_3 + .llong 94b,dst_error + .llong 95b,dst_error + .llong 96b,dst_error + .llong 97b,dst_error diff --git a/arch/powerpc/lib/copy32.S b/arch/powerpc/lib/copy32.S new file mode 100644 index 00000000000..420a912198a --- /dev/null +++ b/arch/powerpc/lib/copy32.S @@ -0,0 +1,543 @@ +/* + * Memory copy functions for 32-bit PowerPC. + * + * Copyright (C) 1996-2005 Paul Mackerras. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/cache.h> +#include <asm/errno.h> +#include <asm/ppc_asm.h> + +#define COPY_16_BYTES \ + lwz r7,4(r4); \ + lwz r8,8(r4); \ + lwz r9,12(r4); \ + lwzu r10,16(r4); \ + stw r7,4(r6); \ + stw r8,8(r6); \ + stw r9,12(r6); \ + stwu r10,16(r6) + +#define COPY_16_BYTES_WITHEX(n) \ +8 ## n ## 0: \ + lwz r7,4(r4); \ +8 ## n ## 1: \ + lwz r8,8(r4); \ +8 ## n ## 2: \ + lwz r9,12(r4); \ +8 ## n ## 3: \ + lwzu r10,16(r4); \ +8 ## n ## 4: \ + stw r7,4(r6); \ +8 ## n ## 5: \ + stw r8,8(r6); \ +8 ## n ## 6: \ + stw r9,12(r6); \ +8 ## n ## 7: \ + stwu r10,16(r6) + +#define COPY_16_BYTES_EXCODE(n) \ +9 ## n ## 0: \ + addi r5,r5,-(16 * n); \ + b 104f; \ +9 ## n ## 1: \ + addi r5,r5,-(16 * n); \ + b 105f; \ +.section __ex_table,"a"; \ + .align 2; \ + .long 8 ## n ## 0b,9 ## n ## 0b; \ + .long 8 ## n ## 1b,9 ## n ## 0b; \ + .long 8 ## n ## 2b,9 ## n ## 0b; \ + .long 8 ## n ## 3b,9 ## n ## 0b; \ + .long 8 ## n ## 4b,9 ## n ## 1b; \ + .long 8 ## n ## 5b,9 ## n ## 1b; \ + .long 8 ## n ## 6b,9 ## n ## 1b; \ + .long 8 ## n ## 7b,9 ## n ## 1b; \ + .text + + .text + .stabs "arch/powerpc/lib/",N_SO,0,0,0f + .stabs "copy32.S",N_SO,0,0,0f +0: + +CACHELINE_BYTES = L1_CACHE_LINE_SIZE +LG_CACHELINE_BYTES = LG_L1_CACHE_LINE_SIZE +CACHELINE_MASK = (L1_CACHE_LINE_SIZE-1) + +/* + * Use dcbz on the complete cache lines in the destination + * to set them to zero. This requires that the destination + * area is cacheable. -- paulus + */ +_GLOBAL(cacheable_memzero) + mr r5,r4 + li r4,0 + addi r6,r3,-4 + cmplwi 0,r5,4 + blt 7f + stwu r4,4(r6) + beqlr + andi. r0,r6,3 + add r5,r0,r5 + subf r6,r0,r6 + clrlwi r7,r6,32-LG_CACHELINE_BYTES + add r8,r7,r5 + srwi r9,r8,LG_CACHELINE_BYTES + addic. r9,r9,-1 /* total number of complete cachelines */ + ble 2f + xori r0,r7,CACHELINE_MASK & ~3 + srwi. r0,r0,2 + beq 3f + mtctr r0 +4: stwu r4,4(r6) + bdnz 4b +3: mtctr r9 + li r7,4 +#if !defined(CONFIG_8xx) +10: dcbz r7,r6 +#else +10: stw r4, 4(r6) + stw r4, 8(r6) + stw r4, 12(r6) + stw r4, 16(r6) +#if CACHE_LINE_SIZE >= 32 + stw r4, 20(r6) + stw r4, 24(r6) + stw r4, 28(r6) + stw r4, 32(r6) +#endif /* CACHE_LINE_SIZE */ +#endif + addi r6,r6,CACHELINE_BYTES + bdnz 10b + clrlwi r5,r8,32-LG_CACHELINE_BYTES + addi r5,r5,4 +2: srwi r0,r5,2 + mtctr r0 + bdz 6f +1: stwu r4,4(r6) + bdnz 1b +6: andi. r5,r5,3 +7: cmpwi 0,r5,0 + beqlr + mtctr r5 + addi r6,r6,3 +8: stbu r4,1(r6) + bdnz 8b + blr + +_GLOBAL(memset) + rlwimi r4,r4,8,16,23 + rlwimi r4,r4,16,0,15 + addi r6,r3,-4 + cmplwi 0,r5,4 + blt 7f + stwu r4,4(r6) + beqlr + andi. r0,r6,3 + add r5,r0,r5 + subf r6,r0,r6 + srwi r0,r5,2 + mtctr r0 + bdz 6f +1: stwu r4,4(r6) + bdnz 1b +6: andi. r5,r5,3 +7: cmpwi 0,r5,0 + beqlr + mtctr r5 + addi r6,r6,3 +8: stbu r4,1(r6) + bdnz 8b + blr + +/* + * This version uses dcbz on the complete cache lines in the + * destination area to reduce memory traffic. This requires that + * the destination area is cacheable. + * We only use this version if the source and dest don't overlap. + * -- paulus. + */ +_GLOBAL(cacheable_memcpy) + add r7,r3,r5 /* test if the src & dst overlap */ + add r8,r4,r5 + cmplw 0,r4,r7 + cmplw 1,r3,r8 + crand 0,0,4 /* cr0.lt &= cr1.lt */ + blt memcpy /* if regions overlap */ + + addi r4,r4,-4 + addi r6,r3,-4 + neg r0,r3 + andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ + beq 58f + + cmplw 0,r5,r0 /* is this more than total to do? */ + blt 63f /* if not much to do */ + andi. r8,r0,3 /* get it word-aligned first */ + subf r5,r0,r5 + mtctr r8 + beq+ 61f +70: lbz r9,4(r4) /* do some bytes */ + stb r9,4(r6) + addi r4,r4,1 + addi r6,r6,1 + bdnz 70b +61: srwi. r0,r0,2 + mtctr r0 + beq 58f +72: lwzu r9,4(r4) /* do some words */ + stwu r9,4(r6) + bdnz 72b + +58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ + clrlwi r5,r5,32-LG_CACHELINE_BYTES + li r11,4 + mtctr r0 + beq 63f +53: +#if !defined(CONFIG_8xx) + dcbz r11,r6 +#endif + COPY_16_BYTES +#if L1_CACHE_LINE_SIZE >= 32 + COPY_16_BYTES +#if L1_CACHE_LINE_SIZE >= 64 + COPY_16_BYTES + COPY_16_BYTES +#if L1_CACHE_LINE_SIZE >= 128 + COPY_16_BYTES + COPY_16_BYTES + COPY_16_BYTES + COPY_16_BYTES +#endif +#endif +#endif + bdnz 53b + +63: srwi. r0,r5,2 + mtctr r0 + beq 64f +30: lwzu r0,4(r4) + stwu r0,4(r6) + bdnz 30b + +64: andi. r0,r5,3 + mtctr r0 + beq+ 65f +40: lbz r0,4(r4) + stb r0,4(r6) + addi r4,r4,1 + addi r6,r6,1 + bdnz 40b +65: blr + +_GLOBAL(memmove) + cmplw 0,r3,r4 + bgt backwards_memcpy + /* fall through */ + +_GLOBAL(memcpy) + srwi. r7,r5,3 + addi r6,r3,-4 + addi r4,r4,-4 + beq 2f /* if less than 8 bytes to do */ + andi. r0,r6,3 /* get dest word aligned */ + mtctr r7 + bne 5f +1: lwz r7,4(r4) + lwzu r8,8(r4) + stw r7,4(r6) + stwu r8,8(r6) + bdnz 1b + andi. r5,r5,7 +2: cmplwi 0,r5,4 + blt 3f + lwzu r0,4(r4) + addi r5,r5,-4 + stwu r0,4(r6) +3: cmpwi 0,r5,0 + beqlr + mtctr r5 + addi r4,r4,3 + addi r6,r6,3 +4: lbzu r0,1(r4) + stbu r0,1(r6) + bdnz 4b + blr +5: subfic r0,r0,4 + mtctr r0 +6: lbz r7,4(r4) + addi r4,r4,1 + stb r7,4(r6) + addi r6,r6,1 + bdnz 6b + subf r5,r0,r5 + rlwinm. r7,r5,32-3,3,31 + beq 2b + mtctr r7 + b 1b + +_GLOBAL(backwards_memcpy) + rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */ + add r6,r3,r5 + add r4,r4,r5 + beq 2f + andi. r0,r6,3 + mtctr r7 + bne 5f +1: lwz r7,-4(r4) + lwzu r8,-8(r4) + stw r7,-4(r6) + stwu r8,-8(r6) + bdnz 1b + andi. r5,r5,7 +2: cmplwi 0,r5,4 + blt 3f + lwzu r0,-4(r4) + subi r5,r5,4 + stwu r0,-4(r6) +3: cmpwi 0,r5,0 + beqlr + mtctr r5 +4: lbzu r0,-1(r4) + stbu r0,-1(r6) + bdnz 4b + blr +5: mtctr r0 +6: lbzu r7,-1(r4) + stbu r7,-1(r6) + bdnz 6b + subf r5,r0,r5 + rlwinm. r7,r5,32-3,3,31 + beq 2b + mtctr r7 + b 1b + +_GLOBAL(__copy_tofrom_user) + addi r4,r4,-4 + addi r6,r3,-4 + neg r0,r3 + andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ + beq 58f + + cmplw 0,r5,r0 /* is this more than total to do? */ + blt 63f /* if not much to do */ + andi. r8,r0,3 /* get it word-aligned first */ + mtctr r8 + beq+ 61f +70: lbz r9,4(r4) /* do some bytes */ +71: stb r9,4(r6) + addi r4,r4,1 + addi r6,r6,1 + bdnz 70b +61: subf r5,r0,r5 + srwi. r0,r0,2 + mtctr r0 + beq 58f +72: lwzu r9,4(r4) /* do some words */ +73: stwu r9,4(r6) + bdnz 72b + + .section __ex_table,"a" + .align 2 + .long 70b,100f + .long 71b,101f + .long 72b,102f + .long 73b,103f + .text + +58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ + clrlwi r5,r5,32-LG_CACHELINE_BYTES + li r11,4 + beq 63f + +#ifdef CONFIG_8xx + /* Don't use prefetch on 8xx */ + mtctr r0 + li r0,0 +53: COPY_16_BYTES_WITHEX(0) + bdnz 53b + +#else /* not CONFIG_8xx */ + /* Here we decide how far ahead to prefetch the source */ + li r3,4 + cmpwi r0,1 + li r7,0 + ble 114f + li r7,1 +#if MAX_COPY_PREFETCH > 1 + /* Heuristically, for large transfers we prefetch + MAX_COPY_PREFETCH cachelines ahead. For small transfers + we prefetch 1 cacheline ahead. */ + cmpwi r0,MAX_COPY_PREFETCH + ble 112f + li r7,MAX_COPY_PREFETCH +112: mtctr r7 +111: dcbt r3,r4 + addi r3,r3,CACHELINE_BYTES + bdnz 111b +#else + dcbt r3,r4 + addi r3,r3,CACHELINE_BYTES +#endif /* MAX_COPY_PREFETCH > 1 */ + +114: subf r8,r7,r0 + mr r0,r7 + mtctr r8 + +53: dcbt r3,r4 +54: dcbz r11,r6 + .section __ex_table,"a" + .align 2 + .long 54b,105f + .text +/* the main body of the cacheline loop */ + COPY_16_BYTES_WITHEX(0) +#if L1_CACHE_LINE_SIZE >= 32 + COPY_16_BYTES_WITHEX(1) +#if L1_CACHE_LINE_SIZE >= 64 + COPY_16_BYTES_WITHEX(2) + COPY_16_BYTES_WITHEX(3) +#if L1_CACHE_LINE_SIZE >= 128 + COPY_16_BYTES_WITHEX(4) + COPY_16_BYTES_WITHEX(5) + COPY_16_BYTES_WITHEX(6) + COPY_16_BYTES_WITHEX(7) +#endif +#endif +#endif + bdnz 53b + cmpwi r0,0 + li r3,4 + li r7,0 + bne 114b +#endif /* CONFIG_8xx */ + +63: srwi. r0,r5,2 + mtctr r0 + beq 64f +30: lwzu r0,4(r4) +31: stwu r0,4(r6) + bdnz 30b + +64: andi. r0,r5,3 + mtctr r0 + beq+ 65f +40: lbz r0,4(r4) +41: stb r0,4(r6) + addi r4,r4,1 + addi r6,r6,1 + bdnz 40b +65: li r3,0 + blr + +/* read fault, initial single-byte copy */ +100: li r9,0 + b 90f +/* write fault, initial single-byte copy */ +101: li r9,1 +90: subf r5,r8,r5 + li r3,0 + b 99f +/* read fault, initial word copy */ +102: li r9,0 + b 91f +/* write fault, initial word copy */ +103: li r9,1 +91: li r3,2 + b 99f + +/* + * this stuff handles faults in the cacheline loop and branches to either + * 104f (if in read part) or 105f (if in write part), after updating r5 + */ + COPY_16_BYTES_EXCODE(0) +#if L1_CACHE_LINE_SIZE >= 32 + COPY_16_BYTES_EXCODE(1) +#if L1_CACHE_LINE_SIZE >= 64 + COPY_16_BYTES_EXCODE(2) + COPY_16_BYTES_EXCODE(3) +#if L1_CACHE_LINE_SIZE >= 128 + COPY_16_BYTES_EXCODE(4) + COPY_16_BYTES_EXCODE(5) + COPY_16_BYTES_EXCODE(6) + COPY_16_BYTES_EXCODE(7) +#endif +#endif +#endif + +/* read fault in cacheline loop */ +104: li r9,0 + b 92f +/* fault on dcbz (effectively a write fault) */ +/* or write fault in cacheline loop */ +105: li r9,1 +92: li r3,LG_CACHELINE_BYTES + mfctr r8 + add r0,r0,r8 + b 106f +/* read fault in final word loop */ +108: li r9,0 + b 93f +/* write fault in final word loop */ +109: li r9,1 +93: andi. r5,r5,3 + li r3,2 + b 99f +/* read fault in final byte loop */ +110: li r9,0 + b 94f +/* write fault in final byte loop */ +111: li r9,1 +94: li r5,0 + li r3,0 +/* + * At this stage the number of bytes not copied is + * r5 + (ctr << r3), and r9 is 0 for read or 1 for write. + */ +99: mfctr r0 +106: slw r3,r0,r3 + add. r3,r3,r5 + beq 120f /* shouldn't happen */ + cmpwi 0,r9,0 + bne 120f +/* for a read fault, first try to continue the copy one byte at a time */ + mtctr r3 +130: lbz r0,4(r4) +131: stb r0,4(r6) + addi r4,r4,1 + addi r6,r6,1 + bdnz 130b +/* then clear out the destination: r3 bytes starting at 4(r6) */ +132: mfctr r3 + srwi. r0,r3,2 + li r9,0 + mtctr r0 + beq 113f +112: stwu r9,4(r6) + bdnz 112b +113: andi. r0,r3,3 + mtctr r0 + beq 120f +114: stb r9,4(r6) + addi r6,r6,1 + bdnz 114b +120: blr + + .section __ex_table,"a" + .align 2 + .long 30b,108b + .long 31b,109b + .long 40b,110b + .long 41b,111b + .long 130b,132b + .long 131b,120b + .long 112b,120b + .long 114b,120b + .text diff --git a/arch/powerpc/lib/copypage.S b/arch/powerpc/lib/copypage.S new file mode 100644 index 00000000000..733d61618bb --- /dev/null +++ b/arch/powerpc/lib/copypage.S @@ -0,0 +1,121 @@ +/* + * arch/ppc64/lib/copypage.S + * + * Copyright (C) 2002 Paul Mackerras, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <asm/processor.h> +#include <asm/ppc_asm.h> + +_GLOBAL(copy_page) + std r31,-8(1) + std r30,-16(1) + std r29,-24(1) + std r28,-32(1) + std r27,-40(1) + std r26,-48(1) + std r25,-56(1) + std r24,-64(1) + std r23,-72(1) + std r22,-80(1) + std r21,-88(1) + std r20,-96(1) + li r5,4096/32 - 1 + addi r3,r3,-8 + li r12,5 +0: addi r5,r5,-24 + mtctr r12 + ld r22,640(4) + ld r21,512(4) + ld r20,384(4) + ld r11,256(4) + ld r9,128(4) + ld r7,0(4) + ld r25,648(4) + ld r24,520(4) + ld r23,392(4) + ld r10,264(4) + ld r8,136(4) + ldu r6,8(4) + cmpwi r5,24 +1: std r22,648(3) + std r21,520(3) + std r20,392(3) + std r11,264(3) + std r9,136(3) + std r7,8(3) + ld r28,648(4) + ld r27,520(4) + ld r26,392(4) + ld r31,264(4) + ld r30,136(4) + ld r29,8(4) + std r25,656(3) + std r24,528(3) + std r23,400(3) + std r10,272(3) + std r8,144(3) + std r6,16(3) + ld r22,656(4) + ld r21,528(4) + ld r20,400(4) + ld r11,272(4) + ld r9,144(4) + ld r7,16(4) + std r28,664(3) + std r27,536(3) + std r26,408(3) + std r31,280(3) + std r30,152(3) + stdu r29,24(3) + ld r25,664(4) + ld r24,536(4) + ld r23,408(4) + ld r10,280(4) + ld r8,152(4) + ldu r6,24(4) + bdnz 1b + std r22,648(3) + std r21,520(3) + std r20,392(3) + std r11,264(3) + std r9,136(3) + std r7,8(3) + addi r4,r4,640 + addi r3,r3,648 + bge 0b + mtctr r5 + ld r7,0(4) + ld r8,8(4) + ldu r9,16(4) +3: ld r10,8(4) + std r7,8(3) + ld r7,16(4) + std r8,16(3) + ld r8,24(4) + std r9,24(3) + ldu r9,32(4) + stdu r10,32(3) + bdnz 3b +4: ld r10,8(4) + std r7,8(3) + std r8,16(3) + std r9,24(3) + std r10,32(3) +9: ld r20,-96(1) + ld r21,-88(1) + ld r22,-80(1) + ld r23,-72(1) + ld r24,-64(1) + ld r25,-56(1) + ld r26,-48(1) + ld r27,-40(1) + ld r28,-32(1) + ld r29,-24(1) + ld r30,-16(1) + ld r31,-8(1) + blr diff --git a/arch/powerpc/lib/copyuser.S b/arch/powerpc/lib/copyuser.S new file mode 100644 index 00000000000..a0b3fbbd6fb --- /dev/null +++ b/arch/powerpc/lib/copyuser.S @@ -0,0 +1,576 @@ +/* + * arch/ppc64/lib/copyuser.S + * + * Copyright (C) 2002 Paul Mackerras, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <asm/processor.h> +#include <asm/ppc_asm.h> + + .align 7 +_GLOBAL(__copy_tofrom_user) + /* first check for a whole page copy on a page boundary */ + cmpldi cr1,r5,16 + cmpdi cr6,r5,4096 + or r0,r3,r4 + neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */ + andi. r0,r0,4095 + std r3,-24(r1) + crand cr0*4+2,cr0*4+2,cr6*4+2 + std r4,-16(r1) + std r5,-8(r1) + dcbt 0,r4 + beq .Lcopy_page + andi. r6,r6,7 + mtcrf 0x01,r5 + blt cr1,.Lshort_copy + bne .Ldst_unaligned +.Ldst_aligned: + andi. r0,r4,7 + addi r3,r3,-16 + bne .Lsrc_unaligned + srdi r7,r5,4 +20: ld r9,0(r4) + addi r4,r4,-8 + mtctr r7 + andi. r5,r5,7 + bf cr7*4+0,22f + addi r3,r3,8 + addi r4,r4,8 + mr r8,r9 + blt cr1,72f +21: ld r9,8(r4) +70: std r8,8(r3) +22: ldu r8,16(r4) +71: stdu r9,16(r3) + bdnz 21b +72: std r8,8(r3) + beq+ 3f + addi r3,r3,16 +23: ld r9,8(r4) +.Ldo_tail: + bf cr7*4+1,1f + rotldi r9,r9,32 +73: stw r9,0(r3) + addi r3,r3,4 +1: bf cr7*4+2,2f + rotldi r9,r9,16 +74: sth r9,0(r3) + addi r3,r3,2 +2: bf cr7*4+3,3f + rotldi r9,r9,8 +75: stb r9,0(r3) +3: li r3,0 + blr + +.Lsrc_unaligned: + srdi r6,r5,3 + addi r5,r5,-16 + subf r4,r0,r4 + srdi r7,r5,4 + sldi r10,r0,3 + cmpldi cr6,r6,3 + andi. r5,r5,7 + mtctr r7 + subfic r11,r10,64 + add r5,r5,r0 + bt cr7*4+0,28f + +24: ld r9,0(r4) /* 3+2n loads, 2+2n stores */ +25: ld r0,8(r4) + sld r6,r9,r10 +26: ldu r9,16(r4) + srd r7,r0,r11 + sld r8,r0,r10 + or r7,r7,r6 + blt cr6,79f +27: ld r0,8(r4) + b 2f + +28: ld r0,0(r4) /* 4+2n loads, 3+2n stores */ +29: ldu r9,8(r4) + sld r8,r0,r10 + addi r3,r3,-8 + blt cr6,5f +30: ld r0,8(r4) + srd r12,r9,r11 + sld r6,r9,r10 +31: ldu r9,16(r4) + or r12,r8,r12 + srd r7,r0,r11 + sld r8,r0,r10 + addi r3,r3,16 + beq cr6,78f + +1: or r7,r7,r6 +32: ld r0,8(r4) +76: std r12,8(r3) +2: srd r12,r9,r11 + sld r6,r9,r10 +33: ldu r9,16(r4) + or r12,r8,r12 +77: stdu r7,16(r3) + srd r7,r0,r11 + sld r8,r0,r10 + bdnz 1b + +78: std r12,8(r3) + or r7,r7,r6 +79: std r7,16(r3) +5: srd r12,r9,r11 + or r12,r8,r12 +80: std r12,24(r3) + bne 6f + li r3,0 + blr +6: cmpwi cr1,r5,8 + addi r3,r3,32 + sld r9,r9,r10 + ble cr1,.Ldo_tail +34: ld r0,8(r4) + srd r7,r0,r11 + or r9,r7,r9 + b .Ldo_tail + +.Ldst_unaligned: + mtcrf 0x01,r6 /* put #bytes to 8B bdry into cr7 */ + subf r5,r6,r5 + li r7,0 + cmpldi r1,r5,16 + bf cr7*4+3,1f +35: lbz r0,0(r4) +81: stb r0,0(r3) + addi r7,r7,1 +1: bf cr7*4+2,2f +36: lhzx r0,r7,r4 +82: sthx r0,r7,r3 + addi r7,r7,2 +2: bf cr7*4+1,3f +37: lwzx r0,r7,r4 +83: stwx r0,r7,r3 +3: mtcrf 0x01,r5 + add r4,r6,r4 + add r3,r6,r3 + b .Ldst_aligned + +.Lshort_copy: + bf cr7*4+0,1f +38: lwz r0,0(r4) +39: lwz r9,4(r4) + addi r4,r4,8 +84: stw r0,0(r3) +85: stw r9,4(r3) + addi r3,r3,8 +1: bf cr7*4+1,2f +40: lwz r0,0(r4) + addi r4,r4,4 +86: stw r0,0(r3) + addi r3,r3,4 +2: bf cr7*4+2,3f +41: lhz r0,0(r4) + addi r4,r4,2 +87: sth r0,0(r3) + addi r3,r3,2 +3: bf cr7*4+3,4f +42: lbz r0,0(r4) +88: stb r0,0(r3) +4: li r3,0 + blr + +/* + * exception handlers follow + * we have to return the number of bytes not copied + * for an exception on a load, we set the rest of the destination to 0 + */ + +136: +137: + add r3,r3,r7 + b 1f +130: +131: + addi r3,r3,8 +120: +122: +124: +125: +126: +127: +128: +129: +133: + addi r3,r3,8 +121: +132: + addi r3,r3,8 +123: +134: +135: +138: +139: +140: +141: +142: + +/* + * here we have had a fault on a load and r3 points to the first + * unmodified byte of the destination + */ +1: ld r6,-24(r1) + ld r4,-16(r1) + ld r5,-8(r1) + subf r6,r6,r3 + add r4,r4,r6 + subf r5,r6,r5 /* #bytes left to go */ + +/* + * first see if we can copy any more bytes before hitting another exception + */ + mtctr r5 +43: lbz r0,0(r4) + addi r4,r4,1 +89: stb r0,0(r3) + addi r3,r3,1 + bdnz 43b + li r3,0 /* huh? all copied successfully this time? */ + blr + +/* + * here we have trapped again, need to clear ctr bytes starting at r3 + */ +143: mfctr r5 + li r0,0 + mr r4,r3 + mr r3,r5 /* return the number of bytes not copied */ +1: andi. r9,r4,7 + beq 3f +90: stb r0,0(r4) + addic. r5,r5,-1 + addi r4,r4,1 + bne 1b + blr +3: cmpldi cr1,r5,8 + srdi r9,r5,3 + andi. r5,r5,7 + blt cr1,93f + mtctr r9 +91: std r0,0(r4) + addi r4,r4,8 + bdnz 91b +93: beqlr + mtctr r5 +92: stb r0,0(r4) + addi r4,r4,1 + bdnz 92b + blr + +/* + * exception handlers for stores: we just need to work + * out how many bytes weren't copied + */ +182: +183: + add r3,r3,r7 + b 1f +180: + addi r3,r3,8 +171: +177: + addi r3,r3,8 +170: +172: +176: +178: + addi r3,r3,4 +185: + addi r3,r3,4 +173: +174: +175: +179: +181: +184: +186: +187: +188: +189: +1: + ld r6,-24(r1) + ld r5,-8(r1) + add r6,r6,r5 + subf r3,r3,r6 /* #bytes not copied */ +190: +191: +192: + blr /* #bytes not copied in r3 */ + + .section __ex_table,"a" + .align 3 + .llong 20b,120b + .llong 21b,121b + .llong 70b,170b + .llong 22b,122b + .llong 71b,171b + .llong 72b,172b + .llong 23b,123b + .llong 73b,173b + .llong 74b,174b + .llong 75b,175b + .llong 24b,124b + .llong 25b,125b + .llong 26b,126b + .llong 27b,127b + .llong 28b,128b + .llong 29b,129b + .llong 30b,130b + .llong 31b,131b + .llong 32b,132b + .llong 76b,176b + .llong 33b,133b + .llong 77b,177b + .llong 78b,178b + .llong 79b,179b + .llong 80b,180b + .llong 34b,134b + .llong 35b,135b + .llong 81b,181b + .llong 36b,136b + .llong 82b,182b + .llong 37b,137b + .llong 83b,183b + .llong 38b,138b + .llong 39b,139b + .llong 84b,184b + .llong 85b,185b + .llong 40b,140b + .llong 86b,186b + .llong 41b,141b + .llong 87b,187b + .llong 42b,142b + .llong 88b,188b + .llong 43b,143b + .llong 89b,189b + .llong 90b,190b + .llong 91b,191b + .llong 92b,192b + + .text + +/* + * Routine to copy a whole page of data, optimized for POWER4. + * On POWER4 it is more than 50% faster than the simple loop + * above (following the .Ldst_aligned label) but it runs slightly + * slower on POWER3. + */ +.Lcopy_page: + std r31,-32(1) + std r30,-40(1) + std r29,-48(1) + std r28,-56(1) + std r27,-64(1) + std r26,-72(1) + std r25,-80(1) + std r24,-88(1) + std r23,-96(1) + std r22,-104(1) + std r21,-112(1) + std r20,-120(1) + li r5,4096/32 - 1 + addi r3,r3,-8 + li r0,5 +0: addi r5,r5,-24 + mtctr r0 +20: ld r22,640(4) +21: ld r21,512(4) +22: ld r20,384(4) +23: ld r11,256(4) +24: ld r9,128(4) +25: ld r7,0(4) +26: ld r25,648(4) +27: ld r24,520(4) +28: ld r23,392(4) +29: ld r10,264(4) +30: ld r8,136(4) +31: ldu r6,8(4) + cmpwi r5,24 +1: +32: std r22,648(3) +33: std r21,520(3) +34: std r20,392(3) +35: std r11,264(3) +36: std r9,136(3) +37: std r7,8(3) +38: ld r28,648(4) +39: ld r27,520(4) +40: ld r26,392(4) +41: ld r31,264(4) +42: ld r30,136(4) +43: ld r29,8(4) +44: std r25,656(3) +45: std r24,528(3) +46: std r23,400(3) +47: std r10,272(3) +48: std r8,144(3) +49: std r6,16(3) +50: ld r22,656(4) +51: ld r21,528(4) +52: ld r20,400(4) +53: ld r11,272(4) +54: ld r9,144(4) +55: ld r7,16(4) +56: std r28,664(3) +57: std r27,536(3) +58: std r26,408(3) +59: std r31,280(3) +60: std r30,152(3) +61: stdu r29,24(3) +62: ld r25,664(4) +63: ld r24,536(4) +64: ld r23,408(4) +65: ld r10,280(4) +66: ld r8,152(4) +67: ldu r6,24(4) + bdnz 1b +68: std r22,648(3) +69: std r21,520(3) +70: std r20,392(3) +71: std r11,264(3) +72: std r9,136(3) +73: std r7,8(3) +74: addi r4,r4,640 +75: addi r3,r3,648 + bge 0b + mtctr r5 +76: ld r7,0(4) +77: ld r8,8(4) +78: ldu r9,16(4) +3: +79: ld r10,8(4) +80: std r7,8(3) +81: ld r7,16(4) +82: std r8,16(3) +83: ld r8,24(4) +84: std r9,24(3) +85: ldu r9,32(4) +86: stdu r10,32(3) + bdnz 3b +4: +87: ld r10,8(4) +88: std r7,8(3) +89: std r8,16(3) +90: std r9,24(3) +91: std r10,32(3) +9: ld r20,-120(1) + ld r21,-112(1) + ld r22,-104(1) + ld r23,-96(1) + ld r24,-88(1) + ld r25,-80(1) + ld r26,-72(1) + ld r27,-64(1) + ld r28,-56(1) + ld r29,-48(1) + ld r30,-40(1) + ld r31,-32(1) + li r3,0 + blr + +/* + * on an exception, reset to the beginning and jump back into the + * standard __copy_tofrom_user + */ +100: ld r20,-120(1) + ld r21,-112(1) + ld r22,-104(1) + ld r23,-96(1) + ld r24,-88(1) + ld r25,-80(1) + ld r26,-72(1) + ld r27,-64(1) + ld r28,-56(1) + ld r29,-48(1) + ld r30,-40(1) + ld r31,-32(1) + ld r3,-24(r1) + ld r4,-16(r1) + li r5,4096 + b .Ldst_aligned + + .section __ex_table,"a" + .align 3 + .llong 20b,100b + .llong 21b,100b + .llong 22b,100b + .llong 23b,100b + .llong 24b,100b + .llong 25b,100b + .llong 26b,100b + .llong 27b,100b + .llong 28b,100b + .llong 29b,100b + .llong 30b,100b + .llong 31b,100b + .llong 32b,100b + .llong 33b,100b + .llong 34b,100b + .llong 35b,100b + .llong 36b,100b + .llong 37b,100b + .llong 38b,100b + .llong 39b,100b + .llong 40b,100b + .llong 41b,100b + .llong 42b,100b + .llong 43b,100b + .llong 44b,100b + .llong 45b,100b + .llong 46b,100b + .llong 47b,100b + .llong 48b,100b + .llong 49b,100b + .llong 50b,100b + .llong 51b,100b + .llong 52b,100b + .llong 53b,100b + .llong 54b,100b + .llong 55b,100b + .llong 56b,100b + .llong 57b,100b + .llong 58b,100b + .llong 59b,100b + .llong 60b,100b + .llong 61b,100b + .llong 62b,100b + .llong 63b,100b + .llong 64b,100b + .llong 65b,100b + .llong 66b,100b + .llong 67b,100b + .llong 68b,100b + .llong 69b,100b + .llong 70b,100b + .llong 71b,100b + .llong 72b,100b + .llong 73b,100b + .llong 74b,100b + .llong 75b,100b + .llong 76b,100b + .llong 77b,100b + .llong 78b,100b + .llong 79b,100b + .llong 80b,100b + .llong 81b,100b + .llong 82b,100b + .llong 83b,100b + .llong 84b,100b + .llong 85b,100b + .llong 86b,100b + .llong 87b,100b + .llong 88b,100b + .llong 89b,100b + .llong 90b,100b + .llong 91b,100b diff --git a/arch/powerpc/lib/div64.S b/arch/powerpc/lib/div64.S new file mode 100644 index 00000000000..3527569e992 --- /dev/null +++ b/arch/powerpc/lib/div64.S @@ -0,0 +1,58 @@ +/* + * Divide a 64-bit unsigned number by a 32-bit unsigned number. + * This routine assumes that the top 32 bits of the dividend are + * non-zero to start with. + * On entry, r3 points to the dividend, which get overwritten with + * the 64-bit quotient, and r4 contains the divisor. + * On exit, r3 contains the remainder. + * + * Copyright (C) 2002 Paul Mackerras, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <asm/ppc_asm.h> +#include <asm/processor.h> + +_GLOBAL(__div64_32) + lwz r5,0(r3) # get the dividend into r5/r6 + lwz r6,4(r3) + cmplw r5,r4 + li r7,0 + li r8,0 + blt 1f + divwu r7,r5,r4 # if dividend.hi >= divisor, + mullw r0,r7,r4 # quotient.hi = dividend.hi / divisor + subf. r5,r0,r5 # dividend.hi %= divisor + beq 3f +1: mr r11,r5 # here dividend.hi != 0 + andis. r0,r5,0xc000 + bne 2f + cntlzw r0,r5 # we are shifting the dividend right + li r10,-1 # to make it < 2^32, and shifting + srw r10,r10,r0 # the divisor right the same amount, + add r9,r4,r10 # rounding up (so the estimate cannot + andc r11,r6,r10 # ever be too large, only too small) + andc r9,r9,r10 + or r11,r5,r11 + rotlw r9,r9,r0 + rotlw r11,r11,r0 + divwu r11,r11,r9 # then we divide the shifted quantities +2: mullw r10,r11,r4 # to get an estimate of the quotient, + mulhwu r9,r11,r4 # multiply the estimate by the divisor, + subfc r6,r10,r6 # take the product from the divisor, + add r8,r8,r11 # and add the estimate to the accumulated + subfe. r5,r9,r5 # quotient + bne 1b +3: cmplw r6,r4 + blt 4f + divwu r0,r6,r4 # perform the remaining 32-bit division + mullw r10,r0,r4 # and get the remainder + add r8,r8,r0 + subf r6,r10,r6 +4: stw r7,0(r3) # return the quotient in *r3 + stw r8,4(r3) + mr r3,r6 # return the remainder in r3 + blr diff --git a/arch/powerpc/lib/e2a.c b/arch/powerpc/lib/e2a.c new file mode 100644 index 00000000000..d2b83488792 --- /dev/null +++ b/arch/powerpc/lib/e2a.c @@ -0,0 +1,108 @@ +/* + * arch/ppc64/lib/e2a.c + * + * EBCDIC to ASCII conversion + * + * This function moved here from arch/ppc64/kernel/viopath.c + * + * (C) Copyright 2000-2004 IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of the + * License, or (at your option) anyu later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/module.h> + +unsigned char e2a(unsigned char x) +{ + switch (x) { + case 0xF0: + return '0'; + case 0xF1: + return '1'; + case 0xF2: + return '2'; + case 0xF3: + return '3'; + case 0xF4: + return '4'; + case 0xF5: + return '5'; + case 0xF6: + return '6'; + case 0xF7: + return '7'; + case 0xF8: + return '8'; + case 0xF9: + return '9'; + case 0xC1: + return 'A'; + case 0xC2: + return 'B'; + case 0xC3: + return 'C'; + case 0xC4: + return 'D'; + case 0xC5: + return 'E'; + case 0xC6: + return 'F'; + case 0xC7: + return 'G'; + case 0xC8: + return 'H'; + case 0xC9: + return 'I'; + case 0xD1: + return 'J'; + case 0xD2: + return 'K'; + case 0xD3: + return 'L'; + case 0xD4: + return 'M'; + case 0xD5: + return 'N'; + case 0xD6: + return 'O'; + case 0xD7: + return 'P'; + case 0xD8: + return 'Q'; + case 0xD9: + return 'R'; + case 0xE2: + return 'S'; + case 0xE3: + return 'T'; + case 0xE4: + return 'U'; + case 0xE5: + return 'V'; + case 0xE6: + return 'W'; + case 0xE7: + return 'X'; + case 0xE8: + return 'Y'; + case 0xE9: + return 'Z'; + } + return ' '; +} +EXPORT_SYMBOL(e2a); + + diff --git a/arch/powerpc/lib/memcpy.S b/arch/powerpc/lib/memcpy.S new file mode 100644 index 00000000000..9ccacdf5bcb --- /dev/null +++ b/arch/powerpc/lib/memcpy.S @@ -0,0 +1,172 @@ +/* + * arch/ppc64/lib/memcpy.S + * + * Copyright (C) 2002 Paul Mackerras, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <asm/processor.h> +#include <asm/ppc_asm.h> + + .align 7 +_GLOBAL(memcpy) + mtcrf 0x01,r5 + cmpldi cr1,r5,16 + neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry + andi. r6,r6,7 + dcbt 0,r4 + blt cr1,.Lshort_copy + bne .Ldst_unaligned +.Ldst_aligned: + andi. r0,r4,7 + addi r3,r3,-16 + bne .Lsrc_unaligned + srdi r7,r5,4 + ld r9,0(r4) + addi r4,r4,-8 + mtctr r7 + andi. r5,r5,7 + bf cr7*4+0,2f + addi r3,r3,8 + addi r4,r4,8 + mr r8,r9 + blt cr1,3f +1: ld r9,8(r4) + std r8,8(r3) +2: ldu r8,16(r4) + stdu r9,16(r3) + bdnz 1b +3: std r8,8(r3) + beqlr + addi r3,r3,16 + ld r9,8(r4) +.Ldo_tail: + bf cr7*4+1,1f + rotldi r9,r9,32 + stw r9,0(r3) + addi r3,r3,4 +1: bf cr7*4+2,2f + rotldi r9,r9,16 + sth r9,0(r3) + addi r3,r3,2 +2: bf cr7*4+3,3f + rotldi r9,r9,8 + stb r9,0(r3) +3: blr + +.Lsrc_unaligned: + srdi r6,r5,3 + addi r5,r5,-16 + subf r4,r0,r4 + srdi r7,r5,4 + sldi r10,r0,3 + cmpdi cr6,r6,3 + andi. r5,r5,7 + mtctr r7 + subfic r11,r10,64 + add r5,r5,r0 + + bt cr7*4+0,0f + + ld r9,0(r4) # 3+2n loads, 2+2n stores + ld r0,8(r4) + sld r6,r9,r10 + ldu r9,16(r4) + srd r7,r0,r11 + sld r8,r0,r10 + or r7,r7,r6 + blt cr6,4f + ld r0,8(r4) + # s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12 + b 2f + +0: ld r0,0(r4) # 4+2n loads, 3+2n stores + ldu r9,8(r4) + sld r8,r0,r10 + addi r3,r3,-8 + blt cr6,5f + ld r0,8(r4) + srd r12,r9,r11 + sld r6,r9,r10 + ldu r9,16(r4) + or r12,r8,r12 + srd r7,r0,r11 + sld r8,r0,r10 + addi r3,r3,16 + beq cr6,3f + + # d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9 +1: or r7,r7,r6 + ld r0,8(r4) + std r12,8(r3) +2: srd r12,r9,r11 + sld r6,r9,r10 + ldu r9,16(r4) + or r12,r8,r12 + stdu r7,16(r3) + srd r7,r0,r11 + sld r8,r0,r10 + bdnz 1b + +3: std r12,8(r3) + or r7,r7,r6 +4: std r7,16(r3) +5: srd r12,r9,r11 + or r12,r8,r12 + std r12,24(r3) + beqlr + cmpwi cr1,r5,8 + addi r3,r3,32 + sld r9,r9,r10 + ble cr1,.Ldo_tail + ld r0,8(r4) + srd r7,r0,r11 + or r9,r7,r9 + b .Ldo_tail + +.Ldst_unaligned: + mtcrf 0x01,r6 # put #bytes to 8B bdry into cr7 + subf r5,r6,r5 + li r7,0 + cmpldi r1,r5,16 + bf cr7*4+3,1f + lbz r0,0(r4) + stb r0,0(r3) + addi r7,r7,1 +1: bf cr7*4+2,2f + lhzx r0,r7,r4 + sthx r0,r7,r3 + addi r7,r7,2 +2: bf cr7*4+1,3f + lwzx r0,r7,r4 + stwx r0,r7,r3 +3: mtcrf 0x01,r5 + add r4,r6,r4 + add r3,r6,r3 + b .Ldst_aligned + +.Lshort_copy: + bf cr7*4+0,1f + lwz r0,0(r4) + lwz r9,4(r4) + addi r4,r4,8 + stw r0,0(r3) + stw r9,4(r3) + addi r3,r3,8 +1: bf cr7*4+1,2f + lwz r0,0(r4) + addi r4,r4,4 + stw r0,0(r3) + addi r3,r3,4 +2: bf cr7*4+2,3f + lhz r0,0(r4) + addi r4,r4,2 + sth r0,0(r3) + addi r3,r3,2 +3: bf cr7*4+3,4f + lbz r0,0(r4) + stb r0,0(r3) +4: blr diff --git a/arch/powerpc/lib/rheap.c b/arch/powerpc/lib/rheap.c new file mode 100644 index 00000000000..42c5de2c898 --- /dev/null +++ b/arch/powerpc/lib/rheap.c @@ -0,0 +1,693 @@ +/* + * arch/ppc/syslib/rheap.c + * + * A Remote Heap. Remote means that we don't touch the memory that the + * heap points to. Normal heap implementations use the memory they manage + * to place their list. We cannot do that because the memory we manage may + * have special properties, for example it is uncachable or of different + * endianess. + * + * Author: Pantelis Antoniou <panto@intracom.gr> + * + * 2004 (c) INTRACOM S.A. Greece. This file is licensed under + * the terms of the GNU General Public License version 2. This program + * is licensed "as is" without any warranty of any kind, whether express + * or implied. + */ +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/mm.h> +#include <linux/slab.h> + +#include <asm/rheap.h> + +/* + * Fixup a list_head, needed when copying lists. If the pointers fall + * between s and e, apply the delta. This assumes that + * sizeof(struct list_head *) == sizeof(unsigned long *). + */ +static inline void fixup(unsigned long s, unsigned long e, int d, + struct list_head *l) +{ + unsigned long *pp; + + pp = (unsigned long *)&l->next; + if (*pp >= s && *pp < e) + *pp += d; + + pp = (unsigned long *)&l->prev; + if (*pp >= s && *pp < e) + *pp += d; +} + +/* Grow the allocated blocks */ +static int grow(rh_info_t * info, int max_blocks) +{ + rh_block_t *block, *blk; + int i, new_blocks; + int delta; + unsigned long blks, blke; + + if (max_blocks <= info->max_blocks) + return -EINVAL; + + new_blocks = max_blocks - info->max_blocks; + + block = kmalloc(sizeof(rh_block_t) * max_blocks, GFP_KERNEL); + if (block == NULL) + return -ENOMEM; + + if (info->max_blocks > 0) { + + /* copy old block area */ + memcpy(block, info->block, + sizeof(rh_block_t) * info->max_blocks); + + delta = (char *)block - (char *)info->block; + + /* and fixup list pointers */ + blks = (unsigned long)info->block; + blke = (unsigned long)(info->block + info->max_blocks); + + for (i = 0, blk = block; i < info->max_blocks; i++, blk++) + fixup(blks, blke, delta, &blk->list); + + fixup(blks, blke, delta, &info->empty_list); + fixup(blks, blke, delta, &info->free_list); + fixup(blks, blke, delta, &info->taken_list); + + /* free the old allocated memory */ + if ((info->flags & RHIF_STATIC_BLOCK) == 0) + kfree(info->block); + } + + info->block = block; + info->empty_slots += new_blocks; + info->max_blocks = max_blocks; + info->flags &= ~RHIF_STATIC_BLOCK; + + /* add all new blocks to the free list */ + for (i = 0, blk = block + info->max_blocks; i < new_blocks; i++, blk++) + list_add(&blk->list, &info->empty_list); + + return 0; +} + +/* + * Assure at least the required amount of empty slots. If this function + * causes a grow in the block area then all pointers kept to the block + * area are invalid! + */ +static int assure_empty(rh_info_t * info, int slots) +{ + int max_blocks; + + /* This function is not meant to be used to grow uncontrollably */ + if (slots >= 4) + return -EINVAL; + + /* Enough space */ + if (info->empty_slots >= slots) + return 0; + + /* Next 16 sized block */ + max_blocks = ((info->max_blocks + slots) + 15) & ~15; + + return grow(info, max_blocks); +} + +static rh_block_t *get_slot(rh_info_t * info) +{ + rh_block_t *blk; + + /* If no more free slots, and failure to extend. */ + /* XXX: You should have called assure_empty before */ + if (info->empty_slots == 0) { + printk(KERN_ERR "rh: out of slots; crash is imminent.\n"); + return NULL; + } + + /* Get empty slot to use */ + blk = list_entry(info->empty_list.next, rh_block_t, list); + list_del_init(&blk->list); + info->empty_slots--; + + /* Initialize */ + blk->start = NULL; + blk->size = 0; + blk->owner = NULL; + + return blk; +} + +static inline void release_slot(rh_info_t * info, rh_block_t * blk) +{ + list_add(&blk->list, &info->empty_list); + info->empty_slots++; +} + +static void attach_free_block(rh_info_t * info, rh_block_t * blkn) +{ + rh_block_t *blk; + rh_block_t *before; + rh_block_t *after; + rh_block_t *next; + int size; + unsigned long s, e, bs, be; + struct list_head *l; + + /* We assume that they are aligned properly */ + size = blkn->size; + s = (unsigned long)blkn->start; + e = s + size; + + /* Find the blocks immediately before and after the given one + * (if any) */ + before = NULL; + after = NULL; + next = NULL; + + list_for_each(l, &info->free_list) { + blk = list_entry(l, rh_block_t, list); + + bs = (unsigned long)blk->start; + be = bs + blk->size; + + if (next == NULL && s >= bs) + next = blk; + + if (be == s) + before = blk; + + if (e == bs) + after = blk; + + /* If both are not null, break now */ + if (before != NULL && after != NULL) + break; + } + + /* Now check if they are really adjacent */ + if (before != NULL && s != (unsigned long)before->start + before->size) + before = NULL; + + if (after != NULL && e != (unsigned long)after->start) + after = NULL; + + /* No coalescing; list insert and return */ + if (before == NULL && after == NULL) { + + if (next != NULL) + list_add(&blkn->list, &next->list); + else + list_add(&blkn->list, &info->free_list); + + return; + } + + /* We don't need it anymore */ + release_slot(info, blkn); + + /* Grow the before block */ + if (before != NULL && after == NULL) { + before->size += size; + return; + } + + /* Grow the after block backwards */ + if (before == NULL && after != NULL) { + after->start = (int8_t *)after->start - size; + after->size += size; + return; + } + + /* Grow the before block, and release the after block */ + before->size += size + after->size; + list_del(&after->list); + release_slot(info, after); +} + +static void attach_taken_block(rh_info_t * info, rh_block_t * blkn) +{ + rh_block_t *blk; + struct list_head *l; + + /* Find the block immediately before the given one (if any) */ + list_for_each(l, &info->taken_list) { + blk = list_entry(l, rh_block_t, list); + if (blk->start > blkn->start) { + list_add_tail(&blkn->list, &blk->list); + return; + } + } + + list_add_tail(&blkn->list, &info->taken_list); +} + +/* + * Create a remote heap dynamically. Note that no memory for the blocks + * are allocated. It will upon the first allocation + */ +rh_info_t *rh_create(unsigned int alignment) +{ + rh_info_t *info; + + /* Alignment must be a power of two */ + if ((alignment & (alignment - 1)) != 0) + return ERR_PTR(-EINVAL); + + info = kmalloc(sizeof(*info), GFP_KERNEL); + if (info == NULL) + return ERR_PTR(-ENOMEM); + + info->alignment = alignment; + + /* Initially everything as empty */ + info->block = NULL; + info->max_blocks = 0; + info->empty_slots = 0; + info->flags = 0; + + INIT_LIST_HEAD(&info->empty_list); + INIT_LIST_HEAD(&info->free_list); + INIT_LIST_HEAD(&info->taken_list); + + return info; +} + +/* + * Destroy a dynamically created remote heap. Deallocate only if the areas + * are not static + */ +void rh_destroy(rh_info_t * info) +{ + if ((info->flags & RHIF_STATIC_BLOCK) == 0 && info->block != NULL) + kfree(info->block); + + if ((info->flags & RHIF_STATIC_INFO) == 0) + kfree(info); +} + +/* + * Initialize in place a remote heap info block. This is needed to support + * operation very early in the startup of the kernel, when it is not yet safe + * to call kmalloc. + */ +void rh_init(rh_info_t * info, unsigned int alignment, int max_blocks, + rh_block_t * block) +{ + int i; + rh_block_t *blk; + + /* Alignment must be a power of two */ + if ((alignment & (alignment - 1)) != 0) + return; + + info->alignment = alignment; + + /* Initially everything as empty */ + info->block = block; + info->max_blocks = max_blocks; + info->empty_slots = max_blocks; + info->flags = RHIF_STATIC_INFO | RHIF_STATIC_BLOCK; + + INIT_LIST_HEAD(&info->empty_list); + INIT_LIST_HEAD(&info->free_list); + INIT_LIST_HEAD(&info->taken_list); + + /* Add all new blocks to the free list */ + for (i = 0, blk = block; i < max_blocks; i++, blk++) + list_add(&blk->list, &info->empty_list); +} + +/* Attach a free memory region, coalesces regions if adjuscent */ +int rh_attach_region(rh_info_t * info, void *start, int size) +{ + rh_block_t *blk; + unsigned long s, e, m; + int r; + + /* The region must be aligned */ + s = (unsigned long)start; + e = s + size; + m = info->alignment - 1; + + /* Round start up */ + s = (s + m) & ~m; + + /* Round end down */ + e = e & ~m; + + /* Take final values */ + start = (void *)s; + size = (int)(e - s); + + /* Grow the blocks, if needed */ + r = assure_empty(info, 1); + if (r < 0) + return r; + + blk = get_slot(info); + blk->start = start; + blk->size = size; + blk->owner = NULL; + + attach_free_block(info, blk); + + return 0; +} + +/* Detatch given address range, splits free block if needed. */ +void *rh_detach_region(rh_info_t * info, void *start, int size) +{ + struct list_head *l; + rh_block_t *blk, *newblk; + unsigned long s, e, m, bs, be; + + /* Validate size */ + if (size <= 0) + return ERR_PTR(-EINVAL); + + /* The region must be aligned */ + s = (unsigned long)start; + e = s + size; + m = info->alignment - 1; + + /* Round start up */ + s = (s + m) & ~m; + + /* Round end down */ + e = e & ~m; + + if (assure_empty(info, 1) < 0) + return ERR_PTR(-ENOMEM); + + blk = NULL; + list_for_each(l, &info->free_list) { + blk = list_entry(l, rh_block_t, list); + /* The range must lie entirely inside one free block */ + bs = (unsigned long)blk->start; + be = (unsigned long)blk->start + blk->size; + if (s >= bs && e <= be) + break; + blk = NULL; + } + + if (blk == NULL) + return ERR_PTR(-ENOMEM); + + /* Perfect fit */ + if (bs == s && be == e) { + /* Delete from free list, release slot */ + list_del(&blk->list); + release_slot(info, blk); + return (void *)s; + } + + /* blk still in free list, with updated start and/or size */ + if (bs == s || be == e) { + if (bs == s) + blk->start = (int8_t *)blk->start + size; + blk->size -= size; + + } else { + /* The front free fragment */ + blk->size = s - bs; + + /* the back free fragment */ + newblk = get_slot(info); + newblk->start = (void *)e; + newblk->size = be - e; + + list_add(&newblk->list, &blk->list); + } + + return (void *)s; +} + +void *rh_alloc(rh_info_t * info, int size, const char *owner) +{ + struct list_head *l; + rh_block_t *blk; + rh_block_t *newblk; + void *start; + + /* Validate size */ + if (size <= 0) + return ERR_PTR(-EINVAL); + + /* Align to configured alignment */ + size = (size + (info->alignment - 1)) & ~(info->alignment - 1); + + if (assure_empty(info, 1) < 0) + return ERR_PTR(-ENOMEM); + + blk = NULL; + list_for_each(l, &info->free_list) { + blk = list_entry(l, rh_block_t, list); + if (size <= blk->size) + break; + blk = NULL; + } + + if (blk == NULL) + return ERR_PTR(-ENOMEM); + + /* Just fits */ + if (blk->size == size) { + /* Move from free list to taken list */ + list_del(&blk->list); + blk->owner = owner; + start = blk->start; + + attach_taken_block(info, blk); + + return start; + } + + newblk = get_slot(info); + newblk->start = blk->start; + newblk->size = size; + newblk->owner = owner; + + /* blk still in free list, with updated start, size */ + blk->start = (int8_t *)blk->start + size; + blk->size -= size; + + start = newblk->start; + + attach_taken_block(info, newblk); + + return start; +} + +/* allocate at precisely the given address */ +void *rh_alloc_fixed(rh_info_t * info, void *start, int size, const char *owner) +{ + struct list_head *l; + rh_block_t *blk, *newblk1, *newblk2; + unsigned long s, e, m, bs, be; + + /* Validate size */ + if (size <= 0) + return ERR_PTR(-EINVAL); + + /* The region must be aligned */ + s = (unsigned long)start; + e = s + size; + m = info->alignment - 1; + + /* Round start up */ + s = (s + m) & ~m; + + /* Round end down */ + e = e & ~m; + + if (assure_empty(info, 2) < 0) + return ERR_PTR(-ENOMEM); + + blk = NULL; + list_for_each(l, &info->free_list) { + blk = list_entry(l, rh_block_t, list); + /* The range must lie entirely inside one free block */ + bs = (unsigned long)blk->start; + be = (unsigned long)blk->start + blk->size; + if (s >= bs && e <= be) + break; + } + + if (blk == NULL) + return ERR_PTR(-ENOMEM); + + /* Perfect fit */ + if (bs == s && be == e) { + /* Move from free list to taken list */ + list_del(&blk->list); + blk->owner = owner; + + start = blk->start; + attach_taken_block(info, blk); + + return start; + + } + + /* blk still in free list, with updated start and/or size */ + if (bs == s || be == e) { + if (bs == s) + blk->start = (int8_t *)blk->start + size; + blk->size -= size; + + } else { + /* The front free fragment */ + blk->size = s - bs; + + /* The back free fragment */ + newblk2 = get_slot(info); + newblk2->start = (void *)e; + newblk2->size = be - e; + + list_add(&newblk2->list, &blk->list); + } + + newblk1 = get_slot(info); + newblk1->start = (void *)s; + newblk1->size = e - s; + newblk1->owner = owner; + + start = newblk1->start; + attach_taken_block(info, newblk1); + + return start; +} + +int rh_free(rh_info_t * info, void *start) +{ + rh_block_t *blk, *blk2; + struct list_head *l; + int size; + + /* Linear search for block */ + blk = NULL; + list_for_each(l, &info->taken_list) { + blk2 = list_entry(l, rh_block_t, list); + if (start < blk2->start) + break; + blk = blk2; + } + + if (blk == NULL || start > (blk->start + blk->size)) + return -EINVAL; + + /* Remove from taken list */ + list_del(&blk->list); + + /* Get size of freed block */ + size = blk->size; + attach_free_block(info, blk); + + return size; +} + +int rh_get_stats(rh_info_t * info, int what, int max_stats, rh_stats_t * stats) +{ + rh_block_t *blk; + struct list_head *l; + struct list_head *h; + int nr; + + switch (what) { + + case RHGS_FREE: + h = &info->free_list; + break; + + case RHGS_TAKEN: + h = &info->taken_list; + break; + + default: + return -EINVAL; + } + + /* Linear search for block */ + nr = 0; + list_for_each(l, h) { + blk = list_entry(l, rh_block_t, list); + if (stats != NULL && nr < max_stats) { + stats->start = blk->start; + stats->size = blk->size; + stats->owner = blk->owner; + stats++; + } + nr++; + } + + return nr; +} + +int rh_set_owner(rh_info_t * info, void *start, const char *owner) +{ + rh_block_t *blk, *blk2; + struct list_head *l; + int size; + + /* Linear search for block */ + blk = NULL; + list_for_each(l, &info->taken_list) { + blk2 = list_entry(l, rh_block_t, list); + if (start < blk2->start) + break; + blk = blk2; + } + + if (blk == NULL || start > (blk->start + blk->size)) + return -EINVAL; + + blk->owner = owner; + size = blk->size; + + return size; +} + +void rh_dump(rh_info_t * info) +{ + static rh_stats_t st[32]; /* XXX maximum 32 blocks */ + int maxnr; + int i, nr; + + maxnr = sizeof(st) / sizeof(st[0]); + + printk(KERN_INFO + "info @0x%p (%d slots empty / %d max)\n", + info, info->empty_slots, info->max_blocks); + + printk(KERN_INFO " Free:\n"); + nr = rh_get_stats(info, RHGS_FREE, maxnr, st); + if (nr > maxnr) + nr = maxnr; + for (i = 0; i < nr; i++) + printk(KERN_INFO + " 0x%p-0x%p (%u)\n", + st[i].start, (int8_t *) st[i].start + st[i].size, + st[i].size); + printk(KERN_INFO "\n"); + + printk(KERN_INFO " Taken:\n"); + nr = rh_get_stats(info, RHGS_TAKEN, maxnr, st); + if (nr > maxnr) + nr = maxnr; + for (i = 0; i < nr; i++) + printk(KERN_INFO + " 0x%p-0x%p (%u) %s\n", + st[i].start, (int8_t *) st[i].start + st[i].size, + st[i].size, st[i].owner != NULL ? st[i].owner : ""); + printk(KERN_INFO "\n"); +} + +void rh_dump_blk(rh_info_t * info, rh_block_t * blk) +{ + printk(KERN_INFO + "blk @0x%p: 0x%p-0x%p (%u)\n", + blk, blk->start, (int8_t *) blk->start + blk->size, blk->size); +} diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c new file mode 100644 index 00000000000..e79123d1485 --- /dev/null +++ b/arch/powerpc/lib/sstep.c @@ -0,0 +1,141 @@ +/* + * Single-step support. + * + * Copyright (C) 2004 Paul Mackerras <paulus@au.ibm.com>, IBM + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/kernel.h> +#include <linux/ptrace.h> +#include <asm/sstep.h> +#include <asm/processor.h> + +extern char system_call_common[]; + +/* Bits in SRR1 that are copied from MSR */ +#define MSR_MASK 0xffffffff87c0ffff + +/* + * Determine whether a conditional branch instruction would branch. + */ +static int branch_taken(unsigned int instr, struct pt_regs *regs) +{ + unsigned int bo = (instr >> 21) & 0x1f; + unsigned int bi; + + if ((bo & 4) == 0) { + /* decrement counter */ + --regs->ctr; + if (((bo >> 1) & 1) ^ (regs->ctr == 0)) + return 0; + } + if ((bo & 0x10) == 0) { + /* check bit from CR */ + bi = (instr >> 16) & 0x1f; + if (((regs->ccr >> (31 - bi)) & 1) != ((bo >> 3) & 1)) + return 0; + } + return 1; +} + +/* + * Emulate instructions that cause a transfer of control. + * Returns 1 if the step was emulated, 0 if not, + * or -1 if the instruction is one that should not be stepped, + * such as an rfid, or a mtmsrd that would clear MSR_RI. + */ +int emulate_step(struct pt_regs *regs, unsigned int instr) +{ + unsigned int opcode, rd; + unsigned long int imm; + + opcode = instr >> 26; + switch (opcode) { + case 16: /* bc */ + imm = (signed short)(instr & 0xfffc); + if ((instr & 2) == 0) + imm += regs->nip; + regs->nip += 4; + if ((regs->msr & MSR_SF) == 0) + regs->nip &= 0xffffffffUL; + if (instr & 1) + regs->link = regs->nip; + if (branch_taken(instr, regs)) + regs->nip = imm; + return 1; + case 17: /* sc */ + /* + * N.B. this uses knowledge about how the syscall + * entry code works. If that is changed, this will + * need to be changed also. + */ + regs->gpr[9] = regs->gpr[13]; + regs->gpr[11] = regs->nip + 4; + regs->gpr[12] = regs->msr & MSR_MASK; + regs->gpr[13] = (unsigned long) get_paca(); + regs->nip = (unsigned long) &system_call_common; + regs->msr = MSR_KERNEL; + return 1; + case 18: /* b */ + imm = instr & 0x03fffffc; + if (imm & 0x02000000) + imm -= 0x04000000; + if ((instr & 2) == 0) + imm += regs->nip; + if (instr & 1) { + regs->link = regs->nip + 4; + if ((regs->msr & MSR_SF) == 0) + regs->link &= 0xffffffffUL; + } + if ((regs->msr & MSR_SF) == 0) + imm &= 0xffffffffUL; + regs->nip = imm; + return 1; + case 19: + switch (instr & 0x7fe) { + case 0x20: /* bclr */ + case 0x420: /* bcctr */ + imm = (instr & 0x400)? regs->ctr: regs->link; + regs->nip += 4; + if ((regs->msr & MSR_SF) == 0) { + regs->nip &= 0xffffffffUL; + imm &= 0xffffffffUL; + } + if (instr & 1) + regs->link = regs->nip; + if (branch_taken(instr, regs)) + regs->nip = imm; + return 1; + case 0x24: /* rfid, scary */ + return -1; + } + case 31: + rd = (instr >> 21) & 0x1f; + switch (instr & 0x7fe) { + case 0xa6: /* mfmsr */ + regs->gpr[rd] = regs->msr & MSR_MASK; + regs->nip += 4; + if ((regs->msr & MSR_SF) == 0) + regs->nip &= 0xffffffffUL; + return 1; + case 0x164: /* mtmsrd */ + /* only MSR_EE and MSR_RI get changed if bit 15 set */ + /* mtmsrd doesn't change MSR_HV and MSR_ME */ + imm = (instr & 0x10000)? 0x8002: 0xefffffffffffefffUL; + imm = (regs->msr & MSR_MASK & ~imm) + | (regs->gpr[rd] & imm); + if ((imm & MSR_RI) == 0) + /* can't step mtmsrd that would clear MSR_RI */ + return -1; + regs->msr = imm; + regs->nip += 4; + if ((imm & MSR_SF) == 0) + regs->nip &= 0xffffffffUL; + return 1; + } + } + return 0; +} diff --git a/arch/powerpc/lib/strcase.c b/arch/powerpc/lib/strcase.c new file mode 100644 index 00000000000..36b521091bb --- /dev/null +++ b/arch/powerpc/lib/strcase.c @@ -0,0 +1,23 @@ +#include <linux/ctype.h> + +int strcasecmp(const char *s1, const char *s2) +{ + int c1, c2; + + do { + c1 = tolower(*s1++); + c2 = tolower(*s2++); + } while (c1 == c2 && c1 != 0); + return c1 - c2; +} + +int strncasecmp(const char *s1, const char *s2, int n) +{ + int c1, c2; + + do { + c1 = tolower(*s1++); + c2 = tolower(*s2++); + } while ((--n > 0) && c1 == c2 && c1 != 0); + return c1 - c2; +} diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S new file mode 100644 index 00000000000..15d40e9ef8b --- /dev/null +++ b/arch/powerpc/lib/string.S @@ -0,0 +1,203 @@ +/* + * String handling functions for PowerPC. + * + * Copyright (C) 1996 Paul Mackerras. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/errno.h> +#include <asm/ppc_asm.h> + + .text + .stabs "arch/powerpc/lib/",N_SO,0,0,0f + .stabs "string.S",N_SO,0,0,0f +0: + + .section __ex_table,"a" +#ifdef CONFIG_PPC64 + .align 3 +#define EXTBL .llong +#else + .align 2 +#define EXTBL .long +#endif + .text + +_GLOBAL(strcpy) + addi r5,r3,-1 + addi r4,r4,-1 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + stbu r0,1(r5) + bne 1b + blr + +/* This clears out any unused part of the destination buffer, + just as the libc version does. -- paulus */ +_GLOBAL(strncpy) + cmpwi 0,r5,0 + beqlr + mtctr r5 + addi r6,r3,-1 + addi r4,r4,-1 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + stbu r0,1(r6) + bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */ + bnelr /* if we didn't hit a null char, we're done */ + mfctr r5 + cmpwi 0,r5,0 /* any space left in destination buffer? */ + beqlr /* we know r0 == 0 here */ +2: stbu r0,1(r6) /* clear it out if so */ + bdnz 2b + blr + +_GLOBAL(strcat) + addi r5,r3,-1 + addi r4,r4,-1 +1: lbzu r0,1(r5) + cmpwi 0,r0,0 + bne 1b + addi r5,r5,-1 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + stbu r0,1(r5) + bne 1b + blr + +_GLOBAL(strcmp) + addi r5,r3,-1 + addi r4,r4,-1 +1: lbzu r3,1(r5) + cmpwi 1,r3,0 + lbzu r0,1(r4) + subf. r3,r0,r3 + beqlr 1 + beq 1b + blr + +_GLOBAL(strlen) + addi r4,r3,-1 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + bne 1b + subf r3,r3,r4 + blr + +_GLOBAL(memcmp) + cmpwi 0,r5,0 + ble- 2f + mtctr r5 + addi r6,r3,-1 + addi r4,r4,-1 +1: lbzu r3,1(r6) + lbzu r0,1(r4) + subf. r3,r0,r3 + bdnzt 2,1b + blr +2: li r3,0 + blr + +_GLOBAL(memchr) + cmpwi 0,r5,0 + ble- 2f + mtctr r5 + addi r3,r3,-1 +1: lbzu r0,1(r3) + cmpw 0,r0,r4 + bdnzf 2,1b + beqlr +2: li r3,0 + blr + +_GLOBAL(__clear_user) + addi r6,r3,-4 + li r3,0 + li r5,0 + cmplwi 0,r4,4 + blt 7f + /* clear a single word */ +11: stwu r5,4(r6) + beqlr + /* clear word sized chunks */ + andi. r0,r6,3 + add r4,r0,r4 + subf r6,r0,r6 + srwi r0,r4,2 + andi. r4,r4,3 + mtctr r0 + bdz 7f +1: stwu r5,4(r6) + bdnz 1b + /* clear byte sized chunks */ +7: cmpwi 0,r4,0 + beqlr + mtctr r4 + addi r6,r6,3 +8: stbu r5,1(r6) + bdnz 8b + blr +90: mr r3,r4 + blr +91: mfctr r3 + slwi r3,r3,2 + add r3,r3,r4 + blr +92: mfctr r3 + blr + + .section __ex_table,"a" + EXTBL 11b,90b + EXTBL 1b,91b + EXTBL 8b,92b + .text + +_GLOBAL(__strncpy_from_user) + addi r6,r3,-1 + addi r4,r4,-1 + cmpwi 0,r5,0 + beq 2f + mtctr r5 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + stbu r0,1(r6) + bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */ + beq 3f +2: addi r6,r6,1 +3: subf r3,r3,r6 + blr +99: li r3,-EFAULT + blr + + .section __ex_table,"a" + EXTBL 1b,99b + .text + +/* r3 = str, r4 = len (> 0), r5 = top (highest addr) */ +_GLOBAL(__strnlen_user) + addi r7,r3,-1 + subf r6,r7,r5 /* top+1 - str */ + cmplw 0,r4,r6 + bge 0f + mr r6,r4 +0: mtctr r6 /* ctr = min(len, top - str) */ +1: lbzu r0,1(r7) /* get next byte */ + cmpwi 0,r0,0 + bdnzf 2,1b /* loop if --ctr != 0 && byte != 0 */ + addi r7,r7,1 + subf r3,r3,r7 /* number of bytes we have looked at */ + beqlr /* return if we found a 0 byte */ + cmpw 0,r3,r4 /* did we look at all len bytes? */ + blt 99f /* if not, must have hit top */ + addi r3,r4,1 /* return len + 1 to indicate no null found */ + blr +99: li r3,0 /* bad address, return 0 */ + blr + + .section __ex_table,"a" + EXTBL 1b,99b diff --git a/arch/powerpc/lib/usercopy.c b/arch/powerpc/lib/usercopy.c new file mode 100644 index 00000000000..5eea6f3c1e0 --- /dev/null +++ b/arch/powerpc/lib/usercopy.c @@ -0,0 +1,41 @@ +/* + * Functions which are too large to be inlined. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/module.h> +#include <asm/uaccess.h> + +unsigned long copy_from_user(void *to, const void __user *from, unsigned long n) +{ + if (likely(access_ok(VERIFY_READ, from, n))) + n = __copy_from_user(to, from, n); + else + memset(to, 0, n); + return n; +} + +unsigned long copy_to_user(void __user *to, const void *from, unsigned long n) +{ + if (likely(access_ok(VERIFY_WRITE, to, n))) + n = __copy_to_user(to, from, n); + return n; +} + +unsigned long copy_in_user(void __user *to, const void __user *from, + unsigned long n) +{ + might_sleep(); + if (likely(access_ok(VERIFY_READ, from, n) && + access_ok(VERIFY_WRITE, to, n))) + n =__copy_tofrom_user(to, from, n); + return n; +} + +EXPORT_SYMBOL(copy_from_user); +EXPORT_SYMBOL(copy_to_user); +EXPORT_SYMBOL(copy_in_user); + diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c new file mode 100644 index 00000000000..3d79ce281b6 --- /dev/null +++ b/arch/powerpc/mm/44x_mmu.c @@ -0,0 +1,120 @@ +/* + * Modifications by Matt Porter (mporter@mvista.com) to support + * PPC44x Book E processors. + * + * This file contains the routines for initializing the MMU + * on the 4xx series of chips. + * -- paulus + * + * Derived from arch/ppc/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/stddef.h> +#include <linux/vmalloc.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/highmem.h> + +#include <asm/pgalloc.h> +#include <asm/prom.h> +#include <asm/io.h> +#include <asm/mmu_context.h> +#include <asm/pgtable.h> +#include <asm/mmu.h> +#include <asm/uaccess.h> +#include <asm/smp.h> +#include <asm/bootx.h> +#include <asm/machdep.h> +#include <asm/setup.h> + +#include "mmu_decl.h" + +extern char etext[], _stext[]; + +/* Used by the 44x TLB replacement exception handler. + * Just needed it declared someplace. + */ +unsigned int tlb_44x_index = 0; +unsigned int tlb_44x_hwater = 62; + +/* + * "Pins" a 256MB TLB entry in AS0 for kernel lowmem + */ +static void __init +ppc44x_pin_tlb(int slot, unsigned int virt, unsigned int phys) +{ + unsigned long attrib = 0; + + __asm__ __volatile__("\ + clrrwi %2,%2,10\n\ + ori %2,%2,%4\n\ + clrrwi %1,%1,10\n\ + li %0,0\n\ + ori %0,%0,%5\n\ + tlbwe %2,%3,%6\n\ + tlbwe %1,%3,%7\n\ + tlbwe %0,%3,%8" + : + : "r" (attrib), "r" (phys), "r" (virt), "r" (slot), + "i" (PPC44x_TLB_VALID | PPC44x_TLB_256M), + "i" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G), + "i" (PPC44x_TLB_PAGEID), + "i" (PPC44x_TLB_XLAT), + "i" (PPC44x_TLB_ATTRIB)); +} + +/* + * MMU_init_hw does the chip-specific initialization of the MMU hardware. + */ +void __init MMU_init_hw(void) +{ + flush_instruction_cache(); +} + +unsigned long __init mmu_mapin_ram(void) +{ + unsigned int pinned_tlbs = 1; + int i; + + /* Determine number of entries necessary to cover lowmem */ + pinned_tlbs = (unsigned int) + (_ALIGN(total_lowmem, PPC44x_PIN_SIZE) >> PPC44x_PIN_SHIFT); + + /* Write upper watermark to save location */ + tlb_44x_hwater = PPC44x_LOW_SLOT - pinned_tlbs; + + /* If necessary, set additional pinned TLBs */ + if (pinned_tlbs > 1) + for (i = (PPC44x_LOW_SLOT-(pinned_tlbs-1)); i < PPC44x_LOW_SLOT; i++) { + unsigned int phys_addr = (PPC44x_LOW_SLOT-i) * PPC44x_PIN_SIZE; + ppc44x_pin_tlb(i, phys_addr+PAGE_OFFSET, phys_addr); + } + + return total_lowmem; +} diff --git a/arch/powerpc/mm/4xx_mmu.c b/arch/powerpc/mm/4xx_mmu.c new file mode 100644 index 00000000000..b7bcbc232f3 --- /dev/null +++ b/arch/powerpc/mm/4xx_mmu.c @@ -0,0 +1,141 @@ +/* + * This file contains the routines for initializing the MMU + * on the 4xx series of chips. + * -- paulus + * + * Derived from arch/ppc/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/stddef.h> +#include <linux/vmalloc.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/highmem.h> + +#include <asm/pgalloc.h> +#include <asm/prom.h> +#include <asm/io.h> +#include <asm/mmu_context.h> +#include <asm/pgtable.h> +#include <asm/mmu.h> +#include <asm/uaccess.h> +#include <asm/smp.h> +#include <asm/bootx.h> +#include <asm/machdep.h> +#include <asm/setup.h> +#include "mmu_decl.h" + +extern int __map_without_ltlbs; +/* + * MMU_init_hw does the chip-specific initialization of the MMU hardware. + */ +void __init MMU_init_hw(void) +{ + /* + * The Zone Protection Register (ZPR) defines how protection will + * be applied to every page which is a member of a given zone. At + * present, we utilize only two of the 4xx's zones. + * The zone index bits (of ZSEL) in the PTE are used for software + * indicators, except the LSB. For user access, zone 1 is used, + * for kernel access, zone 0 is used. We set all but zone 1 + * to zero, allowing only kernel access as indicated in the PTE. + * For zone 1, we set a 01 binary (a value of 10 will not work) + * to allow user access as indicated in the PTE. This also allows + * kernel access as indicated in the PTE. + */ + + mtspr(SPRN_ZPR, 0x10000000); + + flush_instruction_cache(); + + /* + * Set up the real-mode cache parameters for the exception vector + * handlers (which are run in real-mode). + */ + + mtspr(SPRN_DCWR, 0x00000000); /* All caching is write-back */ + + /* + * Cache instruction and data space where the exception + * vectors and the kernel live in real-mode. + */ + + mtspr(SPRN_DCCR, 0xF0000000); /* 512 MB of data space at 0x0. */ + mtspr(SPRN_ICCR, 0xF0000000); /* 512 MB of instr. space at 0x0. */ +} + +#define LARGE_PAGE_SIZE_16M (1<<24) +#define LARGE_PAGE_SIZE_4M (1<<22) + +unsigned long __init mmu_mapin_ram(void) +{ + unsigned long v, s; + phys_addr_t p; + + v = KERNELBASE; + p = PPC_MEMSTART; + s = 0; + + if (__map_without_ltlbs) { + return s; + } + + while (s <= (total_lowmem - LARGE_PAGE_SIZE_16M)) { + pmd_t *pmdp; + unsigned long val = p | _PMD_SIZE_16M | _PAGE_HWEXEC | _PAGE_HWWRITE; + + spin_lock(&init_mm.page_table_lock); + pmdp = pmd_offset(pgd_offset_k(v), v); + pmd_val(*pmdp++) = val; + pmd_val(*pmdp++) = val; + pmd_val(*pmdp++) = val; + pmd_val(*pmdp++) = val; + spin_unlock(&init_mm.page_table_lock); + + v += LARGE_PAGE_SIZE_16M; + p += LARGE_PAGE_SIZE_16M; + s += LARGE_PAGE_SIZE_16M; + } + + while (s <= (total_lowmem - LARGE_PAGE_SIZE_4M)) { + pmd_t *pmdp; + unsigned long val = p | _PMD_SIZE_4M | _PAGE_HWEXEC | _PAGE_HWWRITE; + + spin_lock(&init_mm.page_table_lock); + pmdp = pmd_offset(pgd_offset_k(v), v); + pmd_val(*pmdp) = val; + spin_unlock(&init_mm.page_table_lock); + + v += LARGE_PAGE_SIZE_4M; + p += LARGE_PAGE_SIZE_4M; + s += LARGE_PAGE_SIZE_4M; + } + + return s; +} diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile new file mode 100644 index 00000000000..9f52c26acd8 --- /dev/null +++ b/arch/powerpc/mm/Makefile @@ -0,0 +1,12 @@ +# +# Makefile for the linux ppc-specific parts of the memory manager. +# + +obj-y := fault.o mem.o +obj-$(CONFIG_PPC32) += init.o pgtable.o mmu_context.o \ + mem_pieces.o tlb.o +obj-$(CONFIG_PPC64) += init64.o pgtable64.o mmu_context64.o +obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu.o hash_32.o +obj-$(CONFIG_40x) += 4xx_mmu.o +obj-$(CONFIG_44x) += 44x_mmu.o +obj-$(CONFIG_FSL_BOOKE) += fsl_booke_mmu.o diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c new file mode 100644 index 00000000000..3df641fa789 --- /dev/null +++ b/arch/powerpc/mm/fault.c @@ -0,0 +1,391 @@ +/* + * arch/ppc/mm/fault.c + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Derived from "arch/i386/mm/fault.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * Modified by Cort Dougan and Paul Mackerras. + * + * Modified for PPC64 by Dave Engebretsen (engebret@ibm.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/config.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/interrupt.h> +#include <linux/highmem.h> +#include <linux/module.h> +#include <linux/kprobes.h> + +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/mmu.h> +#include <asm/mmu_context.h> +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/tlbflush.h> +#include <asm/kdebug.h> +#include <asm/siginfo.h> + +/* + * Check whether the instruction at regs->nip is a store using + * an update addressing form which will update r1. + */ +static int store_updates_sp(struct pt_regs *regs) +{ + unsigned int inst; + + if (get_user(inst, (unsigned int __user *)regs->nip)) + return 0; + /* check for 1 in the rA field */ + if (((inst >> 16) & 0x1f) != 1) + return 0; + /* check major opcode */ + switch (inst >> 26) { + case 37: /* stwu */ + case 39: /* stbu */ + case 45: /* sthu */ + case 53: /* stfsu */ + case 55: /* stfdu */ + return 1; + case 62: /* std or stdu */ + return (inst & 3) == 1; + case 31: + /* check minor opcode */ + switch ((inst >> 1) & 0x3ff) { + case 181: /* stdux */ + case 183: /* stwux */ + case 247: /* stbux */ + case 439: /* sthux */ + case 695: /* stfsux */ + case 759: /* stfdux */ + return 1; + } + } + return 0; +} + +static void do_dabr(struct pt_regs *regs, unsigned long error_code) +{ + siginfo_t info; + + if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code, + 11, SIGSEGV) == NOTIFY_STOP) + return; + + if (debugger_dabr_match(regs)) + return; + + /* Clear the DABR */ + set_dabr(0); + + /* Deliver the signal to userspace */ + info.si_signo = SIGTRAP; + info.si_errno = 0; + info.si_code = TRAP_HWBKPT; + info.si_addr = (void __user *)regs->nip; + force_sig_info(SIGTRAP, &info, current); +} + +/* + * For 600- and 800-family processors, the error_code parameter is DSISR + * for a data fault, SRR1 for an instruction fault. For 400-family processors + * the error_code parameter is ESR for a data fault, 0 for an instruction + * fault. + * For 64-bit processors, the error_code parameter is + * - DSISR for a non-SLB data access fault, + * - SRR1 & 0x08000000 for a non-SLB instruction access fault + * - 0 any SLB fault. + * + * The return value is 0 if the fault was handled, or the signal + * number if this is a kernel fault that can't be handled here. + */ +int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, + unsigned long error_code) +{ + struct vm_area_struct * vma; + struct mm_struct *mm = current->mm; + siginfo_t info; + int code = SEGV_MAPERR; + int is_write = 0; + int trap = TRAP(regs); + int is_exec = trap == 0x400; + +#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) + /* + * Fortunately the bit assignments in SRR1 for an instruction + * fault and DSISR for a data fault are mostly the same for the + * bits we are interested in. But there are some bits which + * indicate errors in DSISR but can validly be set in SRR1. + */ + if (trap == 0x400) + error_code &= 0x48200000; + else + is_write = error_code & DSISR_ISSTORE; +#else + is_write = error_code & ESR_DST; +#endif /* CONFIG_4xx || CONFIG_BOOKE */ + + if (notify_die(DIE_PAGE_FAULT, "page_fault", regs, error_code, + 11, SIGSEGV) == NOTIFY_STOP) + return 0; + + if (trap == 0x300) { + if (debugger_fault_handler(regs)) + return 0; + } + + /* On a kernel SLB miss we can only check for a valid exception entry */ + if (!user_mode(regs) && (address >= TASK_SIZE)) + return SIGSEGV; + +#if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) + if (error_code & DSISR_DABRMATCH) { + /* DABR match */ + do_dabr(regs, error_code); + return 0; + } +#endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/ + + if (in_atomic() || mm == NULL) { + if (!user_mode(regs)) + return SIGSEGV; + /* in_atomic() in user mode is really bad, + as is current->mm == NULL. */ + printk(KERN_EMERG "Page fault in user mode with" + "in_atomic() = %d mm = %p\n", in_atomic(), mm); + printk(KERN_EMERG "NIP = %lx MSR = %lx\n", + regs->nip, regs->msr); + die("Weird page fault", regs, SIGSEGV); + } + + /* When running in the kernel we expect faults to occur only to + * addresses in user space. All other faults represent errors in the + * kernel and should generate an OOPS. Unfortunatly, in the case of an + * erroneous fault occuring in a code path which already holds mmap_sem + * we will deadlock attempting to validate the fault against the + * address space. Luckily the kernel only validly references user + * space from well defined areas of code, which are listed in the + * exceptions table. + * + * As the vast majority of faults will be valid we will only perform + * the source reference check when there is a possibilty of a deadlock. + * Attempt to lock the address space, if we cannot we then validate the + * source. If this is invalid we can skip the address space check, + * thus avoiding the deadlock. + */ + if (!down_read_trylock(&mm->mmap_sem)) { + if (!user_mode(regs) && !search_exception_tables(regs->nip)) + goto bad_area_nosemaphore; + + down_read(&mm->mmap_sem); + } + + vma = find_vma(mm, address); + if (!vma) + goto bad_area; + if (vma->vm_start <= address) + goto good_area; + if (!(vma->vm_flags & VM_GROWSDOWN)) + goto bad_area; + + /* + * N.B. The POWER/Open ABI allows programs to access up to + * 288 bytes below the stack pointer. + * The kernel signal delivery code writes up to about 1.5kB + * below the stack pointer (r1) before decrementing it. + * The exec code can write slightly over 640kB to the stack + * before setting the user r1. Thus we allow the stack to + * expand to 1MB without further checks. + */ + if (address + 0x100000 < vma->vm_end) { + /* get user regs even if this fault is in kernel mode */ + struct pt_regs *uregs = current->thread.regs; + if (uregs == NULL) + goto bad_area; + + /* + * A user-mode access to an address a long way below + * the stack pointer is only valid if the instruction + * is one which would update the stack pointer to the + * address accessed if the instruction completed, + * i.e. either stwu rs,n(r1) or stwux rs,r1,rb + * (or the byte, halfword, float or double forms). + * + * If we don't check this then any write to the area + * between the last mapped region and the stack will + * expand the stack rather than segfaulting. + */ + if (address + 2048 < uregs->gpr[1] + && (!user_mode(regs) || !store_updates_sp(regs))) + goto bad_area; + } + if (expand_stack(vma, address)) + goto bad_area; + +good_area: + code = SEGV_ACCERR; +#if defined(CONFIG_6xx) + if (error_code & 0x95700000) + /* an error such as lwarx to I/O controller space, + address matching DABR, eciwx, etc. */ + goto bad_area; +#endif /* CONFIG_6xx */ +#if defined(CONFIG_8xx) + /* The MPC8xx seems to always set 0x80000000, which is + * "undefined". Of those that can be set, this is the only + * one which seems bad. + */ + if (error_code & 0x10000000) + /* Guarded storage error. */ + goto bad_area; +#endif /* CONFIG_8xx */ + + if (is_exec) { +#ifdef CONFIG_PPC64 + /* protection fault */ + if (error_code & DSISR_PROTFAULT) + goto bad_area; + if (!(vma->vm_flags & VM_EXEC)) + goto bad_area; +#endif +#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE) + pte_t *ptep; + + /* Since 4xx/Book-E supports per-page execute permission, + * we lazily flush dcache to icache. */ + ptep = NULL; + if (get_pteptr(mm, address, &ptep) && pte_present(*ptep)) { + struct page *page = pte_page(*ptep); + + if (! test_bit(PG_arch_1, &page->flags)) { + flush_dcache_icache_page(page); + set_bit(PG_arch_1, &page->flags); + } + pte_update(ptep, 0, _PAGE_HWEXEC); + _tlbie(address); + pte_unmap(ptep); + up_read(&mm->mmap_sem); + return 0; + } + if (ptep != NULL) + pte_unmap(ptep); +#endif + /* a write */ + } else if (is_write) { + if (!(vma->vm_flags & VM_WRITE)) + goto bad_area; + /* a read */ + } else { + /* protection fault */ + if (error_code & 0x08000000) + goto bad_area; + if (!(vma->vm_flags & (VM_READ | VM_EXEC))) + goto bad_area; + } + + /* + * If for any reason at all we couldn't handle the fault, + * make sure we exit gracefully rather than endlessly redo + * the fault. + */ + survive: + switch (handle_mm_fault(mm, vma, address, is_write)) { + + case VM_FAULT_MINOR: + current->min_flt++; + break; + case VM_FAULT_MAJOR: + current->maj_flt++; + break; + case VM_FAULT_SIGBUS: + goto do_sigbus; + case VM_FAULT_OOM: + goto out_of_memory; + default: + BUG(); + } + + up_read(&mm->mmap_sem); + return 0; + +bad_area: + up_read(&mm->mmap_sem); + +bad_area_nosemaphore: + /* User mode accesses cause a SIGSEGV */ + if (user_mode(regs)) { + _exception(SIGSEGV, regs, code, address); + return 0; + } + + if (is_exec && (error_code & DSISR_PROTFAULT) + && printk_ratelimit()) + printk(KERN_CRIT "kernel tried to execute NX-protected" + " page (%lx) - exploit attempt? (uid: %d)\n", + address, current->uid); + + return SIGSEGV; + +/* + * We ran out of memory, or some other thing happened to us that made + * us unable to handle the page fault gracefully. + */ +out_of_memory: + up_read(&mm->mmap_sem); + if (current->pid == 1) { + yield(); + down_read(&mm->mmap_sem); + goto survive; + } + printk("VM: killing process %s\n", current->comm); + if (user_mode(regs)) + do_exit(SIGKILL); + return SIGKILL; + +do_sigbus: + up_read(&mm->mmap_sem); + if (user_mode(regs)) { + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRERR; + info.si_addr = (void __user *)address; + force_sig_info(SIGBUS, &info, current); + return 0; + } + return SIGBUS; +} + +/* + * bad_page_fault is called when we have a bad access from the kernel. + * It is called from the DSI and ISI handlers in head.S and from some + * of the procedures in traps.c. + */ +void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) +{ + const struct exception_table_entry *entry; + + /* Are we prepared to handle this fault? */ + if ((entry = search_exception_tables(regs->nip)) != NULL) { + regs->nip = entry->fixup; + return; + } + + /* kernel has accessed a bad area */ + die("Kernel access of bad area", regs, sig); +} diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c new file mode 100644 index 00000000000..af9ca0eb6d5 --- /dev/null +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -0,0 +1,237 @@ +/* + * Modifications by Kumar Gala (kumar.gala@freescale.com) to support + * E500 Book E processors. + * + * Copyright 2004 Freescale Semiconductor, Inc + * + * This file contains the routines for initializing the MMU + * on the 4xx series of chips. + * -- paulus + * + * Derived from arch/ppc/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/stddef.h> +#include <linux/vmalloc.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/highmem.h> + +#include <asm/pgalloc.h> +#include <asm/prom.h> +#include <asm/io.h> +#include <asm/mmu_context.h> +#include <asm/pgtable.h> +#include <asm/mmu.h> +#include <asm/uaccess.h> +#include <asm/smp.h> +#include <asm/bootx.h> +#include <asm/machdep.h> +#include <asm/setup.h> + +extern void loadcam_entry(unsigned int index); +unsigned int tlbcam_index; +unsigned int num_tlbcam_entries; +static unsigned long __cam0, __cam1, __cam2; +extern unsigned long total_lowmem; +extern unsigned long __max_low_memory; +#define MAX_LOW_MEM CONFIG_LOWMEM_SIZE + +#define NUM_TLBCAMS (16) + +struct tlbcam { + u32 MAS0; + u32 MAS1; + u32 MAS2; + u32 MAS3; + u32 MAS7; +} TLBCAM[NUM_TLBCAMS]; + +struct tlbcamrange { + unsigned long start; + unsigned long limit; + phys_addr_t phys; +} tlbcam_addrs[NUM_TLBCAMS]; + +extern unsigned int tlbcam_index; + +/* + * Return PA for this VA if it is mapped by a CAM, or 0 + */ +unsigned long v_mapped_by_tlbcam(unsigned long va) +{ + int b; + for (b = 0; b < tlbcam_index; ++b) + if (va >= tlbcam_addrs[b].start && va < tlbcam_addrs[b].limit) + return tlbcam_addrs[b].phys + (va - tlbcam_addrs[b].start); + return 0; +} + +/* + * Return VA for a given PA or 0 if not mapped + */ +unsigned long p_mapped_by_tlbcam(unsigned long pa) +{ + int b; + for (b = 0; b < tlbcam_index; ++b) + if (pa >= tlbcam_addrs[b].phys + && pa < (tlbcam_addrs[b].limit-tlbcam_addrs[b].start) + +tlbcam_addrs[b].phys) + return tlbcam_addrs[b].start+(pa-tlbcam_addrs[b].phys); + return 0; +} + +/* + * Set up one of the I/D BAT (block address translation) register pairs. + * The parameters are not checked; in particular size must be a power + * of 4 between 4k and 256M. + */ +void settlbcam(int index, unsigned long virt, phys_addr_t phys, + unsigned int size, int flags, unsigned int pid) +{ + unsigned int tsize, lz; + + asm ("cntlzw %0,%1" : "=r" (lz) : "r" (size)); + tsize = (21 - lz) / 2; + +#ifdef CONFIG_SMP + if ((flags & _PAGE_NO_CACHE) == 0) + flags |= _PAGE_COHERENT; +#endif + + TLBCAM[index].MAS0 = MAS0_TLBSEL(1) | MAS0_ESEL(index) | MAS0_NV(index+1); + TLBCAM[index].MAS1 = MAS1_VALID | MAS1_IPROT | MAS1_TSIZE(tsize) | MAS1_TID(pid); + TLBCAM[index].MAS2 = virt & PAGE_MASK; + + TLBCAM[index].MAS2 |= (flags & _PAGE_WRITETHRU) ? MAS2_W : 0; + TLBCAM[index].MAS2 |= (flags & _PAGE_NO_CACHE) ? MAS2_I : 0; + TLBCAM[index].MAS2 |= (flags & _PAGE_COHERENT) ? MAS2_M : 0; + TLBCAM[index].MAS2 |= (flags & _PAGE_GUARDED) ? MAS2_G : 0; + TLBCAM[index].MAS2 |= (flags & _PAGE_ENDIAN) ? MAS2_E : 0; + + TLBCAM[index].MAS3 = (phys & PAGE_MASK) | MAS3_SX | MAS3_SR; + TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_SW : 0); + +#ifndef CONFIG_KGDB /* want user access for breakpoints */ + if (flags & _PAGE_USER) { + TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR; + TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0); + } +#else + TLBCAM[index].MAS3 |= MAS3_UX | MAS3_UR; + TLBCAM[index].MAS3 |= ((flags & _PAGE_RW) ? MAS3_UW : 0); +#endif + + tlbcam_addrs[index].start = virt; + tlbcam_addrs[index].limit = virt + size - 1; + tlbcam_addrs[index].phys = phys; + + loadcam_entry(index); +} + +void invalidate_tlbcam_entry(int index) +{ + TLBCAM[index].MAS0 = MAS0_TLBSEL(1) | MAS0_ESEL(index); + TLBCAM[index].MAS1 = ~MAS1_VALID; + + loadcam_entry(index); +} + +void __init cam_mapin_ram(unsigned long cam0, unsigned long cam1, + unsigned long cam2) +{ + settlbcam(0, KERNELBASE, PPC_MEMSTART, cam0, _PAGE_KERNEL, 0); + tlbcam_index++; + if (cam1) { + tlbcam_index++; + settlbcam(1, KERNELBASE+cam0, PPC_MEMSTART+cam0, cam1, _PAGE_KERNEL, 0); + } + if (cam2) { + tlbcam_index++; + settlbcam(2, KERNELBASE+cam0+cam1, PPC_MEMSTART+cam0+cam1, cam2, _PAGE_KERNEL, 0); + } +} + +/* + * MMU_init_hw does the chip-specific initialization of the MMU hardware. + */ +void __init MMU_init_hw(void) +{ + flush_instruction_cache(); +} + +unsigned long __init mmu_mapin_ram(void) +{ + cam_mapin_ram(__cam0, __cam1, __cam2); + + return __cam0 + __cam1 + __cam2; +} + + +void __init +adjust_total_lowmem(void) +{ + unsigned long max_low_mem = MAX_LOW_MEM; + unsigned long cam_max = 0x10000000; + unsigned long ram; + + /* adjust CAM size to max_low_mem */ + if (max_low_mem < cam_max) + cam_max = max_low_mem; + + /* adjust lowmem size to max_low_mem */ + if (max_low_mem < total_lowmem) + ram = max_low_mem; + else + ram = total_lowmem; + + /* Calculate CAM values */ + __cam0 = 1UL << 2 * (__ilog2(ram) / 2); + if (__cam0 > cam_max) + __cam0 = cam_max; + ram -= __cam0; + if (ram) { + __cam1 = 1UL << 2 * (__ilog2(ram) / 2); + if (__cam1 > cam_max) + __cam1 = cam_max; + ram -= __cam1; + } + if (ram) { + __cam2 = 1UL << 2 * (__ilog2(ram) / 2); + if (__cam2 > cam_max) + __cam2 = cam_max; + ram -= __cam2; + } + + printk(KERN_INFO "Memory CAM mapping: CAM0=%ldMb, CAM1=%ldMb," + " CAM2=%ldMb residual: %ldMb\n", + __cam0 >> 20, __cam1 >> 20, __cam2 >> 20, + (total_lowmem - __cam0 - __cam1 - __cam2) >> 20); + __max_low_memory = max_low_mem = __cam0 + __cam1 + __cam2; +} diff --git a/arch/powerpc/mm/hash_32.S b/arch/powerpc/mm/hash_32.S new file mode 100644 index 00000000000..57278a8dd13 --- /dev/null +++ b/arch/powerpc/mm/hash_32.S @@ -0,0 +1,618 @@ +/* + * arch/ppc/kernel/hashtable.S + * + * $Id: hashtable.S,v 1.6 1999/10/08 01:56:15 paulus Exp $ + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * Rewritten by Cort Dougan (cort@cs.nmt.edu) for PReP + * Copyright (C) 1996 Cort Dougan <cort@cs.nmt.edu> + * Adapted for Power Macintosh by Paul Mackerras. + * Low-level exception handlers and MMU support + * rewritten by Paul Mackerras. + * Copyright (C) 1996 Paul Mackerras. + * + * This file contains low-level assembler routines for managing + * the PowerPC MMU hash table. (PPC 8xx processors don't use a + * hash table, so this file is not used on them.) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/cputable.h> +#include <asm/ppc_asm.h> +#include <asm/thread_info.h> +#include <asm/asm-offsets.h> + +#ifdef CONFIG_SMP + .comm mmu_hash_lock,4 +#endif /* CONFIG_SMP */ + +/* + * Sync CPUs with hash_page taking & releasing the hash + * table lock + */ +#ifdef CONFIG_SMP + .text +_GLOBAL(hash_page_sync) + lis r8,mmu_hash_lock@h + ori r8,r8,mmu_hash_lock@l + lis r0,0x0fff + b 10f +11: lwz r6,0(r8) + cmpwi 0,r6,0 + bne 11b +10: lwarx r6,0,r8 + cmpwi 0,r6,0 + bne- 11b + stwcx. r0,0,r8 + bne- 10b + isync + eieio + li r0,0 + stw r0,0(r8) + blr +#endif + +/* + * Load a PTE into the hash table, if possible. + * The address is in r4, and r3 contains an access flag: + * _PAGE_RW (0x400) if a write. + * r9 contains the SRR1 value, from which we use the MSR_PR bit. + * SPRG3 contains the physical address of the current task's thread. + * + * Returns to the caller if the access is illegal or there is no + * mapping for the address. Otherwise it places an appropriate PTE + * in the hash table and returns from the exception. + * Uses r0, r3 - r8, ctr, lr. + */ + .text +_GLOBAL(hash_page) +#ifdef CONFIG_PPC64BRIDGE + mfmsr r0 + clrldi r0,r0,1 /* make sure it's in 32-bit mode */ + MTMSRD(r0) + isync +#endif + tophys(r7,0) /* gets -KERNELBASE into r7 */ +#ifdef CONFIG_SMP + addis r8,r7,mmu_hash_lock@h + ori r8,r8,mmu_hash_lock@l + lis r0,0x0fff + b 10f +11: lwz r6,0(r8) + cmpwi 0,r6,0 + bne 11b +10: lwarx r6,0,r8 + cmpwi 0,r6,0 + bne- 11b + stwcx. r0,0,r8 + bne- 10b + isync +#endif + /* Get PTE (linux-style) and check access */ + lis r0,KERNELBASE@h /* check if kernel address */ + cmplw 0,r4,r0 + mfspr r8,SPRN_SPRG3 /* current task's THREAD (phys) */ + ori r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */ + lwz r5,PGDIR(r8) /* virt page-table root */ + blt+ 112f /* assume user more likely */ + lis r5,swapper_pg_dir@ha /* if kernel address, use */ + addi r5,r5,swapper_pg_dir@l /* kernel page table */ + rlwimi r3,r9,32-12,29,29 /* MSR_PR -> _PAGE_USER */ +112: add r5,r5,r7 /* convert to phys addr */ + rlwimi r5,r4,12,20,29 /* insert top 10 bits of address */ + lwz r8,0(r5) /* get pmd entry */ + rlwinm. r8,r8,0,0,19 /* extract address of pte page */ +#ifdef CONFIG_SMP + beq- hash_page_out /* return if no mapping */ +#else + /* XXX it seems like the 601 will give a machine fault on the + rfi if its alignment is wrong (bottom 4 bits of address are + 8 or 0xc) and we have had a not-taken conditional branch + to the address following the rfi. */ + beqlr- +#endif + rlwimi r8,r4,22,20,29 /* insert next 10 bits of address */ + rlwinm r0,r3,32-3,24,24 /* _PAGE_RW access -> _PAGE_DIRTY */ + ori r0,r0,_PAGE_ACCESSED|_PAGE_HASHPTE + + /* + * Update the linux PTE atomically. We do the lwarx up-front + * because almost always, there won't be a permission violation + * and there won't already be an HPTE, and thus we will have + * to update the PTE to set _PAGE_HASHPTE. -- paulus. + */ +retry: + lwarx r6,0,r8 /* get linux-style pte */ + andc. r5,r3,r6 /* check access & ~permission */ +#ifdef CONFIG_SMP + bne- hash_page_out /* return if access not permitted */ +#else + bnelr- +#endif + or r5,r0,r6 /* set accessed/dirty bits */ + stwcx. r5,0,r8 /* attempt to update PTE */ + bne- retry /* retry if someone got there first */ + + mfsrin r3,r4 /* get segment reg for segment */ + mfctr r0 + stw r0,_CTR(r11) + bl create_hpte /* add the hash table entry */ + +#ifdef CONFIG_SMP + eieio + addis r8,r7,mmu_hash_lock@ha + li r0,0 + stw r0,mmu_hash_lock@l(r8) +#endif + + /* Return from the exception */ + lwz r5,_CTR(r11) + mtctr r5 + lwz r0,GPR0(r11) + lwz r7,GPR7(r11) + lwz r8,GPR8(r11) + b fast_exception_return + +#ifdef CONFIG_SMP +hash_page_out: + eieio + addis r8,r7,mmu_hash_lock@ha + li r0,0 + stw r0,mmu_hash_lock@l(r8) + blr +#endif /* CONFIG_SMP */ + +/* + * Add an entry for a particular page to the hash table. + * + * add_hash_page(unsigned context, unsigned long va, unsigned long pmdval) + * + * We assume any necessary modifications to the pte (e.g. setting + * the accessed bit) have already been done and that there is actually + * a hash table in use (i.e. we're not on a 603). + */ +_GLOBAL(add_hash_page) + mflr r0 + stw r0,4(r1) + + /* Convert context and va to VSID */ + mulli r3,r3,897*16 /* multiply context by context skew */ + rlwinm r0,r4,4,28,31 /* get ESID (top 4 bits of va) */ + mulli r0,r0,0x111 /* multiply by ESID skew */ + add r3,r3,r0 /* note create_hpte trims to 24 bits */ + +#ifdef CONFIG_SMP + rlwinm r8,r1,0,0,18 /* use cpu number to make tag */ + lwz r8,TI_CPU(r8) /* to go in mmu_hash_lock */ + oris r8,r8,12 +#endif /* CONFIG_SMP */ + + /* + * We disable interrupts here, even on UP, because we don't + * want to race with hash_page, and because we want the + * _PAGE_HASHPTE bit to be a reliable indication of whether + * the HPTE exists (or at least whether one did once). + * We also turn off the MMU for data accesses so that we + * we can't take a hash table miss (assuming the code is + * covered by a BAT). -- paulus + */ + mfmsr r10 + SYNC + rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ + rlwinm r0,r0,0,28,26 /* clear MSR_DR */ + mtmsr r0 + SYNC_601 + isync + + tophys(r7,0) + +#ifdef CONFIG_SMP + addis r9,r7,mmu_hash_lock@ha + addi r9,r9,mmu_hash_lock@l +10: lwarx r0,0,r9 /* take the mmu_hash_lock */ + cmpi 0,r0,0 + bne- 11f + stwcx. r8,0,r9 + beq+ 12f +11: lwz r0,0(r9) + cmpi 0,r0,0 + beq 10b + b 11b +12: isync +#endif + + /* + * Fetch the linux pte and test and set _PAGE_HASHPTE atomically. + * If _PAGE_HASHPTE was already set, we don't replace the existing + * HPTE, so we just unlock and return. + */ + mr r8,r5 + rlwimi r8,r4,22,20,29 +1: lwarx r6,0,r8 + andi. r0,r6,_PAGE_HASHPTE + bne 9f /* if HASHPTE already set, done */ + ori r5,r6,_PAGE_HASHPTE + stwcx. r5,0,r8 + bne- 1b + + bl create_hpte + +9: +#ifdef CONFIG_SMP + eieio + li r0,0 + stw r0,0(r9) /* clear mmu_hash_lock */ +#endif + + /* reenable interrupts and DR */ + mtmsr r10 + SYNC_601 + isync + + lwz r0,4(r1) + mtlr r0 + blr + +/* + * This routine adds a hardware PTE to the hash table. + * It is designed to be called with the MMU either on or off. + * r3 contains the VSID, r4 contains the virtual address, + * r5 contains the linux PTE, r6 contains the old value of the + * linux PTE (before setting _PAGE_HASHPTE) and r7 contains the + * offset to be added to addresses (0 if the MMU is on, + * -KERNELBASE if it is off). + * On SMP, the caller should have the mmu_hash_lock held. + * We assume that the caller has (or will) set the _PAGE_HASHPTE + * bit in the linux PTE in memory. The value passed in r6 should + * be the old linux PTE value; if it doesn't have _PAGE_HASHPTE set + * this routine will skip the search for an existing HPTE. + * This procedure modifies r0, r3 - r6, r8, cr0. + * -- paulus. + * + * For speed, 4 of the instructions get patched once the size and + * physical address of the hash table are known. These definitions + * of Hash_base and Hash_bits below are just an example. + */ +Hash_base = 0xc0180000 +Hash_bits = 12 /* e.g. 256kB hash table */ +Hash_msk = (((1 << Hash_bits) - 1) * 64) + +#ifndef CONFIG_PPC64BRIDGE +/* defines for the PTE format for 32-bit PPCs */ +#define PTE_SIZE 8 +#define PTEG_SIZE 64 +#define LG_PTEG_SIZE 6 +#define LDPTEu lwzu +#define STPTE stw +#define CMPPTE cmpw +#define PTE_H 0x40 +#define PTE_V 0x80000000 +#define TST_V(r) rlwinm. r,r,0,0,0 +#define SET_V(r) oris r,r,PTE_V@h +#define CLR_V(r,t) rlwinm r,r,0,1,31 + +#else +/* defines for the PTE format for 64-bit PPCs */ +#define PTE_SIZE 16 +#define PTEG_SIZE 128 +#define LG_PTEG_SIZE 7 +#define LDPTEu ldu +#define STPTE std +#define CMPPTE cmpd +#define PTE_H 2 +#define PTE_V 1 +#define TST_V(r) andi. r,r,PTE_V +#define SET_V(r) ori r,r,PTE_V +#define CLR_V(r,t) li t,PTE_V; andc r,r,t +#endif /* CONFIG_PPC64BRIDGE */ + +#define HASH_LEFT 31-(LG_PTEG_SIZE+Hash_bits-1) +#define HASH_RIGHT 31-LG_PTEG_SIZE + +_GLOBAL(create_hpte) + /* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */ + rlwinm r8,r5,32-10,31,31 /* _PAGE_RW -> PP lsb */ + rlwinm r0,r5,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */ + and r8,r8,r0 /* writable if _RW & _DIRTY */ + rlwimi r5,r5,32-1,30,30 /* _PAGE_USER -> PP msb */ + rlwimi r5,r5,32-2,31,31 /* _PAGE_USER -> PP lsb */ + ori r8,r8,0xe14 /* clear out reserved bits and M */ + andc r8,r5,r8 /* PP = user? (rw&dirty? 2: 3): 0 */ +BEGIN_FTR_SECTION + ori r8,r8,_PAGE_COHERENT /* set M (coherence required) */ +END_FTR_SECTION_IFSET(CPU_FTR_NEED_COHERENT) + + /* Construct the high word of the PPC-style PTE (r5) */ +#ifndef CONFIG_PPC64BRIDGE + rlwinm r5,r3,7,1,24 /* put VSID in 0x7fffff80 bits */ + rlwimi r5,r4,10,26,31 /* put in API (abbrev page index) */ +#else /* CONFIG_PPC64BRIDGE */ + clrlwi r3,r3,8 /* reduce vsid to 24 bits */ + sldi r5,r3,12 /* shift vsid into position */ + rlwimi r5,r4,16,20,24 /* put in API (abbrev page index) */ +#endif /* CONFIG_PPC64BRIDGE */ + SET_V(r5) /* set V (valid) bit */ + + /* Get the address of the primary PTE group in the hash table (r3) */ +_GLOBAL(hash_page_patch_A) + addis r0,r7,Hash_base@h /* base address of hash table */ + rlwimi r0,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */ + rlwinm r3,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */ + xor r3,r3,r0 /* make primary hash */ + li r0,8 /* PTEs/group */ + + /* + * Test the _PAGE_HASHPTE bit in the old linux PTE, and skip the search + * if it is clear, meaning that the HPTE isn't there already... + */ + andi. r6,r6,_PAGE_HASHPTE + beq+ 10f /* no PTE: go look for an empty slot */ + tlbie r4 + + addis r4,r7,htab_hash_searches@ha + lwz r6,htab_hash_searches@l(r4) + addi r6,r6,1 /* count how many searches we do */ + stw r6,htab_hash_searches@l(r4) + + /* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */ + mtctr r0 + addi r4,r3,-PTE_SIZE +1: LDPTEu r6,PTE_SIZE(r4) /* get next PTE */ + CMPPTE 0,r6,r5 + bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */ + beq+ found_slot + + /* Search the secondary PTEG for a matching PTE */ + ori r5,r5,PTE_H /* set H (secondary hash) bit */ +_GLOBAL(hash_page_patch_B) + xoris r4,r3,Hash_msk>>16 /* compute secondary hash */ + xori r4,r4,(-PTEG_SIZE & 0xffff) + addi r4,r4,-PTE_SIZE + mtctr r0 +2: LDPTEu r6,PTE_SIZE(r4) + CMPPTE 0,r6,r5 + bdnzf 2,2b + beq+ found_slot + xori r5,r5,PTE_H /* clear H bit again */ + + /* Search the primary PTEG for an empty slot */ +10: mtctr r0 + addi r4,r3,-PTE_SIZE /* search primary PTEG */ +1: LDPTEu r6,PTE_SIZE(r4) /* get next PTE */ + TST_V(r6) /* test valid bit */ + bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */ + beq+ found_empty + + /* update counter of times that the primary PTEG is full */ + addis r4,r7,primary_pteg_full@ha + lwz r6,primary_pteg_full@l(r4) + addi r6,r6,1 + stw r6,primary_pteg_full@l(r4) + + /* Search the secondary PTEG for an empty slot */ + ori r5,r5,PTE_H /* set H (secondary hash) bit */ +_GLOBAL(hash_page_patch_C) + xoris r4,r3,Hash_msk>>16 /* compute secondary hash */ + xori r4,r4,(-PTEG_SIZE & 0xffff) + addi r4,r4,-PTE_SIZE + mtctr r0 +2: LDPTEu r6,PTE_SIZE(r4) + TST_V(r6) + bdnzf 2,2b + beq+ found_empty + xori r5,r5,PTE_H /* clear H bit again */ + + /* + * Choose an arbitrary slot in the primary PTEG to overwrite. + * Since both the primary and secondary PTEGs are full, and we + * have no information that the PTEs in the primary PTEG are + * more important or useful than those in the secondary PTEG, + * and we know there is a definite (although small) speed + * advantage to putting the PTE in the primary PTEG, we always + * put the PTE in the primary PTEG. + */ + addis r4,r7,next_slot@ha + lwz r6,next_slot@l(r4) + addi r6,r6,PTE_SIZE + andi. r6,r6,7*PTE_SIZE + stw r6,next_slot@l(r4) + add r4,r3,r6 + +#ifndef CONFIG_SMP + /* Store PTE in PTEG */ +found_empty: + STPTE r5,0(r4) +found_slot: + STPTE r8,PTE_SIZE/2(r4) + +#else /* CONFIG_SMP */ +/* + * Between the tlbie above and updating the hash table entry below, + * another CPU could read the hash table entry and put it in its TLB. + * There are 3 cases: + * 1. using an empty slot + * 2. updating an earlier entry to change permissions (i.e. enable write) + * 3. taking over the PTE for an unrelated address + * + * In each case it doesn't really matter if the other CPUs have the old + * PTE in their TLB. So we don't need to bother with another tlbie here, + * which is convenient as we've overwritten the register that had the + * address. :-) The tlbie above is mainly to make sure that this CPU comes + * and gets the new PTE from the hash table. + * + * We do however have to make sure that the PTE is never in an invalid + * state with the V bit set. + */ +found_empty: +found_slot: + CLR_V(r5,r0) /* clear V (valid) bit in PTE */ + STPTE r5,0(r4) + sync + TLBSYNC + STPTE r8,PTE_SIZE/2(r4) /* put in correct RPN, WIMG, PP bits */ + sync + SET_V(r5) + STPTE r5,0(r4) /* finally set V bit in PTE */ +#endif /* CONFIG_SMP */ + + sync /* make sure pte updates get to memory */ + blr + + .comm next_slot,4 + .comm primary_pteg_full,4 + .comm htab_hash_searches,4 + +/* + * Flush the entry for a particular page from the hash table. + * + * flush_hash_pages(unsigned context, unsigned long va, unsigned long pmdval, + * int count) + * + * We assume that there is a hash table in use (Hash != 0). + */ +_GLOBAL(flush_hash_pages) + tophys(r7,0) + + /* + * We disable interrupts here, even on UP, because we want + * the _PAGE_HASHPTE bit to be a reliable indication of + * whether the HPTE exists (or at least whether one did once). + * We also turn off the MMU for data accesses so that we + * we can't take a hash table miss (assuming the code is + * covered by a BAT). -- paulus + */ + mfmsr r10 + SYNC + rlwinm r0,r10,0,17,15 /* clear bit 16 (MSR_EE) */ + rlwinm r0,r0,0,28,26 /* clear MSR_DR */ + mtmsr r0 + SYNC_601 + isync + + /* First find a PTE in the range that has _PAGE_HASHPTE set */ + rlwimi r5,r4,22,20,29 +1: lwz r0,0(r5) + cmpwi cr1,r6,1 + andi. r0,r0,_PAGE_HASHPTE + bne 2f + ble cr1,19f + addi r4,r4,0x1000 + addi r5,r5,4 + addi r6,r6,-1 + b 1b + + /* Convert context and va to VSID */ +2: mulli r3,r3,897*16 /* multiply context by context skew */ + rlwinm r0,r4,4,28,31 /* get ESID (top 4 bits of va) */ + mulli r0,r0,0x111 /* multiply by ESID skew */ + add r3,r3,r0 /* note code below trims to 24 bits */ + + /* Construct the high word of the PPC-style PTE (r11) */ +#ifndef CONFIG_PPC64BRIDGE + rlwinm r11,r3,7,1,24 /* put VSID in 0x7fffff80 bits */ + rlwimi r11,r4,10,26,31 /* put in API (abbrev page index) */ +#else /* CONFIG_PPC64BRIDGE */ + clrlwi r3,r3,8 /* reduce vsid to 24 bits */ + sldi r11,r3,12 /* shift vsid into position */ + rlwimi r11,r4,16,20,24 /* put in API (abbrev page index) */ +#endif /* CONFIG_PPC64BRIDGE */ + SET_V(r11) /* set V (valid) bit */ + +#ifdef CONFIG_SMP + addis r9,r7,mmu_hash_lock@ha + addi r9,r9,mmu_hash_lock@l + rlwinm r8,r1,0,0,18 + add r8,r8,r7 + lwz r8,TI_CPU(r8) + oris r8,r8,9 +10: lwarx r0,0,r9 + cmpi 0,r0,0 + bne- 11f + stwcx. r8,0,r9 + beq+ 12f +11: lwz r0,0(r9) + cmpi 0,r0,0 + beq 10b + b 11b +12: isync +#endif + + /* + * Check the _PAGE_HASHPTE bit in the linux PTE. If it is + * already clear, we're done (for this pte). If not, + * clear it (atomically) and proceed. -- paulus. + */ +33: lwarx r8,0,r5 /* fetch the pte */ + andi. r0,r8,_PAGE_HASHPTE + beq 8f /* done if HASHPTE is already clear */ + rlwinm r8,r8,0,31,29 /* clear HASHPTE bit */ + stwcx. r8,0,r5 /* update the pte */ + bne- 33b + + /* Get the address of the primary PTE group in the hash table (r3) */ +_GLOBAL(flush_hash_patch_A) + addis r8,r7,Hash_base@h /* base address of hash table */ + rlwimi r8,r3,LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* VSID -> hash */ + rlwinm r0,r4,20+LG_PTEG_SIZE,HASH_LEFT,HASH_RIGHT /* PI -> hash */ + xor r8,r0,r8 /* make primary hash */ + + /* Search the primary PTEG for a PTE whose 1st (d)word matches r5 */ + li r0,8 /* PTEs/group */ + mtctr r0 + addi r12,r8,-PTE_SIZE +1: LDPTEu r0,PTE_SIZE(r12) /* get next PTE */ + CMPPTE 0,r0,r11 + bdnzf 2,1b /* loop while ctr != 0 && !cr0.eq */ + beq+ 3f + + /* Search the secondary PTEG for a matching PTE */ + ori r11,r11,PTE_H /* set H (secondary hash) bit */ + li r0,8 /* PTEs/group */ +_GLOBAL(flush_hash_patch_B) + xoris r12,r8,Hash_msk>>16 /* compute secondary hash */ + xori r12,r12,(-PTEG_SIZE & 0xffff) + addi r12,r12,-PTE_SIZE + mtctr r0 +2: LDPTEu r0,PTE_SIZE(r12) + CMPPTE 0,r0,r11 + bdnzf 2,2b + xori r11,r11,PTE_H /* clear H again */ + bne- 4f /* should rarely fail to find it */ + +3: li r0,0 + STPTE r0,0(r12) /* invalidate entry */ +4: sync + tlbie r4 /* in hw tlb too */ + sync + +8: ble cr1,9f /* if all ptes checked */ +81: addi r6,r6,-1 + addi r5,r5,4 /* advance to next pte */ + addi r4,r4,0x1000 + lwz r0,0(r5) /* check next pte */ + cmpwi cr1,r6,1 + andi. r0,r0,_PAGE_HASHPTE + bne 33b + bgt cr1,81b + +9: +#ifdef CONFIG_SMP + TLBSYNC + li r0,0 + stw r0,0(r9) /* clear mmu_hash_lock */ +#endif + +19: mtmsr r10 + SYNC_601 + isync + blr diff --git a/arch/powerpc/mm/init.c b/arch/powerpc/mm/init.c new file mode 100644 index 00000000000..f4d983a6e52 --- /dev/null +++ b/arch/powerpc/mm/init.c @@ -0,0 +1,581 @@ +/* + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * PPC44x/36-bit changes by Matt Porter (mporter@mvista.com) + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/stddef.h> +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/highmem.h> +#include <linux/initrd.h> +#include <linux/pagemap.h> + +#include <asm/pgalloc.h> +#include <asm/prom.h> +#include <asm/io.h> +#include <asm/mmu_context.h> +#include <asm/pgtable.h> +#include <asm/mmu.h> +#include <asm/smp.h> +#include <asm/machdep.h> +#include <asm/btext.h> +#include <asm/tlb.h> +#include <asm/bootinfo.h> +#include <asm/prom.h> + +#include "mem_pieces.h" +#include "mmu_decl.h" + +#if defined(CONFIG_KERNEL_START_BOOL) || defined(CONFIG_LOWMEM_SIZE_BOOL) +/* The ammount of lowmem must be within 0xF0000000 - KERNELBASE. */ +#if (CONFIG_LOWMEM_SIZE > (0xF0000000 - KERNELBASE)) +#error "You must adjust CONFIG_LOWMEM_SIZE or CONFIG_START_KERNEL" +#endif +#endif +#define MAX_LOW_MEM CONFIG_LOWMEM_SIZE + +DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); + +unsigned long total_memory; +unsigned long total_lowmem; + +unsigned long ppc_memstart; +unsigned long ppc_memoffset = PAGE_OFFSET; + +int mem_init_done; +int init_bootmem_done; +int boot_mapsize; +#ifdef CONFIG_PPC_PMAC +unsigned long agp_special_page; +#endif + +extern char _end[]; +extern char etext[], _stext[]; +extern char __init_begin, __init_end; + +#ifdef CONFIG_HIGHMEM +pte_t *kmap_pte; +pgprot_t kmap_prot; + +EXPORT_SYMBOL(kmap_prot); +EXPORT_SYMBOL(kmap_pte); +#endif + +void MMU_init(void); +void set_phys_avail(unsigned long total_ram); + +/* XXX should be in current.h -- paulus */ +extern struct task_struct *current_set[NR_CPUS]; + +char *klimit = _end; +struct mem_pieces phys_avail; +struct device_node *memory_node; + +/* + * this tells the system to map all of ram with the segregs + * (i.e. page tables) instead of the bats. + * -- Cort + */ +int __map_without_bats; +int __map_without_ltlbs; + +/* max amount of RAM to use */ +unsigned long __max_memory; +/* max amount of low RAM to map in */ +unsigned long __max_low_memory = MAX_LOW_MEM; + +/* + * Read in a property describing some pieces of memory. + */ +static int __init get_mem_prop(char *name, struct mem_pieces *mp) +{ + struct reg_property *rp; + int i, s; + unsigned int *ip; + int nac = prom_n_addr_cells(memory_node); + int nsc = prom_n_size_cells(memory_node); + + ip = (unsigned int *) get_property(memory_node, name, &s); + if (ip == NULL) { + printk(KERN_ERR "error: couldn't get %s property on /memory\n", + name); + return 0; + } + s /= (nsc + nac) * 4; + rp = mp->regions; + for (i = 0; i < s; ++i, ip += nac+nsc) { + if (nac >= 2 && ip[nac-2] != 0) + continue; + rp->address = ip[nac-1]; + if (nsc >= 2 && ip[nac+nsc-2] != 0) + rp->size = ~0U; + else + rp->size = ip[nac+nsc-1]; + ++rp; + } + mp->n_regions = rp - mp->regions; + + /* Make sure the pieces are sorted. */ + mem_pieces_sort(mp); + mem_pieces_coalesce(mp); + return 1; +} + +/* + * Collect information about physical RAM and which pieces are + * already in use from the device tree. + */ +unsigned long __init find_end_of_memory(void) +{ + unsigned long a, total; + struct mem_pieces phys_mem; + + /* + * Find out where physical memory is, and check that it + * starts at 0 and is contiguous. It seems that RAM is + * always physically contiguous on Power Macintoshes. + * + * Supporting discontiguous physical memory isn't hard, + * it just makes the virtual <-> physical mapping functions + * more complicated (or else you end up wasting space + * in mem_map). + */ + memory_node = find_devices("memory"); + if (memory_node == NULL || !get_mem_prop("reg", &phys_mem) + || phys_mem.n_regions == 0) + panic("No RAM??"); + a = phys_mem.regions[0].address; + if (a != 0) + panic("RAM doesn't start at physical address 0"); + total = phys_mem.regions[0].size; + + if (phys_mem.n_regions > 1) { + printk("RAM starting at 0x%x is not contiguous\n", + phys_mem.regions[1].address); + printk("Using RAM from 0 to 0x%lx\n", total-1); + } + + return total; +} + +/* + * Check for command-line options that affect what MMU_init will do. + */ +void MMU_setup(void) +{ + /* Check for nobats option (used in mapin_ram). */ + if (strstr(cmd_line, "nobats")) { + __map_without_bats = 1; + } + + if (strstr(cmd_line, "noltlbs")) { + __map_without_ltlbs = 1; + } + + /* Look for mem= option on command line */ + if (strstr(cmd_line, "mem=")) { + char *p, *q; + unsigned long maxmem = 0; + + for (q = cmd_line; (p = strstr(q, "mem=")) != 0; ) { + q = p + 4; + if (p > cmd_line && p[-1] != ' ') + continue; + maxmem = simple_strtoul(q, &q, 0); + if (*q == 'k' || *q == 'K') { + maxmem <<= 10; + ++q; + } else if (*q == 'm' || *q == 'M') { + maxmem <<= 20; + ++q; + } + } + __max_memory = maxmem; + } +} + +/* + * MMU_init sets up the basic memory mappings for the kernel, + * including both RAM and possibly some I/O regions, + * and sets up the page tables and the MMU hardware ready to go. + */ +void __init MMU_init(void) +{ + if (ppc_md.progress) + ppc_md.progress("MMU:enter", 0x111); + + /* parse args from command line */ + MMU_setup(); + + /* + * Figure out how much memory we have, how much + * is lowmem, and how much is highmem. If we were + * passed the total memory size from the bootloader, + * just use it. + */ + if (boot_mem_size) + total_memory = boot_mem_size; + else + total_memory = ppc_md.find_end_of_memory(); + + if (__max_memory && total_memory > __max_memory) + total_memory = __max_memory; + total_lowmem = total_memory; +#ifdef CONFIG_FSL_BOOKE + /* Freescale Book-E parts expect lowmem to be mapped by fixed TLB + * entries, so we need to adjust lowmem to match the amount we can map + * in the fixed entries */ + adjust_total_lowmem(); +#endif /* CONFIG_FSL_BOOKE */ + if (total_lowmem > __max_low_memory) { + total_lowmem = __max_low_memory; +#ifndef CONFIG_HIGHMEM + total_memory = total_lowmem; +#endif /* CONFIG_HIGHMEM */ + } + set_phys_avail(total_lowmem); + + /* Initialize the MMU hardware */ + if (ppc_md.progress) + ppc_md.progress("MMU:hw init", 0x300); + MMU_init_hw(); + + /* Map in all of RAM starting at KERNELBASE */ + if (ppc_md.progress) + ppc_md.progress("MMU:mapin", 0x301); + mapin_ram(); + +#ifdef CONFIG_HIGHMEM + ioremap_base = PKMAP_BASE; +#else + ioremap_base = 0xfe000000UL; /* for now, could be 0xfffff000 */ +#endif /* CONFIG_HIGHMEM */ + ioremap_bot = ioremap_base; + + /* Map in I/O resources */ + if (ppc_md.progress) + ppc_md.progress("MMU:setio", 0x302); + if (ppc_md.setup_io_mappings) + ppc_md.setup_io_mappings(); + + /* Initialize the context management stuff */ + mmu_context_init(); + + if (ppc_md.progress) + ppc_md.progress("MMU:exit", 0x211); + +#ifdef CONFIG_BOOTX_TEXT + /* By default, we are no longer mapped */ + boot_text_mapped = 0; + /* Must be done last, or ppc_md.progress will die. */ + map_boot_text(); +#endif +} + +/* This is only called until mem_init is done. */ +void __init *early_get_page(void) +{ + void *p; + + if (init_bootmem_done) { + p = alloc_bootmem_pages(PAGE_SIZE); + } else { + p = mem_pieces_find(PAGE_SIZE, PAGE_SIZE); + } + return p; +} + +/* Free up now-unused memory */ +static void free_sec(unsigned long start, unsigned long end, const char *name) +{ + unsigned long cnt = 0; + + while (start < end) { + ClearPageReserved(virt_to_page(start)); + set_page_count(virt_to_page(start), 1); + free_page(start); + cnt++; + start += PAGE_SIZE; + } + if (cnt) { + printk(" %ldk %s", cnt << (PAGE_SHIFT - 10), name); + totalram_pages += cnt; + } +} + +void free_initmem(void) +{ +#define FREESEC(TYPE) \ + free_sec((unsigned long)(&__ ## TYPE ## _begin), \ + (unsigned long)(&__ ## TYPE ## _end), \ + #TYPE); + + printk ("Freeing unused kernel memory:"); + FREESEC(init); + printk("\n"); + ppc_md.progress = NULL; +#undef FREESEC +} + +#ifdef CONFIG_BLK_DEV_INITRD +void free_initrd_mem(unsigned long start, unsigned long end) +{ + if (start < end) + printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); + for (; start < end; start += PAGE_SIZE) { + ClearPageReserved(virt_to_page(start)); + set_page_count(virt_to_page(start), 1); + free_page(start); + totalram_pages++; + } +} +#endif + +/* + * Initialize the bootmem system and give it all the memory we + * have available. + */ +void __init do_init_bootmem(void) +{ + unsigned long start, size; + int i; + + /* + * Find an area to use for the bootmem bitmap. + * We look for the first area which is at least + * 128kB in length (128kB is enough for a bitmap + * for 4GB of memory, using 4kB pages), plus 1 page + * (in case the address isn't page-aligned). + */ + start = 0; + size = 0; + for (i = 0; i < phys_avail.n_regions; ++i) { + unsigned long a = phys_avail.regions[i].address; + unsigned long s = phys_avail.regions[i].size; + if (s <= size) + continue; + start = a; + size = s; + if (s >= 33 * PAGE_SIZE) + break; + } + start = PAGE_ALIGN(start); + + min_low_pfn = start >> PAGE_SHIFT; + max_low_pfn = (PPC_MEMSTART + total_lowmem) >> PAGE_SHIFT; + max_pfn = (PPC_MEMSTART + total_memory) >> PAGE_SHIFT; + boot_mapsize = init_bootmem_node(&contig_page_data, min_low_pfn, + PPC_MEMSTART >> PAGE_SHIFT, + max_low_pfn); + + /* remove the bootmem bitmap from the available memory */ + mem_pieces_remove(&phys_avail, start, boot_mapsize, 1); + + /* add everything in phys_avail into the bootmem map */ + for (i = 0; i < phys_avail.n_regions; ++i) + free_bootmem(phys_avail.regions[i].address, + phys_avail.regions[i].size); + + init_bootmem_done = 1; +} + +/* + * paging_init() sets up the page tables - in fact we've already done this. + */ +void __init paging_init(void) +{ + unsigned long zones_size[MAX_NR_ZONES], i; + +#ifdef CONFIG_HIGHMEM + map_page(PKMAP_BASE, 0, 0); /* XXX gross */ + pkmap_page_table = pte_offset_kernel(pmd_offset(pgd_offset_k + (PKMAP_BASE), PKMAP_BASE), PKMAP_BASE); + map_page(KMAP_FIX_BEGIN, 0, 0); /* XXX gross */ + kmap_pte = pte_offset_kernel(pmd_offset(pgd_offset_k + (KMAP_FIX_BEGIN), KMAP_FIX_BEGIN), KMAP_FIX_BEGIN); + kmap_prot = PAGE_KERNEL; +#endif /* CONFIG_HIGHMEM */ + + /* + * All pages are DMA-able so we put them all in the DMA zone. + */ + zones_size[ZONE_DMA] = total_lowmem >> PAGE_SHIFT; + for (i = 1; i < MAX_NR_ZONES; i++) + zones_size[i] = 0; + +#ifdef CONFIG_HIGHMEM + zones_size[ZONE_HIGHMEM] = (total_memory - total_lowmem) >> PAGE_SHIFT; +#endif /* CONFIG_HIGHMEM */ + + free_area_init(zones_size); +} + +void __init mem_init(void) +{ + unsigned long addr; + int codepages = 0; + int datapages = 0; + int initpages = 0; +#ifdef CONFIG_HIGHMEM + unsigned long highmem_mapnr; + + highmem_mapnr = total_lowmem >> PAGE_SHIFT; +#endif /* CONFIG_HIGHMEM */ + max_mapnr = total_memory >> PAGE_SHIFT; + + high_memory = (void *) __va(PPC_MEMSTART + total_lowmem); + num_physpages = max_mapnr; /* RAM is assumed contiguous */ + + totalram_pages += free_all_bootmem(); + +#ifdef CONFIG_BLK_DEV_INITRD + /* if we are booted from BootX with an initial ramdisk, + make sure the ramdisk pages aren't reserved. */ + if (initrd_start) { + for (addr = initrd_start; addr < initrd_end; addr += PAGE_SIZE) + ClearPageReserved(virt_to_page(addr)); + } +#endif /* CONFIG_BLK_DEV_INITRD */ + +#ifdef CONFIG_PPC_OF + /* mark the RTAS pages as reserved */ + if ( rtas_data ) + for (addr = (ulong)__va(rtas_data); + addr < PAGE_ALIGN((ulong)__va(rtas_data)+rtas_size) ; + addr += PAGE_SIZE) + SetPageReserved(virt_to_page(addr)); +#endif +#ifdef CONFIG_PPC_PMAC + if (agp_special_page) + SetPageReserved(virt_to_page(agp_special_page)); +#endif + for (addr = PAGE_OFFSET; addr < (unsigned long)high_memory; + addr += PAGE_SIZE) { + if (!PageReserved(virt_to_page(addr))) + continue; + if (addr < (ulong) etext) + codepages++; + else if (addr >= (unsigned long)&__init_begin + && addr < (unsigned long)&__init_end) + initpages++; + else if (addr < (ulong) klimit) + datapages++; + } + +#ifdef CONFIG_HIGHMEM + { + unsigned long pfn; + + for (pfn = highmem_mapnr; pfn < max_mapnr; ++pfn) { + struct page *page = mem_map + pfn; + + ClearPageReserved(page); + set_page_count(page, 1); + __free_page(page); + totalhigh_pages++; + } + totalram_pages += totalhigh_pages; + } +#endif /* CONFIG_HIGHMEM */ + + printk("Memory: %luk available (%dk kernel code, %dk data, %dk init, %ldk highmem)\n", + (unsigned long)nr_free_pages()<< (PAGE_SHIFT-10), + codepages<< (PAGE_SHIFT-10), datapages<< (PAGE_SHIFT-10), + initpages<< (PAGE_SHIFT-10), + (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))); + +#ifdef CONFIG_PPC_PMAC + if (agp_special_page) + printk(KERN_INFO "AGP special page: 0x%08lx\n", agp_special_page); +#endif + + mem_init_done = 1; +} + +/* + * Set phys_avail to the amount of physical memory, + * less the kernel text/data/bss. + */ +void __init +set_phys_avail(unsigned long total_memory) +{ + unsigned long kstart, ksize; + + /* + * Initially, available physical memory is equivalent to all + * physical memory. + */ + + phys_avail.regions[0].address = PPC_MEMSTART; + phys_avail.regions[0].size = total_memory; + phys_avail.n_regions = 1; + + /* + * Map out the kernel text/data/bss from the available physical + * memory. + */ + + kstart = __pa(_stext); /* should be 0 */ + ksize = PAGE_ALIGN(klimit - _stext); + + mem_pieces_remove(&phys_avail, kstart, ksize, 0); + mem_pieces_remove(&phys_avail, 0, 0x4000, 0); + +#if defined(CONFIG_BLK_DEV_INITRD) + /* Remove the init RAM disk from the available memory. */ + if (initrd_start) { + mem_pieces_remove(&phys_avail, __pa(initrd_start), + initrd_end - initrd_start, 1); + } +#endif /* CONFIG_BLK_DEV_INITRD */ +#ifdef CONFIG_PPC_OF + /* remove the RTAS pages from the available memory */ + if (rtas_data) + mem_pieces_remove(&phys_avail, rtas_data, rtas_size, 1); +#endif +#ifdef CONFIG_PPC_PMAC + /* Because of some uninorth weirdness, we need a page of + * memory as high as possible (it must be outside of the + * bus address seen as the AGP aperture). It will be used + * by the r128 DRM driver + * + * FIXME: We need to make sure that page doesn't overlap any of the\ + * above. This could be done by improving mem_pieces_find to be able + * to do a backward search from the end of the list. + */ + if (_machine == _MACH_Pmac && find_devices("uni-north-agp")) { + agp_special_page = (total_memory - PAGE_SIZE); + mem_pieces_remove(&phys_avail, agp_special_page, PAGE_SIZE, 0); + agp_special_page = (unsigned long)__va(agp_special_page); + } +#endif /* CONFIG_PPC_PMAC */ +} + +/* Mark some memory as reserved by removing it from phys_avail. */ +void __init reserve_phys_mem(unsigned long start, unsigned long size) +{ + mem_pieces_remove(&phys_avail, start, size, 1); +} diff --git a/arch/powerpc/mm/init64.c b/arch/powerpc/mm/init64.c new file mode 100644 index 00000000000..81f6745b31e --- /dev/null +++ b/arch/powerpc/mm/init64.c @@ -0,0 +1,385 @@ +/* + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * Dave Engebretsen <engebret@us.ibm.com> + * Rework for PPC64 port. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/stddef.h> +#include <linux/vmalloc.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/bootmem.h> +#include <linux/highmem.h> +#include <linux/idr.h> +#include <linux/nodemask.h> +#include <linux/module.h> + +#include <asm/pgalloc.h> +#include <asm/page.h> +#include <asm/prom.h> +#include <asm/lmb.h> +#include <asm/rtas.h> +#include <asm/io.h> +#include <asm/mmu_context.h> +#include <asm/pgtable.h> +#include <asm/mmu.h> +#include <asm/uaccess.h> +#include <asm/smp.h> +#include <asm/machdep.h> +#include <asm/tlb.h> +#include <asm/eeh.h> +#include <asm/processor.h> +#include <asm/mmzone.h> +#include <asm/cputable.h> +#include <asm/ppcdebug.h> +#include <asm/sections.h> +#include <asm/system.h> +#include <asm/iommu.h> +#include <asm/abs_addr.h> +#include <asm/vdso.h> +#include <asm/imalloc.h> + +#if PGTABLE_RANGE > USER_VSID_RANGE +#warning Limited user VSID range means pagetable space is wasted +#endif + +#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE) +#warning TASK_SIZE is smaller than it needs to be. +#endif + +int mem_init_done; +unsigned long ioremap_bot = IMALLOC_BASE; +static unsigned long phbs_io_bot = PHBS_IO_BASE; + +extern pgd_t swapper_pg_dir[]; +extern struct task_struct *current_set[NR_CPUS]; + +unsigned long klimit = (unsigned long)_end; + +unsigned long _SDR1=0; +unsigned long _ASR=0; + +/* max amount of RAM to use */ +unsigned long __max_memory; + +/* info on what we think the IO hole is */ +unsigned long io_hole_start; +unsigned long io_hole_size; + +/* + * Do very early mm setup. + */ +void __init mm_init_ppc64(void) +{ +#ifndef CONFIG_PPC_ISERIES + unsigned long i; +#endif + + ppc64_boot_msg(0x100, "MM Init"); + + /* This is the story of the IO hole... please, keep seated, + * unfortunately, we are out of oxygen masks at the moment. + * So we need some rough way to tell where your big IO hole + * is. On pmac, it's between 2G and 4G, on POWER3, it's around + * that area as well, on POWER4 we don't have one, etc... + * We need that as a "hint" when sizing the TCE table on POWER3 + * So far, the simplest way that seem work well enough for us it + * to just assume that the first discontinuity in our physical + * RAM layout is the IO hole. That may not be correct in the future + * (and isn't on iSeries but then we don't care ;) + */ + +#ifndef CONFIG_PPC_ISERIES + for (i = 1; i < lmb.memory.cnt; i++) { + unsigned long base, prevbase, prevsize; + + prevbase = lmb.memory.region[i-1].base; + prevsize = lmb.memory.region[i-1].size; + base = lmb.memory.region[i].base; + if (base > (prevbase + prevsize)) { + io_hole_start = prevbase + prevsize; + io_hole_size = base - (prevbase + prevsize); + break; + } + } +#endif /* CONFIG_PPC_ISERIES */ + if (io_hole_start) + printk("IO Hole assumed to be %lx -> %lx\n", + io_hole_start, io_hole_start + io_hole_size - 1); + + ppc64_boot_msg(0x100, "MM Init Done"); +} + +void free_initmem(void) +{ + unsigned long addr; + + addr = (unsigned long)__init_begin; + for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) { + memset((void *)addr, 0xcc, PAGE_SIZE); + ClearPageReserved(virt_to_page(addr)); + set_page_count(virt_to_page(addr), 1); + free_page(addr); + totalram_pages++; + } + printk ("Freeing unused kernel memory: %luk freed\n", + ((unsigned long)__init_end - (unsigned long)__init_begin) >> 10); +} + +#ifdef CONFIG_BLK_DEV_INITRD +void free_initrd_mem(unsigned long start, unsigned long end) +{ + if (start < end) + printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); + for (; start < end; start += PAGE_SIZE) { + ClearPageReserved(virt_to_page(start)); + set_page_count(virt_to_page(start), 1); + free_page(start); + totalram_pages++; + } +} +#endif + +/* + * Initialize the bootmem system and give it all the memory we + * have available. + */ +#ifndef CONFIG_NEED_MULTIPLE_NODES +void __init do_init_bootmem(void) +{ + unsigned long i; + unsigned long start, bootmap_pages; + unsigned long total_pages = lmb_end_of_DRAM() >> PAGE_SHIFT; + int boot_mapsize; + + /* + * Find an area to use for the bootmem bitmap. Calculate the size of + * bitmap required as (Total Memory) / PAGE_SIZE / BITS_PER_BYTE. + * Add 1 additional page in case the address isn't page-aligned. + */ + bootmap_pages = bootmem_bootmap_pages(total_pages); + + start = lmb_alloc(bootmap_pages<<PAGE_SHIFT, PAGE_SIZE); + BUG_ON(!start); + + boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages); + + max_pfn = max_low_pfn; + + /* Add all physical memory to the bootmem map, mark each area + * present. + */ + for (i=0; i < lmb.memory.cnt; i++) + free_bootmem(lmb.memory.region[i].base, + lmb_size_bytes(&lmb.memory, i)); + + /* reserve the sections we're already using */ + for (i=0; i < lmb.reserved.cnt; i++) + reserve_bootmem(lmb.reserved.region[i].base, + lmb_size_bytes(&lmb.reserved, i)); + + for (i=0; i < lmb.memory.cnt; i++) + memory_present(0, lmb_start_pfn(&lmb.memory, i), + lmb_end_pfn(&lmb.memory, i)); +} + +/* + * paging_init() sets up the page tables - in fact we've already done this. + */ +void __init paging_init(void) +{ + unsigned long zones_size[MAX_NR_ZONES]; + unsigned long zholes_size[MAX_NR_ZONES]; + unsigned long total_ram = lmb_phys_mem_size(); + unsigned long top_of_ram = lmb_end_of_DRAM(); + + printk(KERN_INFO "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", + top_of_ram, total_ram); + printk(KERN_INFO "Memory hole size: %ldMB\n", + (top_of_ram - total_ram) >> 20); + /* + * All pages are DMA-able so we put them all in the DMA zone. + */ + memset(zones_size, 0, sizeof(zones_size)); + memset(zholes_size, 0, sizeof(zholes_size)); + + zones_size[ZONE_DMA] = top_of_ram >> PAGE_SHIFT; + zholes_size[ZONE_DMA] = (top_of_ram - total_ram) >> PAGE_SHIFT; + + free_area_init_node(0, NODE_DATA(0), zones_size, + __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size); +} +#endif /* ! CONFIG_NEED_MULTIPLE_NODES */ + +static struct kcore_list kcore_vmem; + +static int __init setup_kcore(void) +{ + int i; + + for (i=0; i < lmb.memory.cnt; i++) { + unsigned long base, size; + struct kcore_list *kcore_mem; + + base = lmb.memory.region[i].base; + size = lmb.memory.region[i].size; + + /* GFP_ATOMIC to avoid might_sleep warnings during boot */ + kcore_mem = kmalloc(sizeof(struct kcore_list), GFP_ATOMIC); + if (!kcore_mem) + panic("mem_init: kmalloc failed\n"); + + kclist_add(kcore_mem, __va(base), size); + } + + kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START); + + return 0; +} +module_init(setup_kcore); + +void __init mem_init(void) +{ +#ifdef CONFIG_NEED_MULTIPLE_NODES + int nid; +#endif + pg_data_t *pgdat; + unsigned long i; + struct page *page; + unsigned long reservedpages = 0, codesize, initsize, datasize, bsssize; + + num_physpages = max_low_pfn; /* RAM is assumed contiguous */ + high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); + +#ifdef CONFIG_NEED_MULTIPLE_NODES + for_each_online_node(nid) { + if (NODE_DATA(nid)->node_spanned_pages != 0) { + printk("freeing bootmem node %x\n", nid); + totalram_pages += + free_all_bootmem_node(NODE_DATA(nid)); + } + } +#else + max_mapnr = num_physpages; + totalram_pages += free_all_bootmem(); +#endif + + for_each_pgdat(pgdat) { + for (i = 0; i < pgdat->node_spanned_pages; i++) { + page = pgdat_page_nr(pgdat, i); + if (PageReserved(page)) + reservedpages++; + } + } + + codesize = (unsigned long)&_etext - (unsigned long)&_stext; + initsize = (unsigned long)&__init_end - (unsigned long)&__init_begin; + datasize = (unsigned long)&_edata - (unsigned long)&__init_end; + bsssize = (unsigned long)&__bss_stop - (unsigned long)&__bss_start; + + printk(KERN_INFO "Memory: %luk/%luk available (%luk kernel code, " + "%luk reserved, %luk data, %luk bss, %luk init)\n", + (unsigned long)nr_free_pages() << (PAGE_SHIFT-10), + num_physpages << (PAGE_SHIFT-10), + codesize >> 10, + reservedpages << (PAGE_SHIFT-10), + datasize >> 10, + bsssize >> 10, + initsize >> 10); + + mem_init_done = 1; + + /* Initialize the vDSO */ + vdso_init(); +} + +void __iomem * reserve_phb_iospace(unsigned long size) +{ + void __iomem *virt_addr; + + if (phbs_io_bot >= IMALLOC_BASE) + panic("reserve_phb_iospace(): phb io space overflow\n"); + + virt_addr = (void __iomem *) phbs_io_bot; + phbs_io_bot += size; + + return virt_addr; +} + +static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags) +{ + memset(addr, 0, kmem_cache_size(cache)); +} + +static const int pgtable_cache_size[2] = { + PTE_TABLE_SIZE, PMD_TABLE_SIZE +}; +static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { + "pgd_pte_cache", "pud_pmd_cache", +}; + +kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; + +void pgtable_cache_init(void) +{ + int i; + + BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]); + BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]); + BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]); + BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]); + + for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) { + int size = pgtable_cache_size[i]; + const char *name = pgtable_cache_name[i]; + + pgtable_cache[i] = kmem_cache_create(name, + size, size, + SLAB_HWCACHE_ALIGN + | SLAB_MUST_HWCACHE_ALIGN, + zero_ctor, + NULL); + if (! pgtable_cache[i]) + panic("pgtable_cache_init(): could not create %s!\n", + name); + } +} + +pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr, + unsigned long size, pgprot_t vma_prot) +{ + if (ppc_md.phys_mem_access_prot) + return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot); + + if (!page_is_ram(addr >> PAGE_SHIFT)) + vma_prot = __pgprot(pgprot_val(vma_prot) + | _PAGE_GUARDED | _PAGE_NO_CACHE); + return vma_prot; +} +EXPORT_SYMBOL(phys_mem_access_prot); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c new file mode 100644 index 00000000000..345db08e5d2 --- /dev/null +++ b/arch/powerpc/mm/mem.c @@ -0,0 +1,299 @@ +/* + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * PPC44x/36-bit changes by Matt Porter (mporter@mvista.com) + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/stddef.h> +#include <linux/init.h> +#include <linux/bootmem.h> +#include <linux/highmem.h> +#include <linux/initrd.h> +#include <linux/pagemap.h> + +#include <asm/pgalloc.h> +#include <asm/prom.h> +#include <asm/io.h> +#include <asm/mmu_context.h> +#include <asm/pgtable.h> +#include <asm/mmu.h> +#include <asm/smp.h> +#include <asm/machdep.h> +#include <asm/btext.h> +#include <asm/tlb.h> +#include <asm/bootinfo.h> +#include <asm/prom.h> + +#include "mem_pieces.h" +#include "mmu_decl.h" + +#ifndef CPU_FTR_COHERENT_ICACHE +#define CPU_FTR_COHERENT_ICACHE 0 /* XXX for now */ +#define CPU_FTR_NOEXECUTE 0 +#endif + +/* + * This is called by /dev/mem to know if a given address has to + * be mapped non-cacheable or not + */ +int page_is_ram(unsigned long pfn) +{ + unsigned long paddr = (pfn << PAGE_SHIFT); + +#ifndef CONFIG_PPC64 /* XXX for now */ + return paddr < __pa(high_memory); +#else + int i; + for (i=0; i < lmb.memory.cnt; i++) { + unsigned long base; + + base = lmb.memory.region[i].base; + + if ((paddr >= base) && + (paddr < (base + lmb.memory.region[i].size))) { + return 1; + } + } + + return 0; +#endif +} +EXPORT_SYMBOL(page_is_ram); + +pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr, + unsigned long size, pgprot_t vma_prot) +{ + if (ppc_md.phys_mem_access_prot) + return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot); + + if (!page_is_ram(addr >> PAGE_SHIFT)) + vma_prot = __pgprot(pgprot_val(vma_prot) + | _PAGE_GUARDED | _PAGE_NO_CACHE); + return vma_prot; +} +EXPORT_SYMBOL(phys_mem_access_prot); + +void show_mem(void) +{ + unsigned long total = 0, reserved = 0; + unsigned long shared = 0, cached = 0; + unsigned long highmem = 0; + struct page *page; + pg_data_t *pgdat; + unsigned long i; + + printk("Mem-info:\n"); + show_free_areas(); + printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); + for_each_pgdat(pgdat) { + for (i = 0; i < pgdat->node_spanned_pages; i++) { + page = pgdat_page_nr(pgdat, i); + total++; + if (PageHighMem(page)) + highmem++; + if (PageReserved(page)) + reserved++; + else if (PageSwapCache(page)) + cached++; + else if (page_count(page)) + shared += page_count(page) - 1; + } + } + printk("%ld pages of RAM\n", total); +#ifdef CONFIG_HIGHMEM + printk("%ld pages of HIGHMEM\n", highmem); +#endif + printk("%ld reserved pages\n", reserved); + printk("%ld pages shared\n", shared); + printk("%ld pages swap cached\n", cached); +} + +/* + * This is called when a page has been modified by the kernel. + * It just marks the page as not i-cache clean. We do the i-cache + * flush later when the page is given to a user process, if necessary. + */ +void flush_dcache_page(struct page *page) +{ + if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) + return; + /* avoid an atomic op if possible */ + if (test_bit(PG_arch_1, &page->flags)) + clear_bit(PG_arch_1, &page->flags); +} +EXPORT_SYMBOL(flush_dcache_page); + +void flush_dcache_icache_page(struct page *page) +{ +#ifdef CONFIG_BOOKE + void *start = kmap_atomic(page, KM_PPC_SYNC_ICACHE); + __flush_dcache_icache(start); + kunmap_atomic(start, KM_PPC_SYNC_ICACHE); +#elif defined(CONFIG_8xx) + /* On 8xx there is no need to kmap since highmem is not supported */ + __flush_dcache_icache(page_address(page)); +#else + __flush_dcache_icache_phys(page_to_pfn(page) << PAGE_SHIFT); +#endif + +} +void clear_user_page(void *page, unsigned long vaddr, struct page *pg) +{ + clear_page(page); + + if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) + return; + /* + * We shouldnt have to do this, but some versions of glibc + * require it (ld.so assumes zero filled pages are icache clean) + * - Anton + */ + + /* avoid an atomic op if possible */ + if (test_bit(PG_arch_1, &pg->flags)) + clear_bit(PG_arch_1, &pg->flags); +} +EXPORT_SYMBOL(clear_user_page); + +void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, + struct page *pg) +{ + copy_page(vto, vfrom); + + /* + * We should be able to use the following optimisation, however + * there are two problems. + * Firstly a bug in some versions of binutils meant PLT sections + * were not marked executable. + * Secondly the first word in the GOT section is blrl, used + * to establish the GOT address. Until recently the GOT was + * not marked executable. + * - Anton + */ +#if 0 + if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0)) + return; +#endif + + if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) + return; + + /* avoid an atomic op if possible */ + if (test_bit(PG_arch_1, &pg->flags)) + clear_bit(PG_arch_1, &pg->flags); +} + +void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, + unsigned long addr, int len) +{ + unsigned long maddr; + + maddr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK); + flush_icache_range(maddr, maddr + len); + kunmap(page); +} +EXPORT_SYMBOL(flush_icache_user_range); + +/* + * This is called at the end of handling a user page fault, when the + * fault has been handled by updating a PTE in the linux page tables. + * We use it to preload an HPTE into the hash table corresponding to + * the updated linux PTE. + * + * This must always be called with the mm->page_table_lock held + */ +void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, + pte_t pte) +{ + /* handle i-cache coherency */ + unsigned long pfn = pte_pfn(pte); +#ifdef CONFIG_PPC32 + pmd_t *pmd; +#else + unsigned long vsid; + void *pgdir; + pte_t *ptep; + int local = 0; + cpumask_t tmp; + unsigned long flags; +#endif + + /* handle i-cache coherency */ + if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) && + !cpu_has_feature(CPU_FTR_NOEXECUTE) && + pfn_valid(pfn)) { + struct page *page = pfn_to_page(pfn); + if (!PageReserved(page) + && !test_bit(PG_arch_1, &page->flags)) { + if (vma->vm_mm == current->active_mm) { +#ifdef CONFIG_8xx + /* On 8xx, cache control instructions (particularly + * "dcbst" from flush_dcache_icache) fault as write + * operation if there is an unpopulated TLB entry + * for the address in question. To workaround that, + * we invalidate the TLB here, thus avoiding dcbst + * misbehaviour. + */ + _tlbie(address); +#endif + __flush_dcache_icache((void *) address); + } else + flush_dcache_icache_page(page); + set_bit(PG_arch_1, &page->flags); + } + } + +#ifdef CONFIG_PPC_STD_MMU + /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ + if (!pte_young(pte) || address >= TASK_SIZE) + return; +#ifdef CONFIG_PPC32 + if (Hash == 0) + return; + pmd = pmd_offset(pgd_offset(vma->vm_mm, address), address); + if (!pmd_none(*pmd)) + add_hash_page(vma->vm_mm->context, address, pmd_val(*pmd)); +#else + pgdir = vma->vm_mm->pgd; + if (pgdir == NULL) + return; + + ptep = find_linux_pte(pgdir, ea); + if (!ptep) + return; + + vsid = get_vsid(vma->vm_mm->context.id, ea); + + local_irq_save(flags); + tmp = cpumask_of_cpu(smp_processor_id()); + if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp)) + local = 1; + + __hash_page(ea, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, ptep, + 0x300, local); + local_irq_restore(flags); +#endif +#endif +} diff --git a/arch/powerpc/mm/mem64.c b/arch/powerpc/mm/mem64.c new file mode 100644 index 00000000000..ef765a84433 --- /dev/null +++ b/arch/powerpc/mm/mem64.c @@ -0,0 +1,259 @@ +/* + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * Dave Engebretsen <engebret@us.ibm.com> + * Rework for PPC64 port. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/stddef.h> +#include <linux/vmalloc.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/bootmem.h> +#include <linux/highmem.h> +#include <linux/idr.h> +#include <linux/nodemask.h> +#include <linux/module.h> + +#include <asm/pgalloc.h> +#include <asm/page.h> +#include <asm/prom.h> +#include <asm/lmb.h> +#include <asm/rtas.h> +#include <asm/io.h> +#include <asm/mmu_context.h> +#include <asm/pgtable.h> +#include <asm/mmu.h> +#include <asm/uaccess.h> +#include <asm/smp.h> +#include <asm/machdep.h> +#include <asm/tlb.h> +#include <asm/eeh.h> +#include <asm/processor.h> +#include <asm/mmzone.h> +#include <asm/cputable.h> +#include <asm/ppcdebug.h> +#include <asm/sections.h> +#include <asm/system.h> +#include <asm/iommu.h> +#include <asm/abs_addr.h> +#include <asm/vdso.h> +#include <asm/imalloc.h> + +/* + * This is called by /dev/mem to know if a given address has to + * be mapped non-cacheable or not + */ +int page_is_ram(unsigned long pfn) +{ + int i; + unsigned long paddr = (pfn << PAGE_SHIFT); + + for (i=0; i < lmb.memory.cnt; i++) { + unsigned long base; + + base = lmb.memory.region[i].base; + + if ((paddr >= base) && + (paddr < (base + lmb.memory.region[i].size))) { + return 1; + } + } + + return 0; +} +EXPORT_SYMBOL(page_is_ram); + +pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr, + unsigned long size, pgprot_t vma_prot) +{ + if (ppc_md.phys_mem_access_prot) + return ppc_md.phys_mem_access_prot(file, addr, size, vma_prot); + + if (!page_is_ram(addr >> PAGE_SHIFT)) + vma_prot = __pgprot(pgprot_val(vma_prot) + | _PAGE_GUARDED | _PAGE_NO_CACHE); + return vma_prot; +} +EXPORT_SYMBOL(phys_mem_access_prot); + +void show_mem(void) +{ + unsigned long total = 0, reserved = 0; + unsigned long shared = 0, cached = 0; + struct page *page; + pg_data_t *pgdat; + unsigned long i; + + printk("Mem-info:\n"); + show_free_areas(); + printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); + for_each_pgdat(pgdat) { + for (i = 0; i < pgdat->node_spanned_pages; i++) { + page = pgdat_page_nr(pgdat, i); + total++; + if (PageReserved(page)) + reserved++; + else if (PageSwapCache(page)) + cached++; + else if (page_count(page)) + shared += page_count(page) - 1; + } + } + printk("%ld pages of RAM\n", total); + printk("%ld reserved pages\n", reserved); + printk("%ld pages shared\n", shared); + printk("%ld pages swap cached\n", cached); +} + +/* + * This is called when a page has been modified by the kernel. + * It just marks the page as not i-cache clean. We do the i-cache + * flush later when the page is given to a user process, if necessary. + */ +void flush_dcache_page(struct page *page) +{ + if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) + return; + /* avoid an atomic op if possible */ + if (test_bit(PG_arch_1, &page->flags)) + clear_bit(PG_arch_1, &page->flags); +} +EXPORT_SYMBOL(flush_dcache_page); + +void clear_user_page(void *page, unsigned long vaddr, struct page *pg) +{ + clear_page(page); + + if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) + return; + /* + * We shouldnt have to do this, but some versions of glibc + * require it (ld.so assumes zero filled pages are icache clean) + * - Anton + */ + + /* avoid an atomic op if possible */ + if (test_bit(PG_arch_1, &pg->flags)) + clear_bit(PG_arch_1, &pg->flags); +} +EXPORT_SYMBOL(clear_user_page); + +void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, + struct page *pg) +{ + copy_page(vto, vfrom); + + /* + * We should be able to use the following optimisation, however + * there are two problems. + * Firstly a bug in some versions of binutils meant PLT sections + * were not marked executable. + * Secondly the first word in the GOT section is blrl, used + * to establish the GOT address. Until recently the GOT was + * not marked executable. + * - Anton + */ +#if 0 + if (!vma->vm_file && ((vma->vm_flags & VM_EXEC) == 0)) + return; +#endif + + if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) + return; + + /* avoid an atomic op if possible */ + if (test_bit(PG_arch_1, &pg->flags)) + clear_bit(PG_arch_1, &pg->flags); +} + +void flush_icache_user_range(struct vm_area_struct *vma, struct page *page, + unsigned long addr, int len) +{ + unsigned long maddr; + + maddr = (unsigned long)page_address(page) + (addr & ~PAGE_MASK); + flush_icache_range(maddr, maddr + len); +} +EXPORT_SYMBOL(flush_icache_user_range); + +/* + * This is called at the end of handling a user page fault, when the + * fault has been handled by updating a PTE in the linux page tables. + * We use it to preload an HPTE into the hash table corresponding to + * the updated linux PTE. + * + * This must always be called with the mm->page_table_lock held + */ +void update_mmu_cache(struct vm_area_struct *vma, unsigned long ea, + pte_t pte) +{ + unsigned long vsid; + void *pgdir; + pte_t *ptep; + int local = 0; + cpumask_t tmp; + unsigned long flags; + + /* handle i-cache coherency */ + if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) && + !cpu_has_feature(CPU_FTR_NOEXECUTE)) { + unsigned long pfn = pte_pfn(pte); + if (pfn_valid(pfn)) { + struct page *page = pfn_to_page(pfn); + if (!PageReserved(page) + && !test_bit(PG_arch_1, &page->flags)) { + __flush_dcache_icache(page_address(page)); + set_bit(PG_arch_1, &page->flags); + } + } + } + + /* We only want HPTEs for linux PTEs that have _PAGE_ACCESSED set */ + if (!pte_young(pte)) + return; + + pgdir = vma->vm_mm->pgd; + if (pgdir == NULL) + return; + + ptep = find_linux_pte(pgdir, ea); + if (!ptep) + return; + + vsid = get_vsid(vma->vm_mm->context.id, ea); + + local_irq_save(flags); + tmp = cpumask_of_cpu(smp_processor_id()); + if (cpus_equal(vma->vm_mm->cpu_vm_mask, tmp)) + local = 1; + + __hash_page(ea, pte_val(pte) & (_PAGE_USER|_PAGE_RW), vsid, ptep, + 0x300, local); + local_irq_restore(flags); +} diff --git a/arch/powerpc/mm/mem_pieces.c b/arch/powerpc/mm/mem_pieces.c new file mode 100644 index 00000000000..3d639052017 --- /dev/null +++ b/arch/powerpc/mm/mem_pieces.c @@ -0,0 +1,163 @@ +/* + * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au> + * Changes to accommodate Power Macintoshes. + * Cort Dougan <cort@cs.nmt.edu> + * Rewrites. + * Grant Erickson <grant@lcse.umn.edu> + * General rework and split from mm/init.c. + * + * Module name: mem_pieces.c + * + * Description: + * Routines and data structures for manipulating and representing + * phyiscal memory extents (i.e. address/length pairs). + * + */ + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/stddef.h> +#include <linux/init.h> +#include <asm/page.h> + +#include "mem_pieces.h" + +extern struct mem_pieces phys_avail; + +static void mem_pieces_print(struct mem_pieces *); + +/* + * Scan a region for a piece of a given size with the required alignment. + */ +void __init * +mem_pieces_find(unsigned int size, unsigned int align) +{ + int i; + unsigned a, e; + struct mem_pieces *mp = &phys_avail; + + for (i = 0; i < mp->n_regions; ++i) { + a = mp->regions[i].address; + e = a + mp->regions[i].size; + a = (a + align - 1) & -align; + if (a + size <= e) { + mem_pieces_remove(mp, a, size, 1); + return (void *) __va(a); + } + } + panic("Couldn't find %u bytes at %u alignment\n", size, align); + + return NULL; +} + +/* + * Remove some memory from an array of pieces + */ +void __init +mem_pieces_remove(struct mem_pieces *mp, unsigned int start, unsigned int size, + int must_exist) +{ + int i, j; + unsigned int end, rs, re; + struct reg_property *rp; + + end = start + size; + for (i = 0, rp = mp->regions; i < mp->n_regions; ++i, ++rp) { + if (end > rp->address && start < rp->address + rp->size) + break; + } + if (i >= mp->n_regions) { + if (must_exist) + printk("mem_pieces_remove: [%x,%x) not in any region\n", + start, end); + return; + } + for (; i < mp->n_regions && end > rp->address; ++i, ++rp) { + rs = rp->address; + re = rs + rp->size; + if (must_exist && (start < rs || end > re)) { + printk("mem_pieces_remove: bad overlap [%x,%x) with", + start, end); + mem_pieces_print(mp); + must_exist = 0; + } + if (start > rs) { + rp->size = start - rs; + if (end < re) { + /* need to split this entry */ + if (mp->n_regions >= MEM_PIECES_MAX) + panic("eek... mem_pieces overflow"); + for (j = mp->n_regions; j > i + 1; --j) + mp->regions[j] = mp->regions[j-1]; + ++mp->n_regions; + rp[1].address = end; + rp[1].size = re - end; + } + } else { + if (end < re) { + rp->address = end; + rp->size = re - end; + } else { + /* need to delete this entry */ + for (j = i; j < mp->n_regions - 1; ++j) + mp->regions[j] = mp->regions[j+1]; + --mp->n_regions; + --i; + --rp; + } + } + } +} + +static void __init +mem_pieces_print(struct mem_pieces *mp) +{ + int i; + + for (i = 0; i < mp->n_regions; ++i) + printk(" [%x, %x)", mp->regions[i].address, + mp->regions[i].address + mp->regions[i].size); + printk("\n"); +} + +void __init +mem_pieces_sort(struct mem_pieces *mp) +{ + unsigned long a, s; + int i, j; + + for (i = 1; i < mp->n_regions; ++i) { + a = mp->regions[i].address; + s = mp->regions[i].size; + for (j = i - 1; j >= 0; --j) { + if (a >= mp->regions[j].address) + break; + mp->regions[j+1] = mp->regions[j]; + } + mp->regions[j+1].address = a; + mp->regions[j+1].size = s; + } +} + +void __init +mem_pieces_coalesce(struct mem_pieces *mp) +{ + unsigned long a, s, ns; + int i, j, d; + + d = 0; + for (i = 0; i < mp->n_regions; i = j) { + a = mp->regions[i].address; + s = mp->regions[i].size; + for (j = i + 1; j < mp->n_regions + && mp->regions[j].address - a <= s; ++j) { + ns = mp->regions[j].address + mp->regions[j].size - a; + if (ns > s) + s = ns; + } + mp->regions[d].address = a; + mp->regions[d].size = s; + ++d; + } + mp->n_regions = d; +} diff --git a/arch/powerpc/mm/mem_pieces.h b/arch/powerpc/mm/mem_pieces.h new file mode 100644 index 00000000000..e2b700dc7f1 --- /dev/null +++ b/arch/powerpc/mm/mem_pieces.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 1996 Paul Mackerras <paulus@cs.anu.edu.au> + * Changes to accommodate Power Macintoshes. + * Cort Dougan <cort@cs.nmt.edu> + * Rewrites. + * Grant Erickson <grant@lcse.umn.edu> + * General rework and split from mm/init.c. + * + * Module name: mem_pieces.h + * + * Description: + * Routines and data structures for manipulating and representing + * phyiscal memory extents (i.e. address/length pairs). + * + */ + +#ifndef __MEM_PIECES_H__ +#define __MEM_PIECES_H__ + +#include <asm/prom.h> + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Type Definitions */ + +#define MEM_PIECES_MAX 32 + +struct mem_pieces { + int n_regions; + struct reg_property regions[MEM_PIECES_MAX]; +}; + +/* Function Prototypes */ + +extern void *mem_pieces_find(unsigned int size, unsigned int align); +extern void mem_pieces_remove(struct mem_pieces *mp, unsigned int start, + unsigned int size, int must_exist); +extern void mem_pieces_coalesce(struct mem_pieces *mp); +extern void mem_pieces_sort(struct mem_pieces *mp); + +#ifdef __cplusplus +} +#endif + +#endif /* __MEM_PIECES_H__ */ diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c new file mode 100644 index 00000000000..a8816e0f6a8 --- /dev/null +++ b/arch/powerpc/mm/mmu_context.c @@ -0,0 +1,86 @@ +/* + * This file contains the routines for handling the MMU on those + * PowerPC implementations where the MMU substantially follows the + * architecture specification. This includes the 6xx, 7xx, 7xxx, + * 8260, and POWER3 implementations but excludes the 8xx and 4xx. + * -- paulus + * + * Derived from arch/ppc/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <linux/mm.h> +#include <linux/init.h> + +#include <asm/mmu_context.h> +#include <asm/tlbflush.h> + +mm_context_t next_mmu_context; +unsigned long context_map[LAST_CONTEXT / BITS_PER_LONG + 1]; +#ifdef FEW_CONTEXTS +atomic_t nr_free_contexts; +struct mm_struct *context_mm[LAST_CONTEXT+1]; +void steal_context(void); +#endif /* FEW_CONTEXTS */ + +/* + * Initialize the context management stuff. + */ +void __init +mmu_context_init(void) +{ + /* + * Some processors have too few contexts to reserve one for + * init_mm, and require using context 0 for a normal task. + * Other processors reserve the use of context zero for the kernel. + * This code assumes FIRST_CONTEXT < 32. + */ + context_map[0] = (1 << FIRST_CONTEXT) - 1; + next_mmu_context = FIRST_CONTEXT; +#ifdef FEW_CONTEXTS + atomic_set(&nr_free_contexts, LAST_CONTEXT - FIRST_CONTEXT + 1); +#endif /* FEW_CONTEXTS */ +} + +#ifdef FEW_CONTEXTS +/* + * Steal a context from a task that has one at the moment. + * This is only used on 8xx and 4xx and we presently assume that + * they don't do SMP. If they do then this will have to check + * whether the MM we steal is in use. + * We also assume that this is only used on systems that don't + * use an MMU hash table - this is true for 8xx and 4xx. + * This isn't an LRU system, it just frees up each context in + * turn (sort-of pseudo-random replacement :). This would be the + * place to implement an LRU scheme if anyone was motivated to do it. + * -- paulus + */ +void +steal_context(void) +{ + struct mm_struct *mm; + + /* free up context `next_mmu_context' */ + /* if we shouldn't free context 0, don't... */ + if (next_mmu_context < FIRST_CONTEXT) + next_mmu_context = FIRST_CONTEXT; + mm = context_mm[next_mmu_context]; + flush_tlb_mm(mm); + destroy_context(mm); +} +#endif /* FEW_CONTEXTS */ diff --git a/arch/powerpc/mm/mmu_context64.c b/arch/powerpc/mm/mmu_context64.c new file mode 100644 index 00000000000..714a84dd8d5 --- /dev/null +++ b/arch/powerpc/mm/mmu_context64.c @@ -0,0 +1,63 @@ +/* + * MMU context allocation for 64-bit kernels. + * + * Copyright (C) 2004 Anton Blanchard, IBM Corp. <anton@samba.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/spinlock.h> +#include <linux/idr.h> + +#include <asm/mmu_context.h> + +static DEFINE_SPINLOCK(mmu_context_lock); +static DEFINE_IDR(mmu_context_idr); + +int init_new_context(struct task_struct *tsk, struct mm_struct *mm) +{ + int index; + int err; + +again: + if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL)) + return -ENOMEM; + + spin_lock(&mmu_context_lock); + err = idr_get_new_above(&mmu_context_idr, NULL, 1, &index); + spin_unlock(&mmu_context_lock); + + if (err == -EAGAIN) + goto again; + else if (err) + return err; + + if (index > MAX_CONTEXT) { + idr_remove(&mmu_context_idr, index); + return -ENOMEM; + } + + mm->context.id = index; + + return 0; +} + +void destroy_context(struct mm_struct *mm) +{ + spin_lock(&mmu_context_lock); + idr_remove(&mmu_context_idr, mm->context.id); + spin_unlock(&mmu_context_lock); + + mm->context.id = NO_CONTEXT; +} diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h new file mode 100644 index 00000000000..540f3292b22 --- /dev/null +++ b/arch/powerpc/mm/mmu_decl.h @@ -0,0 +1,85 @@ +/* + * Declarations of procedures and variables shared between files + * in arch/ppc/mm/. + * + * Derived from arch/ppc/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ +#include <asm/tlbflush.h> +#include <asm/mmu.h> + +extern void mapin_ram(void); +extern int map_page(unsigned long va, phys_addr_t pa, int flags); +extern void setbat(int index, unsigned long virt, unsigned long phys, + unsigned int size, int flags); +extern void reserve_phys_mem(unsigned long start, unsigned long size); +extern void settlbcam(int index, unsigned long virt, phys_addr_t phys, + unsigned int size, int flags, unsigned int pid); +extern void invalidate_tlbcam_entry(int index); + +extern int __map_without_bats; +extern unsigned long ioremap_base; +extern unsigned long ioremap_bot; +extern unsigned int rtas_data, rtas_size; + +extern unsigned long total_memory; +extern unsigned long total_lowmem; +extern int mem_init_done; + +extern PTE *Hash, *Hash_end; +extern unsigned long Hash_size, Hash_mask; + +extern unsigned int num_tlbcam_entries; + +/* ...and now those things that may be slightly different between processor + * architectures. -- Dan + */ +#if defined(CONFIG_8xx) +#define flush_HPTE(X, va, pg) _tlbie(va) +#define MMU_init_hw() do { } while(0) +#define mmu_mapin_ram() (0UL) + +#elif defined(CONFIG_4xx) +#define flush_HPTE(X, va, pg) _tlbie(va) +extern void MMU_init_hw(void); +extern unsigned long mmu_mapin_ram(void); + +#elif defined(CONFIG_FSL_BOOKE) +#define flush_HPTE(X, va, pg) _tlbie(va) +extern void MMU_init_hw(void); +extern unsigned long mmu_mapin_ram(void); +extern void adjust_total_lowmem(void); + +#else +/* anything except 4xx or 8xx */ +extern void MMU_init_hw(void); +extern unsigned long mmu_mapin_ram(void); + +/* Be careful....this needs to be updated if we ever encounter 603 SMPs, + * which includes all new 82xx processors. We need tlbie/tlbsync here + * in that case (I think). -- Dan. + */ +static inline void flush_HPTE(unsigned context, unsigned long va, + unsigned long pdval) +{ + if ((Hash != 0) && + cpu_has_feature(CPU_FTR_HPTE_TABLE)) + flush_hash_pages(0, va, pdval, 1); + else + _tlbie(va); +} +#endif diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c new file mode 100644 index 00000000000..81a3d7446d3 --- /dev/null +++ b/arch/powerpc/mm/pgtable.c @@ -0,0 +1,470 @@ +/* + * This file contains the routines setting up the linux page tables. + * -- paulus + * + * Derived from arch/ppc/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/mm.h> +#include <linux/vmalloc.h> +#include <linux/init.h> +#include <linux/highmem.h> + +#include <asm/pgtable.h> +#include <asm/pgalloc.h> +#include <asm/io.h> + +#include "mmu_decl.h" + +unsigned long ioremap_base; +unsigned long ioremap_bot; +int io_bat_index; + +#if defined(CONFIG_6xx) || defined(CONFIG_POWER3) +#define HAVE_BATS 1 +#endif + +#if defined(CONFIG_FSL_BOOKE) +#define HAVE_TLBCAM 1 +#endif + +extern char etext[], _stext[]; + +#ifdef CONFIG_SMP +extern void hash_page_sync(void); +#endif + +#ifdef HAVE_BATS +extern unsigned long v_mapped_by_bats(unsigned long va); +extern unsigned long p_mapped_by_bats(unsigned long pa); +void setbat(int index, unsigned long virt, unsigned long phys, + unsigned int size, int flags); + +#else /* !HAVE_BATS */ +#define v_mapped_by_bats(x) (0UL) +#define p_mapped_by_bats(x) (0UL) +#endif /* HAVE_BATS */ + +#ifdef HAVE_TLBCAM +extern unsigned int tlbcam_index; +extern unsigned long v_mapped_by_tlbcam(unsigned long va); +extern unsigned long p_mapped_by_tlbcam(unsigned long pa); +#else /* !HAVE_TLBCAM */ +#define v_mapped_by_tlbcam(x) (0UL) +#define p_mapped_by_tlbcam(x) (0UL) +#endif /* HAVE_TLBCAM */ + +#ifdef CONFIG_PTE_64BIT +/* 44x uses an 8kB pgdir because it has 8-byte Linux PTEs. */ +#define PGDIR_ORDER 1 +#else +#define PGDIR_ORDER 0 +#endif + +pgd_t *pgd_alloc(struct mm_struct *mm) +{ + pgd_t *ret; + + ret = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, PGDIR_ORDER); + return ret; +} + +void pgd_free(pgd_t *pgd) +{ + free_pages((unsigned long)pgd, PGDIR_ORDER); +} + +pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) +{ + pte_t *pte; + extern int mem_init_done; + extern void *early_get_page(void); + + if (mem_init_done) { + pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); + } else { + pte = (pte_t *)early_get_page(); + if (pte) + clear_page(pte); + } + return pte; +} + +struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) +{ + struct page *ptepage; + +#ifdef CONFIG_HIGHPTE + int flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_REPEAT; +#else + int flags = GFP_KERNEL | __GFP_REPEAT; +#endif + + ptepage = alloc_pages(flags, 0); + if (ptepage) + clear_highpage(ptepage); + return ptepage; +} + +void pte_free_kernel(pte_t *pte) +{ +#ifdef CONFIG_SMP + hash_page_sync(); +#endif + free_page((unsigned long)pte); +} + +void pte_free(struct page *ptepage) +{ +#ifdef CONFIG_SMP + hash_page_sync(); +#endif + __free_page(ptepage); +} + +#ifndef CONFIG_PHYS_64BIT +void __iomem * +ioremap(phys_addr_t addr, unsigned long size) +{ + return __ioremap(addr, size, _PAGE_NO_CACHE); +} +#else /* CONFIG_PHYS_64BIT */ +void __iomem * +ioremap64(unsigned long long addr, unsigned long size) +{ + return __ioremap(addr, size, _PAGE_NO_CACHE); +} + +void __iomem * +ioremap(phys_addr_t addr, unsigned long size) +{ + phys_addr_t addr64 = fixup_bigphys_addr(addr, size); + + return ioremap64(addr64, size); +} +#endif /* CONFIG_PHYS_64BIT */ + +void __iomem * +__ioremap(phys_addr_t addr, unsigned long size, unsigned long flags) +{ + unsigned long v, i; + phys_addr_t p; + int err; + + /* + * Choose an address to map it to. + * Once the vmalloc system is running, we use it. + * Before then, we use space going down from ioremap_base + * (ioremap_bot records where we're up to). + */ + p = addr & PAGE_MASK; + size = PAGE_ALIGN(addr + size) - p; + + /* + * If the address lies within the first 16 MB, assume it's in ISA + * memory space + */ + if (p < 16*1024*1024) + p += _ISA_MEM_BASE; + + /* + * Don't allow anybody to remap normal RAM that we're using. + * mem_init() sets high_memory so only do the check after that. + */ + if ( mem_init_done && (p < virt_to_phys(high_memory)) ) + { + printk("__ioremap(): phys addr "PHYS_FMT" is RAM lr %p\n", p, + __builtin_return_address(0)); + return NULL; + } + + if (size == 0) + return NULL; + + /* + * Is it already mapped? Perhaps overlapped by a previous + * BAT mapping. If the whole area is mapped then we're done, + * otherwise remap it since we want to keep the virt addrs for + * each request contiguous. + * + * We make the assumption here that if the bottom and top + * of the range we want are mapped then it's mapped to the + * same virt address (and this is contiguous). + * -- Cort + */ + if ((v = p_mapped_by_bats(p)) /*&& p_mapped_by_bats(p+size-1)*/ ) + goto out; + + if ((v = p_mapped_by_tlbcam(p))) + goto out; + + if (mem_init_done) { + struct vm_struct *area; + area = get_vm_area(size, VM_IOREMAP); + if (area == 0) + return NULL; + v = (unsigned long) area->addr; + } else { + v = (ioremap_bot -= size); + } + + if ((flags & _PAGE_PRESENT) == 0) + flags |= _PAGE_KERNEL; + if (flags & _PAGE_NO_CACHE) + flags |= _PAGE_GUARDED; + + /* + * Should check if it is a candidate for a BAT mapping + */ + + err = 0; + for (i = 0; i < size && err == 0; i += PAGE_SIZE) + err = map_page(v+i, p+i, flags); + if (err) { + if (mem_init_done) + vunmap((void *)v); + return NULL; + } + +out: + return (void __iomem *) (v + ((unsigned long)addr & ~PAGE_MASK)); +} + +void iounmap(volatile void __iomem *addr) +{ + /* + * If mapped by BATs then there is nothing to do. + * Calling vfree() generates a benign warning. + */ + if (v_mapped_by_bats((unsigned long)addr)) return; + + if (addr > high_memory && (unsigned long) addr < ioremap_bot) + vunmap((void *) (PAGE_MASK & (unsigned long)addr)); +} + +void __iomem *ioport_map(unsigned long port, unsigned int len) +{ + return (void __iomem *) (port + _IO_BASE); +} + +void ioport_unmap(void __iomem *addr) +{ + /* Nothing to do */ +} +EXPORT_SYMBOL(ioport_map); +EXPORT_SYMBOL(ioport_unmap); + +int +map_page(unsigned long va, phys_addr_t pa, int flags) +{ + pmd_t *pd; + pte_t *pg; + int err = -ENOMEM; + + spin_lock(&init_mm.page_table_lock); + /* Use upper 10 bits of VA to index the first level map */ + pd = pmd_offset(pgd_offset_k(va), va); + /* Use middle 10 bits of VA to index the second-level map */ + pg = pte_alloc_kernel(&init_mm, pd, va); + if (pg != 0) { + err = 0; + set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags))); + if (mem_init_done) + flush_HPTE(0, va, pmd_val(*pd)); + } + spin_unlock(&init_mm.page_table_lock); + return err; +} + +/* + * Map in all of physical memory starting at KERNELBASE. + */ +void __init mapin_ram(void) +{ + unsigned long v, p, s, f; + + s = mmu_mapin_ram(); + v = KERNELBASE + s; + p = PPC_MEMSTART + s; + for (; s < total_lowmem; s += PAGE_SIZE) { + if ((char *) v >= _stext && (char *) v < etext) + f = _PAGE_RAM_TEXT; + else + f = _PAGE_RAM; + map_page(v, p, f); + v += PAGE_SIZE; + p += PAGE_SIZE; + } +} + +/* is x a power of 2? */ +#define is_power_of_2(x) ((x) != 0 && (((x) & ((x) - 1)) == 0)) + +/* is x a power of 4? */ +#define is_power_of_4(x) ((x) != 0 && (((x) & (x-1)) == 0) && (ffs(x) & 1)) + +/* + * Set up a mapping for a block of I/O. + * virt, phys, size must all be page-aligned. + * This should only be called before ioremap is called. + */ +void __init io_block_mapping(unsigned long virt, phys_addr_t phys, + unsigned int size, int flags) +{ + int i; + + if (virt > KERNELBASE && virt < ioremap_bot) + ioremap_bot = ioremap_base = virt; + +#ifdef HAVE_BATS + /* + * Use a BAT for this if possible... + */ + if (io_bat_index < 2 && is_power_of_2(size) + && (virt & (size - 1)) == 0 && (phys & (size - 1)) == 0) { + setbat(io_bat_index, virt, phys, size, flags); + ++io_bat_index; + return; + } +#endif /* HAVE_BATS */ + +#ifdef HAVE_TLBCAM + /* + * Use a CAM for this if possible... + */ + if (tlbcam_index < num_tlbcam_entries && is_power_of_4(size) + && (virt & (size - 1)) == 0 && (phys & (size - 1)) == 0) { + settlbcam(tlbcam_index, virt, phys, size, flags, 0); + ++tlbcam_index; + return; + } +#endif /* HAVE_TLBCAM */ + + /* No BATs available, put it in the page tables. */ + for (i = 0; i < size; i += PAGE_SIZE) + map_page(virt + i, phys + i, flags); +} + +/* Scan the real Linux page tables and return a PTE pointer for + * a virtual address in a context. + * Returns true (1) if PTE was found, zero otherwise. The pointer to + * the PTE pointer is unmodified if PTE is not found. + */ +int +get_pteptr(struct mm_struct *mm, unsigned long addr, pte_t **ptep) +{ + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + int retval = 0; + + pgd = pgd_offset(mm, addr & PAGE_MASK); + if (pgd) { + pmd = pmd_offset(pgd, addr & PAGE_MASK); + if (pmd_present(*pmd)) { + pte = pte_offset_map(pmd, addr & PAGE_MASK); + if (pte) { + retval = 1; + *ptep = pte; + /* XXX caller needs to do pte_unmap, yuck */ + } + } + } + return(retval); +} + +/* Find physical address for this virtual address. Normally used by + * I/O functions, but anyone can call it. + */ +unsigned long iopa(unsigned long addr) +{ + unsigned long pa; + + /* I don't know why this won't work on PMacs or CHRP. It + * appears there is some bug, or there is some implicit + * mapping done not properly represented by BATs or in page + * tables.......I am actively working on resolving this, but + * can't hold up other stuff. -- Dan + */ + pte_t *pte; + struct mm_struct *mm; + + /* Check the BATs */ + pa = v_mapped_by_bats(addr); + if (pa) + return pa; + + /* Allow mapping of user addresses (within the thread) + * for DMA if necessary. + */ + if (addr < TASK_SIZE) + mm = current->mm; + else + mm = &init_mm; + + pa = 0; + if (get_pteptr(mm, addr, &pte)) { + pa = (pte_val(*pte) & PAGE_MASK) | (addr & ~PAGE_MASK); + pte_unmap(pte); + } + + return(pa); +} + +/* This is will find the virtual address for a physical one.... + * Swiped from APUS, could be dangerous :-). + * This is only a placeholder until I really find a way to make this + * work. -- Dan + */ +unsigned long +mm_ptov (unsigned long paddr) +{ + unsigned long ret; +#if 0 + if (paddr < 16*1024*1024) + ret = ZTWO_VADDR(paddr); + else { + int i; + + for (i = 0; i < kmap_chunk_count;){ + unsigned long phys = kmap_chunks[i++]; + unsigned long size = kmap_chunks[i++]; + unsigned long virt = kmap_chunks[i++]; + if (paddr >= phys + && paddr < (phys + size)){ + ret = virt + paddr - phys; + goto exit; + } + } + + ret = (unsigned long) __va(paddr); + } +exit: +#ifdef DEBUGPV + printk ("PTOV(%lx)=%lx\n", paddr, ret); +#endif +#else + ret = (unsigned long)paddr + KERNELBASE; +#endif + return ret; +} + diff --git a/arch/powerpc/mm/pgtable64.c b/arch/powerpc/mm/pgtable64.c new file mode 100644 index 00000000000..724f97e5dee --- /dev/null +++ b/arch/powerpc/mm/pgtable64.c @@ -0,0 +1,357 @@ +/* + * This file contains ioremap and related functions for 64-bit machines. + * + * Derived from arch/ppc64/mm/init.c + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@samba.org) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * Dave Engebretsen <engebret@us.ibm.com> + * Rework for PPC64 port. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/swap.h> +#include <linux/stddef.h> +#include <linux/vmalloc.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/bootmem.h> +#include <linux/highmem.h> +#include <linux/idr.h> +#include <linux/nodemask.h> +#include <linux/module.h> + +#include <asm/pgalloc.h> +#include <asm/page.h> +#include <asm/prom.h> +#include <asm/lmb.h> +#include <asm/rtas.h> +#include <asm/io.h> +#include <asm/mmu_context.h> +#include <asm/pgtable.h> +#include <asm/mmu.h> +#include <asm/uaccess.h> +#include <asm/smp.h> +#include <asm/machdep.h> +#include <asm/tlb.h> +#include <asm/eeh.h> +#include <asm/processor.h> +#include <asm/mmzone.h> +#include <asm/cputable.h> +#include <asm/ppcdebug.h> +#include <asm/sections.h> +#include <asm/system.h> +#include <asm/iommu.h> +#include <asm/abs_addr.h> +#include <asm/vdso.h> +#include <asm/imalloc.h> + +#if PGTABLE_RANGE > USER_VSID_RANGE +#warning Limited user VSID range means pagetable space is wasted +#endif + +#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE) +#warning TASK_SIZE is smaller than it needs to be. +#endif + +int mem_init_done; +unsigned long ioremap_bot = IMALLOC_BASE; +static unsigned long phbs_io_bot = PHBS_IO_BASE; + +extern pgd_t swapper_pg_dir[]; +extern struct task_struct *current_set[NR_CPUS]; + +unsigned long klimit = (unsigned long)_end; + +/* max amount of RAM to use */ +unsigned long __max_memory; + +/* info on what we think the IO hole is */ +unsigned long io_hole_start; +unsigned long io_hole_size; + +#ifdef CONFIG_PPC_ISERIES + +void __iomem *ioremap(unsigned long addr, unsigned long size) +{ + return (void __iomem *)addr; +} + +extern void __iomem *__ioremap(unsigned long addr, unsigned long size, + unsigned long flags) +{ + return (void __iomem *)addr; +} + +void iounmap(volatile void __iomem *addr) +{ + return; +} + +#else + +/* + * map_io_page currently only called by __ioremap + * map_io_page adds an entry to the ioremap page table + * and adds an entry to the HPT, possibly bolting it + */ +static int map_io_page(unsigned long ea, unsigned long pa, int flags) +{ + pgd_t *pgdp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + unsigned long vsid; + + if (mem_init_done) { + spin_lock(&init_mm.page_table_lock); + pgdp = pgd_offset_k(ea); + pudp = pud_alloc(&init_mm, pgdp, ea); + if (!pudp) + return -ENOMEM; + pmdp = pmd_alloc(&init_mm, pudp, ea); + if (!pmdp) + return -ENOMEM; + ptep = pte_alloc_kernel(&init_mm, pmdp, ea); + if (!ptep) + return -ENOMEM; + set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, + __pgprot(flags))); + spin_unlock(&init_mm.page_table_lock); + } else { + unsigned long va, vpn, hash, hpteg; + + /* + * If the mm subsystem is not fully up, we cannot create a + * linux page table entry for this mapping. Simply bolt an + * entry in the hardware page table. + */ + vsid = get_kernel_vsid(ea); + va = (vsid << 28) | (ea & 0xFFFFFFF); + vpn = va >> PAGE_SHIFT; + + hash = hpt_hash(vpn, 0); + + hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP); + + /* Panic if a pte grpup is full */ + if (ppc_md.hpte_insert(hpteg, va, pa >> PAGE_SHIFT, + HPTE_V_BOLTED, + _PAGE_NO_CACHE|_PAGE_GUARDED|PP_RWXX) + == -1) { + panic("map_io_page: could not insert mapping"); + } + } + return 0; +} + + +static void __iomem * __ioremap_com(unsigned long addr, unsigned long pa, + unsigned long ea, unsigned long size, + unsigned long flags) +{ + unsigned long i; + + if ((flags & _PAGE_PRESENT) == 0) + flags |= pgprot_val(PAGE_KERNEL); + + for (i = 0; i < size; i += PAGE_SIZE) + if (map_io_page(ea+i, pa+i, flags)) + return NULL; + + return (void __iomem *) (ea + (addr & ~PAGE_MASK)); +} + + +void __iomem * +ioremap(unsigned long addr, unsigned long size) +{ + return __ioremap(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED); +} + +void __iomem * __ioremap(unsigned long addr, unsigned long size, + unsigned long flags) +{ + unsigned long pa, ea; + void __iomem *ret; + + /* + * Choose an address to map it to. + * Once the imalloc system is running, we use it. + * Before that, we map using addresses going + * up from ioremap_bot. imalloc will use + * the addresses from ioremap_bot through + * IMALLOC_END + * + */ + pa = addr & PAGE_MASK; + size = PAGE_ALIGN(addr + size) - pa; + + if (size == 0) + return NULL; + + if (mem_init_done) { + struct vm_struct *area; + area = im_get_free_area(size); + if (area == NULL) + return NULL; + ea = (unsigned long)(area->addr); + ret = __ioremap_com(addr, pa, ea, size, flags); + if (!ret) + im_free(area->addr); + } else { + ea = ioremap_bot; + ret = __ioremap_com(addr, pa, ea, size, flags); + if (ret) + ioremap_bot += size; + } + return ret; +} + +#define IS_PAGE_ALIGNED(_val) ((_val) == ((_val) & PAGE_MASK)) + +int __ioremap_explicit(unsigned long pa, unsigned long ea, + unsigned long size, unsigned long flags) +{ + struct vm_struct *area; + void __iomem *ret; + + /* For now, require page-aligned values for pa, ea, and size */ + if (!IS_PAGE_ALIGNED(pa) || !IS_PAGE_ALIGNED(ea) || + !IS_PAGE_ALIGNED(size)) { + printk(KERN_ERR "unaligned value in %s\n", __FUNCTION__); + return 1; + } + + if (!mem_init_done) { + /* Two things to consider in this case: + * 1) No records will be kept (imalloc, etc) that the region + * has been remapped + * 2) It won't be easy to iounmap() the region later (because + * of 1) + */ + ; + } else { + area = im_get_area(ea, size, + IM_REGION_UNUSED|IM_REGION_SUBSET|IM_REGION_EXISTS); + if (area == NULL) { + /* Expected when PHB-dlpar is in play */ + return 1; + } + if (ea != (unsigned long) area->addr) { + printk(KERN_ERR "unexpected addr return from " + "im_get_area\n"); + return 1; + } + } + + ret = __ioremap_com(pa, pa, ea, size, flags); + if (ret == NULL) { + printk(KERN_ERR "ioremap_explicit() allocation failure !\n"); + return 1; + } + if (ret != (void *) ea) { + printk(KERN_ERR "__ioremap_com() returned unexpected addr\n"); + return 1; + } + + return 0; +} + +/* + * Unmap an IO region and remove it from imalloc'd list. + * Access to IO memory should be serialized by driver. + * This code is modeled after vmalloc code - unmap_vm_area() + * + * XXX what about calls before mem_init_done (ie python_countermeasures()) + */ +void iounmap(volatile void __iomem *token) +{ + void *addr; + + if (!mem_init_done) + return; + + addr = (void *) ((unsigned long __force) token & PAGE_MASK); + + im_free(addr); +} + +static int iounmap_subset_regions(unsigned long addr, unsigned long size) +{ + struct vm_struct *area; + + /* Check whether subsets of this region exist */ + area = im_get_area(addr, size, IM_REGION_SUPERSET); + if (area == NULL) + return 1; + + while (area) { + iounmap((void __iomem *) area->addr); + area = im_get_area(addr, size, + IM_REGION_SUPERSET); + } + + return 0; +} + +int iounmap_explicit(volatile void __iomem *start, unsigned long size) +{ + struct vm_struct *area; + unsigned long addr; + int rc; + + addr = (unsigned long __force) start & PAGE_MASK; + + /* Verify that the region either exists or is a subset of an existing + * region. In the latter case, split the parent region to create + * the exact region + */ + area = im_get_area(addr, size, + IM_REGION_EXISTS | IM_REGION_SUBSET); + if (area == NULL) { + /* Determine whether subset regions exist. If so, unmap */ + rc = iounmap_subset_regions(addr, size); + if (rc) { + printk(KERN_ERR + "%s() cannot unmap nonexistent range 0x%lx\n", + __FUNCTION__, addr); + return 1; + } + } else { + iounmap((void __iomem *) area->addr); + } + /* + * FIXME! This can't be right: + iounmap(area->addr); + * Maybe it should be "iounmap(area);" + */ + return 0; +} + +#endif + +EXPORT_SYMBOL(ioremap); +EXPORT_SYMBOL(__ioremap); +EXPORT_SYMBOL(iounmap); diff --git a/arch/powerpc/mm/ppc_mmu.c b/arch/powerpc/mm/ppc_mmu.c new file mode 100644 index 00000000000..9a381ed5eb2 --- /dev/null +++ b/arch/powerpc/mm/ppc_mmu.c @@ -0,0 +1,296 @@ +/* + * This file contains the routines for handling the MMU on those + * PowerPC implementations where the MMU substantially follows the + * architecture specification. This includes the 6xx, 7xx, 7xxx, + * 8260, and POWER3 implementations but excludes the 8xx and 4xx. + * -- paulus + * + * Derived from arch/ppc/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/init.h> +#include <linux/highmem.h> + +#include <asm/prom.h> +#include <asm/mmu.h> +#include <asm/machdep.h> + +#include "mmu_decl.h" +#include "mem_pieces.h" + +PTE *Hash, *Hash_end; +unsigned long Hash_size, Hash_mask; +unsigned long _SDR1; + +union ubat { /* BAT register values to be loaded */ + BAT bat; +#ifdef CONFIG_PPC64BRIDGE + u64 word[2]; +#else + u32 word[2]; +#endif +} BATS[4][2]; /* 4 pairs of IBAT, DBAT */ + +struct batrange { /* stores address ranges mapped by BATs */ + unsigned long start; + unsigned long limit; + unsigned long phys; +} bat_addrs[4]; + +/* + * Return PA for this VA if it is mapped by a BAT, or 0 + */ +unsigned long v_mapped_by_bats(unsigned long va) +{ + int b; + for (b = 0; b < 4; ++b) + if (va >= bat_addrs[b].start && va < bat_addrs[b].limit) + return bat_addrs[b].phys + (va - bat_addrs[b].start); + return 0; +} + +/* + * Return VA for a given PA or 0 if not mapped + */ +unsigned long p_mapped_by_bats(unsigned long pa) +{ + int b; + for (b = 0; b < 4; ++b) + if (pa >= bat_addrs[b].phys + && pa < (bat_addrs[b].limit-bat_addrs[b].start) + +bat_addrs[b].phys) + return bat_addrs[b].start+(pa-bat_addrs[b].phys); + return 0; +} + +unsigned long __init mmu_mapin_ram(void) +{ +#ifdef CONFIG_POWER4 + return 0; +#else + unsigned long tot, bl, done; + unsigned long max_size = (256<<20); + unsigned long align; + + if (__map_without_bats) + return 0; + + /* Set up BAT2 and if necessary BAT3 to cover RAM. */ + + /* Make sure we don't map a block larger than the + smallest alignment of the physical address. */ + /* alignment of PPC_MEMSTART */ + align = ~(PPC_MEMSTART-1) & PPC_MEMSTART; + /* set BAT block size to MIN(max_size, align) */ + if (align && align < max_size) + max_size = align; + + tot = total_lowmem; + for (bl = 128<<10; bl < max_size; bl <<= 1) { + if (bl * 2 > tot) + break; + } + + setbat(2, KERNELBASE, PPC_MEMSTART, bl, _PAGE_RAM); + done = (unsigned long)bat_addrs[2].limit - KERNELBASE + 1; + if ((done < tot) && !bat_addrs[3].limit) { + /* use BAT3 to cover a bit more */ + tot -= done; + for (bl = 128<<10; bl < max_size; bl <<= 1) + if (bl * 2 > tot) + break; + setbat(3, KERNELBASE+done, PPC_MEMSTART+done, bl, _PAGE_RAM); + done = (unsigned long)bat_addrs[3].limit - KERNELBASE + 1; + } + + return done; +#endif +} + +/* + * Set up one of the I/D BAT (block address translation) register pairs. + * The parameters are not checked; in particular size must be a power + * of 2 between 128k and 256M. + */ +void __init setbat(int index, unsigned long virt, unsigned long phys, + unsigned int size, int flags) +{ + unsigned int bl; + int wimgxpp; + union ubat *bat = BATS[index]; + + if (((flags & _PAGE_NO_CACHE) == 0) && + cpu_has_feature(CPU_FTR_NEED_COHERENT)) + flags |= _PAGE_COHERENT; + + bl = (size >> 17) - 1; + if (PVR_VER(mfspr(SPRN_PVR)) != 1) { + /* 603, 604, etc. */ + /* Do DBAT first */ + wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE + | _PAGE_COHERENT | _PAGE_GUARDED); + wimgxpp |= (flags & _PAGE_RW)? BPP_RW: BPP_RX; + bat[1].word[0] = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */ + bat[1].word[1] = phys | wimgxpp; +#ifndef CONFIG_KGDB /* want user access for breakpoints */ + if (flags & _PAGE_USER) +#endif + bat[1].bat.batu.vp = 1; + if (flags & _PAGE_GUARDED) { + /* G bit must be zero in IBATs */ + bat[0].word[0] = bat[0].word[1] = 0; + } else { + /* make IBAT same as DBAT */ + bat[0] = bat[1]; + } + } else { + /* 601 cpu */ + if (bl > BL_8M) + bl = BL_8M; + wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE + | _PAGE_COHERENT); + wimgxpp |= (flags & _PAGE_RW)? + ((flags & _PAGE_USER)? PP_RWRW: PP_RWXX): PP_RXRX; + bat->word[0] = virt | wimgxpp | 4; /* Ks=0, Ku=1 */ + bat->word[1] = phys | bl | 0x40; /* V=1 */ + } + + bat_addrs[index].start = virt; + bat_addrs[index].limit = virt + ((bl + 1) << 17) - 1; + bat_addrs[index].phys = phys; +} + +/* + * Initialize the hash table and patch the instructions in hashtable.S. + */ +void __init MMU_init_hw(void) +{ + unsigned int hmask, mb, mb2; + unsigned int n_hpteg, lg_n_hpteg; + + extern unsigned int hash_page_patch_A[]; + extern unsigned int hash_page_patch_B[], hash_page_patch_C[]; + extern unsigned int hash_page[]; + extern unsigned int flush_hash_patch_A[], flush_hash_patch_B[]; + + if (!cpu_has_feature(CPU_FTR_HPTE_TABLE)) { + /* + * Put a blr (procedure return) instruction at the + * start of hash_page, since we can still get DSI + * exceptions on a 603. + */ + hash_page[0] = 0x4e800020; + flush_icache_range((unsigned long) &hash_page[0], + (unsigned long) &hash_page[1]); + return; + } + + if ( ppc_md.progress ) ppc_md.progress("hash:enter", 0x105); + +#ifdef CONFIG_PPC64BRIDGE +#define LG_HPTEG_SIZE 7 /* 128 bytes per HPTEG */ +#define SDR1_LOW_BITS (lg_n_hpteg - 11) +#define MIN_N_HPTEG 2048 /* min 256kB hash table */ +#else +#define LG_HPTEG_SIZE 6 /* 64 bytes per HPTEG */ +#define SDR1_LOW_BITS ((n_hpteg - 1) >> 10) +#define MIN_N_HPTEG 1024 /* min 64kB hash table */ +#endif + +#ifdef CONFIG_POWER4 + /* The hash table has already been allocated and initialized + in prom.c */ + n_hpteg = Hash_size >> LG_HPTEG_SIZE; + lg_n_hpteg = __ilog2(n_hpteg); + + /* Remove the hash table from the available memory */ + if (Hash) + reserve_phys_mem(__pa(Hash), Hash_size); + +#else /* CONFIG_POWER4 */ + /* + * Allow 1 HPTE (1/8 HPTEG) for each page of memory. + * This is less than the recommended amount, but then + * Linux ain't AIX. + */ + n_hpteg = total_memory / (PAGE_SIZE * 8); + if (n_hpteg < MIN_N_HPTEG) + n_hpteg = MIN_N_HPTEG; + lg_n_hpteg = __ilog2(n_hpteg); + if (n_hpteg & (n_hpteg - 1)) { + ++lg_n_hpteg; /* round up if not power of 2 */ + n_hpteg = 1 << lg_n_hpteg; + } + Hash_size = n_hpteg << LG_HPTEG_SIZE; + + /* + * Find some memory for the hash table. + */ + if ( ppc_md.progress ) ppc_md.progress("hash:find piece", 0x322); + Hash = mem_pieces_find(Hash_size, Hash_size); + cacheable_memzero(Hash, Hash_size); + _SDR1 = __pa(Hash) | SDR1_LOW_BITS; +#endif /* CONFIG_POWER4 */ + + Hash_end = (PTE *) ((unsigned long)Hash + Hash_size); + + printk("Total memory = %ldMB; using %ldkB for hash table (at %p)\n", + total_memory >> 20, Hash_size >> 10, Hash); + + + /* + * Patch up the instructions in hashtable.S:create_hpte + */ + if ( ppc_md.progress ) ppc_md.progress("hash:patch", 0x345); + Hash_mask = n_hpteg - 1; + hmask = Hash_mask >> (16 - LG_HPTEG_SIZE); + mb2 = mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg; + if (lg_n_hpteg > 16) + mb2 = 16 - LG_HPTEG_SIZE; + + hash_page_patch_A[0] = (hash_page_patch_A[0] & ~0xffff) + | ((unsigned int)(Hash) >> 16); + hash_page_patch_A[1] = (hash_page_patch_A[1] & ~0x7c0) | (mb << 6); + hash_page_patch_A[2] = (hash_page_patch_A[2] & ~0x7c0) | (mb2 << 6); + hash_page_patch_B[0] = (hash_page_patch_B[0] & ~0xffff) | hmask; + hash_page_patch_C[0] = (hash_page_patch_C[0] & ~0xffff) | hmask; + + /* + * Ensure that the locations we've patched have been written + * out from the data cache and invalidated in the instruction + * cache, on those machines with split caches. + */ + flush_icache_range((unsigned long) &hash_page_patch_A[0], + (unsigned long) &hash_page_patch_C[1]); + + /* + * Patch up the instructions in hashtable.S:flush_hash_page + */ + flush_hash_patch_A[0] = (flush_hash_patch_A[0] & ~0xffff) + | ((unsigned int)(Hash) >> 16); + flush_hash_patch_A[1] = (flush_hash_patch_A[1] & ~0x7c0) | (mb << 6); + flush_hash_patch_A[2] = (flush_hash_patch_A[2] & ~0x7c0) | (mb2 << 6); + flush_hash_patch_B[0] = (flush_hash_patch_B[0] & ~0xffff) | hmask; + flush_icache_range((unsigned long) &flush_hash_patch_A[0], + (unsigned long) &flush_hash_patch_B[1]); + + if ( ppc_md.progress ) ppc_md.progress("hash:done", 0x205); +} diff --git a/arch/powerpc/mm/tlb.c b/arch/powerpc/mm/tlb.c new file mode 100644 index 00000000000..6c3dc3c44c8 --- /dev/null +++ b/arch/powerpc/mm/tlb.c @@ -0,0 +1,183 @@ +/* + * This file contains the routines for TLB flushing. + * On machines where the MMU uses a hash table to store virtual to + * physical translations, these routines flush entries from the + * hash table also. + * -- paulus + * + * Derived from arch/ppc/mm/init.c: + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au) + * and Cort Dougan (PReP) (cort@cs.nmt.edu) + * Copyright (C) 1996 Paul Mackerras + * Amiga/APUS changes by Jesper Skov (jskov@cygnus.co.uk). + * + * Derived from "arch/i386/mm/init.c" + * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/init.h> +#include <linux/highmem.h> +#include <asm/tlbflush.h> +#include <asm/tlb.h> + +#include "mmu_decl.h" + +/* + * Called when unmapping pages to flush entries from the TLB/hash table. + */ +void flush_hash_entry(struct mm_struct *mm, pte_t *ptep, unsigned long addr) +{ + unsigned long ptephys; + + if (Hash != 0) { + ptephys = __pa(ptep) & PAGE_MASK; + flush_hash_pages(mm->context, addr, ptephys, 1); + } +} + +/* + * Called by ptep_set_access_flags, must flush on CPUs for which the + * DSI handler can't just "fixup" the TLB on a write fault + */ +void flush_tlb_page_nohash(struct vm_area_struct *vma, unsigned long addr) +{ + if (Hash != 0) + return; + _tlbie(addr); +} + +/* + * Called at the end of a mmu_gather operation to make sure the + * TLB flush is completely done. + */ +void tlb_flush(struct mmu_gather *tlb) +{ + if (Hash == 0) { + /* + * 603 needs to flush the whole TLB here since + * it doesn't use a hash table. + */ + _tlbia(); + } +} + +/* + * TLB flushing: + * + * - flush_tlb_mm(mm) flushes the specified mm context TLB's + * - flush_tlb_page(vma, vmaddr) flushes one page + * - flush_tlb_range(vma, start, end) flushes a range of pages + * - flush_tlb_kernel_range(start, end) flushes kernel pages + * + * since the hardware hash table functions as an extension of the + * tlb as far as the linux tables are concerned, flush it too. + * -- Cort + */ + +/* + * 750 SMP is a Bad Idea because the 750 doesn't broadcast all + * the cache operations on the bus. Hence we need to use an IPI + * to get the other CPU(s) to invalidate their TLBs. + */ +#ifdef CONFIG_SMP_750 +#define FINISH_FLUSH smp_send_tlb_invalidate(0) +#else +#define FINISH_FLUSH do { } while (0) +#endif + +static void flush_range(struct mm_struct *mm, unsigned long start, + unsigned long end) +{ + pmd_t *pmd; + unsigned long pmd_end; + int count; + unsigned int ctx = mm->context; + + if (Hash == 0) { + _tlbia(); + return; + } + start &= PAGE_MASK; + if (start >= end) + return; + end = (end - 1) | ~PAGE_MASK; + pmd = pmd_offset(pgd_offset(mm, start), start); + for (;;) { + pmd_end = ((start + PGDIR_SIZE) & PGDIR_MASK) - 1; + if (pmd_end > end) + pmd_end = end; + if (!pmd_none(*pmd)) { + count = ((pmd_end - start) >> PAGE_SHIFT) + 1; + flush_hash_pages(ctx, start, pmd_val(*pmd), count); + } + if (pmd_end == end) + break; + start = pmd_end + 1; + ++pmd; + } +} + +/* + * Flush kernel TLB entries in the given range + */ +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + flush_range(&init_mm, start, end); + FINISH_FLUSH; +} + +/* + * Flush all the (user) entries for the address space described by mm. + */ +void flush_tlb_mm(struct mm_struct *mm) +{ + struct vm_area_struct *mp; + + if (Hash == 0) { + _tlbia(); + return; + } + + for (mp = mm->mmap; mp != NULL; mp = mp->vm_next) + flush_range(mp->vm_mm, mp->vm_start, mp->vm_end); + FINISH_FLUSH; +} + +void flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) +{ + struct mm_struct *mm; + pmd_t *pmd; + + if (Hash == 0) { + _tlbie(vmaddr); + return; + } + mm = (vmaddr < TASK_SIZE)? vma->vm_mm: &init_mm; + pmd = pmd_offset(pgd_offset(mm, vmaddr), vmaddr); + if (!pmd_none(*pmd)) + flush_hash_pages(mm->context, vmaddr, pmd_val(*pmd), 1); + FINISH_FLUSH; +} + +/* + * For each address in the range, find the pte for the address + * and check _PAGE_HASHPTE bit; if it is set, find and destroy + * the corresponding HPTE. + */ +void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + flush_range(vma->vm_mm, start, end); + FINISH_FLUSH; +} diff --git a/arch/powerpc/platforms/4xx/Kconfig b/arch/powerpc/platforms/4xx/Kconfig new file mode 100644 index 00000000000..ed39d6a3d22 --- /dev/null +++ b/arch/powerpc/platforms/4xx/Kconfig @@ -0,0 +1,280 @@ +config 4xx + bool + depends on 40x || 44x + default y + +config WANT_EARLY_SERIAL + bool + select SERIAL_8250 + default n + +menu "AMCC 4xx options" + depends on 4xx + +choice + prompt "Machine Type" + depends on 40x + default WALNUT + +config BUBINGA + bool "Bubinga" + select WANT_EARLY_SERIAL + help + This option enables support for the IBM 405EP evaluation board. + +config CPCI405 + bool "CPCI405" + help + This option enables support for the CPCI405 board. + +config EP405 + bool "EP405/EP405PC" + help + This option enables support for the EP405/EP405PC boards. + +config REDWOOD_5 + bool "Redwood-5" + help + This option enables support for the IBM STB04 evaluation board. + +config REDWOOD_6 + bool "Redwood-6" + help + This option enables support for the IBM STBx25xx evaluation board. + +config SYCAMORE + bool "Sycamore" + help + This option enables support for the IBM PPC405GPr evaluation board. + +config WALNUT + bool "Walnut" + help + This option enables support for the IBM PPC405GP evaluation board. + +config XILINX_ML300 + bool "Xilinx-ML300" + help + This option enables support for the Xilinx ML300 evaluation board. + +endchoice + +choice + prompt "Machine Type" + depends on 44x + default EBONY + +config BAMBOO + bool "Bamboo" + select WANT_EARLY_SERIAL + help + This option enables support for the IBM PPC440EP evaluation board. + +config EBONY + bool "Ebony" + select WANT_EARLY_SERIAL + help + This option enables support for the IBM PPC440GP evaluation board. + +config LUAN + bool "Luan" + select WANT_EARLY_SERIAL + help + This option enables support for the IBM PPC440SP evaluation board. + +config OCOTEA + bool "Ocotea" + select WANT_EARLY_SERIAL + help + This option enables support for the IBM PPC440GX evaluation board. + +endchoice + +config EP405PC + bool "EP405PC Support" + depends on EP405 + + +# It's often necessary to know the specific 4xx processor type. +# Fortunately, it is impled (so far) from the board type, so we +# don't need to ask more redundant questions. +config NP405H + bool + depends on ASH + default y + +config 440EP + bool + depends on BAMBOO + select PPC_FPU + default y + +config 440GP + bool + depends on EBONY + default y + +config 440GX + bool + depends on OCOTEA + default y + +config 440SP + bool + depends on LUAN + default y + +config 440 + bool + depends on 440GP || 440SP || 440EP + default y + +config 440A + bool + depends on 440GX + default y + +config IBM440EP_ERR42 + bool + depends on 440EP + default y + +# All 405-based cores up until the 405GPR and 405EP have this errata. +config IBM405_ERR77 + bool + depends on 40x && !403GCX && !405GPR && !405EP + default y + +# All 40x-based cores, up until the 405GPR and 405EP have this errata. +config IBM405_ERR51 + bool + depends on 40x && !405GPR && !405EP + default y + +config BOOKE + bool + depends on 44x + default y + +config IBM_OCP + bool + depends on ASH || BAMBOO || BUBINGA || CPCI405 || EBONY || EP405 || LUAN || OCOTEA || REDWOOD_5 || REDWOOD_6 || SYCAMORE || WALNUT + default y + +config XILINX_OCP + bool + depends on XILINX_ML300 + default y + +config IBM_EMAC4 + bool + depends on 440GX || 440SP + default y + +config BIOS_FIXUP + bool + depends on BUBINGA || EP405 || SYCAMORE || WALNUT + default y + +# OAK doesn't exist but wanted to keep this around for any future 403GCX boards +config 403GCX + bool + depends OAK + default y + +config 405EP + bool + depends on BUBINGA + default y + +config 405GP + bool + depends on CPCI405 || EP405 || WALNUT + default y + +config 405GPR + bool + depends on SYCAMORE + default y + +config VIRTEX_II_PRO + bool + depends on XILINX_ML300 + default y + +config STB03xxx + bool + depends on REDWOOD_5 || REDWOOD_6 + default y + +config EMBEDDEDBOOT + bool + depends on EP405 || XILINX_ML300 + default y + +config IBM_OPENBIOS + bool + depends on ASH || BUBINGA || REDWOOD_5 || REDWOOD_6 || SYCAMORE || WALNUT + default y + +config PPC4xx_DMA + bool "PPC4xx DMA controller support" + depends on 4xx + +config PPC4xx_EDMA + bool + depends on !STB03xxx && PPC4xx_DMA + default y + +config PPC_GEN550 + bool + depends on 4xx + default y + +choice + prompt "TTYS0 device and default console" + depends on 40x + default UART0_TTYS0 + +config UART0_TTYS0 + bool "UART0" + +config UART0_TTYS1 + bool "UART1" + +endchoice + +config SERIAL_SICC + bool "SICC Serial port support" + depends on STB03xxx + +config UART1_DFLT_CONSOLE + bool + depends on SERIAL_SICC && UART0_TTYS1 + default y + +config SERIAL_SICC_CONSOLE + bool + depends on SERIAL_SICC && UART0_TTYS1 + default y +endmenu + + +menu "IBM 40x options" + depends on 40x + +config SERIAL_SICC + bool "SICC Serial port" + depends on STB03xxx + +config UART1_DFLT_CONSOLE + bool + depends on SERIAL_SICC && UART0_TTYS1 + default y + +config SERIAL_SICC_CONSOLE + bool + depends on SERIAL_SICC && UART0_TTYS1 + default y + +endmenu diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig new file mode 100644 index 00000000000..c5bc2821d99 --- /dev/null +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -0,0 +1,86 @@ +config 85xx + bool + depends on E500 + default y + +config PPC_INDIRECT_PCI_BE + bool + depends on 85xx + default y + +menu "Freescale 85xx options" + depends on E500 + +choice + prompt "Machine Type" + depends on 85xx + default MPC8540_ADS + +config MPC8540_ADS + bool "Freescale MPC8540 ADS" + help + This option enables support for the MPC 8540 ADS evaluation board. + +config MPC8548_CDS + bool "Freescale MPC8548 CDS" + help + This option enablese support for the MPC8548 CDS evaluation board. + +config MPC8555_CDS + bool "Freescale MPC8555 CDS" + help + This option enablese support for the MPC8555 CDS evaluation board. + +config MPC8560_ADS + bool "Freescale MPC8560 ADS" + help + This option enables support for the MPC 8560 ADS evaluation board. + +config SBC8560 + bool "WindRiver PowerQUICC III SBC8560" + help + This option enables support for the WindRiver PowerQUICC III + SBC8560 board. + +config STX_GP3 + bool "Silicon Turnkey Express GP3" + help + This option enables support for the Silicon Turnkey Express GP3 + board. + +endchoice + +# It's often necessary to know the specific 85xx processor type. +# Fortunately, it is implied (so far) from the board type, so we +# don't need to ask more redundant questions. +config MPC8540 + bool + depends on MPC8540_ADS + default y + +config MPC8548 + bool + depends on MPC8548_CDS + default y + +config MPC8555 + bool + depends on MPC8555_CDS + default y + +config MPC8560 + bool + depends on SBC8560 || MPC8560_ADS || STX_GP3 + default y + +config 85xx_PCI2 + bool "Supprt for 2nd PCI host controller" + depends on MPC8555_CDS + default y + +config PPC_GEN550 + bool + depends on MPC8540 || SBC8560 || MPC8555 + default y + +endmenu diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig new file mode 100644 index 00000000000..c8c0ba3cf8e --- /dev/null +++ b/arch/powerpc/platforms/8xx/Kconfig @@ -0,0 +1,352 @@ +config FADS + bool + +choice + prompt "8xx Machine Type" + depends on 8xx + default RPXLITE + +config RPXLITE + bool "RPX-Lite" + ---help--- + Single-board computers based around the PowerPC MPC8xx chips and + intended for embedded applications. The following types are + supported: + + RPX-Lite: + Embedded Planet RPX Lite. PC104 form-factor SBC based on the MPC823. + + RPX-Classic: + Embedded Planet RPX Classic Low-fat. Credit-card-size SBC based on + the MPC 860 + + BSE-IP: + Bright Star Engineering ip-Engine. + + TQM823L: + TQM850L: + TQM855L: + TQM860L: + MPC8xx based family of mini modules, half credit card size, + up to 64 MB of RAM, 8 MB Flash, (Fast) Ethernet, 2 x serial ports, + 2 x CAN bus interface, ... + Manufacturer: TQ Components, www.tq-group.de + Date of Release: October (?) 1999 + End of Life: not yet :-) + URL: + - module: <http://www.denx.de/PDF/TQM8xxLHWM201.pdf> + - starter kit: <http://www.denx.de/PDF/STK8xxLHWM201.pdf> + - images: <http://www.denx.de/embedded-ppc-en.html> + + FPS850L: + FingerPrint Sensor System (based on TQM850L) + Manufacturer: IKENDI AG, <http://www.ikendi.com/> + Date of Release: November 1999 + End of life: end 2000 ? + URL: see TQM850L + + IVMS8: + MPC860 based board used in the "Integrated Voice Mail System", + Small Version (8 voice channels) + Manufacturer: Speech Design, <http://www.speech-design.de/> + Date of Release: December 2000 (?) + End of life: - + URL: <http://www.speech-design.de/> + + IVML24: + MPC860 based board used in the "Integrated Voice Mail System", + Large Version (24 voice channels) + Manufacturer: Speech Design, <http://www.speech-design.de/> + Date of Release: March 2001 (?) + End of life: - + URL: <http://www.speech-design.de/> + + HERMES: + Hermes-Pro ISDN/LAN router with integrated 8 x hub + Manufacturer: Multidata Gesellschaft fur Datentechnik und Informatik + <http://www.multidata.de/> + Date of Release: 2000 (?) + End of life: - + URL: <http://www.multidata.de/english/products/hpro.htm> + + IP860: + VMEBus IP (Industry Pack) carrier board with MPC860 + Manufacturer: MicroSys GmbH, <http://www.microsys.de/> + Date of Release: ? + End of life: - + URL: <http://www.microsys.de/html/ip860.html> + + PCU_E: + PCU = Peripheral Controller Unit, Extended + Manufacturer: Siemens AG, ICN (Information and Communication Networks) + <http://www.siemens.de/page/1,3771,224315-1-999_2_226207-0,00.html> + Date of Release: April 2001 + End of life: August 2001 + URL: n. a. + +config RPXCLASSIC + bool "RPX-Classic" + help + The RPX-Classic is a single-board computer based on the Motorola + MPC860. It features 16MB of DRAM and a variable amount of flash, + I2C EEPROM, thermal monitoring, a PCMCIA slot, a DIP switch and two + LEDs. Variants with Ethernet ports exist. Say Y here to support it + directly. + +config BSEIP + bool "BSE-IP" + help + Say Y here to support the Bright Star Engineering ipEngine SBC. + This is a credit-card-sized device featuring a MPC823 processor, + 26MB DRAM, 4MB flash, Ethernet, a 16K-gate FPGA, USB, an LCD/video + controller, and two RS232 ports. + +config MPC8XXFADS + bool "FADS" + select FADS + +config MPC86XADS + bool "MPC86XADS" + help + MPC86x Application Development System by Freescale Semiconductor. + The MPC86xADS is meant to serve as a platform for s/w and h/w + development around the MPC86X processor families. + select FADS + +config MPC885ADS + bool "MPC885ADS" + help + Freescale Semiconductor MPC885 Application Development System (ADS). + Also known as DUET. + The MPC885ADS is meant to serve as a platform for s/w and h/w + development around the MPC885 processor family. + +config TQM823L + bool "TQM823L" + help + Say Y here to support the TQM823L, one of an MPC8xx-based family of + mini SBCs (half credit-card size) from TQ Components first released + in late 1999. Technical references are at + <http://www.denx.de/PDF/TQM8xxLHWM201.pdf>, and + <http://www.denx.de/PDF/STK8xxLHWM201.pdf>, and an image at + <http://www.denx.de/embedded-ppc-en.html>. + +config TQM850L + bool "TQM850L" + help + Say Y here to support the TQM850L, one of an MPC8xx-based family of + mini SBCs (half credit-card size) from TQ Components first released + in late 1999. Technical references are at + <http://www.denx.de/PDF/TQM8xxLHWM201.pdf>, and + <http://www.denx.de/PDF/STK8xxLHWM201.pdf>, and an image at + <http://www.denx.de/embedded-ppc-en.html>. + +config TQM855L + bool "TQM855L" + help + Say Y here to support the TQM855L, one of an MPC8xx-based family of + mini SBCs (half credit-card size) from TQ Components first released + in late 1999. Technical references are at + <http://www.denx.de/PDF/TQM8xxLHWM201.pdf>, and + <http://www.denx.de/PDF/STK8xxLHWM201.pdf>, and an image at + <http://www.denx.de/embedded-ppc-en.html>. + +config TQM860L + bool "TQM860L" + help + Say Y here to support the TQM860L, one of an MPC8xx-based family of + mini SBCs (half credit-card size) from TQ Components first released + in late 1999. Technical references are at + <http://www.denx.de/PDF/TQM8xxLHWM201.pdf>, and + <http://www.denx.de/PDF/STK8xxLHWM201.pdf>, and an image at + <http://www.denx.de/embedded-ppc-en.html>. + +config FPS850L + bool "FPS850L" + +config IVMS8 + bool "IVMS8" + help + Say Y here to support the Integrated Voice-Mail Small 8-channel SBC + from Speech Design, released March 2001. The manufacturer's website + is at <http://www.speech-design.de/>. + +config IVML24 + bool "IVML24" + help + Say Y here to support the Integrated Voice-Mail Large 24-channel SBC + from Speech Design, released March 2001. The manufacturer's website + is at <http://www.speech-design.de/>. + +config HERMES_PRO + bool "HERMES" + +config IP860 + bool "IP860" + +config LWMON + bool "LWMON" + +config PCU_E + bool "PCU_E" + +config CCM + bool "CCM" + +config LANTEC + bool "LANTEC" + +config MBX + bool "MBX" + help + MBX is a line of Motorola single-board computer based around the + MPC821 and MPC860 processors, and intended for embedded-controller + applications. Say Y here to support these boards directly. + +config WINCEPT + bool "WinCept" + help + The Wincept 100/110 is a Motorola single-board computer based on the + MPC821 PowerPC, introduced in 1998 and designed to be used in + thin-client machines. Say Y to support it directly. + +endchoice + +# +# MPC8xx Communication options +# + +menu "MPC8xx CPM Options" + depends on 8xx + +config SCC_ENET + bool "CPM SCC Ethernet" + depends on NET_ETHERNET + help + Enable Ethernet support via the Motorola MPC8xx serial + communications controller. + +choice + prompt "SCC used for Ethernet" + depends on SCC_ENET + default SCC1_ENET + +config SCC1_ENET + bool "SCC1" + help + Use MPC8xx serial communications controller 1 to drive Ethernet + (default). + +config SCC2_ENET + bool "SCC2" + help + Use MPC8xx serial communications controller 2 to drive Ethernet. + +config SCC3_ENET + bool "SCC3" + help + Use MPC8xx serial communications controller 3 to drive Ethernet. + +endchoice + +config FEC_ENET + bool "860T FEC Ethernet" + depends on NET_ETHERNET + help + Enable Ethernet support via the Fast Ethernet Controller (FCC) on + the Motorola MPC8260. + +config USE_MDIO + bool "Use MDIO for PHY configuration" + depends on FEC_ENET + help + On some boards the hardware configuration of the ethernet PHY can be + used without any software interaction over the MDIO interface, so + all MII code can be omitted. Say N here if unsure or if you don't + need link status reports. + +config FEC_AM79C874 + bool "Support AMD79C874 PHY" + depends on USE_MDIO + +config FEC_LXT970 + bool "Support LXT970 PHY" + depends on USE_MDIO + +config FEC_LXT971 + bool "Support LXT971 PHY" + depends on USE_MDIO + +config FEC_QS6612 + bool "Support QS6612 PHY" + depends on USE_MDIO + +config ENET_BIG_BUFFERS + bool "Use Big CPM Ethernet Buffers" + depends on SCC_ENET || FEC_ENET + help + Allocate large buffers for MPC8xx Ethernet. Increases throughput + and decreases the likelihood of dropped packets, but costs memory. + +config HTDMSOUND + bool "Embedded Planet HIOX Audio" + depends on SOUND=y + +# This doesn't really belong here, but it is convenient to ask +# 8xx specific questions. +comment "Generic MPC8xx Options" + +config 8xx_COPYBACK + bool "Copy-Back Data Cache (else Writethrough)" + help + Saying Y here will cause the cache on an MPC8xx processor to be used + in Copy-Back mode. If you say N here, it is used in Writethrough + mode. + + If in doubt, say Y here. + +config 8xx_CPU6 + bool "CPU6 Silicon Errata (860 Pre Rev. C)" + help + MPC860 CPUs, prior to Rev C have some bugs in the silicon, which + require workarounds for Linux (and most other OSes to work). If you + get a BUG() very early in boot, this might fix the problem. For + more details read the document entitled "MPC860 Family Device Errata + Reference" on Motorola's website. This option also incurs a + performance hit. + + If in doubt, say N here. + +choice + prompt "Microcode patch selection" + default NO_UCODE_PATCH + help + Help not implemented yet, coming soon. + +config NO_UCODE_PATCH + bool "None" + +config USB_SOF_UCODE_PATCH + bool "USB SOF patch" + help + Help not implemented yet, coming soon. + +config I2C_SPI_UCODE_PATCH + bool "I2C/SPI relocation patch" + help + Help not implemented yet, coming soon. + +config I2C_SPI_SMC1_UCODE_PATCH + bool "I2C/SPI/SMC1 relocation patch" + help + Help not implemented yet, coming soon. + +endchoice + +config UCODE_PATCH + bool + default y + depends on !NO_UCODE_PATCH + +endmenu + diff --git a/arch/powerpc/platforms/apus/Kconfig b/arch/powerpc/platforms/apus/Kconfig new file mode 100644 index 00000000000..6bde3bffed8 --- /dev/null +++ b/arch/powerpc/platforms/apus/Kconfig @@ -0,0 +1,130 @@ + +config AMIGA + bool + depends on APUS + default y + help + This option enables support for the Amiga series of computers. + +config ZORRO + bool + depends on APUS + default y + help + This enables support for the Zorro bus in the Amiga. If you have + expansion cards in your Amiga that conform to the Amiga + AutoConfig(tm) specification, say Y, otherwise N. Note that even + expansion cards that do not fit in the Zorro slots but fit in e.g. + the CPU slot may fall in this category, so you have to say Y to let + Linux use these. + +config ABSTRACT_CONSOLE + bool + depends on APUS + default y + +config APUS_FAST_EXCEPT + bool + depends on APUS + default y + +config AMIGA_PCMCIA + bool "Amiga 1200/600 PCMCIA support" + depends on APUS && EXPERIMENTAL + help + Include support in the kernel for pcmcia on Amiga 1200 and Amiga + 600. If you intend to use pcmcia cards say Y; otherwise say N. + +config AMIGA_BUILTIN_SERIAL + tristate "Amiga builtin serial support" + depends on APUS + help + If you want to use your Amiga's built-in serial port in Linux, + answer Y. + + To compile this driver as a module, choose M here. + +config GVPIOEXT + tristate "GVP IO-Extender support" + depends on APUS + help + If you want to use a GVP IO-Extender serial card in Linux, say Y. + Otherwise, say N. + +config GVPIOEXT_LP + tristate "GVP IO-Extender parallel printer support" + depends on GVPIOEXT + help + Say Y to enable driving a printer from the parallel port on your + GVP IO-Extender card, N otherwise. + +config GVPIOEXT_PLIP + tristate "GVP IO-Extender PLIP support" + depends on GVPIOEXT + help + Say Y to enable doing IP over the parallel port on your GVP + IO-Extender card, N otherwise. + +config MULTIFACE_III_TTY + tristate "Multiface Card III serial support" + depends on APUS + help + If you want to use a Multiface III card's serial port in Linux, + answer Y. + + To compile this driver as a module, choose M here. + +config A2232 + tristate "Commodore A2232 serial support (EXPERIMENTAL)" + depends on EXPERIMENTAL && APUS + ---help--- + This option supports the 2232 7-port serial card shipped with the + Amiga 2000 and other Zorro-bus machines, dating from 1989. At + a max of 19,200 bps, the ports are served by a 6551 ACIA UART chip + each, plus a 8520 CIA, and a master 6502 CPU and buffer as well. The + ports were connected with 8 pin DIN connectors on the card bracket, + for which 8 pin to DB25 adapters were supplied. The card also had + jumpers internally to toggle various pinning configurations. + + This driver can be built as a module; but then "generic_serial" + will also be built as a module. This has to be loaded before + "ser_a2232". If you want to do this, answer M here. + +config WHIPPET_SERIAL + tristate "Hisoft Whippet PCMCIA serial support" + depends on AMIGA_PCMCIA + help + HiSoft has a web page at <http://www.hisoft.co.uk/>, but there + is no listing for the Whippet in their Amiga section. + +config APNE + tristate "PCMCIA NE2000 support" + depends on AMIGA_PCMCIA + help + If you have a PCMCIA NE2000 compatible adapter, say Y. Otherwise, + say N. + + To compile this driver as a module, choose M here: the + module will be called apne. + +config SERIAL_CONSOLE + bool "Support for serial port console" + depends on APUS && (AMIGA_BUILTIN_SERIAL=y || GVPIOEXT=y || MULTIFACE_III_TTY=y) + +config HEARTBEAT + bool "Use power LED as a heartbeat" + depends on APUS + help + Use the power-on LED on your machine as a load meter. The exact + behavior is platform-dependent, but normally the flash frequency is + a hyperbolic function of the 5-minute load average. + +config PROC_HARDWARE + bool "/proc/hardware support" + depends on APUS + +source "drivers/zorro/Kconfig" + +config PCI_PERMEDIA + bool "PCI for Permedia2" + depends on !4xx && !8xx && APUS diff --git a/arch/powerpc/platforms/embedded6xx/Kconfig b/arch/powerpc/platforms/embedded6xx/Kconfig new file mode 100644 index 00000000000..4f355143059 --- /dev/null +++ b/arch/powerpc/platforms/embedded6xx/Kconfig @@ -0,0 +1,313 @@ +choice + prompt "Machine Type" + depends on EMBEDDED6xx + +config APUS + bool "Amiga-APUS" + depends on BROKEN + help + Select APUS if configuring for a PowerUP Amiga. + More information is available at: + <http://linux-apus.sourceforge.net/>. + +config KATANA + bool "Artesyn-Katana" + help + Select KATANA if configuring an Artesyn KATANA 750i or 3750 + cPCI board. + +config WILLOW + bool "Cogent-Willow" + +config CPCI690 + bool "Force-CPCI690" + help + Select CPCI690 if configuring a Force CPCI690 cPCI board. + +config POWERPMC250 + bool "Force-PowerPMC250" + +config CHESTNUT + bool "IBM 750FX Eval board or 750GX Eval board" + help + Select CHESTNUT if configuring an IBM 750FX Eval Board or a + IBM 750GX Eval board. + +config SPRUCE + bool "IBM-Spruce" + +config HDPU + bool "Sky-HDPU" + help + Select HDPU if configuring a Sky Computers Compute Blade. + +config HDPU_FEATURES + depends HDPU + tristate "HDPU-Features" + help + Select to enable HDPU enhanced features. + +config EV64260 + bool "Marvell-EV64260BP" + help + Select EV64260 if configuring a Marvell (formerly Galileo) + EV64260BP Evaluation platform. + +config LOPEC + bool "Motorola-LoPEC" + +config MVME5100 + bool "Motorola-MVME5100" + +config PPLUS + bool "Motorola-PowerPlus" + +config PRPMC750 + bool "Motorola-PrPMC750" + +config PRPMC800 + bool "Motorola-PrPMC800" + +config SANDPOINT + bool "Motorola-Sandpoint" + help + Select SANDPOINT if configuring for a Motorola Sandpoint X3 + (any flavor). + +config RADSTONE_PPC7D + bool "Radstone Technology PPC7D board" + +config PAL4 + bool "SBS-Palomar4" + +config GEMINI + bool "Synergy-Gemini" + depends on BROKEN + help + Select Gemini if configuring for a Synergy Microsystems' Gemini + series Single Board Computer. More information is available at: + <http://www.synergymicro.com/PressRel/97_10_15.html>. + +config EST8260 + bool "EST8260" + ---help--- + The EST8260 is a single-board computer manufactured by Wind River + Systems, Inc. (formerly Embedded Support Tools Corp.) and based on + the MPC8260. Wind River Systems has a website at + <http://www.windriver.com/>, but the EST8260 cannot be found on it + and has probably been discontinued or rebadged. + +config SBC82xx + bool "SBC82xx" + ---help--- + SBC PowerQUICC II, single-board computer with MPC82xx CPU + Manufacturer: Wind River Systems, Inc. + Date of Release: May 2003 + End of Life: - + URL: <http://www.windriver.com/> + +config SBS8260 + bool "SBS8260" + +config RPX8260 + bool "RPXSUPER" + +config TQM8260 + bool "TQM8260" + ---help--- + MPC8260 based module, little larger than credit card, + up to 128 MB global + 64 MB local RAM, 32 MB Flash, + 32 kB EEPROM, 256 kB L@ Cache, 10baseT + 100baseT Ethernet, + 2 x serial ports, ... + Manufacturer: TQ Components, www.tq-group.de + Date of Release: June 2001 + End of Life: not yet :-) + URL: <http://www.denx.de/PDF/TQM82xx_SPEC_Rev005.pdf> + +config ADS8272 + bool "ADS8272" + +config PQ2FADS + bool "Freescale-PQ2FADS" + help + Select PQ2FADS if you wish to configure for a Freescale + PQ2FADS board (-VR or -ZU). + +config LITE5200 + bool "Freescale LITE5200 / (IceCube)" + select PPC_MPC52xx + help + Support for the LITE5200 dev board for the MPC5200 from Freescale. + This is for the LITE5200 version 2.0 board. Don't know if it changes + much but it's only been tested on this board version. I think this + board is also known as IceCube. + +config MPC834x_SYS + bool "Freescale MPC834x SYS" + help + This option enables support for the MPC 834x SYS evaluation board. + + Be aware that PCI buses can only function when SYS board is plugged + into the PIB (Platform IO Board) board from Freescale which provide + 3 PCI slots. The PIBs PCI initialization is the bootloader's + responsiblilty. + +config EV64360 + bool "Marvell-EV64360BP" + help + Select EV64360 if configuring a Marvell EV64360BP Evaluation + platform. +endchoice + +config PQ2ADS + bool + depends on ADS8272 + default y + +config TQM8xxL + bool + depends on 8xx && (TQM823L || TQM850L || FPS850L || TQM855L || TQM860L) + default y + +config PPC_MPC52xx + bool + +config 8260 + bool "CPM2 Support" if WILLOW + depends on 6xx + default y if TQM8260 || RPX8260 || EST8260 || SBS8260 || SBC82xx || PQ2FADS + help + The MPC8260 is a typical embedded CPU made by Motorola. Selecting + this option means that you wish to build a kernel for a machine with + an 8260 class CPU. + +config 8272 + bool + depends on 6xx + default y if ADS8272 + select 8260 + help + The MPC8272 CPM has a different internal dpram setup than other CPM2 + devices + +config 83xx + bool + default y if MPC834x_SYS + +config MPC834x + bool + default y if MPC834x_SYS + +config CPM2 + bool + depends on 8260 || MPC8560 || MPC8555 + default y + help + The CPM2 (Communications Processor Module) is a coprocessor on + embedded CPUs made by Motorola. Selecting this option means that + you wish to build a kernel for a machine with a CPM2 coprocessor + on it (826x, 827x, 8560). + +config PPC_GEN550 + bool + depends on SANDPOINT || SPRUCE || PPLUS || \ + PRPMC750 || PRPMC800 || LOPEC || \ + (EV64260 && !SERIAL_MPSC) || CHESTNUT || RADSTONE_PPC7D || \ + 83xx + default y + +config FORCE + bool + depends on 6xx && POWERPMC250 + default y + +config GT64260 + bool + depends on EV64260 || CPCI690 + default y + +config MV64360 # Really MV64360 & MV64460 + bool + depends on CHESTNUT || KATANA || RADSTONE_PPC7D || HDPU || EV64360 + default y + +config MV64X60 + bool + depends on (GT64260 || MV64360) + default y + +menu "Set bridge options" + depends on MV64X60 + +config NOT_COHERENT_CACHE + bool "Turn off Cache Coherency" + default n + help + Some 64x60 bridges lock up when trying to enforce cache coherency. + When this option is selected, cache coherency will be turned off. + Note that this can cause other problems (e.g., stale data being + speculatively loaded via a cached mapping). Use at your own risk. + +config MV64X60_BASE + hex "Set bridge base used by firmware" + default "0xf1000000" + help + A firmware can leave the base address of the bridge's registers at + a non-standard location. If so, set this value to reflect the + address of that non-standard location. + +config MV64X60_NEW_BASE + hex "Set bridge base used by kernel" + default "0xf1000000" + help + If the current base address of the bridge's registers is not where + you want it, set this value to the address that you want it moved to. + +endmenu + +config NONMONARCH_SUPPORT + bool "Enable Non-Monarch Support" + depends on PRPMC800 + +config HARRIER + bool + depends on PRPMC800 + default y + +config EPIC_SERIAL_MODE + bool + depends on 6xx && (LOPEC || SANDPOINT) + default y + +config MPC10X_BRIDGE + bool + depends on POWERPMC250 || LOPEC || SANDPOINT + default y + +config MPC10X_OPENPIC + bool + depends on POWERPMC250 || LOPEC || SANDPOINT + default y + +config MPC10X_STORE_GATHERING + bool "Enable MPC10x store gathering" + depends on MPC10X_BRIDGE + +config SANDPOINT_ENABLE_UART1 + bool "Enable DUART mode on Sandpoint" + depends on SANDPOINT + help + If this option is enabled then the MPC824x processor will run + in DUART mode instead of UART mode. + +config HARRIER_STORE_GATHERING + bool "Enable Harrier store gathering" + depends on HARRIER + +config MVME5100_IPMC761_PRESENT + bool "MVME5100 configured with an IPMC761" + depends on MVME5100 + +config SPRUCE_BAUD_33M + bool "Spruce baud clock support" + depends on SPRUCE diff --git a/arch/powerpc/platforms/iseries/Kconfig b/arch/powerpc/platforms/iseries/Kconfig new file mode 100644 index 00000000000..3d957a30c8c --- /dev/null +++ b/arch/powerpc/platforms/iseries/Kconfig @@ -0,0 +1,31 @@ + +menu "iSeries device drivers" + depends on PPC_ISERIES + +config VIOCONS + tristate "iSeries Virtual Console Support" + +config VIODASD + tristate "iSeries Virtual I/O disk support" + help + If you are running on an iSeries system and you want to use + virtual disks created and managed by OS/400, say Y. + +config VIOCD + tristate "iSeries Virtual I/O CD support" + help + If you are running Linux on an IBM iSeries system and you want to + read a CD drive owned by OS/400, say Y here. + +config VIOTAPE + tristate "iSeries Virtual Tape Support" + help + If you are running Linux on an iSeries system and you want Linux + to read and/or write a tape drive owned by OS/400, say Y here. + +endmenu + +config VIOPATH + bool + depends on VIOCONS || VIODASD || VIOCD || VIOTAPE || VETH + default y diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile new file mode 100644 index 00000000000..37b7341396e --- /dev/null +++ b/arch/powerpc/platforms/powermac/Makefile @@ -0,0 +1,9 @@ +obj-$(CONFIG_PPC_PMAC) += pmac_pic.o pmac_setup.o pmac_time.o \ + pmac_feature.o pmac_pci.o pmac_sleep.o \ + pmac_low_i2c.o pmac_cache.o +obj-$(CONFIG_PMAC_BACKLIGHT) += pmac_backlight.o +obj-$(CONFIG_CPU_FREQ_PMAC) += pmac_cpufreq.o +ifeq ($(CONFIG_PPC_PMAC),y) +obj-$(CONFIG_NVRAM) += pmac_nvram.o +obj-$(CONFIG_SMP) += pmac_smp.o +endif diff --git a/arch/powerpc/platforms/powermac/pmac.h b/arch/powerpc/platforms/powermac/pmac.h new file mode 100644 index 00000000000..40e1c5030f7 --- /dev/null +++ b/arch/powerpc/platforms/powermac/pmac.h @@ -0,0 +1,31 @@ +#ifndef __PMAC_H__ +#define __PMAC_H__ + +#include <linux/pci.h> +#include <linux/ide.h> + +/* + * Declaration for the various functions exported by the + * pmac_* files. Mostly for use by pmac_setup + */ + +extern void pmac_get_boot_time(struct rtc_time *tm); +extern void pmac_get_rtc_time(struct rtc_time *tm); +extern int pmac_set_rtc_time(struct rtc_time *tm); +extern void pmac_read_rtc_time(void); +extern void pmac_calibrate_decr(void); + +extern void pmac_pcibios_fixup(void); +extern void pmac_pci_init(void); +extern void pmac_setup_pci_dma(void); +extern void pmac_check_ht_link(void); + +extern void pmac_setup_smp(void); + +extern unsigned long pmac_ide_get_base(int index); +extern void pmac_ide_init_hwif_ports(hw_regs_t *hw, + unsigned long data_port, unsigned long ctrl_port, int *irq); + +extern void pmac_nvram_init(void); + +#endif /* __PMAC_H__ */ diff --git a/arch/powerpc/platforms/powermac/pmac_backlight.c b/arch/powerpc/platforms/powermac/pmac_backlight.c new file mode 100644 index 00000000000..8be2f7d071f --- /dev/null +++ b/arch/powerpc/platforms/powermac/pmac_backlight.c @@ -0,0 +1,202 @@ +/* + * Miscellaneous procedures for dealing with the PowerMac hardware. + * Contains support for the backlight. + * + * Copyright (C) 2000 Benjamin Herrenschmidt + * + */ + +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/stddef.h> +#include <linux/reboot.h> +#include <linux/nvram.h> +#include <linux/console.h> +#include <asm/sections.h> +#include <asm/ptrace.h> +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/system.h> +#include <asm/prom.h> +#include <asm/machdep.h> +#include <asm/nvram.h> +#include <asm/backlight.h> + +#include <linux/adb.h> +#include <linux/pmu.h> + +static struct backlight_controller *backlighter; +static void* backlighter_data; +static int backlight_autosave; +static int backlight_level = BACKLIGHT_MAX; +static int backlight_enabled = 1; +static int backlight_req_level = -1; +static int backlight_req_enable = -1; + +static void backlight_callback(void *); +static DECLARE_WORK(backlight_work, backlight_callback, NULL); + +void register_backlight_controller(struct backlight_controller *ctrler, + void *data, char *type) +{ + struct device_node* bk_node; + char *prop; + int valid = 0; + + /* There's already a matching controller, bail out */ + if (backlighter != NULL) + return; + + bk_node = find_devices("backlight"); + +#ifdef CONFIG_ADB_PMU + /* Special case for the old PowerBook since I can't test on it */ + backlight_autosave = machine_is_compatible("AAPL,3400/2400") + || machine_is_compatible("AAPL,3500"); + if ((backlight_autosave + || machine_is_compatible("AAPL,PowerBook1998") + || machine_is_compatible("PowerBook1,1")) + && !strcmp(type, "pmu")) + valid = 1; +#endif + if (bk_node) { + prop = get_property(bk_node, "backlight-control", NULL); + if (prop && !strncmp(prop, type, strlen(type))) + valid = 1; + } + if (!valid) + return; + backlighter = ctrler; + backlighter_data = data; + + if (bk_node && !backlight_autosave) + prop = get_property(bk_node, "bklt", NULL); + else + prop = NULL; + if (prop) { + backlight_level = ((*prop)+1) >> 1; + if (backlight_level > BACKLIGHT_MAX) + backlight_level = BACKLIGHT_MAX; + } + +#ifdef CONFIG_ADB_PMU + if (backlight_autosave) { + struct adb_request req; + pmu_request(&req, NULL, 2, 0xd9, 0); + while (!req.complete) + pmu_poll(); + backlight_level = req.reply[0] >> 4; + } +#endif + acquire_console_sem(); + if (!backlighter->set_enable(1, backlight_level, data)) + backlight_enabled = 1; + release_console_sem(); + + printk(KERN_INFO "Registered \"%s\" backlight controller," + "level: %d/15\n", type, backlight_level); +} +EXPORT_SYMBOL(register_backlight_controller); + +void unregister_backlight_controller(struct backlight_controller + *ctrler, void *data) +{ + /* We keep the current backlight level (for now) */ + if (ctrler == backlighter && data == backlighter_data) + backlighter = NULL; +} +EXPORT_SYMBOL(unregister_backlight_controller); + +static int __set_backlight_enable(int enable) +{ + int rc; + + if (!backlighter) + return -ENODEV; + acquire_console_sem(); + rc = backlighter->set_enable(enable, backlight_level, + backlighter_data); + if (!rc) + backlight_enabled = enable; + release_console_sem(); + return rc; +} +int set_backlight_enable(int enable) +{ + if (!backlighter) + return -ENODEV; + backlight_req_enable = enable; + schedule_work(&backlight_work); + return 0; +} + +EXPORT_SYMBOL(set_backlight_enable); + +int get_backlight_enable(void) +{ + if (!backlighter) + return -ENODEV; + return backlight_enabled; +} +EXPORT_SYMBOL(get_backlight_enable); + +static int __set_backlight_level(int level) +{ + int rc = 0; + + if (!backlighter) + return -ENODEV; + if (level < BACKLIGHT_MIN) + level = BACKLIGHT_OFF; + if (level > BACKLIGHT_MAX) + level = BACKLIGHT_MAX; + acquire_console_sem(); + if (backlight_enabled) + rc = backlighter->set_level(level, backlighter_data); + if (!rc) + backlight_level = level; + release_console_sem(); + if (!rc && !backlight_autosave) { + level <<=1; + if (level & 0x10) + level |= 0x01; + // -- todo: save to property "bklt" + } + return rc; +} +int set_backlight_level(int level) +{ + if (!backlighter) + return -ENODEV; + backlight_req_level = level; + schedule_work(&backlight_work); + return 0; +} + +EXPORT_SYMBOL(set_backlight_level); + +int get_backlight_level(void) +{ + if (!backlighter) + return -ENODEV; + return backlight_level; +} +EXPORT_SYMBOL(get_backlight_level); + +static void backlight_callback(void *dummy) +{ + int level, enable; + + do { + level = backlight_req_level; + enable = backlight_req_enable; + mb(); + + if (level >= 0) + __set_backlight_level(level); + if (enable >= 0) + __set_backlight_enable(enable); + } while(cmpxchg(&backlight_req_level, level, -1) != level || + cmpxchg(&backlight_req_enable, enable, -1) != enable); +} diff --git a/arch/powerpc/platforms/powermac/pmac_cache.S b/arch/powerpc/platforms/powermac/pmac_cache.S new file mode 100644 index 00000000000..fb977de6b70 --- /dev/null +++ b/arch/powerpc/platforms/powermac/pmac_cache.S @@ -0,0 +1,359 @@ +/* + * This file contains low-level cache management functions + * used for sleep and CPU speed changes on Apple machines. + * (In fact the only thing that is Apple-specific is that we assume + * that we can read from ROM at physical address 0xfff00000.) + * + * Copyright (C) 2004 Paul Mackerras (paulus@samba.org) and + * Benjamin Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/ppc_asm.h> +#include <asm/cputable.h> + +/* + * Flush and disable all data caches (dL1, L2, L3). This is used + * when going to sleep, when doing a PMU based cpufreq transition, + * or when "offlining" a CPU on SMP machines. This code is over + * paranoid, but I've had enough issues with various CPU revs and + * bugs that I decided it was worth beeing over cautious + */ + +_GLOBAL(flush_disable_caches) +#ifndef CONFIG_6xx + blr +#else +BEGIN_FTR_SECTION + b flush_disable_745x +END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450) +BEGIN_FTR_SECTION + b flush_disable_75x +END_FTR_SECTION_IFSET(CPU_FTR_L2CR) + b __flush_disable_L1 + +/* This is the code for G3 and 74[01]0 */ +flush_disable_75x: + mflr r10 + + /* Turn off EE and DR in MSR */ + mfmsr r11 + rlwinm r0,r11,0,~MSR_EE + rlwinm r0,r0,0,~MSR_DR + sync + mtmsr r0 + isync + + /* Stop DST streams */ +BEGIN_FTR_SECTION + DSSALL + sync +END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) + + /* Stop DPM */ + mfspr r8,SPRN_HID0 /* Save SPRN_HID0 in r8 */ + rlwinm r4,r8,0,12,10 /* Turn off HID0[DPM] */ + sync + mtspr SPRN_HID0,r4 /* Disable DPM */ + sync + + /* Disp-flush L1. We have a weird problem here that I never + * totally figured out. On 750FX, using the ROM for the flush + * results in a non-working flush. We use that workaround for + * now until I finally understand what's going on. --BenH + */ + + /* ROM base by default */ + lis r4,0xfff0 + mfpvr r3 + srwi r3,r3,16 + cmplwi cr0,r3,0x7000 + bne+ 1f + /* RAM base on 750FX */ + li r4,0 +1: li r4,0x4000 + mtctr r4 +1: lwz r0,0(r4) + addi r4,r4,32 + bdnz 1b + sync + isync + + /* Disable / invalidate / enable L1 data */ + mfspr r3,SPRN_HID0 + rlwinm r3,r3,0,~(HID0_DCE | HID0_ICE) + mtspr SPRN_HID0,r3 + sync + isync + ori r3,r3,(HID0_DCE|HID0_DCI|HID0_ICE|HID0_ICFI) + sync + isync + mtspr SPRN_HID0,r3 + xori r3,r3,(HID0_DCI|HID0_ICFI) + mtspr SPRN_HID0,r3 + sync + + /* Get the current enable bit of the L2CR into r4 */ + mfspr r5,SPRN_L2CR + /* Set to data-only (pre-745x bit) */ + oris r3,r5,L2CR_L2DO@h + b 2f + /* When disabling L2, code must be in L1 */ + .balign 32 +1: mtspr SPRN_L2CR,r3 +3: sync + isync + b 1f +2: b 3f +3: sync + isync + b 1b +1: /* disp-flush L2. The interesting thing here is that the L2 can be + * up to 2Mb ... so using the ROM, we'll end up wrapping back to memory + * but that is probbaly fine. We disp-flush over 4Mb to be safe + */ + lis r4,2 + mtctr r4 + lis r4,0xfff0 +1: lwz r0,0(r4) + addi r4,r4,32 + bdnz 1b + sync + isync + lis r4,2 + mtctr r4 + lis r4,0xfff0 +1: dcbf 0,r4 + addi r4,r4,32 + bdnz 1b + sync + isync + + /* now disable L2 */ + rlwinm r5,r5,0,~L2CR_L2E + b 2f + /* When disabling L2, code must be in L1 */ + .balign 32 +1: mtspr SPRN_L2CR,r5 +3: sync + isync + b 1f +2: b 3f +3: sync + isync + b 1b +1: sync + isync + /* Invalidate L2. This is pre-745x, we clear the L2I bit ourselves */ + oris r4,r5,L2CR_L2I@h + mtspr SPRN_L2CR,r4 + sync + isync + + /* Wait for the invalidation to complete */ +1: mfspr r3,SPRN_L2CR + rlwinm. r0,r3,0,31,31 + bne 1b + + /* Clear L2I */ + xoris r4,r4,L2CR_L2I@h + sync + mtspr SPRN_L2CR,r4 + sync + + /* now disable the L1 data cache */ + mfspr r0,SPRN_HID0 + rlwinm r0,r0,0,~(HID0_DCE|HID0_ICE) + mtspr SPRN_HID0,r0 + sync + isync + + /* Restore HID0[DPM] to whatever it was before */ + sync + mfspr r0,SPRN_HID0 + rlwimi r0,r8,0,11,11 /* Turn back HID0[DPM] */ + mtspr SPRN_HID0,r0 + sync + + /* restore DR and EE */ + sync + mtmsr r11 + isync + + mtlr r10 + blr + +/* This code is for 745x processors */ +flush_disable_745x: + /* Turn off EE and DR in MSR */ + mfmsr r11 + rlwinm r0,r11,0,~MSR_EE + rlwinm r0,r0,0,~MSR_DR + sync + mtmsr r0 + isync + + /* Stop prefetch streams */ + DSSALL + sync + + /* Disable L2 prefetching */ + mfspr r0,SPRN_MSSCR0 + rlwinm r0,r0,0,0,29 + mtspr SPRN_MSSCR0,r0 + sync + isync + lis r4,0 + dcbf 0,r4 + dcbf 0,r4 + dcbf 0,r4 + dcbf 0,r4 + dcbf 0,r4 + dcbf 0,r4 + dcbf 0,r4 + dcbf 0,r4 + + /* Due to a bug with the HW flush on some CPU revs, we occasionally + * experience data corruption. I'm adding a displacement flush along + * with a dcbf loop over a few Mb to "help". The problem isn't totally + * fixed by this in theory, but at least, in practice, I couldn't reproduce + * it even with a big hammer... + */ + + lis r4,0x0002 + mtctr r4 + li r4,0 +1: + lwz r0,0(r4) + addi r4,r4,32 /* Go to start of next cache line */ + bdnz 1b + isync + + /* Now, flush the first 4MB of memory */ + lis r4,0x0002 + mtctr r4 + li r4,0 + sync +1: + dcbf 0,r4 + addi r4,r4,32 /* Go to start of next cache line */ + bdnz 1b + + /* Flush and disable the L1 data cache */ + mfspr r6,SPRN_LDSTCR + lis r3,0xfff0 /* read from ROM for displacement flush */ + li r4,0xfe /* start with only way 0 unlocked */ + li r5,128 /* 128 lines in each way */ +1: mtctr r5 + rlwimi r6,r4,0,24,31 + mtspr SPRN_LDSTCR,r6 + sync + isync +2: lwz r0,0(r3) /* touch each cache line */ + addi r3,r3,32 + bdnz 2b + rlwinm r4,r4,1,24,30 /* move on to the next way */ + ori r4,r4,1 + cmpwi r4,0xff /* all done? */ + bne 1b + /* now unlock the L1 data cache */ + li r4,0 + rlwimi r6,r4,0,24,31 + sync + mtspr SPRN_LDSTCR,r6 + sync + isync + + /* Flush the L2 cache using the hardware assist */ + mfspr r3,SPRN_L2CR + cmpwi r3,0 /* check if it is enabled first */ + bge 4f + oris r0,r3,(L2CR_L2IO_745x|L2CR_L2DO_745x)@h + b 2f + /* When disabling/locking L2, code must be in L1 */ + .balign 32 +1: mtspr SPRN_L2CR,r0 /* lock the L2 cache */ +3: sync + isync + b 1f +2: b 3f +3: sync + isync + b 1b +1: sync + isync + ori r0,r3,L2CR_L2HWF_745x + sync + mtspr SPRN_L2CR,r0 /* set the hardware flush bit */ +3: mfspr r0,SPRN_L2CR /* wait for it to go to 0 */ + andi. r0,r0,L2CR_L2HWF_745x + bne 3b + sync + rlwinm r3,r3,0,~L2CR_L2E + b 2f + /* When disabling L2, code must be in L1 */ + .balign 32 +1: mtspr SPRN_L2CR,r3 /* disable the L2 cache */ +3: sync + isync + b 1f +2: b 3f +3: sync + isync + b 1b +1: sync + isync + oris r4,r3,L2CR_L2I@h + mtspr SPRN_L2CR,r4 + sync + isync +1: mfspr r4,SPRN_L2CR + andis. r0,r4,L2CR_L2I@h + bne 1b + sync + +BEGIN_FTR_SECTION + /* Flush the L3 cache using the hardware assist */ +4: mfspr r3,SPRN_L3CR + cmpwi r3,0 /* check if it is enabled */ + bge 6f + oris r0,r3,L3CR_L3IO@h + ori r0,r0,L3CR_L3DO + sync + mtspr SPRN_L3CR,r0 /* lock the L3 cache */ + sync + isync + ori r0,r0,L3CR_L3HWF + sync + mtspr SPRN_L3CR,r0 /* set the hardware flush bit */ +5: mfspr r0,SPRN_L3CR /* wait for it to go to zero */ + andi. r0,r0,L3CR_L3HWF + bne 5b + rlwinm r3,r3,0,~L3CR_L3E + sync + mtspr SPRN_L3CR,r3 /* disable the L3 cache */ + sync + ori r4,r3,L3CR_L3I + mtspr SPRN_L3CR,r4 +1: mfspr r4,SPRN_L3CR + andi. r0,r4,L3CR_L3I + bne 1b + sync +END_FTR_SECTION_IFSET(CPU_FTR_L3CR) + +6: mfspr r0,SPRN_HID0 /* now disable the L1 data cache */ + rlwinm r0,r0,0,~HID0_DCE + mtspr SPRN_HID0,r0 + sync + isync + mtmsr r11 /* restore DR and EE */ + isync + blr +#endif /* CONFIG_6xx */ diff --git a/arch/powerpc/platforms/powermac/pmac_cpufreq.c b/arch/powerpc/platforms/powermac/pmac_cpufreq.c new file mode 100644 index 00000000000..6d32d99402b --- /dev/null +++ b/arch/powerpc/platforms/powermac/pmac_cpufreq.c @@ -0,0 +1,728 @@ +/* + * arch/ppc/platforms/pmac_cpufreq.c + * + * Copyright (C) 2002 - 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org> + * Copyright (C) 2004 John Steele Scott <toojays@toojays.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * TODO: Need a big cleanup here. Basically, we need to have different + * cpufreq_driver structures for the different type of HW instead of the + * current mess. We also need to better deal with the detection of the + * type of machine. + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/delay.h> +#include <linux/sched.h> +#include <linux/adb.h> +#include <linux/pmu.h> +#include <linux/slab.h> +#include <linux/cpufreq.h> +#include <linux/init.h> +#include <linux/sysdev.h> +#include <linux/i2c.h> +#include <linux/hardirq.h> +#include <asm/prom.h> +#include <asm/machdep.h> +#include <asm/irq.h> +#include <asm/pmac_feature.h> +#include <asm/mmu_context.h> +#include <asm/sections.h> +#include <asm/cputable.h> +#include <asm/time.h> +#include <asm/system.h> +#include <asm/mpic.h> +#include <asm/keylargo.h> + +/* WARNING !!! This will cause calibrate_delay() to be called, + * but this is an __init function ! So you MUST go edit + * init/main.c to make it non-init before enabling DEBUG_FREQ + */ +#undef DEBUG_FREQ + +/* + * There is a problem with the core cpufreq code on SMP kernels, + * it won't recalculate the Bogomips properly + */ +#ifdef CONFIG_SMP +#warning "WARNING, CPUFREQ not recommended on SMP kernels" +#endif + +extern void low_choose_7447a_dfs(int dfs); +extern void low_choose_750fx_pll(int pll); +extern void low_sleep_handler(void); + +/* + * Currently, PowerMac cpufreq supports only high & low frequencies + * that are set by the firmware + */ +static unsigned int low_freq; +static unsigned int hi_freq; +static unsigned int cur_freq; +static unsigned int sleep_freq; + +/* + * Different models uses different mecanisms to switch the frequency + */ +static int (*set_speed_proc)(int low_speed); +static unsigned int (*get_speed_proc)(void); + +/* + * Some definitions used by the various speedprocs + */ +static u32 voltage_gpio; +static u32 frequency_gpio; +static u32 slew_done_gpio; +static int no_schedule; +static int has_cpu_l2lve; +static int is_pmu_based; + +/* There are only two frequency states for each processor. Values + * are in kHz for the time being. + */ +#define CPUFREQ_HIGH 0 +#define CPUFREQ_LOW 1 + +static struct cpufreq_frequency_table pmac_cpu_freqs[] = { + {CPUFREQ_HIGH, 0}, + {CPUFREQ_LOW, 0}, + {0, CPUFREQ_TABLE_END}, +}; + +static struct freq_attr* pmac_cpu_freqs_attr[] = { + &cpufreq_freq_attr_scaling_available_freqs, + NULL, +}; + +static inline void local_delay(unsigned long ms) +{ + if (no_schedule) + mdelay(ms); + else + msleep(ms); +} + +static inline void wakeup_decrementer(void) +{ + set_dec(tb_ticks_per_jiffy); + /* No currently-supported powerbook has a 601, + * so use get_tbl, not native + */ + last_jiffy_stamp(0) = tb_last_stamp = get_tbl(); +} + +#ifdef DEBUG_FREQ +static inline void debug_calc_bogomips(void) +{ + /* This will cause a recalc of bogomips and display the + * result. We backup/restore the value to avoid affecting the + * core cpufreq framework's own calculation. + */ + extern void calibrate_delay(void); + + unsigned long save_lpj = loops_per_jiffy; + calibrate_delay(); + loops_per_jiffy = save_lpj; +} +#endif /* DEBUG_FREQ */ + +/* Switch CPU speed under 750FX CPU control + */ +static int cpu_750fx_cpu_speed(int low_speed) +{ + u32 hid2; + + if (low_speed == 0) { + /* ramping up, set voltage first */ + pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x05); + /* Make sure we sleep for at least 1ms */ + local_delay(10); + + /* tweak L2 for high voltage */ + if (has_cpu_l2lve) { + hid2 = mfspr(SPRN_HID2); + hid2 &= ~0x2000; + mtspr(SPRN_HID2, hid2); + } + } +#ifdef CONFIG_6xx + low_choose_750fx_pll(low_speed); +#endif + if (low_speed == 1) { + /* tweak L2 for low voltage */ + if (has_cpu_l2lve) { + hid2 = mfspr(SPRN_HID2); + hid2 |= 0x2000; + mtspr(SPRN_HID2, hid2); + } + + /* ramping down, set voltage last */ + pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x04); + local_delay(10); + } + + return 0; +} + +static unsigned int cpu_750fx_get_cpu_speed(void) +{ + if (mfspr(SPRN_HID1) & HID1_PS) + return low_freq; + else + return hi_freq; +} + +/* Switch CPU speed using DFS */ +static int dfs_set_cpu_speed(int low_speed) +{ + if (low_speed == 0) { + /* ramping up, set voltage first */ + pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x05); + /* Make sure we sleep for at least 1ms */ + local_delay(1); + } + + /* set frequency */ +#ifdef CONFIG_6xx + low_choose_7447a_dfs(low_speed); +#endif + udelay(100); + + if (low_speed == 1) { + /* ramping down, set voltage last */ + pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x04); + local_delay(1); + } + + return 0; +} + +static unsigned int dfs_get_cpu_speed(void) +{ + if (mfspr(SPRN_HID1) & HID1_DFS) + return low_freq; + else + return hi_freq; +} + + +/* Switch CPU speed using slewing GPIOs + */ +static int gpios_set_cpu_speed(int low_speed) +{ + int gpio, timeout = 0; + + /* If ramping up, set voltage first */ + if (low_speed == 0) { + pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x05); + /* Delay is way too big but it's ok, we schedule */ + local_delay(10); + } + + /* Set frequency */ + gpio = pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, frequency_gpio, 0); + if (low_speed == ((gpio & 0x01) == 0)) + goto skip; + + pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, frequency_gpio, + low_speed ? 0x04 : 0x05); + udelay(200); + do { + if (++timeout > 100) + break; + local_delay(1); + gpio = pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, slew_done_gpio, 0); + } while((gpio & 0x02) == 0); + skip: + /* If ramping down, set voltage last */ + if (low_speed == 1) { + pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, voltage_gpio, 0x04); + /* Delay is way too big but it's ok, we schedule */ + local_delay(10); + } + +#ifdef DEBUG_FREQ + debug_calc_bogomips(); +#endif + + return 0; +} + +/* Switch CPU speed under PMU control + */ +static int pmu_set_cpu_speed(int low_speed) +{ + struct adb_request req; + unsigned long save_l2cr; + unsigned long save_l3cr; + unsigned int pic_prio; + unsigned long flags; + + preempt_disable(); + +#ifdef DEBUG_FREQ + printk(KERN_DEBUG "HID1, before: %x\n", mfspr(SPRN_HID1)); +#endif + pmu_suspend(); + + /* Disable all interrupt sources on openpic */ + pic_prio = mpic_cpu_get_priority(); + mpic_cpu_set_priority(0xf); + + /* Make sure the decrementer won't interrupt us */ + asm volatile("mtdec %0" : : "r" (0x7fffffff)); + /* Make sure any pending DEC interrupt occuring while we did + * the above didn't re-enable the DEC */ + mb(); + asm volatile("mtdec %0" : : "r" (0x7fffffff)); + + /* We can now disable MSR_EE */ + local_irq_save(flags); + + /* Giveup the FPU & vec */ + enable_kernel_fp(); + +#ifdef CONFIG_ALTIVEC + if (cpu_has_feature(CPU_FTR_ALTIVEC)) + enable_kernel_altivec(); +#endif /* CONFIG_ALTIVEC */ + + /* Save & disable L2 and L3 caches */ + save_l3cr = _get_L3CR(); /* (returns -1 if not available) */ + save_l2cr = _get_L2CR(); /* (returns -1 if not available) */ + + /* Send the new speed command. My assumption is that this command + * will cause PLL_CFG[0..3] to be changed next time CPU goes to sleep + */ + pmu_request(&req, NULL, 6, PMU_CPU_SPEED, 'W', 'O', 'O', 'F', low_speed); + while (!req.complete) + pmu_poll(); + + /* Prepare the northbridge for the speed transition */ + pmac_call_feature(PMAC_FTR_SLEEP_STATE,NULL,1,1); + + /* Call low level code to backup CPU state and recover from + * hardware reset + */ + low_sleep_handler(); + + /* Restore the northbridge */ + pmac_call_feature(PMAC_FTR_SLEEP_STATE,NULL,1,0); + + /* Restore L2 cache */ + if (save_l2cr != 0xffffffff && (save_l2cr & L2CR_L2E) != 0) + _set_L2CR(save_l2cr); + /* Restore L3 cache */ + if (save_l3cr != 0xffffffff && (save_l3cr & L3CR_L3E) != 0) + _set_L3CR(save_l3cr); + + /* Restore userland MMU context */ + set_context(current->active_mm->context, current->active_mm->pgd); + +#ifdef DEBUG_FREQ + printk(KERN_DEBUG "HID1, after: %x\n", mfspr(SPRN_HID1)); +#endif + + /* Restore low level PMU operations */ + pmu_unlock(); + + /* Restore decrementer */ + wakeup_decrementer(); + + /* Restore interrupts */ + mpic_cpu_set_priority(pic_prio); + + /* Let interrupts flow again ... */ + local_irq_restore(flags); + +#ifdef DEBUG_FREQ + debug_calc_bogomips(); +#endif + + pmu_resume(); + + preempt_enable(); + + return 0; +} + +static int do_set_cpu_speed(int speed_mode, int notify) +{ + struct cpufreq_freqs freqs; + unsigned long l3cr; + static unsigned long prev_l3cr; + + freqs.old = cur_freq; + freqs.new = (speed_mode == CPUFREQ_HIGH) ? hi_freq : low_freq; + freqs.cpu = smp_processor_id(); + + if (freqs.old == freqs.new) + return 0; + + if (notify) + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + if (speed_mode == CPUFREQ_LOW && + cpu_has_feature(CPU_FTR_L3CR)) { + l3cr = _get_L3CR(); + if (l3cr & L3CR_L3E) { + prev_l3cr = l3cr; + _set_L3CR(0); + } + } + set_speed_proc(speed_mode == CPUFREQ_LOW); + if (speed_mode == CPUFREQ_HIGH && + cpu_has_feature(CPU_FTR_L3CR)) { + l3cr = _get_L3CR(); + if ((prev_l3cr & L3CR_L3E) && l3cr != prev_l3cr) + _set_L3CR(prev_l3cr); + } + if (notify) + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); + cur_freq = (speed_mode == CPUFREQ_HIGH) ? hi_freq : low_freq; + + return 0; +} + +static unsigned int pmac_cpufreq_get_speed(unsigned int cpu) +{ + return cur_freq; +} + +static int pmac_cpufreq_verify(struct cpufreq_policy *policy) +{ + return cpufreq_frequency_table_verify(policy, pmac_cpu_freqs); +} + +static int pmac_cpufreq_target( struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + unsigned int newstate = 0; + + if (cpufreq_frequency_table_target(policy, pmac_cpu_freqs, + target_freq, relation, &newstate)) + return -EINVAL; + + return do_set_cpu_speed(newstate, 1); +} + +unsigned int pmac_get_one_cpufreq(int i) +{ + /* Supports only one CPU for now */ + return (i == 0) ? cur_freq : 0; +} + +static int pmac_cpufreq_cpu_init(struct cpufreq_policy *policy) +{ + if (policy->cpu != 0) + return -ENODEV; + + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL; + policy->cur = cur_freq; + + cpufreq_frequency_table_get_attr(pmac_cpu_freqs, policy->cpu); + return cpufreq_frequency_table_cpuinfo(policy, pmac_cpu_freqs); +} + +static u32 read_gpio(struct device_node *np) +{ + u32 *reg = (u32 *)get_property(np, "reg", NULL); + u32 offset; + + if (reg == NULL) + return 0; + /* That works for all keylargos but shall be fixed properly + * some day... The problem is that it seems we can't rely + * on the "reg" property of the GPIO nodes, they are either + * relative to the base of KeyLargo or to the base of the + * GPIO space, and the device-tree doesn't help. + */ + offset = *reg; + if (offset < KEYLARGO_GPIO_LEVELS0) + offset += KEYLARGO_GPIO_LEVELS0; + return offset; +} + +static int pmac_cpufreq_suspend(struct cpufreq_policy *policy, pm_message_t pmsg) +{ + /* Ok, this could be made a bit smarter, but let's be robust for now. We + * always force a speed change to high speed before sleep, to make sure + * we have appropriate voltage and/or bus speed for the wakeup process, + * and to make sure our loops_per_jiffies are "good enough", that is will + * not cause too short delays if we sleep in low speed and wake in high + * speed.. + */ + no_schedule = 1; + sleep_freq = cur_freq; + if (cur_freq == low_freq && !is_pmu_based) + do_set_cpu_speed(CPUFREQ_HIGH, 0); + return 0; +} + +static int pmac_cpufreq_resume(struct cpufreq_policy *policy) +{ + /* If we resume, first check if we have a get() function */ + if (get_speed_proc) + cur_freq = get_speed_proc(); + else + cur_freq = 0; + + /* We don't, hrm... we don't really know our speed here, best + * is that we force a switch to whatever it was, which is + * probably high speed due to our suspend() routine + */ + do_set_cpu_speed(sleep_freq == low_freq ? + CPUFREQ_LOW : CPUFREQ_HIGH, 0); + + no_schedule = 0; + return 0; +} + +static struct cpufreq_driver pmac_cpufreq_driver = { + .verify = pmac_cpufreq_verify, + .target = pmac_cpufreq_target, + .get = pmac_cpufreq_get_speed, + .init = pmac_cpufreq_cpu_init, + .suspend = pmac_cpufreq_suspend, + .resume = pmac_cpufreq_resume, + .flags = CPUFREQ_PM_NO_WARN, + .attr = pmac_cpu_freqs_attr, + .name = "powermac", + .owner = THIS_MODULE, +}; + + +static int pmac_cpufreq_init_MacRISC3(struct device_node *cpunode) +{ + struct device_node *volt_gpio_np = of_find_node_by_name(NULL, + "voltage-gpio"); + struct device_node *freq_gpio_np = of_find_node_by_name(NULL, + "frequency-gpio"); + struct device_node *slew_done_gpio_np = of_find_node_by_name(NULL, + "slewing-done"); + u32 *value; + + /* + * Check to see if it's GPIO driven or PMU only + * + * The way we extract the GPIO address is slightly hackish, but it + * works well enough for now. We need to abstract the whole GPIO + * stuff sooner or later anyway + */ + + if (volt_gpio_np) + voltage_gpio = read_gpio(volt_gpio_np); + if (freq_gpio_np) + frequency_gpio = read_gpio(freq_gpio_np); + if (slew_done_gpio_np) + slew_done_gpio = read_gpio(slew_done_gpio_np); + + /* If we use the frequency GPIOs, calculate the min/max speeds based + * on the bus frequencies + */ + if (frequency_gpio && slew_done_gpio) { + int lenp, rc; + u32 *freqs, *ratio; + + freqs = (u32 *)get_property(cpunode, "bus-frequencies", &lenp); + lenp /= sizeof(u32); + if (freqs == NULL || lenp != 2) { + printk(KERN_ERR "cpufreq: bus-frequencies incorrect or missing\n"); + return 1; + } + ratio = (u32 *)get_property(cpunode, "processor-to-bus-ratio*2", NULL); + if (ratio == NULL) { + printk(KERN_ERR "cpufreq: processor-to-bus-ratio*2 missing\n"); + return 1; + } + + /* Get the min/max bus frequencies */ + low_freq = min(freqs[0], freqs[1]); + hi_freq = max(freqs[0], freqs[1]); + + /* Grrrr.. It _seems_ that the device-tree is lying on the low bus + * frequency, it claims it to be around 84Mhz on some models while + * it appears to be approx. 101Mhz on all. Let's hack around here... + * fortunately, we don't need to be too precise + */ + if (low_freq < 98000000) + low_freq = 101000000; + + /* Convert those to CPU core clocks */ + low_freq = (low_freq * (*ratio)) / 2000; + hi_freq = (hi_freq * (*ratio)) / 2000; + + /* Now we get the frequencies, we read the GPIO to see what is out current + * speed + */ + rc = pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, frequency_gpio, 0); + cur_freq = (rc & 0x01) ? hi_freq : low_freq; + + set_speed_proc = gpios_set_cpu_speed; + return 1; + } + + /* If we use the PMU, look for the min & max frequencies in the + * device-tree + */ + value = (u32 *)get_property(cpunode, "min-clock-frequency", NULL); + if (!value) + return 1; + low_freq = (*value) / 1000; + /* The PowerBook G4 12" (PowerBook6,1) has an error in the device-tree + * here */ + if (low_freq < 100000) + low_freq *= 10; + + value = (u32 *)get_property(cpunode, "max-clock-frequency", NULL); + if (!value) + return 1; + hi_freq = (*value) / 1000; + set_speed_proc = pmu_set_cpu_speed; + is_pmu_based = 1; + + return 0; +} + +static int pmac_cpufreq_init_7447A(struct device_node *cpunode) +{ + struct device_node *volt_gpio_np; + + if (get_property(cpunode, "dynamic-power-step", NULL) == NULL) + return 1; + + volt_gpio_np = of_find_node_by_name(NULL, "cpu-vcore-select"); + if (volt_gpio_np) + voltage_gpio = read_gpio(volt_gpio_np); + if (!voltage_gpio){ + printk(KERN_ERR "cpufreq: missing cpu-vcore-select gpio\n"); + return 1; + } + + /* OF only reports the high frequency */ + hi_freq = cur_freq; + low_freq = cur_freq/2; + + /* Read actual frequency from CPU */ + cur_freq = dfs_get_cpu_speed(); + set_speed_proc = dfs_set_cpu_speed; + get_speed_proc = dfs_get_cpu_speed; + + return 0; +} + +static int pmac_cpufreq_init_750FX(struct device_node *cpunode) +{ + struct device_node *volt_gpio_np; + u32 pvr, *value; + + if (get_property(cpunode, "dynamic-power-step", NULL) == NULL) + return 1; + + hi_freq = cur_freq; + value = (u32 *)get_property(cpunode, "reduced-clock-frequency", NULL); + if (!value) + return 1; + low_freq = (*value) / 1000; + + volt_gpio_np = of_find_node_by_name(NULL, "cpu-vcore-select"); + if (volt_gpio_np) + voltage_gpio = read_gpio(volt_gpio_np); + + pvr = mfspr(SPRN_PVR); + has_cpu_l2lve = !((pvr & 0xf00) == 0x100); + + set_speed_proc = cpu_750fx_cpu_speed; + get_speed_proc = cpu_750fx_get_cpu_speed; + cur_freq = cpu_750fx_get_cpu_speed(); + + return 0; +} + +/* Currently, we support the following machines: + * + * - Titanium PowerBook 1Ghz (PMU based, 667Mhz & 1Ghz) + * - Titanium PowerBook 800 (PMU based, 667Mhz & 800Mhz) + * - Titanium PowerBook 400 (PMU based, 300Mhz & 400Mhz) + * - Titanium PowerBook 500 (PMU based, 300Mhz & 500Mhz) + * - iBook2 500/600 (PMU based, 400Mhz & 500/600Mhz) + * - iBook2 700 (CPU based, 400Mhz & 700Mhz, support low voltage) + * - Recent MacRISC3 laptops + * - All new machines with 7447A CPUs + */ +static int __init pmac_cpufreq_setup(void) +{ + struct device_node *cpunode; + u32 *value; + + if (strstr(cmd_line, "nocpufreq")) + return 0; + + /* Assume only one CPU */ + cpunode = find_type_devices("cpu"); + if (!cpunode) + goto out; + + /* Get current cpu clock freq */ + value = (u32 *)get_property(cpunode, "clock-frequency", NULL); + if (!value) + goto out; + cur_freq = (*value) / 1000; + + /* Check for 7447A based MacRISC3 */ + if (machine_is_compatible("MacRISC3") && + get_property(cpunode, "dynamic-power-step", NULL) && + PVR_VER(mfspr(SPRN_PVR)) == 0x8003) { + pmac_cpufreq_init_7447A(cpunode); + /* Check for other MacRISC3 machines */ + } else if (machine_is_compatible("PowerBook3,4") || + machine_is_compatible("PowerBook3,5") || + machine_is_compatible("MacRISC3")) { + pmac_cpufreq_init_MacRISC3(cpunode); + /* Else check for iBook2 500/600 */ + } else if (machine_is_compatible("PowerBook4,1")) { + hi_freq = cur_freq; + low_freq = 400000; + set_speed_proc = pmu_set_cpu_speed; + is_pmu_based = 1; + } + /* Else check for TiPb 400 & 500 */ + else if (machine_is_compatible("PowerBook3,2")) { + /* We only know about the 400 MHz and the 500Mhz model + * they both have 300 MHz as low frequency + */ + if (cur_freq < 350000 || cur_freq > 550000) + goto out; + hi_freq = cur_freq; + low_freq = 300000; + set_speed_proc = pmu_set_cpu_speed; + is_pmu_based = 1; + } + /* Else check for 750FX */ + else if (PVR_VER(mfspr(SPRN_PVR)) == 0x7000) + pmac_cpufreq_init_750FX(cpunode); +out: + if (set_speed_proc == NULL) + return -ENODEV; + + pmac_cpu_freqs[CPUFREQ_LOW].frequency = low_freq; + pmac_cpu_freqs[CPUFREQ_HIGH].frequency = hi_freq; + + printk(KERN_INFO "Registering PowerMac CPU frequency driver\n"); + printk(KERN_INFO "Low: %d Mhz, High: %d Mhz, Boot: %d Mhz\n", + low_freq/1000, hi_freq/1000, cur_freq/1000); + + return cpufreq_register_driver(&pmac_cpufreq_driver); +} + +module_init(pmac_cpufreq_setup); + diff --git a/arch/powerpc/platforms/powermac/pmac_feature.c b/arch/powerpc/platforms/powermac/pmac_feature.c new file mode 100644 index 00000000000..2cba670c71b --- /dev/null +++ b/arch/powerpc/platforms/powermac/pmac_feature.c @@ -0,0 +1,3062 @@ +/* + * arch/ppc/platforms/pmac_feature.c + * + * Copyright (C) 1996-2001 Paul Mackerras (paulus@cs.anu.edu.au) + * Ben. Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * TODO: + * + * - Replace mdelay with some schedule loop if possible + * - Shorten some obfuscated delays on some routines (like modem + * power) + * - Refcount some clocks (see darwin) + * - Split split split... + * + */ +#include <linux/config.h> +#include <linux/types.h> +#include <linux/init.h> +#include <linux/delay.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/spinlock.h> +#include <linux/adb.h> +#include <linux/pmu.h> +#include <linux/ioport.h> +#include <linux/pci.h> +#include <asm/sections.h> +#include <asm/errno.h> +#include <asm/ohare.h> +#include <asm/heathrow.h> +#include <asm/keylargo.h> +#include <asm/uninorth.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/machdep.h> +#include <asm/pmac_feature.h> +#include <asm/dbdma.h> +#include <asm/pci-bridge.h> +#include <asm/pmac_low_i2c.h> + +#undef DEBUG_FEATURE + +#ifdef DEBUG_FEATURE +#define DBG(fmt...) printk(KERN_DEBUG fmt) +#else +#define DBG(fmt...) +#endif + +#ifdef CONFIG_6xx +extern int powersave_lowspeed; +#endif + +extern int powersave_nap; +extern struct device_node *k2_skiplist[2]; + + +/* + * We use a single global lock to protect accesses. Each driver has + * to take care of its own locking + */ +static DEFINE_SPINLOCK(feature_lock); + +#define LOCK(flags) spin_lock_irqsave(&feature_lock, flags); +#define UNLOCK(flags) spin_unlock_irqrestore(&feature_lock, flags); + + +/* + * Instance of some macio stuffs + */ +struct macio_chip macio_chips[MAX_MACIO_CHIPS]; + +struct macio_chip *macio_find(struct device_node *child, int type) +{ + while(child) { + int i; + + for (i=0; i < MAX_MACIO_CHIPS && macio_chips[i].of_node; i++) + if (child == macio_chips[i].of_node && + (!type || macio_chips[i].type == type)) + return &macio_chips[i]; + child = child->parent; + } + return NULL; +} +EXPORT_SYMBOL_GPL(macio_find); + +static const char *macio_names[] = +{ + "Unknown", + "Grand Central", + "OHare", + "OHareII", + "Heathrow", + "Gatwick", + "Paddington", + "Keylargo", + "Pangea", + "Intrepid", + "K2" +}; + + + +/* + * Uninorth reg. access. Note that Uni-N regs are big endian + */ + +#define UN_REG(r) (uninorth_base + ((r) >> 2)) +#define UN_IN(r) (in_be32(UN_REG(r))) +#define UN_OUT(r,v) (out_be32(UN_REG(r), (v))) +#define UN_BIS(r,v) (UN_OUT((r), UN_IN(r) | (v))) +#define UN_BIC(r,v) (UN_OUT((r), UN_IN(r) & ~(v))) + +static struct device_node *uninorth_node; +static u32 __iomem *uninorth_base; +static u32 uninorth_rev; +static int uninorth_u3; +static void __iomem *u3_ht; + +/* + * For each motherboard family, we have a table of functions pointers + * that handle the various features. + */ + +typedef long (*feature_call)(struct device_node *node, long param, long value); + +struct feature_table_entry { + unsigned int selector; + feature_call function; +}; + +struct pmac_mb_def +{ + const char* model_string; + const char* model_name; + int model_id; + struct feature_table_entry* features; + unsigned long board_flags; +}; +static struct pmac_mb_def pmac_mb; + +/* + * Here are the chip specific feature functions + */ + +static inline int simple_feature_tweak(struct device_node *node, int type, + int reg, u32 mask, int value) +{ + struct macio_chip* macio; + unsigned long flags; + + macio = macio_find(node, type); + if (!macio) + return -ENODEV; + LOCK(flags); + if (value) + MACIO_BIS(reg, mask); + else + MACIO_BIC(reg, mask); + (void)MACIO_IN32(reg); + UNLOCK(flags); + + return 0; +} + +#ifndef CONFIG_POWER4 + +static long ohare_htw_scc_enable(struct device_node *node, long param, + long value) +{ + struct macio_chip* macio; + unsigned long chan_mask; + unsigned long fcr; + unsigned long flags; + int htw, trans; + unsigned long rmask; + + macio = macio_find(node, 0); + if (!macio) + return -ENODEV; + if (!strcmp(node->name, "ch-a")) + chan_mask = MACIO_FLAG_SCCA_ON; + else if (!strcmp(node->name, "ch-b")) + chan_mask = MACIO_FLAG_SCCB_ON; + else + return -ENODEV; + + htw = (macio->type == macio_heathrow || macio->type == macio_paddington + || macio->type == macio_gatwick); + /* On these machines, the HRW_SCC_TRANS_EN_N bit mustn't be touched */ + trans = (pmac_mb.model_id != PMAC_TYPE_YOSEMITE && + pmac_mb.model_id != PMAC_TYPE_YIKES); + if (value) { +#ifdef CONFIG_ADB_PMU + if ((param & 0xfff) == PMAC_SCC_IRDA) + pmu_enable_irled(1); +#endif /* CONFIG_ADB_PMU */ + LOCK(flags); + fcr = MACIO_IN32(OHARE_FCR); + /* Check if scc cell need enabling */ + if (!(fcr & OH_SCC_ENABLE)) { + fcr |= OH_SCC_ENABLE; + if (htw) { + /* Side effect: this will also power up the + * modem, but it's too messy to figure out on which + * ports this controls the tranceiver and on which + * it controls the modem + */ + if (trans) + fcr &= ~HRW_SCC_TRANS_EN_N; + MACIO_OUT32(OHARE_FCR, fcr); + fcr |= (rmask = HRW_RESET_SCC); + MACIO_OUT32(OHARE_FCR, fcr); + } else { + fcr |= (rmask = OH_SCC_RESET); + MACIO_OUT32(OHARE_FCR, fcr); + } + UNLOCK(flags); + (void)MACIO_IN32(OHARE_FCR); + mdelay(15); + LOCK(flags); + fcr &= ~rmask; + MACIO_OUT32(OHARE_FCR, fcr); + } + if (chan_mask & MACIO_FLAG_SCCA_ON) + fcr |= OH_SCCA_IO; + if (chan_mask & MACIO_FLAG_SCCB_ON) + fcr |= OH_SCCB_IO; + MACIO_OUT32(OHARE_FCR, fcr); + macio->flags |= chan_mask; + UNLOCK(flags); + if (param & PMAC_SCC_FLAG_XMON) + macio->flags |= MACIO_FLAG_SCC_LOCKED; + } else { + if (macio->flags & MACIO_FLAG_SCC_LOCKED) + return -EPERM; + LOCK(flags); + fcr = MACIO_IN32(OHARE_FCR); + if (chan_mask & MACIO_FLAG_SCCA_ON) + fcr &= ~OH_SCCA_IO; + if (chan_mask & MACIO_FLAG_SCCB_ON) + fcr &= ~OH_SCCB_IO; + MACIO_OUT32(OHARE_FCR, fcr); + if ((fcr & (OH_SCCA_IO | OH_SCCB_IO)) == 0) { + fcr &= ~OH_SCC_ENABLE; + if (htw && trans) + fcr |= HRW_SCC_TRANS_EN_N; + MACIO_OUT32(OHARE_FCR, fcr); + } + macio->flags &= ~(chan_mask); + UNLOCK(flags); + mdelay(10); +#ifdef CONFIG_ADB_PMU + if ((param & 0xfff) == PMAC_SCC_IRDA) + pmu_enable_irled(0); +#endif /* CONFIG_ADB_PMU */ + } + return 0; +} + +static long ohare_floppy_enable(struct device_node *node, long param, + long value) +{ + return simple_feature_tweak(node, macio_ohare, + OHARE_FCR, OH_FLOPPY_ENABLE, value); +} + +static long ohare_mesh_enable(struct device_node *node, long param, long value) +{ + return simple_feature_tweak(node, macio_ohare, + OHARE_FCR, OH_MESH_ENABLE, value); +} + +static long ohare_ide_enable(struct device_node *node, long param, long value) +{ + switch(param) { + case 0: + /* For some reason, setting the bit in set_initial_features() + * doesn't stick. I'm still investigating... --BenH. + */ + if (value) + simple_feature_tweak(node, macio_ohare, + OHARE_FCR, OH_IOBUS_ENABLE, 1); + return simple_feature_tweak(node, macio_ohare, + OHARE_FCR, OH_IDE0_ENABLE, value); + case 1: + return simple_feature_tweak(node, macio_ohare, + OHARE_FCR, OH_BAY_IDE_ENABLE, value); + default: + return -ENODEV; + } +} + +static long ohare_ide_reset(struct device_node *node, long param, long value) +{ + switch(param) { + case 0: + return simple_feature_tweak(node, macio_ohare, + OHARE_FCR, OH_IDE0_RESET_N, !value); + case 1: + return simple_feature_tweak(node, macio_ohare, + OHARE_FCR, OH_IDE1_RESET_N, !value); + default: + return -ENODEV; + } +} + +static long ohare_sleep_state(struct device_node *node, long param, long value) +{ + struct macio_chip* macio = &macio_chips[0]; + + if ((pmac_mb.board_flags & PMAC_MB_CAN_SLEEP) == 0) + return -EPERM; + if (value == 1) { + MACIO_BIC(OHARE_FCR, OH_IOBUS_ENABLE); + } else if (value == 0) { + MACIO_BIS(OHARE_FCR, OH_IOBUS_ENABLE); + } + + return 0; +} + +static long heathrow_modem_enable(struct device_node *node, long param, + long value) +{ + struct macio_chip* macio; + u8 gpio; + unsigned long flags; + + macio = macio_find(node, macio_unknown); + if (!macio) + return -ENODEV; + gpio = MACIO_IN8(HRW_GPIO_MODEM_RESET) & ~1; + if (!value) { + LOCK(flags); + MACIO_OUT8(HRW_GPIO_MODEM_RESET, gpio); + UNLOCK(flags); + (void)MACIO_IN8(HRW_GPIO_MODEM_RESET); + mdelay(250); + } + if (pmac_mb.model_id != PMAC_TYPE_YOSEMITE && + pmac_mb.model_id != PMAC_TYPE_YIKES) { + LOCK(flags); + if (value) + MACIO_BIC(HEATHROW_FCR, HRW_SCC_TRANS_EN_N); + else + MACIO_BIS(HEATHROW_FCR, HRW_SCC_TRANS_EN_N); + UNLOCK(flags); + (void)MACIO_IN32(HEATHROW_FCR); + mdelay(250); + } + if (value) { + LOCK(flags); + MACIO_OUT8(HRW_GPIO_MODEM_RESET, gpio | 1); + (void)MACIO_IN8(HRW_GPIO_MODEM_RESET); + UNLOCK(flags); mdelay(250); LOCK(flags); + MACIO_OUT8(HRW_GPIO_MODEM_RESET, gpio); + (void)MACIO_IN8(HRW_GPIO_MODEM_RESET); + UNLOCK(flags); mdelay(250); LOCK(flags); + MACIO_OUT8(HRW_GPIO_MODEM_RESET, gpio | 1); + (void)MACIO_IN8(HRW_GPIO_MODEM_RESET); + UNLOCK(flags); mdelay(250); + } + return 0; +} + +static long heathrow_floppy_enable(struct device_node *node, long param, + long value) +{ + return simple_feature_tweak(node, macio_unknown, + HEATHROW_FCR, + HRW_SWIM_ENABLE|HRW_BAY_FLOPPY_ENABLE, + value); +} + +static long heathrow_mesh_enable(struct device_node *node, long param, + long value) +{ + struct macio_chip* macio; + unsigned long flags; + + macio = macio_find(node, macio_unknown); + if (!macio) + return -ENODEV; + LOCK(flags); + /* Set clear mesh cell enable */ + if (value) + MACIO_BIS(HEATHROW_FCR, HRW_MESH_ENABLE); + else + MACIO_BIC(HEATHROW_FCR, HRW_MESH_ENABLE); + (void)MACIO_IN32(HEATHROW_FCR); + udelay(10); + /* Set/Clear termination power */ + if (value) + MACIO_BIC(HEATHROW_MBCR, 0x04000000); + else + MACIO_BIS(HEATHROW_MBCR, 0x04000000); + (void)MACIO_IN32(HEATHROW_MBCR); + udelay(10); + UNLOCK(flags); + + return 0; +} + +static long heathrow_ide_enable(struct device_node *node, long param, + long value) +{ + switch(param) { + case 0: + return simple_feature_tweak(node, macio_unknown, + HEATHROW_FCR, HRW_IDE0_ENABLE, value); + case 1: + return simple_feature_tweak(node, macio_unknown, + HEATHROW_FCR, HRW_BAY_IDE_ENABLE, value); + default: + return -ENODEV; + } +} + +static long heathrow_ide_reset(struct device_node *node, long param, + long value) +{ + switch(param) { + case 0: + return simple_feature_tweak(node, macio_unknown, + HEATHROW_FCR, HRW_IDE0_RESET_N, !value); + case 1: + return simple_feature_tweak(node, macio_unknown, + HEATHROW_FCR, HRW_IDE1_RESET_N, !value); + default: + return -ENODEV; + } +} + +static long heathrow_bmac_enable(struct device_node *node, long param, + long value) +{ + struct macio_chip* macio; + unsigned long flags; + + macio = macio_find(node, 0); + if (!macio) + return -ENODEV; + if (value) { + LOCK(flags); + MACIO_BIS(HEATHROW_FCR, HRW_BMAC_IO_ENABLE); + MACIO_BIS(HEATHROW_FCR, HRW_BMAC_RESET); + UNLOCK(flags); + (void)MACIO_IN32(HEATHROW_FCR); + mdelay(10); + LOCK(flags); + MACIO_BIC(HEATHROW_FCR, HRW_BMAC_RESET); + UNLOCK(flags); + (void)MACIO_IN32(HEATHROW_FCR); + mdelay(10); + } else { + LOCK(flags); + MACIO_BIC(HEATHROW_FCR, HRW_BMAC_IO_ENABLE); + UNLOCK(flags); + } + return 0; +} + +static long heathrow_sound_enable(struct device_node *node, long param, + long value) +{ + struct macio_chip* macio; + unsigned long flags; + + /* B&W G3 and Yikes don't support that properly (the + * sound appear to never come back after beeing shut down). + */ + if (pmac_mb.model_id == PMAC_TYPE_YOSEMITE || + pmac_mb.model_id == PMAC_TYPE_YIKES) + return 0; + + macio = macio_find(node, 0); + if (!macio) + return -ENODEV; + if (value) { + LOCK(flags); + MACIO_BIS(HEATHROW_FCR, HRW_SOUND_CLK_ENABLE); + MACIO_BIC(HEATHROW_FCR, HRW_SOUND_POWER_N); + UNLOCK(flags); + (void)MACIO_IN32(HEATHROW_FCR); + } else { + LOCK(flags); + MACIO_BIS(HEATHROW_FCR, HRW_SOUND_POWER_N); + MACIO_BIC(HEATHROW_FCR, HRW_SOUND_CLK_ENABLE); + UNLOCK(flags); + } + return 0; +} + +static u32 save_fcr[6]; +static u32 save_mbcr; +static u32 save_gpio_levels[2]; +static u8 save_gpio_extint[KEYLARGO_GPIO_EXTINT_CNT]; +static u8 save_gpio_normal[KEYLARGO_GPIO_CNT]; +static u32 save_unin_clock_ctl; +static struct dbdma_regs save_dbdma[13]; +static struct dbdma_regs save_alt_dbdma[13]; + +static void dbdma_save(struct macio_chip *macio, struct dbdma_regs *save) +{ + int i; + + /* Save state & config of DBDMA channels */ + for (i = 0; i < 13; i++) { + volatile struct dbdma_regs __iomem * chan = (void __iomem *) + (macio->base + ((0x8000+i*0x100)>>2)); + save[i].cmdptr_hi = in_le32(&chan->cmdptr_hi); + save[i].cmdptr = in_le32(&chan->cmdptr); + save[i].intr_sel = in_le32(&chan->intr_sel); + save[i].br_sel = in_le32(&chan->br_sel); + save[i].wait_sel = in_le32(&chan->wait_sel); + } +} + +static void dbdma_restore(struct macio_chip *macio, struct dbdma_regs *save) +{ + int i; + + /* Save state & config of DBDMA channels */ + for (i = 0; i < 13; i++) { + volatile struct dbdma_regs __iomem * chan = (void __iomem *) + (macio->base + ((0x8000+i*0x100)>>2)); + out_le32(&chan->control, (ACTIVE|DEAD|WAKE|FLUSH|PAUSE|RUN)<<16); + while (in_le32(&chan->status) & ACTIVE) + mb(); + out_le32(&chan->cmdptr_hi, save[i].cmdptr_hi); + out_le32(&chan->cmdptr, save[i].cmdptr); + out_le32(&chan->intr_sel, save[i].intr_sel); + out_le32(&chan->br_sel, save[i].br_sel); + out_le32(&chan->wait_sel, save[i].wait_sel); + } +} + +static void heathrow_sleep(struct macio_chip *macio, int secondary) +{ + if (secondary) { + dbdma_save(macio, save_alt_dbdma); + save_fcr[2] = MACIO_IN32(0x38); + save_fcr[3] = MACIO_IN32(0x3c); + } else { + dbdma_save(macio, save_dbdma); + save_fcr[0] = MACIO_IN32(0x38); + save_fcr[1] = MACIO_IN32(0x3c); + save_mbcr = MACIO_IN32(0x34); + /* Make sure sound is shut down */ + MACIO_BIS(HEATHROW_FCR, HRW_SOUND_POWER_N); + MACIO_BIC(HEATHROW_FCR, HRW_SOUND_CLK_ENABLE); + /* This seems to be necessary as well or the fan + * keeps coming up and battery drains fast */ + MACIO_BIC(HEATHROW_FCR, HRW_IOBUS_ENABLE); + MACIO_BIC(HEATHROW_FCR, HRW_IDE0_RESET_N); + /* Make sure eth is down even if module or sleep + * won't work properly */ + MACIO_BIC(HEATHROW_FCR, HRW_BMAC_IO_ENABLE | HRW_BMAC_RESET); + } + /* Make sure modem is shut down */ + MACIO_OUT8(HRW_GPIO_MODEM_RESET, + MACIO_IN8(HRW_GPIO_MODEM_RESET) & ~1); + MACIO_BIS(HEATHROW_FCR, HRW_SCC_TRANS_EN_N); + MACIO_BIC(HEATHROW_FCR, OH_SCCA_IO|OH_SCCB_IO|HRW_SCC_ENABLE); + + /* Let things settle */ + (void)MACIO_IN32(HEATHROW_FCR); +} + +static void heathrow_wakeup(struct macio_chip *macio, int secondary) +{ + if (secondary) { + MACIO_OUT32(0x38, save_fcr[2]); + (void)MACIO_IN32(0x38); + mdelay(1); + MACIO_OUT32(0x3c, save_fcr[3]); + (void)MACIO_IN32(0x38); + mdelay(10); + dbdma_restore(macio, save_alt_dbdma); + } else { + MACIO_OUT32(0x38, save_fcr[0] | HRW_IOBUS_ENABLE); + (void)MACIO_IN32(0x38); + mdelay(1); + MACIO_OUT32(0x3c, save_fcr[1]); + (void)MACIO_IN32(0x38); + mdelay(1); + MACIO_OUT32(0x34, save_mbcr); + (void)MACIO_IN32(0x38); + mdelay(10); + dbdma_restore(macio, save_dbdma); + } +} + +static long heathrow_sleep_state(struct device_node *node, long param, + long value) +{ + if ((pmac_mb.board_flags & PMAC_MB_CAN_SLEEP) == 0) + return -EPERM; + if (value == 1) { + if (macio_chips[1].type == macio_gatwick) + heathrow_sleep(&macio_chips[0], 1); + heathrow_sleep(&macio_chips[0], 0); + } else if (value == 0) { + heathrow_wakeup(&macio_chips[0], 0); + if (macio_chips[1].type == macio_gatwick) + heathrow_wakeup(&macio_chips[0], 1); + } + return 0; +} + +static long core99_scc_enable(struct device_node *node, long param, long value) +{ + struct macio_chip* macio; + unsigned long flags; + unsigned long chan_mask; + u32 fcr; + + macio = macio_find(node, 0); + if (!macio) + return -ENODEV; + if (!strcmp(node->name, "ch-a")) + chan_mask = MACIO_FLAG_SCCA_ON; + else if (!strcmp(node->name, "ch-b")) + chan_mask = MACIO_FLAG_SCCB_ON; + else + return -ENODEV; + + if (value) { + int need_reset_scc = 0; + int need_reset_irda = 0; + + LOCK(flags); + fcr = MACIO_IN32(KEYLARGO_FCR0); + /* Check if scc cell need enabling */ + if (!(fcr & KL0_SCC_CELL_ENABLE)) { + fcr |= KL0_SCC_CELL_ENABLE; + need_reset_scc = 1; + } + if (chan_mask & MACIO_FLAG_SCCA_ON) { + fcr |= KL0_SCCA_ENABLE; + /* Don't enable line drivers for I2S modem */ + if ((param & 0xfff) == PMAC_SCC_I2S1) + fcr &= ~KL0_SCC_A_INTF_ENABLE; + else + fcr |= KL0_SCC_A_INTF_ENABLE; + } + if (chan_mask & MACIO_FLAG_SCCB_ON) { + fcr |= KL0_SCCB_ENABLE; + /* Perform irda specific inits */ + if ((param & 0xfff) == PMAC_SCC_IRDA) { + fcr &= ~KL0_SCC_B_INTF_ENABLE; + fcr |= KL0_IRDA_ENABLE; + fcr |= KL0_IRDA_CLK32_ENABLE | KL0_IRDA_CLK19_ENABLE; + fcr |= KL0_IRDA_SOURCE1_SEL; + fcr &= ~(KL0_IRDA_FAST_CONNECT|KL0_IRDA_DEFAULT1|KL0_IRDA_DEFAULT0); + fcr &= ~(KL0_IRDA_SOURCE2_SEL|KL0_IRDA_HIGH_BAND); + need_reset_irda = 1; + } else + fcr |= KL0_SCC_B_INTF_ENABLE; + } + MACIO_OUT32(KEYLARGO_FCR0, fcr); + macio->flags |= chan_mask; + if (need_reset_scc) { + MACIO_BIS(KEYLARGO_FCR0, KL0_SCC_RESET); + (void)MACIO_IN32(KEYLARGO_FCR0); + UNLOCK(flags); + mdelay(15); + LOCK(flags); + MACIO_BIC(KEYLARGO_FCR0, KL0_SCC_RESET); + } + if (need_reset_irda) { + MACIO_BIS(KEYLARGO_FCR0, KL0_IRDA_RESET); + (void)MACIO_IN32(KEYLARGO_FCR0); + UNLOCK(flags); + mdelay(15); + LOCK(flags); + MACIO_BIC(KEYLARGO_FCR0, KL0_IRDA_RESET); + } + UNLOCK(flags); + if (param & PMAC_SCC_FLAG_XMON) + macio->flags |= MACIO_FLAG_SCC_LOCKED; + } else { + if (macio->flags & MACIO_FLAG_SCC_LOCKED) + return -EPERM; + LOCK(flags); + fcr = MACIO_IN32(KEYLARGO_FCR0); + if (chan_mask & MACIO_FLAG_SCCA_ON) + fcr &= ~KL0_SCCA_ENABLE; + if (chan_mask & MACIO_FLAG_SCCB_ON) { + fcr &= ~KL0_SCCB_ENABLE; + /* Perform irda specific clears */ + if ((param & 0xfff) == PMAC_SCC_IRDA) { + fcr &= ~KL0_IRDA_ENABLE; + fcr &= ~(KL0_IRDA_CLK32_ENABLE | KL0_IRDA_CLK19_ENABLE); + fcr &= ~(KL0_IRDA_FAST_CONNECT|KL0_IRDA_DEFAULT1|KL0_IRDA_DEFAULT0); + fcr &= ~(KL0_IRDA_SOURCE1_SEL|KL0_IRDA_SOURCE2_SEL|KL0_IRDA_HIGH_BAND); + } + } + MACIO_OUT32(KEYLARGO_FCR0, fcr); + if ((fcr & (KL0_SCCA_ENABLE | KL0_SCCB_ENABLE)) == 0) { + fcr &= ~KL0_SCC_CELL_ENABLE; + MACIO_OUT32(KEYLARGO_FCR0, fcr); + } + macio->flags &= ~(chan_mask); + UNLOCK(flags); + mdelay(10); + } + return 0; +} + +static long +core99_modem_enable(struct device_node *node, long param, long value) +{ + struct macio_chip* macio; + u8 gpio; + unsigned long flags; + + /* Hack for internal USB modem */ + if (node == NULL) { + if (macio_chips[0].type != macio_keylargo) + return -ENODEV; + node = macio_chips[0].of_node; + } + macio = macio_find(node, 0); + if (!macio) + return -ENODEV; + gpio = MACIO_IN8(KL_GPIO_MODEM_RESET); + gpio |= KEYLARGO_GPIO_OUTPUT_ENABLE; + gpio &= ~KEYLARGO_GPIO_OUTOUT_DATA; + + if (!value) { + LOCK(flags); + MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio); + UNLOCK(flags); + (void)MACIO_IN8(KL_GPIO_MODEM_RESET); + mdelay(250); + } + LOCK(flags); + if (value) { + MACIO_BIC(KEYLARGO_FCR2, KL2_ALT_DATA_OUT); + UNLOCK(flags); + (void)MACIO_IN32(KEYLARGO_FCR2); + mdelay(250); + } else { + MACIO_BIS(KEYLARGO_FCR2, KL2_ALT_DATA_OUT); + UNLOCK(flags); + } + if (value) { + LOCK(flags); + MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio | KEYLARGO_GPIO_OUTOUT_DATA); + (void)MACIO_IN8(KL_GPIO_MODEM_RESET); + UNLOCK(flags); mdelay(250); LOCK(flags); + MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio); + (void)MACIO_IN8(KL_GPIO_MODEM_RESET); + UNLOCK(flags); mdelay(250); LOCK(flags); + MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio | KEYLARGO_GPIO_OUTOUT_DATA); + (void)MACIO_IN8(KL_GPIO_MODEM_RESET); + UNLOCK(flags); mdelay(250); + } + return 0; +} + +static long +pangea_modem_enable(struct device_node *node, long param, long value) +{ + struct macio_chip* macio; + u8 gpio; + unsigned long flags; + + /* Hack for internal USB modem */ + if (node == NULL) { + if (macio_chips[0].type != macio_pangea && + macio_chips[0].type != macio_intrepid) + return -ENODEV; + node = macio_chips[0].of_node; + } + macio = macio_find(node, 0); + if (!macio) + return -ENODEV; + gpio = MACIO_IN8(KL_GPIO_MODEM_RESET); + gpio |= KEYLARGO_GPIO_OUTPUT_ENABLE; + gpio &= ~KEYLARGO_GPIO_OUTOUT_DATA; + + if (!value) { + LOCK(flags); + MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio); + UNLOCK(flags); + (void)MACIO_IN8(KL_GPIO_MODEM_RESET); + mdelay(250); + } + LOCK(flags); + if (value) { + MACIO_OUT8(KL_GPIO_MODEM_POWER, + KEYLARGO_GPIO_OUTPUT_ENABLE); + UNLOCK(flags); + (void)MACIO_IN32(KEYLARGO_FCR2); + mdelay(250); + } else { + MACIO_OUT8(KL_GPIO_MODEM_POWER, + KEYLARGO_GPIO_OUTPUT_ENABLE | KEYLARGO_GPIO_OUTOUT_DATA); + UNLOCK(flags); + } + if (value) { + LOCK(flags); + MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio | KEYLARGO_GPIO_OUTOUT_DATA); + (void)MACIO_IN8(KL_GPIO_MODEM_RESET); + UNLOCK(flags); mdelay(250); LOCK(flags); + MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio); + (void)MACIO_IN8(KL_GPIO_MODEM_RESET); + UNLOCK(flags); mdelay(250); LOCK(flags); + MACIO_OUT8(KL_GPIO_MODEM_RESET, gpio | KEYLARGO_GPIO_OUTOUT_DATA); + (void)MACIO_IN8(KL_GPIO_MODEM_RESET); + UNLOCK(flags); mdelay(250); + } + return 0; +} + +static long +core99_ata100_enable(struct device_node *node, long value) +{ + unsigned long flags; + struct pci_dev *pdev = NULL; + u8 pbus, pid; + + if (uninorth_rev < 0x24) + return -ENODEV; + + LOCK(flags); + if (value) + UN_BIS(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_ATA100); + else + UN_BIC(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_ATA100); + (void)UN_IN(UNI_N_CLOCK_CNTL); + UNLOCK(flags); + udelay(20); + + if (value) { + if (pci_device_from_OF_node(node, &pbus, &pid) == 0) + pdev = pci_find_slot(pbus, pid); + if (pdev == NULL) + return 0; + pci_enable_device(pdev); + pci_set_master(pdev); + } + return 0; +} + +static long +core99_ide_enable(struct device_node *node, long param, long value) +{ + /* Bus ID 0 to 2 are KeyLargo based IDE, busID 3 is U2 + * based ata-100 + */ + switch(param) { + case 0: + return simple_feature_tweak(node, macio_unknown, + KEYLARGO_FCR1, KL1_EIDE0_ENABLE, value); + case 1: + return simple_feature_tweak(node, macio_unknown, + KEYLARGO_FCR1, KL1_EIDE1_ENABLE, value); + case 2: + return simple_feature_tweak(node, macio_unknown, + KEYLARGO_FCR1, KL1_UIDE_ENABLE, value); + case 3: + return core99_ata100_enable(node, value); + default: + return -ENODEV; + } +} + +static long +core99_ide_reset(struct device_node *node, long param, long value) +{ + switch(param) { + case 0: + return simple_feature_tweak(node, macio_unknown, + KEYLARGO_FCR1, KL1_EIDE0_RESET_N, !value); + case 1: + return simple_feature_tweak(node, macio_unknown, + KEYLARGO_FCR1, KL1_EIDE1_RESET_N, !value); + case 2: + return simple_feature_tweak(node, macio_unknown, + KEYLARGO_FCR1, KL1_UIDE_RESET_N, !value); + default: + return -ENODEV; + } +} + +static long +core99_gmac_enable(struct device_node *node, long param, long value) +{ + unsigned long flags; + + LOCK(flags); + if (value) + UN_BIS(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_GMAC); + else + UN_BIC(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_GMAC); + (void)UN_IN(UNI_N_CLOCK_CNTL); + UNLOCK(flags); + udelay(20); + + return 0; +} + +static long +core99_gmac_phy_reset(struct device_node *node, long param, long value) +{ + unsigned long flags; + struct macio_chip *macio; + + macio = &macio_chips[0]; + if (macio->type != macio_keylargo && macio->type != macio_pangea && + macio->type != macio_intrepid) + return -ENODEV; + + LOCK(flags); + MACIO_OUT8(KL_GPIO_ETH_PHY_RESET, KEYLARGO_GPIO_OUTPUT_ENABLE); + (void)MACIO_IN8(KL_GPIO_ETH_PHY_RESET); + UNLOCK(flags); + mdelay(10); + LOCK(flags); + MACIO_OUT8(KL_GPIO_ETH_PHY_RESET, /*KEYLARGO_GPIO_OUTPUT_ENABLE | */ + KEYLARGO_GPIO_OUTOUT_DATA); + UNLOCK(flags); + mdelay(10); + + return 0; +} + +static long +core99_sound_chip_enable(struct device_node *node, long param, long value) +{ + struct macio_chip* macio; + unsigned long flags; + + macio = macio_find(node, 0); + if (!macio) + return -ENODEV; + + /* Do a better probe code, screamer G4 desktops & + * iMacs can do that too, add a recalibrate in + * the driver as well + */ + if (pmac_mb.model_id == PMAC_TYPE_PISMO || + pmac_mb.model_id == PMAC_TYPE_TITANIUM) { + LOCK(flags); + if (value) + MACIO_OUT8(KL_GPIO_SOUND_POWER, + KEYLARGO_GPIO_OUTPUT_ENABLE | + KEYLARGO_GPIO_OUTOUT_DATA); + else + MACIO_OUT8(KL_GPIO_SOUND_POWER, + KEYLARGO_GPIO_OUTPUT_ENABLE); + (void)MACIO_IN8(KL_GPIO_SOUND_POWER); + UNLOCK(flags); + } + return 0; +} + +static long +core99_airport_enable(struct device_node *node, long param, long value) +{ + struct macio_chip* macio; + unsigned long flags; + int state; + + macio = macio_find(node, 0); + if (!macio) + return -ENODEV; + + /* Hint: we allow passing of macio itself for the sake of the + * sleep code + */ + if (node != macio->of_node && + (!node->parent || node->parent != macio->of_node)) + return -ENODEV; + state = (macio->flags & MACIO_FLAG_AIRPORT_ON) != 0; + if (value == state) + return 0; + if (value) { + /* This code is a reproduction of OF enable-cardslot + * and init-wireless methods, slightly hacked until + * I got it working. + */ + LOCK(flags); + MACIO_OUT8(KEYLARGO_GPIO_0+0xf, 5); + (void)MACIO_IN8(KEYLARGO_GPIO_0+0xf); + UNLOCK(flags); + mdelay(10); + LOCK(flags); + MACIO_OUT8(KEYLARGO_GPIO_0+0xf, 4); + (void)MACIO_IN8(KEYLARGO_GPIO_0+0xf); + UNLOCK(flags); + + mdelay(10); + + LOCK(flags); + MACIO_BIC(KEYLARGO_FCR2, KL2_CARDSEL_16); + (void)MACIO_IN32(KEYLARGO_FCR2); + udelay(10); + MACIO_OUT8(KEYLARGO_GPIO_EXTINT_0+0xb, 0); + (void)MACIO_IN8(KEYLARGO_GPIO_EXTINT_0+0xb); + udelay(10); + MACIO_OUT8(KEYLARGO_GPIO_EXTINT_0+0xa, 0x28); + (void)MACIO_IN8(KEYLARGO_GPIO_EXTINT_0+0xa); + udelay(10); + MACIO_OUT8(KEYLARGO_GPIO_EXTINT_0+0xd, 0x28); + (void)MACIO_IN8(KEYLARGO_GPIO_EXTINT_0+0xd); + udelay(10); + MACIO_OUT8(KEYLARGO_GPIO_0+0xd, 0x28); + (void)MACIO_IN8(KEYLARGO_GPIO_0+0xd); + udelay(10); + MACIO_OUT8(KEYLARGO_GPIO_0+0xe, 0x28); + (void)MACIO_IN8(KEYLARGO_GPIO_0+0xe); + UNLOCK(flags); + udelay(10); + MACIO_OUT32(0x1c000, 0); + mdelay(1); + MACIO_OUT8(0x1a3e0, 0x41); + (void)MACIO_IN8(0x1a3e0); + udelay(10); + LOCK(flags); + MACIO_BIS(KEYLARGO_FCR2, KL2_CARDSEL_16); + (void)MACIO_IN32(KEYLARGO_FCR2); + UNLOCK(flags); + mdelay(100); + + macio->flags |= MACIO_FLAG_AIRPORT_ON; + } else { + LOCK(flags); + MACIO_BIC(KEYLARGO_FCR2, KL2_CARDSEL_16); + (void)MACIO_IN32(KEYLARGO_FCR2); + MACIO_OUT8(KL_GPIO_AIRPORT_0, 0); + MACIO_OUT8(KL_GPIO_AIRPORT_1, 0); + MACIO_OUT8(KL_GPIO_AIRPORT_2, 0); + MACIO_OUT8(KL_GPIO_AIRPORT_3, 0); + MACIO_OUT8(KL_GPIO_AIRPORT_4, 0); + (void)MACIO_IN8(KL_GPIO_AIRPORT_4); + UNLOCK(flags); + + macio->flags &= ~MACIO_FLAG_AIRPORT_ON; + } + return 0; +} + +#ifdef CONFIG_SMP +static long +core99_reset_cpu(struct device_node *node, long param, long value) +{ + unsigned int reset_io = 0; + unsigned long flags; + struct macio_chip *macio; + struct device_node *np; + const int dflt_reset_lines[] = { KL_GPIO_RESET_CPU0, + KL_GPIO_RESET_CPU1, + KL_GPIO_RESET_CPU2, + KL_GPIO_RESET_CPU3 }; + + macio = &macio_chips[0]; + if (macio->type != macio_keylargo) + return -ENODEV; + + np = find_path_device("/cpus"); + if (np == NULL) + return -ENODEV; + for (np = np->child; np != NULL; np = np->sibling) { + u32 *num = (u32 *)get_property(np, "reg", NULL); + u32 *rst = (u32 *)get_property(np, "soft-reset", NULL); + if (num == NULL || rst == NULL) + continue; + if (param == *num) { + reset_io = *rst; + break; + } + } + if (np == NULL || reset_io == 0) + reset_io = dflt_reset_lines[param]; + + LOCK(flags); + MACIO_OUT8(reset_io, KEYLARGO_GPIO_OUTPUT_ENABLE); + (void)MACIO_IN8(reset_io); + udelay(1); + MACIO_OUT8(reset_io, 0); + (void)MACIO_IN8(reset_io); + UNLOCK(flags); + + return 0; +} +#endif /* CONFIG_SMP */ + +static long +core99_usb_enable(struct device_node *node, long param, long value) +{ + struct macio_chip *macio; + unsigned long flags; + char *prop; + int number; + u32 reg; + + macio = &macio_chips[0]; + if (macio->type != macio_keylargo && macio->type != macio_pangea && + macio->type != macio_intrepid) + return -ENODEV; + + prop = (char *)get_property(node, "AAPL,clock-id", NULL); + if (!prop) + return -ENODEV; + if (strncmp(prop, "usb0u048", 8) == 0) + number = 0; + else if (strncmp(prop, "usb1u148", 8) == 0) + number = 2; + else if (strncmp(prop, "usb2u248", 8) == 0) + number = 4; + else + return -ENODEV; + + /* Sorry for the brute-force locking, but this is only used during + * sleep and the timing seem to be critical + */ + LOCK(flags); + if (value) { + /* Turn ON */ + if (number == 0) { + MACIO_BIC(KEYLARGO_FCR0, (KL0_USB0_PAD_SUSPEND0 | KL0_USB0_PAD_SUSPEND1)); + (void)MACIO_IN32(KEYLARGO_FCR0); + UNLOCK(flags); + mdelay(1); + LOCK(flags); + MACIO_BIS(KEYLARGO_FCR0, KL0_USB0_CELL_ENABLE); + } else if (number == 2) { + MACIO_BIC(KEYLARGO_FCR0, (KL0_USB1_PAD_SUSPEND0 | KL0_USB1_PAD_SUSPEND1)); + UNLOCK(flags); + (void)MACIO_IN32(KEYLARGO_FCR0); + mdelay(1); + LOCK(flags); + MACIO_BIS(KEYLARGO_FCR0, KL0_USB1_CELL_ENABLE); + } else if (number == 4) { + MACIO_BIC(KEYLARGO_FCR1, (KL1_USB2_PAD_SUSPEND0 | KL1_USB2_PAD_SUSPEND1)); + UNLOCK(flags); + (void)MACIO_IN32(KEYLARGO_FCR1); + mdelay(1); + LOCK(flags); + MACIO_BIS(KEYLARGO_FCR1, KL1_USB2_CELL_ENABLE); + } + if (number < 4) { + reg = MACIO_IN32(KEYLARGO_FCR4); + reg &= ~(KL4_PORT_WAKEUP_ENABLE(number) | KL4_PORT_RESUME_WAKE_EN(number) | + KL4_PORT_CONNECT_WAKE_EN(number) | KL4_PORT_DISCONNECT_WAKE_EN(number)); + reg &= ~(KL4_PORT_WAKEUP_ENABLE(number+1) | KL4_PORT_RESUME_WAKE_EN(number+1) | + KL4_PORT_CONNECT_WAKE_EN(number+1) | KL4_PORT_DISCONNECT_WAKE_EN(number+1)); + MACIO_OUT32(KEYLARGO_FCR4, reg); + (void)MACIO_IN32(KEYLARGO_FCR4); + udelay(10); + } else { + reg = MACIO_IN32(KEYLARGO_FCR3); + reg &= ~(KL3_IT_PORT_WAKEUP_ENABLE(0) | KL3_IT_PORT_RESUME_WAKE_EN(0) | + KL3_IT_PORT_CONNECT_WAKE_EN(0) | KL3_IT_PORT_DISCONNECT_WAKE_EN(0)); + reg &= ~(KL3_IT_PORT_WAKEUP_ENABLE(1) | KL3_IT_PORT_RESUME_WAKE_EN(1) | + KL3_IT_PORT_CONNECT_WAKE_EN(1) | KL3_IT_PORT_DISCONNECT_WAKE_EN(1)); + MACIO_OUT32(KEYLARGO_FCR3, reg); + (void)MACIO_IN32(KEYLARGO_FCR3); + udelay(10); + } + if (macio->type == macio_intrepid) { + /* wait for clock stopped bits to clear */ + u32 test0 = 0, test1 = 0; + u32 status0, status1; + int timeout = 1000; + + UNLOCK(flags); + switch (number) { + case 0: + test0 = UNI_N_CLOCK_STOPPED_USB0; + test1 = UNI_N_CLOCK_STOPPED_USB0PCI; + break; + case 2: + test0 = UNI_N_CLOCK_STOPPED_USB1; + test1 = UNI_N_CLOCK_STOPPED_USB1PCI; + break; + case 4: + test0 = UNI_N_CLOCK_STOPPED_USB2; + test1 = UNI_N_CLOCK_STOPPED_USB2PCI; + break; + } + do { + if (--timeout <= 0) { + printk(KERN_ERR "core99_usb_enable: " + "Timeout waiting for clocks\n"); + break; + } + mdelay(1); + status0 = UN_IN(UNI_N_CLOCK_STOP_STATUS0); + status1 = UN_IN(UNI_N_CLOCK_STOP_STATUS1); + } while ((status0 & test0) | (status1 & test1)); + LOCK(flags); + } + } else { + /* Turn OFF */ + if (number < 4) { + reg = MACIO_IN32(KEYLARGO_FCR4); + reg |= KL4_PORT_WAKEUP_ENABLE(number) | KL4_PORT_RESUME_WAKE_EN(number) | + KL4_PORT_CONNECT_WAKE_EN(number) | KL4_PORT_DISCONNECT_WAKE_EN(number); + reg |= KL4_PORT_WAKEUP_ENABLE(number+1) | KL4_PORT_RESUME_WAKE_EN(number+1) | + KL4_PORT_CONNECT_WAKE_EN(number+1) | KL4_PORT_DISCONNECT_WAKE_EN(number+1); + MACIO_OUT32(KEYLARGO_FCR4, reg); + (void)MACIO_IN32(KEYLARGO_FCR4); + udelay(1); + } else { + reg = MACIO_IN32(KEYLARGO_FCR3); + reg |= KL3_IT_PORT_WAKEUP_ENABLE(0) | KL3_IT_PORT_RESUME_WAKE_EN(0) | + KL3_IT_PORT_CONNECT_WAKE_EN(0) | KL3_IT_PORT_DISCONNECT_WAKE_EN(0); + reg |= KL3_IT_PORT_WAKEUP_ENABLE(1) | KL3_IT_PORT_RESUME_WAKE_EN(1) | + KL3_IT_PORT_CONNECT_WAKE_EN(1) | KL3_IT_PORT_DISCONNECT_WAKE_EN(1); + MACIO_OUT32(KEYLARGO_FCR3, reg); + (void)MACIO_IN32(KEYLARGO_FCR3); + udelay(1); + } + if (number == 0) { + if (macio->type != macio_intrepid) + MACIO_BIC(KEYLARGO_FCR0, KL0_USB0_CELL_ENABLE); + (void)MACIO_IN32(KEYLARGO_FCR0); + udelay(1); + MACIO_BIS(KEYLARGO_FCR0, (KL0_USB0_PAD_SUSPEND0 | KL0_USB0_PAD_SUSPEND1)); + (void)MACIO_IN32(KEYLARGO_FCR0); + } else if (number == 2) { + if (macio->type != macio_intrepid) + MACIO_BIC(KEYLARGO_FCR0, KL0_USB1_CELL_ENABLE); + (void)MACIO_IN32(KEYLARGO_FCR0); + udelay(1); + MACIO_BIS(KEYLARGO_FCR0, (KL0_USB1_PAD_SUSPEND0 | KL0_USB1_PAD_SUSPEND1)); + (void)MACIO_IN32(KEYLARGO_FCR0); + } else if (number == 4) { + udelay(1); + MACIO_BIS(KEYLARGO_FCR1, (KL1_USB2_PAD_SUSPEND0 | KL1_USB2_PAD_SUSPEND1)); + (void)MACIO_IN32(KEYLARGO_FCR1); + } + udelay(1); + } + UNLOCK(flags); + + return 0; +} + +static long +core99_firewire_enable(struct device_node *node, long param, long value) +{ + unsigned long flags; + struct macio_chip *macio; + + macio = &macio_chips[0]; + if (macio->type != macio_keylargo && macio->type != macio_pangea && + macio->type != macio_intrepid) + return -ENODEV; + if (!(macio->flags & MACIO_FLAG_FW_SUPPORTED)) + return -ENODEV; + + LOCK(flags); + if (value) { + UN_BIS(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_FW); + (void)UN_IN(UNI_N_CLOCK_CNTL); + } else { + UN_BIC(UNI_N_CLOCK_CNTL, UNI_N_CLOCK_CNTL_FW); + (void)UN_IN(UNI_N_CLOCK_CNTL); + } + UNLOCK(flags); + mdelay(1); + + return 0; +} + +static long +core99_firewire_cable_power(struct device_node *node, long param, long value) +{ + unsigned long flags; + struct macio_chip *macio; + + /* Trick: we allow NULL node */ + if ((pmac_mb.board_flags & PMAC_MB_HAS_FW_POWER) == 0) + return -ENODEV; + macio = &macio_chips[0]; + if (macio->type != macio_keylargo && macio->type != macio_pangea && + macio->type != macio_intrepid) + return -ENODEV; + if (!(macio->flags & MACIO_FLAG_FW_SUPPORTED)) + return -ENODEV; + + LOCK(flags); + if (value) { + MACIO_OUT8(KL_GPIO_FW_CABLE_POWER , 0); + MACIO_IN8(KL_GPIO_FW_CABLE_POWER); + udelay(10); + } else { + MACIO_OUT8(KL_GPIO_FW_CABLE_POWER , 4); + MACIO_IN8(KL_GPIO_FW_CABLE_POWER); udelay(10); + } + UNLOCK(flags); + mdelay(1); + + return 0; +} + +static long +intrepid_aack_delay_enable(struct device_node *node, long param, long value) +{ + unsigned long flags; + + if (uninorth_rev < 0xd2) + return -ENODEV; + + LOCK(flags); + if (param) + UN_BIS(UNI_N_AACK_DELAY, UNI_N_AACK_DELAY_ENABLE); + else + UN_BIC(UNI_N_AACK_DELAY, UNI_N_AACK_DELAY_ENABLE); + UNLOCK(flags); + + return 0; +} + + +#endif /* CONFIG_POWER4 */ + +static long +core99_read_gpio(struct device_node *node, long param, long value) +{ + struct macio_chip *macio = &macio_chips[0]; + + return MACIO_IN8(param); +} + + +static long +core99_write_gpio(struct device_node *node, long param, long value) +{ + struct macio_chip *macio = &macio_chips[0]; + + MACIO_OUT8(param, (u8)(value & 0xff)); + return 0; +} + +#ifdef CONFIG_POWER4 +static long g5_gmac_enable(struct device_node *node, long param, long value) +{ + struct macio_chip *macio = &macio_chips[0]; + unsigned long flags; + + if (node == NULL) + return -ENODEV; + + LOCK(flags); + if (value) { + MACIO_BIS(KEYLARGO_FCR1, K2_FCR1_GMAC_CLK_ENABLE); + mb(); + k2_skiplist[0] = NULL; + } else { + k2_skiplist[0] = node; + mb(); + MACIO_BIC(KEYLARGO_FCR1, K2_FCR1_GMAC_CLK_ENABLE); + } + + UNLOCK(flags); + mdelay(1); + + return 0; +} + +static long g5_fw_enable(struct device_node *node, long param, long value) +{ + struct macio_chip *macio = &macio_chips[0]; + unsigned long flags; + + if (node == NULL) + return -ENODEV; + + LOCK(flags); + if (value) { + MACIO_BIS(KEYLARGO_FCR1, K2_FCR1_FW_CLK_ENABLE); + mb(); + k2_skiplist[1] = NULL; + } else { + k2_skiplist[1] = node; + mb(); + MACIO_BIC(KEYLARGO_FCR1, K2_FCR1_FW_CLK_ENABLE); + } + + UNLOCK(flags); + mdelay(1); + + return 0; +} + +static long g5_mpic_enable(struct device_node *node, long param, long value) +{ + unsigned long flags; + + if (node->parent == NULL || strcmp(node->parent->name, "u3")) + return 0; + + LOCK(flags); + UN_BIS(U3_TOGGLE_REG, U3_MPIC_RESET | U3_MPIC_OUTPUT_ENABLE); + UNLOCK(flags); + + return 0; +} + +static long g5_eth_phy_reset(struct device_node *node, long param, long value) +{ + struct macio_chip *macio = &macio_chips[0]; + struct device_node *phy; + int need_reset; + + /* + * We must not reset the combo PHYs, only the BCM5221 found in + * the iMac G5. + */ + phy = of_get_next_child(node, NULL); + if (!phy) + return -ENODEV; + need_reset = device_is_compatible(phy, "B5221"); + of_node_put(phy); + if (!need_reset) + return 0; + + /* PHY reset is GPIO 29, not in device-tree unfortunately */ + MACIO_OUT8(K2_GPIO_EXTINT_0 + 29, + KEYLARGO_GPIO_OUTPUT_ENABLE | KEYLARGO_GPIO_OUTOUT_DATA); + /* Thankfully, this is now always called at a time when we can + * schedule by sungem. + */ + msleep(10); + MACIO_OUT8(K2_GPIO_EXTINT_0 + 29, 0); + + return 0; +} + +static long g5_i2s_enable(struct device_node *node, long param, long value) +{ + /* Very crude implementation for now */ + struct macio_chip *macio = &macio_chips[0]; + unsigned long flags; + + if (value == 0) + return 0; /* don't disable yet */ + + LOCK(flags); + MACIO_BIS(KEYLARGO_FCR3, KL3_CLK45_ENABLE | KL3_CLK49_ENABLE | + KL3_I2S0_CLK18_ENABLE); + udelay(10); + MACIO_BIS(KEYLARGO_FCR1, K2_FCR1_I2S0_CELL_ENABLE | + K2_FCR1_I2S0_CLK_ENABLE_BIT | K2_FCR1_I2S0_ENABLE); + udelay(10); + MACIO_BIC(KEYLARGO_FCR1, K2_FCR1_I2S0_RESET); + UNLOCK(flags); + udelay(10); + + return 0; +} + + +#ifdef CONFIG_SMP +static long g5_reset_cpu(struct device_node *node, long param, long value) +{ + unsigned int reset_io = 0; + unsigned long flags; + struct macio_chip *macio; + struct device_node *np; + + macio = &macio_chips[0]; + if (macio->type != macio_keylargo2) + return -ENODEV; + + np = find_path_device("/cpus"); + if (np == NULL) + return -ENODEV; + for (np = np->child; np != NULL; np = np->sibling) { + u32 *num = (u32 *)get_property(np, "reg", NULL); + u32 *rst = (u32 *)get_property(np, "soft-reset", NULL); + if (num == NULL || rst == NULL) + continue; + if (param == *num) { + reset_io = *rst; + break; + } + } + if (np == NULL || reset_io == 0) + return -ENODEV; + + LOCK(flags); + MACIO_OUT8(reset_io, KEYLARGO_GPIO_OUTPUT_ENABLE); + (void)MACIO_IN8(reset_io); + udelay(1); + MACIO_OUT8(reset_io, 0); + (void)MACIO_IN8(reset_io); + UNLOCK(flags); + + return 0; +} +#endif /* CONFIG_SMP */ + +/* + * This can be called from pmac_smp so isn't static + * + * This takes the second CPU off the bus on dual CPU machines + * running UP + */ +void g5_phy_disable_cpu1(void) +{ + UN_OUT(U3_API_PHY_CONFIG_1, 0); +} +#endif /* CONFIG_POWER4 */ + +#ifndef CONFIG_POWER4 + +static void +keylargo_shutdown(struct macio_chip *macio, int sleep_mode) +{ + u32 temp; + + if (sleep_mode) { + mdelay(1); + MACIO_BIS(KEYLARGO_FCR0, KL0_USB_REF_SUSPEND); + (void)MACIO_IN32(KEYLARGO_FCR0); + mdelay(1); + } + + MACIO_BIC(KEYLARGO_FCR0,KL0_SCCA_ENABLE | KL0_SCCB_ENABLE | + KL0_SCC_CELL_ENABLE | + KL0_IRDA_ENABLE | KL0_IRDA_CLK32_ENABLE | + KL0_IRDA_CLK19_ENABLE); + + MACIO_BIC(KEYLARGO_MBCR, KL_MBCR_MB0_DEV_MASK); + MACIO_BIS(KEYLARGO_MBCR, KL_MBCR_MB0_IDE_ENABLE); + + MACIO_BIC(KEYLARGO_FCR1, + KL1_AUDIO_SEL_22MCLK | KL1_AUDIO_CLK_ENABLE_BIT | + KL1_AUDIO_CLK_OUT_ENABLE | KL1_AUDIO_CELL_ENABLE | + KL1_I2S0_CELL_ENABLE | KL1_I2S0_CLK_ENABLE_BIT | + KL1_I2S0_ENABLE | KL1_I2S1_CELL_ENABLE | + KL1_I2S1_CLK_ENABLE_BIT | KL1_I2S1_ENABLE | + KL1_EIDE0_ENABLE | KL1_EIDE0_RESET_N | + KL1_EIDE1_ENABLE | KL1_EIDE1_RESET_N | + KL1_UIDE_ENABLE); + + MACIO_BIS(KEYLARGO_FCR2, KL2_ALT_DATA_OUT); + MACIO_BIC(KEYLARGO_FCR2, KL2_IOBUS_ENABLE); + + temp = MACIO_IN32(KEYLARGO_FCR3); + if (macio->rev >= 2) { + temp |= KL3_SHUTDOWN_PLL2X; + if (sleep_mode) + temp |= KL3_SHUTDOWN_PLL_TOTAL; + } + + temp |= KL3_SHUTDOWN_PLLKW6 | KL3_SHUTDOWN_PLLKW4 | + KL3_SHUTDOWN_PLLKW35; + if (sleep_mode) + temp |= KL3_SHUTDOWN_PLLKW12; + temp &= ~(KL3_CLK66_ENABLE | KL3_CLK49_ENABLE | KL3_CLK45_ENABLE + | KL3_CLK31_ENABLE | KL3_I2S1_CLK18_ENABLE | KL3_I2S0_CLK18_ENABLE); + if (sleep_mode) + temp &= ~(KL3_TIMER_CLK18_ENABLE | KL3_VIA_CLK16_ENABLE); + MACIO_OUT32(KEYLARGO_FCR3, temp); + + /* Flush posted writes & wait a bit */ + (void)MACIO_IN32(KEYLARGO_FCR0); mdelay(1); +} + +static void +pangea_shutdown(struct macio_chip *macio, int sleep_mode) +{ + u32 temp; + + MACIO_BIC(KEYLARGO_FCR0,KL0_SCCA_ENABLE | KL0_SCCB_ENABLE | + KL0_SCC_CELL_ENABLE | + KL0_USB0_CELL_ENABLE | KL0_USB1_CELL_ENABLE); + + MACIO_BIC(KEYLARGO_FCR1, + KL1_AUDIO_SEL_22MCLK | KL1_AUDIO_CLK_ENABLE_BIT | + KL1_AUDIO_CLK_OUT_ENABLE | KL1_AUDIO_CELL_ENABLE | + KL1_I2S0_CELL_ENABLE | KL1_I2S0_CLK_ENABLE_BIT | + KL1_I2S0_ENABLE | KL1_I2S1_CELL_ENABLE | + KL1_I2S1_CLK_ENABLE_BIT | KL1_I2S1_ENABLE | + KL1_UIDE_ENABLE); + if (pmac_mb.board_flags & PMAC_MB_MOBILE) + MACIO_BIC(KEYLARGO_FCR1, KL1_UIDE_RESET_N); + + MACIO_BIS(KEYLARGO_FCR2, KL2_ALT_DATA_OUT); + + temp = MACIO_IN32(KEYLARGO_FCR3); + temp |= KL3_SHUTDOWN_PLLKW6 | KL3_SHUTDOWN_PLLKW4 | + KL3_SHUTDOWN_PLLKW35; + temp &= ~(KL3_CLK49_ENABLE | KL3_CLK45_ENABLE | KL3_CLK31_ENABLE + | KL3_I2S0_CLK18_ENABLE | KL3_I2S1_CLK18_ENABLE); + if (sleep_mode) + temp &= ~(KL3_VIA_CLK16_ENABLE | KL3_TIMER_CLK18_ENABLE); + MACIO_OUT32(KEYLARGO_FCR3, temp); + + /* Flush posted writes & wait a bit */ + (void)MACIO_IN32(KEYLARGO_FCR0); mdelay(1); +} + +static void +intrepid_shutdown(struct macio_chip *macio, int sleep_mode) +{ + u32 temp; + + MACIO_BIC(KEYLARGO_FCR0,KL0_SCCA_ENABLE | KL0_SCCB_ENABLE | + KL0_SCC_CELL_ENABLE); + + MACIO_BIC(KEYLARGO_FCR1, + /*KL1_USB2_CELL_ENABLE |*/ + KL1_I2S0_CELL_ENABLE | KL1_I2S0_CLK_ENABLE_BIT | + KL1_I2S0_ENABLE | KL1_I2S1_CELL_ENABLE | + KL1_I2S1_CLK_ENABLE_BIT | KL1_I2S1_ENABLE); + if (pmac_mb.board_flags & PMAC_MB_MOBILE) + MACIO_BIC(KEYLARGO_FCR1, KL1_UIDE_RESET_N); + + temp = MACIO_IN32(KEYLARGO_FCR3); + temp &= ~(KL3_CLK49_ENABLE | KL3_CLK45_ENABLE | + KL3_I2S1_CLK18_ENABLE | KL3_I2S0_CLK18_ENABLE); + if (sleep_mode) + temp &= ~(KL3_TIMER_CLK18_ENABLE | KL3_IT_VIA_CLK32_ENABLE); + MACIO_OUT32(KEYLARGO_FCR3, temp); + + /* Flush posted writes & wait a bit */ + (void)MACIO_IN32(KEYLARGO_FCR0); + mdelay(10); +} + + +void pmac_tweak_clock_spreading(int enable) +{ + struct macio_chip *macio = &macio_chips[0]; + + /* Hack for doing clock spreading on some machines PowerBooks and + * iBooks. This implements the "platform-do-clockspreading" OF + * property as decoded manually on various models. For safety, we also + * check the product ID in the device-tree in cases we'll whack the i2c + * chip to make reasonably sure we won't set wrong values in there + * + * Of course, ultimately, we have to implement a real parser for + * the platform-do-* stuff... + */ + + if (macio->type == macio_intrepid) { + if (enable) + UN_OUT(UNI_N_CLOCK_SPREADING, 2); + else + UN_OUT(UNI_N_CLOCK_SPREADING, 0); + mdelay(40); + } + + while (machine_is_compatible("PowerBook5,2") || + machine_is_compatible("PowerBook5,3") || + machine_is_compatible("PowerBook6,2") || + machine_is_compatible("PowerBook6,3")) { + struct device_node *ui2c = of_find_node_by_type(NULL, "i2c"); + struct device_node *dt = of_find_node_by_name(NULL, "device-tree"); + u8 buffer[9]; + u32 *productID; + int i, rc, changed = 0; + + if (dt == NULL) + break; + productID = (u32 *)get_property(dt, "pid#", NULL); + if (productID == NULL) + break; + while(ui2c) { + struct device_node *p = of_get_parent(ui2c); + if (p && !strcmp(p->name, "uni-n")) + break; + ui2c = of_find_node_by_type(ui2c, "i2c"); + } + if (ui2c == NULL) + break; + DBG("Trying to bump clock speed for PID: %08x...\n", *productID); + rc = pmac_low_i2c_open(ui2c, 1); + if (rc != 0) + break; + pmac_low_i2c_setmode(ui2c, pmac_low_i2c_mode_combined); + rc = pmac_low_i2c_xfer(ui2c, 0xd2 | pmac_low_i2c_read, 0x80, buffer, 9); + DBG("read result: %d,", rc); + if (rc != 0) { + pmac_low_i2c_close(ui2c); + break; + } + for (i=0; i<9; i++) + DBG(" %02x", buffer[i]); + DBG("\n"); + + switch(*productID) { + case 0x1182: /* AlBook 12" rev 2 */ + case 0x1183: /* iBook G4 12" */ + buffer[0] = (buffer[0] & 0x8f) | 0x70; + buffer[2] = (buffer[2] & 0x7f) | 0x00; + buffer[5] = (buffer[5] & 0x80) | 0x31; + buffer[6] = (buffer[6] & 0x40) | 0xb0; + buffer[7] = (buffer[7] & 0x00) | (enable ? 0xc0 : 0xba); + buffer[8] = (buffer[8] & 0x00) | 0x30; + changed = 1; + break; + case 0x3142: /* AlBook 15" (ATI M10) */ + case 0x3143: /* AlBook 17" (ATI M10) */ + buffer[0] = (buffer[0] & 0xaf) | 0x50; + buffer[2] = (buffer[2] & 0x7f) | 0x00; + buffer[5] = (buffer[5] & 0x80) | 0x31; + buffer[6] = (buffer[6] & 0x40) | 0xb0; + buffer[7] = (buffer[7] & 0x00) | (enable ? 0xd0 : 0xc0); + buffer[8] = (buffer[8] & 0x00) | 0x30; + changed = 1; + break; + default: + DBG("i2c-hwclock: Machine model not handled\n"); + break; + } + if (!changed) { + pmac_low_i2c_close(ui2c); + break; + } + pmac_low_i2c_setmode(ui2c, pmac_low_i2c_mode_stdsub); + rc = pmac_low_i2c_xfer(ui2c, 0xd2 | pmac_low_i2c_write, 0x80, buffer, 9); + DBG("write result: %d,", rc); + pmac_low_i2c_setmode(ui2c, pmac_low_i2c_mode_combined); + rc = pmac_low_i2c_xfer(ui2c, 0xd2 | pmac_low_i2c_read, 0x80, buffer, 9); + DBG("read result: %d,", rc); + if (rc != 0) { + pmac_low_i2c_close(ui2c); + break; + } + for (i=0; i<9; i++) + DBG(" %02x", buffer[i]); + pmac_low_i2c_close(ui2c); + break; + } +} + + +static int +core99_sleep(void) +{ + struct macio_chip *macio; + int i; + + macio = &macio_chips[0]; + if (macio->type != macio_keylargo && macio->type != macio_pangea && + macio->type != macio_intrepid) + return -ENODEV; + + /* We power off the wireless slot in case it was not done + * by the driver. We don't power it on automatically however + */ + if (macio->flags & MACIO_FLAG_AIRPORT_ON) + core99_airport_enable(macio->of_node, 0, 0); + + /* We power off the FW cable. Should be done by the driver... */ + if (macio->flags & MACIO_FLAG_FW_SUPPORTED) { + core99_firewire_enable(NULL, 0, 0); + core99_firewire_cable_power(NULL, 0, 0); + } + + /* We make sure int. modem is off (in case driver lost it) */ + if (macio->type == macio_keylargo) + core99_modem_enable(macio->of_node, 0, 0); + else + pangea_modem_enable(macio->of_node, 0, 0); + + /* We make sure the sound is off as well */ + core99_sound_chip_enable(macio->of_node, 0, 0); + + /* + * Save various bits of KeyLargo + */ + + /* Save the state of the various GPIOs */ + save_gpio_levels[0] = MACIO_IN32(KEYLARGO_GPIO_LEVELS0); + save_gpio_levels[1] = MACIO_IN32(KEYLARGO_GPIO_LEVELS1); + for (i=0; i<KEYLARGO_GPIO_EXTINT_CNT; i++) + save_gpio_extint[i] = MACIO_IN8(KEYLARGO_GPIO_EXTINT_0+i); + for (i=0; i<KEYLARGO_GPIO_CNT; i++) + save_gpio_normal[i] = MACIO_IN8(KEYLARGO_GPIO_0+i); + + /* Save the FCRs */ + if (macio->type == macio_keylargo) + save_mbcr = MACIO_IN32(KEYLARGO_MBCR); + save_fcr[0] = MACIO_IN32(KEYLARGO_FCR0); + save_fcr[1] = MACIO_IN32(KEYLARGO_FCR1); + save_fcr[2] = MACIO_IN32(KEYLARGO_FCR2); + save_fcr[3] = MACIO_IN32(KEYLARGO_FCR3); + save_fcr[4] = MACIO_IN32(KEYLARGO_FCR4); + if (macio->type == macio_pangea || macio->type == macio_intrepid) + save_fcr[5] = MACIO_IN32(KEYLARGO_FCR5); + + /* Save state & config of DBDMA channels */ + dbdma_save(macio, save_dbdma); + + /* + * Turn off as much as we can + */ + if (macio->type == macio_pangea) + pangea_shutdown(macio, 1); + else if (macio->type == macio_intrepid) + intrepid_shutdown(macio, 1); + else if (macio->type == macio_keylargo) + keylargo_shutdown(macio, 1); + + /* + * Put the host bridge to sleep + */ + + save_unin_clock_ctl = UN_IN(UNI_N_CLOCK_CNTL); + /* Note: do not switch GMAC off, driver does it when necessary, WOL must keep it + * enabled ! + */ + UN_OUT(UNI_N_CLOCK_CNTL, save_unin_clock_ctl & + ~(/*UNI_N_CLOCK_CNTL_GMAC|*/UNI_N_CLOCK_CNTL_FW/*|UNI_N_CLOCK_CNTL_PCI*/)); + udelay(100); + UN_OUT(UNI_N_HWINIT_STATE, UNI_N_HWINIT_STATE_SLEEPING); + UN_OUT(UNI_N_POWER_MGT, UNI_N_POWER_MGT_SLEEP); + mdelay(10); + + /* + * FIXME: A bit of black magic with OpenPIC (don't ask me why) + */ + if (pmac_mb.model_id == PMAC_TYPE_SAWTOOTH) { + MACIO_BIS(0x506e0, 0x00400000); + MACIO_BIS(0x506e0, 0x80000000); + } + return 0; +} + +static int +core99_wake_up(void) +{ + struct macio_chip *macio; + int i; + + macio = &macio_chips[0]; + if (macio->type != macio_keylargo && macio->type != macio_pangea && + macio->type != macio_intrepid) + return -ENODEV; + + /* + * Wakeup the host bridge + */ + UN_OUT(UNI_N_POWER_MGT, UNI_N_POWER_MGT_NORMAL); + udelay(10); + UN_OUT(UNI_N_HWINIT_STATE, UNI_N_HWINIT_STATE_RUNNING); + udelay(10); + + /* + * Restore KeyLargo + */ + + if (macio->type == macio_keylargo) { + MACIO_OUT32(KEYLARGO_MBCR, save_mbcr); + (void)MACIO_IN32(KEYLARGO_MBCR); udelay(10); + } + MACIO_OUT32(KEYLARGO_FCR0, save_fcr[0]); + (void)MACIO_IN32(KEYLARGO_FCR0); udelay(10); + MACIO_OUT32(KEYLARGO_FCR1, save_fcr[1]); + (void)MACIO_IN32(KEYLARGO_FCR1); udelay(10); + MACIO_OUT32(KEYLARGO_FCR2, save_fcr[2]); + (void)MACIO_IN32(KEYLARGO_FCR2); udelay(10); + MACIO_OUT32(KEYLARGO_FCR3, save_fcr[3]); + (void)MACIO_IN32(KEYLARGO_FCR3); udelay(10); + MACIO_OUT32(KEYLARGO_FCR4, save_fcr[4]); + (void)MACIO_IN32(KEYLARGO_FCR4); udelay(10); + if (macio->type == macio_pangea || macio->type == macio_intrepid) { + MACIO_OUT32(KEYLARGO_FCR5, save_fcr[5]); + (void)MACIO_IN32(KEYLARGO_FCR5); udelay(10); + } + + dbdma_restore(macio, save_dbdma); + + MACIO_OUT32(KEYLARGO_GPIO_LEVELS0, save_gpio_levels[0]); + MACIO_OUT32(KEYLARGO_GPIO_LEVELS1, save_gpio_levels[1]); + for (i=0; i<KEYLARGO_GPIO_EXTINT_CNT; i++) + MACIO_OUT8(KEYLARGO_GPIO_EXTINT_0+i, save_gpio_extint[i]); + for (i=0; i<KEYLARGO_GPIO_CNT; i++) + MACIO_OUT8(KEYLARGO_GPIO_0+i, save_gpio_normal[i]); + + /* FIXME more black magic with OpenPIC ... */ + if (pmac_mb.model_id == PMAC_TYPE_SAWTOOTH) { + MACIO_BIC(0x506e0, 0x00400000); + MACIO_BIC(0x506e0, 0x80000000); + } + + UN_OUT(UNI_N_CLOCK_CNTL, save_unin_clock_ctl); + udelay(100); + + return 0; +} + +static long +core99_sleep_state(struct device_node *node, long param, long value) +{ + /* Param == 1 means to enter the "fake sleep" mode that is + * used for CPU speed switch + */ + if (param == 1) { + if (value == 1) { + UN_OUT(UNI_N_HWINIT_STATE, UNI_N_HWINIT_STATE_SLEEPING); + UN_OUT(UNI_N_POWER_MGT, UNI_N_POWER_MGT_IDLE2); + } else { + UN_OUT(UNI_N_POWER_MGT, UNI_N_POWER_MGT_NORMAL); + udelay(10); + UN_OUT(UNI_N_HWINIT_STATE, UNI_N_HWINIT_STATE_RUNNING); + udelay(10); + } + return 0; + } + if ((pmac_mb.board_flags & PMAC_MB_CAN_SLEEP) == 0) + return -EPERM; + + if (value == 1) + return core99_sleep(); + else if (value == 0) + return core99_wake_up(); + return 0; +} + +#endif /* CONFIG_POWER4 */ + +static long +generic_dev_can_wake(struct device_node *node, long param, long value) +{ + /* Todo: eventually check we are really dealing with on-board + * video device ... + */ + + if (pmac_mb.board_flags & PMAC_MB_MAY_SLEEP) + pmac_mb.board_flags |= PMAC_MB_CAN_SLEEP; + return 0; +} + +static long generic_get_mb_info(struct device_node *node, long param, long value) +{ + switch(param) { + case PMAC_MB_INFO_MODEL: + return pmac_mb.model_id; + case PMAC_MB_INFO_FLAGS: + return pmac_mb.board_flags; + case PMAC_MB_INFO_NAME: + /* hack hack hack... but should work */ + *((const char **)value) = pmac_mb.model_name; + return 0; + } + return -EINVAL; +} + + +/* + * Table definitions + */ + +/* Used on any machine + */ +static struct feature_table_entry any_features[] = { + { PMAC_FTR_GET_MB_INFO, generic_get_mb_info }, + { PMAC_FTR_DEVICE_CAN_WAKE, generic_dev_can_wake }, + { 0, NULL } +}; + +#ifndef CONFIG_POWER4 + +/* OHare based motherboards. Currently, we only use these on the + * 2400,3400 and 3500 series powerbooks. Some older desktops seem + * to have issues with turning on/off those asic cells + */ +static struct feature_table_entry ohare_features[] = { + { PMAC_FTR_SCC_ENABLE, ohare_htw_scc_enable }, + { PMAC_FTR_SWIM3_ENABLE, ohare_floppy_enable }, + { PMAC_FTR_MESH_ENABLE, ohare_mesh_enable }, + { PMAC_FTR_IDE_ENABLE, ohare_ide_enable}, + { PMAC_FTR_IDE_RESET, ohare_ide_reset}, + { PMAC_FTR_SLEEP_STATE, ohare_sleep_state }, + { 0, NULL } +}; + +/* Heathrow desktop machines (Beige G3). + * Separated as some features couldn't be properly tested + * and the serial port control bits appear to confuse it. + */ +static struct feature_table_entry heathrow_desktop_features[] = { + { PMAC_FTR_SWIM3_ENABLE, heathrow_floppy_enable }, + { PMAC_FTR_MESH_ENABLE, heathrow_mesh_enable }, + { PMAC_FTR_IDE_ENABLE, heathrow_ide_enable }, + { PMAC_FTR_IDE_RESET, heathrow_ide_reset }, + { PMAC_FTR_BMAC_ENABLE, heathrow_bmac_enable }, + { 0, NULL } +}; + +/* Heathrow based laptop, that is the Wallstreet and mainstreet + * powerbooks. + */ +static struct feature_table_entry heathrow_laptop_features[] = { + { PMAC_FTR_SCC_ENABLE, ohare_htw_scc_enable }, + { PMAC_FTR_MODEM_ENABLE, heathrow_modem_enable }, + { PMAC_FTR_SWIM3_ENABLE, heathrow_floppy_enable }, + { PMAC_FTR_MESH_ENABLE, heathrow_mesh_enable }, + { PMAC_FTR_IDE_ENABLE, heathrow_ide_enable }, + { PMAC_FTR_IDE_RESET, heathrow_ide_reset }, + { PMAC_FTR_BMAC_ENABLE, heathrow_bmac_enable }, + { PMAC_FTR_SOUND_CHIP_ENABLE, heathrow_sound_enable }, + { PMAC_FTR_SLEEP_STATE, heathrow_sleep_state }, + { 0, NULL } +}; + +/* Paddington based machines + * The lombard (101) powerbook, first iMac models, B&W G3 and Yikes G4. + */ +static struct feature_table_entry paddington_features[] = { + { PMAC_FTR_SCC_ENABLE, ohare_htw_scc_enable }, + { PMAC_FTR_MODEM_ENABLE, heathrow_modem_enable }, + { PMAC_FTR_SWIM3_ENABLE, heathrow_floppy_enable }, + { PMAC_FTR_MESH_ENABLE, heathrow_mesh_enable }, + { PMAC_FTR_IDE_ENABLE, heathrow_ide_enable }, + { PMAC_FTR_IDE_RESET, heathrow_ide_reset }, + { PMAC_FTR_BMAC_ENABLE, heathrow_bmac_enable }, + { PMAC_FTR_SOUND_CHIP_ENABLE, heathrow_sound_enable }, + { PMAC_FTR_SLEEP_STATE, heathrow_sleep_state }, + { 0, NULL } +}; + +/* Core99 & MacRISC 2 machines (all machines released since the + * iBook (included), that is all AGP machines, except pangea + * chipset. The pangea chipset is the "combo" UniNorth/KeyLargo + * used on iBook2 & iMac "flow power". + */ +static struct feature_table_entry core99_features[] = { + { PMAC_FTR_SCC_ENABLE, core99_scc_enable }, + { PMAC_FTR_MODEM_ENABLE, core99_modem_enable }, + { PMAC_FTR_IDE_ENABLE, core99_ide_enable }, + { PMAC_FTR_IDE_RESET, core99_ide_reset }, + { PMAC_FTR_GMAC_ENABLE, core99_gmac_enable }, + { PMAC_FTR_GMAC_PHY_RESET, core99_gmac_phy_reset }, + { PMAC_FTR_SOUND_CHIP_ENABLE, core99_sound_chip_enable }, + { PMAC_FTR_AIRPORT_ENABLE, core99_airport_enable }, + { PMAC_FTR_USB_ENABLE, core99_usb_enable }, + { PMAC_FTR_1394_ENABLE, core99_firewire_enable }, + { PMAC_FTR_1394_CABLE_POWER, core99_firewire_cable_power }, + { PMAC_FTR_SLEEP_STATE, core99_sleep_state }, +#ifdef CONFIG_SMP + { PMAC_FTR_RESET_CPU, core99_reset_cpu }, +#endif /* CONFIG_SMP */ + { PMAC_FTR_READ_GPIO, core99_read_gpio }, + { PMAC_FTR_WRITE_GPIO, core99_write_gpio }, + { 0, NULL } +}; + +/* RackMac + */ +static struct feature_table_entry rackmac_features[] = { + { PMAC_FTR_SCC_ENABLE, core99_scc_enable }, + { PMAC_FTR_IDE_ENABLE, core99_ide_enable }, + { PMAC_FTR_IDE_RESET, core99_ide_reset }, + { PMAC_FTR_GMAC_ENABLE, core99_gmac_enable }, + { PMAC_FTR_GMAC_PHY_RESET, core99_gmac_phy_reset }, + { PMAC_FTR_USB_ENABLE, core99_usb_enable }, + { PMAC_FTR_1394_ENABLE, core99_firewire_enable }, + { PMAC_FTR_1394_CABLE_POWER, core99_firewire_cable_power }, + { PMAC_FTR_SLEEP_STATE, core99_sleep_state }, +#ifdef CONFIG_SMP + { PMAC_FTR_RESET_CPU, core99_reset_cpu }, +#endif /* CONFIG_SMP */ + { PMAC_FTR_READ_GPIO, core99_read_gpio }, + { PMAC_FTR_WRITE_GPIO, core99_write_gpio }, + { 0, NULL } +}; + +/* Pangea features + */ +static struct feature_table_entry pangea_features[] = { + { PMAC_FTR_SCC_ENABLE, core99_scc_enable }, + { PMAC_FTR_MODEM_ENABLE, pangea_modem_enable }, + { PMAC_FTR_IDE_ENABLE, core99_ide_enable }, + { PMAC_FTR_IDE_RESET, core99_ide_reset }, + { PMAC_FTR_GMAC_ENABLE, core99_gmac_enable }, + { PMAC_FTR_GMAC_PHY_RESET, core99_gmac_phy_reset }, + { PMAC_FTR_SOUND_CHIP_ENABLE, core99_sound_chip_enable }, + { PMAC_FTR_AIRPORT_ENABLE, core99_airport_enable }, + { PMAC_FTR_USB_ENABLE, core99_usb_enable }, + { PMAC_FTR_1394_ENABLE, core99_firewire_enable }, + { PMAC_FTR_1394_CABLE_POWER, core99_firewire_cable_power }, + { PMAC_FTR_SLEEP_STATE, core99_sleep_state }, + { PMAC_FTR_READ_GPIO, core99_read_gpio }, + { PMAC_FTR_WRITE_GPIO, core99_write_gpio }, + { 0, NULL } +}; + +/* Intrepid features + */ +static struct feature_table_entry intrepid_features[] = { + { PMAC_FTR_SCC_ENABLE, core99_scc_enable }, + { PMAC_FTR_MODEM_ENABLE, pangea_modem_enable }, + { PMAC_FTR_IDE_ENABLE, core99_ide_enable }, + { PMAC_FTR_IDE_RESET, core99_ide_reset }, + { PMAC_FTR_GMAC_ENABLE, core99_gmac_enable }, + { PMAC_FTR_GMAC_PHY_RESET, core99_gmac_phy_reset }, + { PMAC_FTR_SOUND_CHIP_ENABLE, core99_sound_chip_enable }, + { PMAC_FTR_AIRPORT_ENABLE, core99_airport_enable }, + { PMAC_FTR_USB_ENABLE, core99_usb_enable }, + { PMAC_FTR_1394_ENABLE, core99_firewire_enable }, + { PMAC_FTR_1394_CABLE_POWER, core99_firewire_cable_power }, + { PMAC_FTR_SLEEP_STATE, core99_sleep_state }, + { PMAC_FTR_READ_GPIO, core99_read_gpio }, + { PMAC_FTR_WRITE_GPIO, core99_write_gpio }, + { PMAC_FTR_AACK_DELAY_ENABLE, intrepid_aack_delay_enable }, + { 0, NULL } +}; + +#else /* CONFIG_POWER4 */ + +/* G5 features + */ +static struct feature_table_entry g5_features[] = { + { PMAC_FTR_GMAC_ENABLE, g5_gmac_enable }, + { PMAC_FTR_1394_ENABLE, g5_fw_enable }, + { PMAC_FTR_ENABLE_MPIC, g5_mpic_enable }, + { PMAC_FTR_GMAC_PHY_RESET, g5_eth_phy_reset }, + { PMAC_FTR_SOUND_CHIP_ENABLE, g5_i2s_enable }, +#ifdef CONFIG_SMP + { PMAC_FTR_RESET_CPU, g5_reset_cpu }, +#endif /* CONFIG_SMP */ + { PMAC_FTR_READ_GPIO, core99_read_gpio }, + { PMAC_FTR_WRITE_GPIO, core99_write_gpio }, + { 0, NULL } +}; + +#endif /* CONFIG_POWER4 */ + +static struct pmac_mb_def pmac_mb_defs[] = { +#ifndef CONFIG_POWER4 + /* + * Desktops + */ + + { "AAPL,8500", "PowerMac 8500/8600", + PMAC_TYPE_PSURGE, NULL, + 0 + }, + { "AAPL,9500", "PowerMac 9500/9600", + PMAC_TYPE_PSURGE, NULL, + 0 + }, + { "AAPL,7200", "PowerMac 7200", + PMAC_TYPE_PSURGE, NULL, + 0 + }, + { "AAPL,7300", "PowerMac 7200/7300", + PMAC_TYPE_PSURGE, NULL, + 0 + }, + { "AAPL,7500", "PowerMac 7500", + PMAC_TYPE_PSURGE, NULL, + 0 + }, + { "AAPL,ShinerESB", "Apple Network Server", + PMAC_TYPE_ANS, NULL, + 0 + }, + { "AAPL,e407", "Alchemy", + PMAC_TYPE_ALCHEMY, NULL, + 0 + }, + { "AAPL,e411", "Gazelle", + PMAC_TYPE_GAZELLE, NULL, + 0 + }, + { "AAPL,Gossamer", "PowerMac G3 (Gossamer)", + PMAC_TYPE_GOSSAMER, heathrow_desktop_features, + 0 + }, + { "AAPL,PowerMac G3", "PowerMac G3 (Silk)", + PMAC_TYPE_SILK, heathrow_desktop_features, + 0 + }, + { "PowerMac1,1", "Blue&White G3", + PMAC_TYPE_YOSEMITE, paddington_features, + 0 + }, + { "PowerMac1,2", "PowerMac G4 PCI Graphics", + PMAC_TYPE_YIKES, paddington_features, + 0 + }, + { "PowerMac2,1", "iMac FireWire", + PMAC_TYPE_FW_IMAC, core99_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_OLD_CORE99 + }, + { "PowerMac2,2", "iMac FireWire", + PMAC_TYPE_FW_IMAC, core99_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_OLD_CORE99 + }, + { "PowerMac3,1", "PowerMac G4 AGP Graphics", + PMAC_TYPE_SAWTOOTH, core99_features, + PMAC_MB_OLD_CORE99 + }, + { "PowerMac3,2", "PowerMac G4 AGP Graphics", + PMAC_TYPE_SAWTOOTH, core99_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_OLD_CORE99 + }, + { "PowerMac3,3", "PowerMac G4 AGP Graphics", + PMAC_TYPE_SAWTOOTH, core99_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_OLD_CORE99 + }, + { "PowerMac3,4", "PowerMac G4 Silver", + PMAC_TYPE_QUICKSILVER, core99_features, + PMAC_MB_MAY_SLEEP + }, + { "PowerMac3,5", "PowerMac G4 Silver", + PMAC_TYPE_QUICKSILVER, core99_features, + PMAC_MB_MAY_SLEEP + }, + { "PowerMac3,6", "PowerMac G4 Windtunnel", + PMAC_TYPE_WINDTUNNEL, core99_features, + PMAC_MB_MAY_SLEEP, + }, + { "PowerMac4,1", "iMac \"Flower Power\"", + PMAC_TYPE_PANGEA_IMAC, pangea_features, + PMAC_MB_MAY_SLEEP + }, + { "PowerMac4,2", "Flat panel iMac", + PMAC_TYPE_FLAT_PANEL_IMAC, pangea_features, + PMAC_MB_CAN_SLEEP + }, + { "PowerMac4,4", "eMac", + PMAC_TYPE_EMAC, core99_features, + PMAC_MB_MAY_SLEEP + }, + { "PowerMac5,1", "PowerMac G4 Cube", + PMAC_TYPE_CUBE, core99_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_OLD_CORE99 + }, + { "PowerMac6,1", "Flat panel iMac", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP, + }, + { "PowerMac6,3", "Flat panel iMac", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP, + }, + { "PowerMac6,4", "eMac", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP, + }, + { "PowerMac10,1", "Mac mini", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER, + }, + { "iMac,1", "iMac (first generation)", + PMAC_TYPE_ORIG_IMAC, paddington_features, + 0 + }, + + /* + * Xserve's + */ + + { "RackMac1,1", "XServe", + PMAC_TYPE_RACKMAC, rackmac_features, + 0, + }, + { "RackMac1,2", "XServe rev. 2", + PMAC_TYPE_RACKMAC, rackmac_features, + 0, + }, + + /* + * Laptops + */ + + { "AAPL,3400/2400", "PowerBook 3400", + PMAC_TYPE_HOOPER, ohare_features, + PMAC_MB_CAN_SLEEP | PMAC_MB_MOBILE + }, + { "AAPL,3500", "PowerBook 3500", + PMAC_TYPE_KANGA, ohare_features, + PMAC_MB_CAN_SLEEP | PMAC_MB_MOBILE + }, + { "AAPL,PowerBook1998", "PowerBook Wallstreet", + PMAC_TYPE_WALLSTREET, heathrow_laptop_features, + PMAC_MB_CAN_SLEEP | PMAC_MB_MOBILE + }, + { "PowerBook1,1", "PowerBook 101 (Lombard)", + PMAC_TYPE_101_PBOOK, paddington_features, + PMAC_MB_CAN_SLEEP | PMAC_MB_MOBILE + }, + { "PowerBook2,1", "iBook (first generation)", + PMAC_TYPE_ORIG_IBOOK, core99_features, + PMAC_MB_CAN_SLEEP | PMAC_MB_OLD_CORE99 | PMAC_MB_MOBILE + }, + { "PowerBook2,2", "iBook FireWire", + PMAC_TYPE_FW_IBOOK, core99_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | + PMAC_MB_OLD_CORE99 | PMAC_MB_MOBILE + }, + { "PowerBook3,1", "PowerBook Pismo", + PMAC_TYPE_PISMO, core99_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | + PMAC_MB_OLD_CORE99 | PMAC_MB_MOBILE + }, + { "PowerBook3,2", "PowerBook Titanium", + PMAC_TYPE_TITANIUM, core99_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE + }, + { "PowerBook3,3", "PowerBook Titanium II", + PMAC_TYPE_TITANIUM2, core99_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE + }, + { "PowerBook3,4", "PowerBook Titanium III", + PMAC_TYPE_TITANIUM3, core99_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE + }, + { "PowerBook3,5", "PowerBook Titanium IV", + PMAC_TYPE_TITANIUM4, core99_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE + }, + { "PowerBook4,1", "iBook 2", + PMAC_TYPE_IBOOK2, pangea_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE + }, + { "PowerBook4,2", "iBook 2", + PMAC_TYPE_IBOOK2, pangea_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE + }, + { "PowerBook4,3", "iBook 2 rev. 2", + PMAC_TYPE_IBOOK2, pangea_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE + }, + { "PowerBook5,1", "PowerBook G4 17\"", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE, + }, + { "PowerBook5,2", "PowerBook G4 15\"", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE, + }, + { "PowerBook5,3", "PowerBook G4 17\"", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE, + }, + { "PowerBook5,4", "PowerBook G4 15\"", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE, + }, + { "PowerBook5,5", "PowerBook G4 17\"", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE, + }, + { "PowerBook5,6", "PowerBook G4 15\"", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE, + }, + { "PowerBook5,7", "PowerBook G4 17\"", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE, + }, + { "PowerBook6,1", "PowerBook G4 12\"", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE, + }, + { "PowerBook6,2", "PowerBook G4", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE, + }, + { "PowerBook6,3", "iBook G4", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE, + }, + { "PowerBook6,4", "PowerBook G4 12\"", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE, + }, + { "PowerBook6,5", "iBook G4", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE, + }, + { "PowerBook6,8", "PowerBook G4 12\"", + PMAC_TYPE_UNKNOWN_INTREPID, intrepid_features, + PMAC_MB_MAY_SLEEP | PMAC_MB_HAS_FW_POWER | PMAC_MB_MOBILE, + }, +#else /* CONFIG_POWER4 */ + { "PowerMac7,2", "PowerMac G5", + PMAC_TYPE_POWERMAC_G5, g5_features, + 0, + }, +#ifdef CONFIG_PPC64 + { "PowerMac7,3", "PowerMac G5", + PMAC_TYPE_POWERMAC_G5, g5_features, + 0, + }, + { "PowerMac8,1", "iMac G5", + PMAC_TYPE_IMAC_G5, g5_features, + 0, + }, + { "PowerMac9,1", "PowerMac G5", + PMAC_TYPE_POWERMAC_G5_U3L, g5_features, + 0, + }, + { "RackMac3,1", "XServe G5", + PMAC_TYPE_XSERVE_G5, g5_features, + 0, + }, +#endif /* CONFIG_PPC64 */ +#endif /* CONFIG_POWER4 */ +}; + +/* + * The toplevel feature_call callback + */ +long pmac_do_feature_call(unsigned int selector, ...) +{ + struct device_node *node; + long param, value; + int i; + feature_call func = NULL; + va_list args; + + if (pmac_mb.features) + for (i=0; pmac_mb.features[i].function; i++) + if (pmac_mb.features[i].selector == selector) { + func = pmac_mb.features[i].function; + break; + } + if (!func) + for (i=0; any_features[i].function; i++) + if (any_features[i].selector == selector) { + func = any_features[i].function; + break; + } + if (!func) + return -ENODEV; + + va_start(args, selector); + node = (struct device_node*)va_arg(args, void*); + param = va_arg(args, long); + value = va_arg(args, long); + va_end(args); + + return func(node, param, value); +} + +static int __init probe_motherboard(void) +{ + int i; + struct macio_chip *macio = &macio_chips[0]; + const char *model = NULL; + struct device_node *dt; + + /* Lookup known motherboard type in device-tree. First try an + * exact match on the "model" property, then try a "compatible" + * match is none is found. + */ + dt = find_devices("device-tree"); + if (dt != NULL) + model = (const char *) get_property(dt, "model", NULL); + for(i=0; model && i<(sizeof(pmac_mb_defs)/sizeof(struct pmac_mb_def)); i++) { + if (strcmp(model, pmac_mb_defs[i].model_string) == 0) { + pmac_mb = pmac_mb_defs[i]; + goto found; + } + } + for(i=0; i<(sizeof(pmac_mb_defs)/sizeof(struct pmac_mb_def)); i++) { + if (machine_is_compatible(pmac_mb_defs[i].model_string)) { + pmac_mb = pmac_mb_defs[i]; + goto found; + } + } + + /* Fallback to selection depending on mac-io chip type */ + switch(macio->type) { +#ifndef CONFIG_POWER4 + case macio_grand_central: + pmac_mb.model_id = PMAC_TYPE_PSURGE; + pmac_mb.model_name = "Unknown PowerSurge"; + break; + case macio_ohare: + pmac_mb.model_id = PMAC_TYPE_UNKNOWN_OHARE; + pmac_mb.model_name = "Unknown OHare-based"; + break; + case macio_heathrow: + pmac_mb.model_id = PMAC_TYPE_UNKNOWN_HEATHROW; + pmac_mb.model_name = "Unknown Heathrow-based"; + pmac_mb.features = heathrow_desktop_features; + break; + case macio_paddington: + pmac_mb.model_id = PMAC_TYPE_UNKNOWN_PADDINGTON; + pmac_mb.model_name = "Unknown Paddington-based"; + pmac_mb.features = paddington_features; + break; + case macio_keylargo: + pmac_mb.model_id = PMAC_TYPE_UNKNOWN_CORE99; + pmac_mb.model_name = "Unknown Keylargo-based"; + pmac_mb.features = core99_features; + break; + case macio_pangea: + pmac_mb.model_id = PMAC_TYPE_UNKNOWN_PANGEA; + pmac_mb.model_name = "Unknown Pangea-based"; + pmac_mb.features = pangea_features; + break; + case macio_intrepid: + pmac_mb.model_id = PMAC_TYPE_UNKNOWN_INTREPID; + pmac_mb.model_name = "Unknown Intrepid-based"; + pmac_mb.features = intrepid_features; + break; +#else /* CONFIG_POWER4 */ + case macio_keylargo2: + pmac_mb.model_id = PMAC_TYPE_UNKNOWN_K2; + pmac_mb.model_name = "Unknown K2-based"; + pmac_mb.features = g5_features; + break; +#endif /* CONFIG_POWER4 */ + default: + return -ENODEV; + } +found: +#ifndef CONFIG_POWER4 + /* Fixup Hooper vs. Comet */ + if (pmac_mb.model_id == PMAC_TYPE_HOOPER) { + u32 __iomem * mach_id_ptr = ioremap(0xf3000034, 4); + if (!mach_id_ptr) + return -ENODEV; + /* Here, I used to disable the media-bay on comet. It + * appears this is wrong, the floppy connector is actually + * a kind of media-bay and works with the current driver. + */ + if (__raw_readl(mach_id_ptr) & 0x20000000UL) + pmac_mb.model_id = PMAC_TYPE_COMET; + iounmap(mach_id_ptr); + } +#endif /* CONFIG_POWER4 */ + +#ifdef CONFIG_6xx + /* Set default value of powersave_nap on machines that support it. + * It appears that uninorth rev 3 has a problem with it, we don't + * enable it on those. In theory, the flush-on-lock property is + * supposed to be set when not supported, but I'm not very confident + * that all Apple OF revs did it properly, I do it the paranoid way. + */ + while (uninorth_base && uninorth_rev > 3) { + struct device_node *np = find_path_device("/cpus"); + if (!np || !np->child) { + printk(KERN_WARNING "Can't find CPU(s) in device tree !\n"); + break; + } + np = np->child; + /* Nap mode not supported on SMP */ + if (np->sibling) + break; + /* Nap mode not supported if flush-on-lock property is present */ + if (get_property(np, "flush-on-lock", NULL)) + break; + powersave_nap = 1; + printk(KERN_INFO "Processor NAP mode on idle enabled.\n"); + break; + } + + /* On CPUs that support it (750FX), lowspeed by default during + * NAP mode + */ + powersave_lowspeed = 1; +#endif /* CONFIG_6xx */ +#ifdef CONFIG_POWER4 + powersave_nap = 1; +#endif + /* Check for "mobile" machine */ + if (model && (strncmp(model, "PowerBook", 9) == 0 + || strncmp(model, "iBook", 5) == 0)) + pmac_mb.board_flags |= PMAC_MB_MOBILE; + + + printk(KERN_INFO "PowerMac motherboard: %s\n", pmac_mb.model_name); + return 0; +} + +/* Initialize the Core99 UniNorth host bridge and memory controller + */ +static void __init probe_uninorth(void) +{ + unsigned long actrl; + + /* Locate core99 Uni-N */ + uninorth_node = of_find_node_by_name(NULL, "uni-n"); + /* Locate G5 u3 */ + if (uninorth_node == NULL) { + uninorth_node = of_find_node_by_name(NULL, "u3"); + uninorth_u3 = 1; + } + if (uninorth_node && uninorth_node->n_addrs > 0) { + unsigned long address = uninorth_node->addrs[0].address; + uninorth_base = ioremap(address, 0x40000); + uninorth_rev = in_be32(UN_REG(UNI_N_VERSION)); + if (uninorth_u3) + u3_ht = ioremap(address + U3_HT_CONFIG_BASE, 0x1000); + } else + uninorth_node = NULL; + + if (!uninorth_node) + return; + + printk(KERN_INFO "Found %s memory controller & host bridge, revision: %d\n", + uninorth_u3 ? "U3" : "UniNorth", uninorth_rev); + printk(KERN_INFO "Mapped at 0x%08lx\n", (unsigned long)uninorth_base); + + /* Set the arbitrer QAck delay according to what Apple does + */ + if (uninorth_rev < 0x11) { + actrl = UN_IN(UNI_N_ARB_CTRL) & ~UNI_N_ARB_CTRL_QACK_DELAY_MASK; + actrl |= ((uninorth_rev < 3) ? UNI_N_ARB_CTRL_QACK_DELAY105 : + UNI_N_ARB_CTRL_QACK_DELAY) << UNI_N_ARB_CTRL_QACK_DELAY_SHIFT; + UN_OUT(UNI_N_ARB_CTRL, actrl); + } + + /* Some more magic as done by them in recent MacOS X on UniNorth + * revs 1.5 to 2.O and Pangea. Seem to toggle the UniN Maxbus/PCI + * memory timeout + */ + if ((uninorth_rev >= 0x11 && uninorth_rev <= 0x24) || uninorth_rev == 0xc0) + UN_OUT(0x2160, UN_IN(0x2160) & 0x00ffffff); +} + +static void __init probe_one_macio(const char *name, const char *compat, int type) +{ + struct device_node* node; + int i; + volatile u32 __iomem * base; + u32* revp; + + node = find_devices(name); + if (!node || !node->n_addrs) + return; + if (compat) + do { + if (device_is_compatible(node, compat)) + break; + node = node->next; + } while (node); + if (!node) + return; + for(i=0; i<MAX_MACIO_CHIPS; i++) { + if (!macio_chips[i].of_node) + break; + if (macio_chips[i].of_node == node) + return; + } + if (i >= MAX_MACIO_CHIPS) { + printk(KERN_ERR "pmac_feature: Please increase MAX_MACIO_CHIPS !\n"); + printk(KERN_ERR "pmac_feature: %s skipped\n", node->full_name); + return; + } + base = ioremap(node->addrs[0].address, node->addrs[0].size); + if (!base) { + printk(KERN_ERR "pmac_feature: Can't map mac-io chip !\n"); + return; + } + if (type == macio_keylargo) { + u32 *did = (u32 *)get_property(node, "device-id", NULL); + if (*did == 0x00000025) + type = macio_pangea; + if (*did == 0x0000003e) + type = macio_intrepid; + } + macio_chips[i].of_node = node; + macio_chips[i].type = type; + macio_chips[i].base = base; + macio_chips[i].flags = MACIO_FLAG_SCCB_ON | MACIO_FLAG_SCCB_ON; + macio_chips[i].name = macio_names[type]; + revp = (u32 *)get_property(node, "revision-id", NULL); + if (revp) + macio_chips[i].rev = *revp; + printk(KERN_INFO "Found a %s mac-io controller, rev: %d, mapped at 0x%p\n", + macio_names[type], macio_chips[i].rev, macio_chips[i].base); +} + +static int __init +probe_macios(void) +{ + /* Warning, ordering is important */ + probe_one_macio("gc", NULL, macio_grand_central); + probe_one_macio("ohare", NULL, macio_ohare); + probe_one_macio("pci106b,7", NULL, macio_ohareII); + probe_one_macio("mac-io", "keylargo", macio_keylargo); + probe_one_macio("mac-io", "paddington", macio_paddington); + probe_one_macio("mac-io", "gatwick", macio_gatwick); + probe_one_macio("mac-io", "heathrow", macio_heathrow); + probe_one_macio("mac-io", "K2-Keylargo", macio_keylargo2); + + /* Make sure the "main" macio chip appear first */ + if (macio_chips[0].type == macio_gatwick + && macio_chips[1].type == macio_heathrow) { + struct macio_chip temp = macio_chips[0]; + macio_chips[0] = macio_chips[1]; + macio_chips[1] = temp; + } + if (macio_chips[0].type == macio_ohareII + && macio_chips[1].type == macio_ohare) { + struct macio_chip temp = macio_chips[0]; + macio_chips[0] = macio_chips[1]; + macio_chips[1] = temp; + } + macio_chips[0].lbus.index = 0; + macio_chips[1].lbus.index = 1; + + return (macio_chips[0].of_node == NULL) ? -ENODEV : 0; +} + +static void __init +initial_serial_shutdown(struct device_node *np) +{ + int len; + struct slot_names_prop { + int count; + char name[1]; + } *slots; + char *conn; + int port_type = PMAC_SCC_ASYNC; + int modem = 0; + + slots = (struct slot_names_prop *)get_property(np, "slot-names", &len); + conn = get_property(np, "AAPL,connector", &len); + if (conn && (strcmp(conn, "infrared") == 0)) + port_type = PMAC_SCC_IRDA; + else if (device_is_compatible(np, "cobalt")) + modem = 1; + else if (slots && slots->count > 0) { + if (strcmp(slots->name, "IrDA") == 0) + port_type = PMAC_SCC_IRDA; + else if (strcmp(slots->name, "Modem") == 0) + modem = 1; + } + if (modem) + pmac_call_feature(PMAC_FTR_MODEM_ENABLE, np, 0, 0); + pmac_call_feature(PMAC_FTR_SCC_ENABLE, np, port_type, 0); +} + +static void __init +set_initial_features(void) +{ + struct device_node *np; + + /* That hack appears to be necessary for some StarMax motherboards + * but I'm not too sure it was audited for side-effects on other + * ohare based machines... + * Since I still have difficulties figuring the right way to + * differenciate them all and since that hack was there for a long + * time, I'll keep it around + */ + if (macio_chips[0].type == macio_ohare && !find_devices("via-pmu")) { + struct macio_chip *macio = &macio_chips[0]; + MACIO_OUT32(OHARE_FCR, STARMAX_FEATURES); + } else if (macio_chips[0].type == macio_ohare) { + struct macio_chip *macio = &macio_chips[0]; + MACIO_BIS(OHARE_FCR, OH_IOBUS_ENABLE); + } else if (macio_chips[1].type == macio_ohare) { + struct macio_chip *macio = &macio_chips[1]; + MACIO_BIS(OHARE_FCR, OH_IOBUS_ENABLE); + } + +#ifdef CONFIG_POWER4 + if (macio_chips[0].type == macio_keylargo2) { +#ifndef CONFIG_SMP + /* On SMP machines running UP, we have the second CPU eating + * bus cycles. We need to take it off the bus. This is done + * from pmac_smp for SMP kernels running on one CPU + */ + np = of_find_node_by_type(NULL, "cpu"); + if (np != NULL) + np = of_find_node_by_type(np, "cpu"); + if (np != NULL) { + g5_phy_disable_cpu1(); + of_node_put(np); + } +#endif /* CONFIG_SMP */ + /* Enable GMAC for now for PCI probing. It will be disabled + * later on after PCI probe + */ + np = of_find_node_by_name(NULL, "ethernet"); + while(np) { + if (device_is_compatible(np, "K2-GMAC")) + g5_gmac_enable(np, 0, 1); + np = of_find_node_by_name(np, "ethernet"); + } + + /* Enable FW before PCI probe. Will be disabled later on + * Note: We should have a batter way to check that we are + * dealing with uninorth internal cell and not a PCI cell + * on the external PCI. The code below works though. + */ + np = of_find_node_by_name(NULL, "firewire"); + while(np) { + if (device_is_compatible(np, "pci106b,5811")) { + macio_chips[0].flags |= MACIO_FLAG_FW_SUPPORTED; + g5_fw_enable(np, 0, 1); + } + np = of_find_node_by_name(np, "firewire"); + } + } +#else /* CONFIG_POWER4 */ + + if (macio_chips[0].type == macio_keylargo || + macio_chips[0].type == macio_pangea || + macio_chips[0].type == macio_intrepid) { + /* Enable GMAC for now for PCI probing. It will be disabled + * later on after PCI probe + */ + np = of_find_node_by_name(NULL, "ethernet"); + while(np) { + if (np->parent + && device_is_compatible(np->parent, "uni-north") + && device_is_compatible(np, "gmac")) + core99_gmac_enable(np, 0, 1); + np = of_find_node_by_name(np, "ethernet"); + } + + /* Enable FW before PCI probe. Will be disabled later on + * Note: We should have a batter way to check that we are + * dealing with uninorth internal cell and not a PCI cell + * on the external PCI. The code below works though. + */ + np = of_find_node_by_name(NULL, "firewire"); + while(np) { + if (np->parent + && device_is_compatible(np->parent, "uni-north") + && (device_is_compatible(np, "pci106b,18") || + device_is_compatible(np, "pci106b,30") || + device_is_compatible(np, "pci11c1,5811"))) { + macio_chips[0].flags |= MACIO_FLAG_FW_SUPPORTED; + core99_firewire_enable(np, 0, 1); + } + np = of_find_node_by_name(np, "firewire"); + } + + /* Enable ATA-100 before PCI probe. */ + np = of_find_node_by_name(NULL, "ata-6"); + while(np) { + if (np->parent + && device_is_compatible(np->parent, "uni-north") + && device_is_compatible(np, "kauai-ata")) { + core99_ata100_enable(np, 1); + } + np = of_find_node_by_name(np, "ata-6"); + } + + /* Switch airport off */ + np = find_devices("radio"); + while(np) { + if (np && np->parent == macio_chips[0].of_node) { + macio_chips[0].flags |= MACIO_FLAG_AIRPORT_ON; + core99_airport_enable(np, 0, 0); + } + np = np->next; + } + } + + /* On all machines that support sound PM, switch sound off */ + if (macio_chips[0].of_node) + pmac_do_feature_call(PMAC_FTR_SOUND_CHIP_ENABLE, + macio_chips[0].of_node, 0, 0); + + /* While on some desktop G3s, we turn it back on */ + if (macio_chips[0].of_node && macio_chips[0].type == macio_heathrow + && (pmac_mb.model_id == PMAC_TYPE_GOSSAMER || + pmac_mb.model_id == PMAC_TYPE_SILK)) { + struct macio_chip *macio = &macio_chips[0]; + MACIO_BIS(HEATHROW_FCR, HRW_SOUND_CLK_ENABLE); + MACIO_BIC(HEATHROW_FCR, HRW_SOUND_POWER_N); + } + + /* Some machine models need the clock chip to be properly setup for + * clock spreading now. This should be a platform function but we + * don't do these at the moment + */ + pmac_tweak_clock_spreading(1); + +#endif /* CONFIG_POWER4 */ + + /* On all machines, switch modem & serial ports off */ + np = find_devices("ch-a"); + while(np) { + initial_serial_shutdown(np); + np = np->next; + } + np = find_devices("ch-b"); + while(np) { + initial_serial_shutdown(np); + np = np->next; + } +} + +void __init +pmac_feature_init(void) +{ + /* Detect the UniNorth memory controller */ + probe_uninorth(); + + /* Probe mac-io controllers */ + if (probe_macios()) { + printk(KERN_WARNING "No mac-io chip found\n"); + return; + } + + /* Setup low-level i2c stuffs */ + pmac_init_low_i2c(); + + /* Probe machine type */ + if (probe_motherboard()) + printk(KERN_WARNING "Unknown PowerMac !\n"); + + /* Set some initial features (turn off some chips that will + * be later turned on) + */ + set_initial_features(); +} + +int __init pmac_feature_late_init(void) +{ +#if 0 + struct device_node *np; + + /* Request some resources late */ + if (uninorth_node) + request_OF_resource(uninorth_node, 0, NULL); + np = find_devices("hammerhead"); + if (np) + request_OF_resource(np, 0, NULL); + np = find_devices("interrupt-controller"); + if (np) + request_OF_resource(np, 0, NULL); +#endif + return 0; +} + +device_initcall(pmac_feature_late_init); + +#if 0 +static void dump_HT_speeds(char *name, u32 cfg, u32 frq) +{ + int freqs[16] = { 200,300,400,500,600,800,1000,0,0,0,0,0,0,0,0,0 }; + int bits[8] = { 8,16,0,32,2,4,0,0 }; + int freq = (frq >> 8) & 0xf; + + if (freqs[freq] == 0) + printk("%s: Unknown HT link frequency %x\n", name, freq); + else + printk("%s: %d MHz on main link, (%d in / %d out) bits width\n", + name, freqs[freq], + bits[(cfg >> 28) & 0x7], bits[(cfg >> 24) & 0x7]); +} + +void __init pmac_check_ht_link(void) +{ +#if 0 /* Disabled for now */ + u32 ufreq, freq, ucfg, cfg; + struct device_node *pcix_node; + u8 px_bus, px_devfn; + struct pci_controller *px_hose; + + (void)in_be32(u3_ht + U3_HT_LINK_COMMAND); + ucfg = cfg = in_be32(u3_ht + U3_HT_LINK_CONFIG); + ufreq = freq = in_be32(u3_ht + U3_HT_LINK_FREQ); + dump_HT_speeds("U3 HyperTransport", cfg, freq); + + pcix_node = of_find_compatible_node(NULL, "pci", "pci-x"); + if (pcix_node == NULL) { + printk("No PCI-X bridge found\n"); + return; + } + if (pci_device_from_OF_node(pcix_node, &px_bus, &px_devfn) != 0) { + printk("PCI-X bridge found but not matched to pci\n"); + return; + } + px_hose = pci_find_hose_for_OF_device(pcix_node); + if (px_hose == NULL) { + printk("PCI-X bridge found but not matched to host\n"); + return; + } + early_read_config_dword(px_hose, px_bus, px_devfn, 0xc4, &cfg); + early_read_config_dword(px_hose, px_bus, px_devfn, 0xcc, &freq); + dump_HT_speeds("PCI-X HT Uplink", cfg, freq); + early_read_config_dword(px_hose, px_bus, px_devfn, 0xc8, &cfg); + early_read_config_dword(px_hose, px_bus, px_devfn, 0xd0, &freq); + dump_HT_speeds("PCI-X HT Downlink", cfg, freq); +#endif +} + +#endif /* CONFIG_POWER4 */ + +/* + * Early video resume hook + */ + +static void (*pmac_early_vresume_proc)(void *data); +static void *pmac_early_vresume_data; + +void pmac_set_early_video_resume(void (*proc)(void *data), void *data) +{ + if (_machine != _MACH_Pmac) + return; + preempt_disable(); + pmac_early_vresume_proc = proc; + pmac_early_vresume_data = data; + preempt_enable(); +} +EXPORT_SYMBOL(pmac_set_early_video_resume); + +void pmac_call_early_video_resume(void) +{ + if (pmac_early_vresume_proc) + pmac_early_vresume_proc(pmac_early_vresume_data); +} + +/* + * AGP related suspend/resume code + */ + +static struct pci_dev *pmac_agp_bridge; +static int (*pmac_agp_suspend)(struct pci_dev *bridge); +static int (*pmac_agp_resume)(struct pci_dev *bridge); + +void pmac_register_agp_pm(struct pci_dev *bridge, + int (*suspend)(struct pci_dev *bridge), + int (*resume)(struct pci_dev *bridge)) +{ + if (suspend || resume) { + pmac_agp_bridge = bridge; + pmac_agp_suspend = suspend; + pmac_agp_resume = resume; + return; + } + if (bridge != pmac_agp_bridge) + return; + pmac_agp_suspend = pmac_agp_resume = NULL; + return; +} +EXPORT_SYMBOL(pmac_register_agp_pm); + +void pmac_suspend_agp_for_card(struct pci_dev *dev) +{ + if (pmac_agp_bridge == NULL || pmac_agp_suspend == NULL) + return; + if (pmac_agp_bridge->bus != dev->bus) + return; + pmac_agp_suspend(pmac_agp_bridge); +} +EXPORT_SYMBOL(pmac_suspend_agp_for_card); + +void pmac_resume_agp_for_card(struct pci_dev *dev) +{ + if (pmac_agp_bridge == NULL || pmac_agp_resume == NULL) + return; + if (pmac_agp_bridge->bus != dev->bus) + return; + pmac_agp_resume(pmac_agp_bridge); +} +EXPORT_SYMBOL(pmac_resume_agp_for_card); diff --git a/arch/powerpc/platforms/powermac/pmac_low_i2c.c b/arch/powerpc/platforms/powermac/pmac_low_i2c.c new file mode 100644 index 00000000000..f3f39e8e337 --- /dev/null +++ b/arch/powerpc/platforms/powermac/pmac_low_i2c.c @@ -0,0 +1,523 @@ +/* + * arch/ppc/platforms/pmac_low_i2c.c + * + * Copyright (C) 2003 Ben. Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This file contains some low-level i2c access routines that + * need to be used by various bits of the PowerMac platform code + * at times where the real asynchronous & interrupt driven driver + * cannot be used. The API borrows some semantics from the darwin + * driver in order to ease the implementation of the platform + * properties parser + */ + +#undef DEBUG + +#include <linux/config.h> +#include <linux/types.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/adb.h> +#include <linux/pmu.h> +#include <asm/keylargo.h> +#include <asm/uninorth.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/machdep.h> +#include <asm/pmac_low_i2c.h> + +#define MAX_LOW_I2C_HOST 4 + +#ifdef DEBUG +#define DBG(x...) do {\ + printk(KERN_DEBUG "KW:" x); \ + } while(0) +#else +#define DBG(x...) +#endif + +struct low_i2c_host; + +typedef int (*low_i2c_func_t)(struct low_i2c_host *host, u8 addr, u8 sub, u8 *data, int len); + +struct low_i2c_host +{ + struct device_node *np; /* OF device node */ + struct semaphore mutex; /* Access mutex for use by i2c-keywest */ + low_i2c_func_t func; /* Access function */ + unsigned int is_open : 1; /* Poor man's access control */ + int mode; /* Current mode */ + int channel; /* Current channel */ + int num_channels; /* Number of channels */ + void __iomem *base; /* For keywest-i2c, base address */ + int bsteps; /* And register stepping */ + int speed; /* And speed */ +}; + +static struct low_i2c_host low_i2c_hosts[MAX_LOW_I2C_HOST]; + +/* No locking is necessary on allocation, we are running way before + * anything can race with us + */ +static struct low_i2c_host *find_low_i2c_host(struct device_node *np) +{ + int i; + + for (i = 0; i < MAX_LOW_I2C_HOST; i++) + if (low_i2c_hosts[i].np == np) + return &low_i2c_hosts[i]; + return NULL; +} + +/* + * + * i2c-keywest implementation (UniNorth, U2, U3, Keylargo's) + * + */ + +/* + * Keywest i2c definitions borrowed from drivers/i2c/i2c-keywest.h, + * should be moved somewhere in include/asm-ppc/ + */ +/* Register indices */ +typedef enum { + reg_mode = 0, + reg_control, + reg_status, + reg_isr, + reg_ier, + reg_addr, + reg_subaddr, + reg_data +} reg_t; + + +/* Mode register */ +#define KW_I2C_MODE_100KHZ 0x00 +#define KW_I2C_MODE_50KHZ 0x01 +#define KW_I2C_MODE_25KHZ 0x02 +#define KW_I2C_MODE_DUMB 0x00 +#define KW_I2C_MODE_STANDARD 0x04 +#define KW_I2C_MODE_STANDARDSUB 0x08 +#define KW_I2C_MODE_COMBINED 0x0C +#define KW_I2C_MODE_MODE_MASK 0x0C +#define KW_I2C_MODE_CHAN_MASK 0xF0 + +/* Control register */ +#define KW_I2C_CTL_AAK 0x01 +#define KW_I2C_CTL_XADDR 0x02 +#define KW_I2C_CTL_STOP 0x04 +#define KW_I2C_CTL_START 0x08 + +/* Status register */ +#define KW_I2C_STAT_BUSY 0x01 +#define KW_I2C_STAT_LAST_AAK 0x02 +#define KW_I2C_STAT_LAST_RW 0x04 +#define KW_I2C_STAT_SDA 0x08 +#define KW_I2C_STAT_SCL 0x10 + +/* IER & ISR registers */ +#define KW_I2C_IRQ_DATA 0x01 +#define KW_I2C_IRQ_ADDR 0x02 +#define KW_I2C_IRQ_STOP 0x04 +#define KW_I2C_IRQ_START 0x08 +#define KW_I2C_IRQ_MASK 0x0F + +/* State machine states */ +enum { + state_idle, + state_addr, + state_read, + state_write, + state_stop, + state_dead +}; + +#define WRONG_STATE(name) do {\ + printk(KERN_DEBUG "KW: wrong state. Got %s, state: %s (isr: %02x)\n", \ + name, __kw_state_names[state], isr); \ + } while(0) + +static const char *__kw_state_names[] = { + "state_idle", + "state_addr", + "state_read", + "state_write", + "state_stop", + "state_dead" +}; + +static inline u8 __kw_read_reg(struct low_i2c_host *host, reg_t reg) +{ + return readb(host->base + (((unsigned int)reg) << host->bsteps)); +} + +static inline void __kw_write_reg(struct low_i2c_host *host, reg_t reg, u8 val) +{ + writeb(val, host->base + (((unsigned)reg) << host->bsteps)); + (void)__kw_read_reg(host, reg_subaddr); +} + +#define kw_write_reg(reg, val) __kw_write_reg(host, reg, val) +#define kw_read_reg(reg) __kw_read_reg(host, reg) + + +/* Don't schedule, the g5 fan controller is too + * timing sensitive + */ +static u8 kw_wait_interrupt(struct low_i2c_host* host) +{ + int i, j; + u8 isr; + + for (i = 0; i < 100000; i++) { + isr = kw_read_reg(reg_isr) & KW_I2C_IRQ_MASK; + if (isr != 0) + return isr; + + /* This code is used with the timebase frozen, we cannot rely + * on udelay ! For now, just use a bogus loop + */ + for (j = 1; j < 10000; j++) + mb(); + } + return isr; +} + +static int kw_handle_interrupt(struct low_i2c_host *host, int state, int rw, int *rc, u8 **data, int *len, u8 isr) +{ + u8 ack; + + DBG("kw_handle_interrupt(%s, isr: %x)\n", __kw_state_names[state], isr); + + if (isr == 0) { + if (state != state_stop) { + DBG("KW: Timeout !\n"); + *rc = -EIO; + goto stop; + } + if (state == state_stop) { + ack = kw_read_reg(reg_status); + if (!(ack & KW_I2C_STAT_BUSY)) { + state = state_idle; + kw_write_reg(reg_ier, 0x00); + } + } + return state; + } + + if (isr & KW_I2C_IRQ_ADDR) { + ack = kw_read_reg(reg_status); + if (state != state_addr) { + kw_write_reg(reg_isr, KW_I2C_IRQ_ADDR); + WRONG_STATE("KW_I2C_IRQ_ADDR"); + *rc = -EIO; + goto stop; + } + if ((ack & KW_I2C_STAT_LAST_AAK) == 0) { + *rc = -ENODEV; + DBG("KW: NAK on address\n"); + return state_stop; + } else { + if (rw) { + state = state_read; + if (*len > 1) + kw_write_reg(reg_control, KW_I2C_CTL_AAK); + } else { + state = state_write; + kw_write_reg(reg_data, **data); + (*data)++; (*len)--; + } + } + kw_write_reg(reg_isr, KW_I2C_IRQ_ADDR); + } + + if (isr & KW_I2C_IRQ_DATA) { + if (state == state_read) { + **data = kw_read_reg(reg_data); + (*data)++; (*len)--; + kw_write_reg(reg_isr, KW_I2C_IRQ_DATA); + if ((*len) == 0) + state = state_stop; + else if ((*len) == 1) + kw_write_reg(reg_control, 0); + } else if (state == state_write) { + ack = kw_read_reg(reg_status); + if ((ack & KW_I2C_STAT_LAST_AAK) == 0) { + DBG("KW: nack on data write\n"); + *rc = -EIO; + goto stop; + } else if (*len) { + kw_write_reg(reg_data, **data); + (*data)++; (*len)--; + } else { + kw_write_reg(reg_control, KW_I2C_CTL_STOP); + state = state_stop; + *rc = 0; + } + kw_write_reg(reg_isr, KW_I2C_IRQ_DATA); + } else { + kw_write_reg(reg_isr, KW_I2C_IRQ_DATA); + WRONG_STATE("KW_I2C_IRQ_DATA"); + if (state != state_stop) { + *rc = -EIO; + goto stop; + } + } + } + + if (isr & KW_I2C_IRQ_STOP) { + kw_write_reg(reg_isr, KW_I2C_IRQ_STOP); + if (state != state_stop) { + WRONG_STATE("KW_I2C_IRQ_STOP"); + *rc = -EIO; + } + return state_idle; + } + + if (isr & KW_I2C_IRQ_START) + kw_write_reg(reg_isr, KW_I2C_IRQ_START); + + return state; + + stop: + kw_write_reg(reg_control, KW_I2C_CTL_STOP); + return state_stop; +} + +static int keywest_low_i2c_func(struct low_i2c_host *host, u8 addr, u8 subaddr, u8 *data, int len) +{ + u8 mode_reg = host->speed; + int state = state_addr; + int rc = 0; + + /* Setup mode & subaddress if any */ + switch(host->mode) { + case pmac_low_i2c_mode_dumb: + printk(KERN_ERR "low_i2c: Dumb mode not supported !\n"); + return -EINVAL; + case pmac_low_i2c_mode_std: + mode_reg |= KW_I2C_MODE_STANDARD; + break; + case pmac_low_i2c_mode_stdsub: + mode_reg |= KW_I2C_MODE_STANDARDSUB; + break; + case pmac_low_i2c_mode_combined: + mode_reg |= KW_I2C_MODE_COMBINED; + break; + } + + /* Setup channel & clear pending irqs */ + kw_write_reg(reg_isr, kw_read_reg(reg_isr)); + kw_write_reg(reg_mode, mode_reg | (host->channel << 4)); + kw_write_reg(reg_status, 0); + + /* Set up address and r/w bit */ + kw_write_reg(reg_addr, addr); + + /* Set up the sub address */ + if ((mode_reg & KW_I2C_MODE_MODE_MASK) == KW_I2C_MODE_STANDARDSUB + || (mode_reg & KW_I2C_MODE_MODE_MASK) == KW_I2C_MODE_COMBINED) + kw_write_reg(reg_subaddr, subaddr); + + /* Start sending address & disable interrupt*/ + kw_write_reg(reg_ier, 0 /*KW_I2C_IRQ_MASK*/); + kw_write_reg(reg_control, KW_I2C_CTL_XADDR); + + /* State machine, to turn into an interrupt handler */ + while(state != state_idle) { + u8 isr = kw_wait_interrupt(host); + state = kw_handle_interrupt(host, state, addr & 1, &rc, &data, &len, isr); + } + + return rc; +} + +static void keywest_low_i2c_add(struct device_node *np) +{ + struct low_i2c_host *host = find_low_i2c_host(NULL); + u32 *psteps, *prate, steps, aoffset = 0; + struct device_node *parent; + + if (host == NULL) { + printk(KERN_ERR "low_i2c: Can't allocate host for %s\n", + np->full_name); + return; + } + memset(host, 0, sizeof(*host)); + + init_MUTEX(&host->mutex); + host->np = of_node_get(np); + psteps = (u32 *)get_property(np, "AAPL,address-step", NULL); + steps = psteps ? (*psteps) : 0x10; + for (host->bsteps = 0; (steps & 0x01) == 0; host->bsteps++) + steps >>= 1; + parent = of_get_parent(np); + host->num_channels = 1; + if (parent && parent->name[0] == 'u') { + host->num_channels = 2; + aoffset = 3; + } + /* Select interface rate */ + host->speed = KW_I2C_MODE_100KHZ; + prate = (u32 *)get_property(np, "AAPL,i2c-rate", NULL); + if (prate) switch(*prate) { + case 100: + host->speed = KW_I2C_MODE_100KHZ; + break; + case 50: + host->speed = KW_I2C_MODE_50KHZ; + break; + case 25: + host->speed = KW_I2C_MODE_25KHZ; + break; + } + + host->mode = pmac_low_i2c_mode_std; + host->base = ioremap(np->addrs[0].address + aoffset, + np->addrs[0].size); + host->func = keywest_low_i2c_func; +} + +/* + * + * PMU implementation + * + */ + + +#ifdef CONFIG_ADB_PMU + +static int pmu_low_i2c_func(struct low_i2c_host *host, u8 addr, u8 sub, u8 *data, int len) +{ + // TODO + return -ENODEV; +} + +static void pmu_low_i2c_add(struct device_node *np) +{ + struct low_i2c_host *host = find_low_i2c_host(NULL); + + if (host == NULL) { + printk(KERN_ERR "low_i2c: Can't allocate host for %s\n", + np->full_name); + return; + } + memset(host, 0, sizeof(*host)); + + init_MUTEX(&host->mutex); + host->np = of_node_get(np); + host->num_channels = 3; + host->mode = pmac_low_i2c_mode_std; + host->func = pmu_low_i2c_func; +} + +#endif /* CONFIG_ADB_PMU */ + +void __init pmac_init_low_i2c(void) +{ + struct device_node *np; + + /* Probe keywest-i2c busses */ + np = of_find_compatible_node(NULL, "i2c", "keywest-i2c"); + while(np) { + keywest_low_i2c_add(np); + np = of_find_compatible_node(np, "i2c", "keywest-i2c"); + } + +#ifdef CONFIG_ADB_PMU + /* Probe PMU busses */ + np = of_find_node_by_name(NULL, "via-pmu"); + if (np) + pmu_low_i2c_add(np); +#endif /* CONFIG_ADB_PMU */ + + /* TODO: Add CUDA support as well */ +} + +int pmac_low_i2c_lock(struct device_node *np) +{ + struct low_i2c_host *host = find_low_i2c_host(np); + + if (!host) + return -ENODEV; + down(&host->mutex); + return 0; +} +EXPORT_SYMBOL(pmac_low_i2c_lock); + +int pmac_low_i2c_unlock(struct device_node *np) +{ + struct low_i2c_host *host = find_low_i2c_host(np); + + if (!host) + return -ENODEV; + up(&host->mutex); + return 0; +} +EXPORT_SYMBOL(pmac_low_i2c_unlock); + + +int pmac_low_i2c_open(struct device_node *np, int channel) +{ + struct low_i2c_host *host = find_low_i2c_host(np); + + if (!host) + return -ENODEV; + + if (channel >= host->num_channels) + return -EINVAL; + + down(&host->mutex); + host->is_open = 1; + host->channel = channel; + + return 0; +} +EXPORT_SYMBOL(pmac_low_i2c_open); + +int pmac_low_i2c_close(struct device_node *np) +{ + struct low_i2c_host *host = find_low_i2c_host(np); + + if (!host) + return -ENODEV; + + host->is_open = 0; + up(&host->mutex); + + return 0; +} +EXPORT_SYMBOL(pmac_low_i2c_close); + +int pmac_low_i2c_setmode(struct device_node *np, int mode) +{ + struct low_i2c_host *host = find_low_i2c_host(np); + + if (!host) + return -ENODEV; + WARN_ON(!host->is_open); + host->mode = mode; + + return 0; +} +EXPORT_SYMBOL(pmac_low_i2c_setmode); + +int pmac_low_i2c_xfer(struct device_node *np, u8 addrdir, u8 subaddr, u8 *data, int len) +{ + struct low_i2c_host *host = find_low_i2c_host(np); + + if (!host) + return -ENODEV; + WARN_ON(!host->is_open); + + return host->func(host, addrdir, subaddr, data, len); +} +EXPORT_SYMBOL(pmac_low_i2c_xfer); + diff --git a/arch/powerpc/platforms/powermac/pmac_nvram.c b/arch/powerpc/platforms/powermac/pmac_nvram.c new file mode 100644 index 00000000000..8c9b008c722 --- /dev/null +++ b/arch/powerpc/platforms/powermac/pmac_nvram.c @@ -0,0 +1,584 @@ +/* + * arch/ppc/platforms/pmac_nvram.c + * + * Copyright (C) 2002 Benjamin Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Todo: - add support for the OF persistent properties + */ +#include <linux/config.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/stddef.h> +#include <linux/string.h> +#include <linux/nvram.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/delay.h> +#include <linux/errno.h> +#include <linux/adb.h> +#include <linux/pmu.h> +#include <linux/bootmem.h> +#include <linux/completion.h> +#include <linux/spinlock.h> +#include <asm/sections.h> +#include <asm/io.h> +#include <asm/system.h> +#include <asm/prom.h> +#include <asm/machdep.h> +#include <asm/nvram.h> + +#define DEBUG + +#ifdef DEBUG +#define DBG(x...) printk(x) +#else +#define DBG(x...) +#endif + +#define NVRAM_SIZE 0x2000 /* 8kB of non-volatile RAM */ + +#define CORE99_SIGNATURE 0x5a +#define CORE99_ADLER_START 0x14 + +/* On Core99, nvram is either a sharp, a micron or an AMD flash */ +#define SM_FLASH_STATUS_DONE 0x80 +#define SM_FLASH_STATUS_ERR 0x38 +#define SM_FLASH_CMD_ERASE_CONFIRM 0xd0 +#define SM_FLASH_CMD_ERASE_SETUP 0x20 +#define SM_FLASH_CMD_RESET 0xff +#define SM_FLASH_CMD_WRITE_SETUP 0x40 +#define SM_FLASH_CMD_CLEAR_STATUS 0x50 +#define SM_FLASH_CMD_READ_STATUS 0x70 + +/* CHRP NVRAM header */ +struct chrp_header { + u8 signature; + u8 cksum; + u16 len; + char name[12]; + u8 data[0]; +}; + +struct core99_header { + struct chrp_header hdr; + u32 adler; + u32 generation; + u32 reserved[2]; +}; + +/* + * Read and write the non-volatile RAM on PowerMacs and CHRP machines. + */ +static int nvram_naddrs; +static volatile unsigned char *nvram_addr; +static volatile unsigned char *nvram_data; +static int nvram_mult, is_core_99; +static int core99_bank = 0; +static int nvram_partitions[3]; +static DEFINE_SPINLOCK(nv_lock); + +extern int pmac_newworld; +extern int system_running; + +static int (*core99_write_bank)(int bank, u8* datas); +static int (*core99_erase_bank)(int bank); + +static char *nvram_image; + + +static unsigned char core99_nvram_read_byte(int addr) +{ + if (nvram_image == NULL) + return 0xff; + return nvram_image[addr]; +} + +static void core99_nvram_write_byte(int addr, unsigned char val) +{ + if (nvram_image == NULL) + return; + nvram_image[addr] = val; +} + + +static unsigned char direct_nvram_read_byte(int addr) +{ + return in_8(&nvram_data[(addr & (NVRAM_SIZE - 1)) * nvram_mult]); +} + +static void direct_nvram_write_byte(int addr, unsigned char val) +{ + out_8(&nvram_data[(addr & (NVRAM_SIZE - 1)) * nvram_mult], val); +} + + +static unsigned char indirect_nvram_read_byte(int addr) +{ + unsigned char val; + unsigned long flags; + + spin_lock_irqsave(&nv_lock, flags); + out_8(nvram_addr, addr >> 5); + val = in_8(&nvram_data[(addr & 0x1f) << 4]); + spin_unlock_irqrestore(&nv_lock, flags); + + return val; +} + +static void indirect_nvram_write_byte(int addr, unsigned char val) +{ + unsigned long flags; + + spin_lock_irqsave(&nv_lock, flags); + out_8(nvram_addr, addr >> 5); + out_8(&nvram_data[(addr & 0x1f) << 4], val); + spin_unlock_irqrestore(&nv_lock, flags); +} + + +#ifdef CONFIG_ADB_PMU + +static void pmu_nvram_complete(struct adb_request *req) +{ + if (req->arg) + complete((struct completion *)req->arg); +} + +static unsigned char pmu_nvram_read_byte(int addr) +{ + struct adb_request req; + DECLARE_COMPLETION(req_complete); + + req.arg = system_state == SYSTEM_RUNNING ? &req_complete : NULL; + if (pmu_request(&req, pmu_nvram_complete, 3, PMU_READ_NVRAM, + (addr >> 8) & 0xff, addr & 0xff)) + return 0xff; + if (system_state == SYSTEM_RUNNING) + wait_for_completion(&req_complete); + while (!req.complete) + pmu_poll(); + return req.reply[0]; +} + +static void pmu_nvram_write_byte(int addr, unsigned char val) +{ + struct adb_request req; + DECLARE_COMPLETION(req_complete); + + req.arg = system_state == SYSTEM_RUNNING ? &req_complete : NULL; + if (pmu_request(&req, pmu_nvram_complete, 4, PMU_WRITE_NVRAM, + (addr >> 8) & 0xff, addr & 0xff, val)) + return; + if (system_state == SYSTEM_RUNNING) + wait_for_completion(&req_complete); + while (!req.complete) + pmu_poll(); +} + +#endif /* CONFIG_ADB_PMU */ + + +static u8 chrp_checksum(struct chrp_header* hdr) +{ + u8 *ptr; + u16 sum = hdr->signature; + for (ptr = (u8 *)&hdr->len; ptr < hdr->data; ptr++) + sum += *ptr; + while (sum > 0xFF) + sum = (sum & 0xFF) + (sum>>8); + return sum; +} + +static u32 core99_calc_adler(u8 *buffer) +{ + int cnt; + u32 low, high; + + buffer += CORE99_ADLER_START; + low = 1; + high = 0; + for (cnt=0; cnt<(NVRAM_SIZE-CORE99_ADLER_START); cnt++) { + if ((cnt % 5000) == 0) { + high %= 65521UL; + high %= 65521UL; + } + low += buffer[cnt]; + high += low; + } + low %= 65521UL; + high %= 65521UL; + + return (high << 16) | low; +} + +static u32 core99_check(u8* datas) +{ + struct core99_header* hdr99 = (struct core99_header*)datas; + + if (hdr99->hdr.signature != CORE99_SIGNATURE) { + DBG("Invalid signature\n"); + return 0; + } + if (hdr99->hdr.cksum != chrp_checksum(&hdr99->hdr)) { + DBG("Invalid checksum\n"); + return 0; + } + if (hdr99->adler != core99_calc_adler(datas)) { + DBG("Invalid adler\n"); + return 0; + } + return hdr99->generation; +} + +static int sm_erase_bank(int bank) +{ + int stat, i; + unsigned long timeout; + + u8* base = (u8 *)nvram_data + core99_bank*NVRAM_SIZE; + + DBG("nvram: Sharp/Micron Erasing bank %d...\n", bank); + + out_8(base, SM_FLASH_CMD_ERASE_SETUP); + out_8(base, SM_FLASH_CMD_ERASE_CONFIRM); + timeout = 0; + do { + if (++timeout > 1000000) { + printk(KERN_ERR "nvram: Sharp/Miron flash erase timeout !\n"); + break; + } + out_8(base, SM_FLASH_CMD_READ_STATUS); + stat = in_8(base); + } while (!(stat & SM_FLASH_STATUS_DONE)); + + out_8(base, SM_FLASH_CMD_CLEAR_STATUS); + out_8(base, SM_FLASH_CMD_RESET); + + for (i=0; i<NVRAM_SIZE; i++) + if (base[i] != 0xff) { + printk(KERN_ERR "nvram: Sharp/Micron flash erase failed !\n"); + return -ENXIO; + } + return 0; +} + +static int sm_write_bank(int bank, u8* datas) +{ + int i, stat = 0; + unsigned long timeout; + + u8* base = (u8 *)nvram_data + core99_bank*NVRAM_SIZE; + + DBG("nvram: Sharp/Micron Writing bank %d...\n", bank); + + for (i=0; i<NVRAM_SIZE; i++) { + out_8(base+i, SM_FLASH_CMD_WRITE_SETUP); + udelay(1); + out_8(base+i, datas[i]); + timeout = 0; + do { + if (++timeout > 1000000) { + printk(KERN_ERR "nvram: Sharp/Micron flash write timeout !\n"); + break; + } + out_8(base, SM_FLASH_CMD_READ_STATUS); + stat = in_8(base); + } while (!(stat & SM_FLASH_STATUS_DONE)); + if (!(stat & SM_FLASH_STATUS_DONE)) + break; + } + out_8(base, SM_FLASH_CMD_CLEAR_STATUS); + out_8(base, SM_FLASH_CMD_RESET); + for (i=0; i<NVRAM_SIZE; i++) + if (base[i] != datas[i]) { + printk(KERN_ERR "nvram: Sharp/Micron flash write failed !\n"); + return -ENXIO; + } + return 0; +} + +static int amd_erase_bank(int bank) +{ + int i, stat = 0; + unsigned long timeout; + + u8* base = (u8 *)nvram_data + core99_bank*NVRAM_SIZE; + + DBG("nvram: AMD Erasing bank %d...\n", bank); + + /* Unlock 1 */ + out_8(base+0x555, 0xaa); + udelay(1); + /* Unlock 2 */ + out_8(base+0x2aa, 0x55); + udelay(1); + + /* Sector-Erase */ + out_8(base+0x555, 0x80); + udelay(1); + out_8(base+0x555, 0xaa); + udelay(1); + out_8(base+0x2aa, 0x55); + udelay(1); + out_8(base, 0x30); + udelay(1); + + timeout = 0; + do { + if (++timeout > 1000000) { + printk(KERN_ERR "nvram: AMD flash erase timeout !\n"); + break; + } + stat = in_8(base) ^ in_8(base); + } while (stat != 0); + + /* Reset */ + out_8(base, 0xf0); + udelay(1); + + for (i=0; i<NVRAM_SIZE; i++) + if (base[i] != 0xff) { + printk(KERN_ERR "nvram: AMD flash erase failed !\n"); + return -ENXIO; + } + return 0; +} + +static int amd_write_bank(int bank, u8* datas) +{ + int i, stat = 0; + unsigned long timeout; + + u8* base = (u8 *)nvram_data + core99_bank*NVRAM_SIZE; + + DBG("nvram: AMD Writing bank %d...\n", bank); + + for (i=0; i<NVRAM_SIZE; i++) { + /* Unlock 1 */ + out_8(base+0x555, 0xaa); + udelay(1); + /* Unlock 2 */ + out_8(base+0x2aa, 0x55); + udelay(1); + + /* Write single word */ + out_8(base+0x555, 0xa0); + udelay(1); + out_8(base+i, datas[i]); + + timeout = 0; + do { + if (++timeout > 1000000) { + printk(KERN_ERR "nvram: AMD flash write timeout !\n"); + break; + } + stat = in_8(base) ^ in_8(base); + } while (stat != 0); + if (stat != 0) + break; + } + + /* Reset */ + out_8(base, 0xf0); + udelay(1); + + for (i=0; i<NVRAM_SIZE; i++) + if (base[i] != datas[i]) { + printk(KERN_ERR "nvram: AMD flash write failed !\n"); + return -ENXIO; + } + return 0; +} + +static void __init lookup_partitions(void) +{ + u8 buffer[17]; + int i, offset; + struct chrp_header* hdr; + + if (pmac_newworld) { + nvram_partitions[pmac_nvram_OF] = -1; + nvram_partitions[pmac_nvram_XPRAM] = -1; + nvram_partitions[pmac_nvram_NR] = -1; + hdr = (struct chrp_header *)buffer; + + offset = 0; + buffer[16] = 0; + do { + for (i=0;i<16;i++) + buffer[i] = nvram_read_byte(offset+i); + if (!strcmp(hdr->name, "common")) + nvram_partitions[pmac_nvram_OF] = offset + 0x10; + if (!strcmp(hdr->name, "APL,MacOS75")) { + nvram_partitions[pmac_nvram_XPRAM] = offset + 0x10; + nvram_partitions[pmac_nvram_NR] = offset + 0x110; + } + offset += (hdr->len * 0x10); + } while(offset < NVRAM_SIZE); + } else { + nvram_partitions[pmac_nvram_OF] = 0x1800; + nvram_partitions[pmac_nvram_XPRAM] = 0x1300; + nvram_partitions[pmac_nvram_NR] = 0x1400; + } + DBG("nvram: OF partition at 0x%x\n", nvram_partitions[pmac_nvram_OF]); + DBG("nvram: XP partition at 0x%x\n", nvram_partitions[pmac_nvram_XPRAM]); + DBG("nvram: NR partition at 0x%x\n", nvram_partitions[pmac_nvram_NR]); +} + +static void core99_nvram_sync(void) +{ + struct core99_header* hdr99; + unsigned long flags; + + if (!is_core_99 || !nvram_data || !nvram_image) + return; + + spin_lock_irqsave(&nv_lock, flags); + if (!memcmp(nvram_image, (u8*)nvram_data + core99_bank*NVRAM_SIZE, + NVRAM_SIZE)) + goto bail; + + DBG("Updating nvram...\n"); + + hdr99 = (struct core99_header*)nvram_image; + hdr99->generation++; + hdr99->hdr.signature = CORE99_SIGNATURE; + hdr99->hdr.cksum = chrp_checksum(&hdr99->hdr); + hdr99->adler = core99_calc_adler(nvram_image); + core99_bank = core99_bank ? 0 : 1; + if (core99_erase_bank) + if (core99_erase_bank(core99_bank)) { + printk("nvram: Error erasing bank %d\n", core99_bank); + goto bail; + } + if (core99_write_bank) + if (core99_write_bank(core99_bank, nvram_image)) + printk("nvram: Error writing bank %d\n", core99_bank); + bail: + spin_unlock_irqrestore(&nv_lock, flags); + +#ifdef DEBUG + mdelay(2000); +#endif +} + +void __init pmac_nvram_init(void) +{ + struct device_node *dp; + + nvram_naddrs = 0; + + dp = find_devices("nvram"); + if (dp == NULL) { + printk(KERN_ERR "Can't find NVRAM device\n"); + return; + } + nvram_naddrs = dp->n_addrs; + is_core_99 = device_is_compatible(dp, "nvram,flash"); + if (is_core_99) { + int i; + u32 gen_bank0, gen_bank1; + + if (nvram_naddrs < 1) { + printk(KERN_ERR "nvram: no address\n"); + return; + } + nvram_image = alloc_bootmem(NVRAM_SIZE); + if (nvram_image == NULL) { + printk(KERN_ERR "nvram: can't allocate ram image\n"); + return; + } + nvram_data = ioremap(dp->addrs[0].address, NVRAM_SIZE*2); + nvram_naddrs = 1; /* Make sure we get the correct case */ + + DBG("nvram: Checking bank 0...\n"); + + gen_bank0 = core99_check((u8 *)nvram_data); + gen_bank1 = core99_check((u8 *)nvram_data + NVRAM_SIZE); + core99_bank = (gen_bank0 < gen_bank1) ? 1 : 0; + + DBG("nvram: gen0=%d, gen1=%d\n", gen_bank0, gen_bank1); + DBG("nvram: Active bank is: %d\n", core99_bank); + + for (i=0; i<NVRAM_SIZE; i++) + nvram_image[i] = nvram_data[i + core99_bank*NVRAM_SIZE]; + + ppc_md.nvram_read_val = core99_nvram_read_byte; + ppc_md.nvram_write_val = core99_nvram_write_byte; + ppc_md.nvram_sync = core99_nvram_sync; + /* + * Maybe we could be smarter here though making an exclusive list + * of known flash chips is a bit nasty as older OF didn't provide us + * with a useful "compatible" entry. A solution would be to really + * identify the chip using flash id commands and base ourselves on + * a list of known chips IDs + */ + if (device_is_compatible(dp, "amd-0137")) { + core99_erase_bank = amd_erase_bank; + core99_write_bank = amd_write_bank; + } else { + core99_erase_bank = sm_erase_bank; + core99_write_bank = sm_write_bank; + } + } else if (_machine == _MACH_chrp && nvram_naddrs == 1) { + nvram_data = ioremap(dp->addrs[0].address + isa_mem_base, + dp->addrs[0].size); + nvram_mult = 1; + ppc_md.nvram_read_val = direct_nvram_read_byte; + ppc_md.nvram_write_val = direct_nvram_write_byte; + } else if (nvram_naddrs == 1) { + nvram_data = ioremap(dp->addrs[0].address, dp->addrs[0].size); + nvram_mult = (dp->addrs[0].size + NVRAM_SIZE - 1) / NVRAM_SIZE; + ppc_md.nvram_read_val = direct_nvram_read_byte; + ppc_md.nvram_write_val = direct_nvram_write_byte; + } else if (nvram_naddrs == 2) { + nvram_addr = ioremap(dp->addrs[0].address, dp->addrs[0].size); + nvram_data = ioremap(dp->addrs[1].address, dp->addrs[1].size); + ppc_md.nvram_read_val = indirect_nvram_read_byte; + ppc_md.nvram_write_val = indirect_nvram_write_byte; + } else if (nvram_naddrs == 0 && sys_ctrler == SYS_CTRLER_PMU) { +#ifdef CONFIG_ADB_PMU + nvram_naddrs = -1; + ppc_md.nvram_read_val = pmu_nvram_read_byte; + ppc_md.nvram_write_val = pmu_nvram_write_byte; +#endif /* CONFIG_ADB_PMU */ + } else { + printk(KERN_ERR "Don't know how to access NVRAM with %d addresses\n", + nvram_naddrs); + } + lookup_partitions(); +} + +int pmac_get_partition(int partition) +{ + return nvram_partitions[partition]; +} + +u8 pmac_xpram_read(int xpaddr) +{ + int offset = nvram_partitions[pmac_nvram_XPRAM]; + + if (offset < 0) + return 0xff; + + return ppc_md.nvram_read_val(xpaddr + offset); +} + +void pmac_xpram_write(int xpaddr, u8 data) +{ + int offset = nvram_partitions[pmac_nvram_XPRAM]; + + if (offset < 0) + return; + + ppc_md.nvram_write_val(xpaddr + offset, data); +} + +EXPORT_SYMBOL(pmac_get_partition); +EXPORT_SYMBOL(pmac_xpram_read); +EXPORT_SYMBOL(pmac_xpram_write); diff --git a/arch/powerpc/platforms/powermac/pmac_pci.c b/arch/powerpc/platforms/powermac/pmac_pci.c new file mode 100644 index 00000000000..40bcd3e55af --- /dev/null +++ b/arch/powerpc/platforms/powermac/pmac_pci.c @@ -0,0 +1,1341 @@ +/* + * Support for PCI bridges found on Power Macintoshes. + * At present the "bandit" and "chaos" bridges are supported. + * Fortunately you access configuration space in the same + * way with either bridge. + * + * Copyright (C) 2003 Benjamin Herrenschmuidt (benh@kernel.crashing.org) + * Copyright (C) 1997 Paul Mackerras (paulus@samba.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/delay.h> +#include <linux/string.h> +#include <linux/init.h> +#include <linux/bootmem.h> + +#include <asm/sections.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/pci-bridge.h> +#include <asm/machdep.h> +#include <asm/pmac_feature.h> + +#undef DEBUG + +#ifdef DEBUG +#define DBG(x...) printk(x) +#else +#define DBG(x...) +#endif + +static int add_bridge(struct device_node *dev); +extern void pmac_check_ht_link(void); + +/* XXX Could be per-controller, but I don't think we risk anything by + * assuming we won't have both UniNorth and Bandit */ +static int has_uninorth; +#ifdef CONFIG_POWER4 +static struct pci_controller *u3_agp; +#endif /* CONFIG_POWER4 */ + +extern u8 pci_cache_line_size; +extern int pcibios_assign_bus_offset; + +struct device_node *k2_skiplist[2]; + +/* + * Magic constants for enabling cache coherency in the bandit/PSX bridge. + */ +#define BANDIT_DEVID_2 8 +#define BANDIT_REVID 3 + +#define BANDIT_DEVNUM 11 +#define BANDIT_MAGIC 0x50 +#define BANDIT_COHERENT 0x40 + +static int __init fixup_one_level_bus_range(struct device_node *node, int higher) +{ + for (; node != 0;node = node->sibling) { + int * bus_range; + unsigned int *class_code; + int len; + + /* For PCI<->PCI bridges or CardBus bridges, we go down */ + class_code = (unsigned int *) get_property(node, "class-code", NULL); + if (!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI && + (*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS)) + continue; + bus_range = (int *) get_property(node, "bus-range", &len); + if (bus_range != NULL && len > 2 * sizeof(int)) { + if (bus_range[1] > higher) + higher = bus_range[1]; + } + higher = fixup_one_level_bus_range(node->child, higher); + } + return higher; +} + +/* This routine fixes the "bus-range" property of all bridges in the + * system since they tend to have their "last" member wrong on macs + * + * Note that the bus numbers manipulated here are OF bus numbers, they + * are not Linux bus numbers. + */ +static void __init fixup_bus_range(struct device_node *bridge) +{ + int * bus_range; + int len; + + /* Lookup the "bus-range" property for the hose */ + bus_range = (int *) get_property(bridge, "bus-range", &len); + if (bus_range == NULL || len < 2 * sizeof(int)) { + printk(KERN_WARNING "Can't get bus-range for %s\n", + bridge->full_name); + return; + } + bus_range[1] = fixup_one_level_bus_range(bridge->child, bus_range[1]); +} + +/* + * Apple MacRISC (U3, UniNorth, Bandit, Chaos) PCI controllers. + * + * The "Bandit" version is present in all early PCI PowerMacs, + * and up to the first ones using Grackle. Some machines may + * have 2 bandit controllers (2 PCI busses). + * + * "Chaos" is used in some "Bandit"-type machines as a bridge + * for the separate display bus. It is accessed the same + * way as bandit, but cannot be probed for devices. It therefore + * has its own config access functions. + * + * The "UniNorth" version is present in all Core99 machines + * (iBook, G4, new IMacs, and all the recent Apple machines). + * It contains 3 controllers in one ASIC. + * + * The U3 is the bridge used on G5 machines. It contains an + * AGP bus which is dealt with the old UniNorth access routines + * and a HyperTransport bus which uses its own set of access + * functions. + */ + +#define MACRISC_CFA0(devfn, off) \ + ((1 << (unsigned long)PCI_SLOT(dev_fn)) \ + | (((unsigned long)PCI_FUNC(dev_fn)) << 8) \ + | (((unsigned long)(off)) & 0xFCUL)) + +#define MACRISC_CFA1(bus, devfn, off) \ + ((((unsigned long)(bus)) << 16) \ + |(((unsigned long)(devfn)) << 8) \ + |(((unsigned long)(off)) & 0xFCUL) \ + |1UL) + +static unsigned long macrisc_cfg_access(struct pci_controller* hose, + u8 bus, u8 dev_fn, u8 offset) +{ + unsigned int caddr; + + if (bus == hose->first_busno) { + if (dev_fn < (11 << 3)) + return 0; + caddr = MACRISC_CFA0(dev_fn, offset); + } else + caddr = MACRISC_CFA1(bus, dev_fn, offset); + + /* Uninorth will return garbage if we don't read back the value ! */ + do { + out_le32(hose->cfg_addr, caddr); + } while (in_le32(hose->cfg_addr) != caddr); + + offset &= has_uninorth ? 0x07 : 0x03; + return ((unsigned long)hose->cfg_data) + offset; +} + +static int macrisc_read_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 *val) +{ + struct pci_controller *hose = bus->sysdata; + unsigned long addr; + + addr = macrisc_cfg_access(hose, bus->number, devfn, offset); + if (!addr) + return PCIBIOS_DEVICE_NOT_FOUND; + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + *val = in_8((u8 *)addr); + break; + case 2: + *val = in_le16((u16 *)addr); + break; + default: + *val = in_le32((u32 *)addr); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static int macrisc_write_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 val) +{ + struct pci_controller *hose = bus->sysdata; + unsigned long addr; + + addr = macrisc_cfg_access(hose, bus->number, devfn, offset); + if (!addr) + return PCIBIOS_DEVICE_NOT_FOUND; + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + out_8((u8 *)addr, val); + (void) in_8((u8 *)addr); + break; + case 2: + out_le16((u16 *)addr, val); + (void) in_le16((u16 *)addr); + break; + default: + out_le32((u32 *)addr, val); + (void) in_le32((u32 *)addr); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops macrisc_pci_ops = +{ + macrisc_read_config, + macrisc_write_config +}; + +/* + * Verifiy that a specific (bus, dev_fn) exists on chaos + */ +static int +chaos_validate_dev(struct pci_bus *bus, int devfn, int offset) +{ + struct device_node *np; + u32 *vendor, *device; + + np = pci_busdev_to_OF_node(bus, devfn); + if (np == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + + vendor = (u32 *)get_property(np, "vendor-id", NULL); + device = (u32 *)get_property(np, "device-id", NULL); + if (vendor == NULL || device == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + + if ((*vendor == 0x106b) && (*device == 3) && (offset >= 0x10) + && (offset != 0x14) && (offset != 0x18) && (offset <= 0x24)) + return PCIBIOS_BAD_REGISTER_NUMBER; + + return PCIBIOS_SUCCESSFUL; +} + +static int +chaos_read_config(struct pci_bus *bus, unsigned int devfn, int offset, + int len, u32 *val) +{ + int result = chaos_validate_dev(bus, devfn, offset); + if (result == PCIBIOS_BAD_REGISTER_NUMBER) + *val = ~0U; + if (result != PCIBIOS_SUCCESSFUL) + return result; + return macrisc_read_config(bus, devfn, offset, len, val); +} + +static int +chaos_write_config(struct pci_bus *bus, unsigned int devfn, int offset, + int len, u32 val) +{ + int result = chaos_validate_dev(bus, devfn, offset); + if (result != PCIBIOS_SUCCESSFUL) + return result; + return macrisc_write_config(bus, devfn, offset, len, val); +} + +static struct pci_ops chaos_pci_ops = +{ + chaos_read_config, + chaos_write_config +}; + +#ifdef CONFIG_POWER4 + +/* + * These versions of U3 HyperTransport config space access ops do not + * implement self-view of the HT host yet + */ + +/* + * This function deals with some "special cases" devices. + * + * 0 -> No special case + * 1 -> Skip the device but act as if the access was successfull + * (return 0xff's on reads, eventually, cache config space + * accesses in a later version) + * -1 -> Hide the device (unsuccessful acess) + */ +static int u3_ht_skip_device(struct pci_controller *hose, + struct pci_bus *bus, unsigned int devfn) +{ + struct device_node *busdn, *dn; + int i; + + /* We only allow config cycles to devices that are in OF device-tree + * as we are apparently having some weird things going on with some + * revs of K2 on recent G5s + */ + if (bus->self) + busdn = pci_device_to_OF_node(bus->self); + else + busdn = hose->arch_data; + for (dn = busdn->child; dn; dn = dn->sibling) + if (dn->data && PCI_DN(dn)->devfn == devfn) + break; + if (dn == NULL) + return -1; + + /* + * When a device in K2 is powered down, we die on config + * cycle accesses. Fix that here. + */ + for (i=0; i<2; i++) + if (k2_skiplist[i] == dn) + return 1; + + return 0; +} + +#define U3_HT_CFA0(devfn, off) \ + ((((unsigned long)devfn) << 8) | offset) +#define U3_HT_CFA1(bus, devfn, off) \ + (U3_HT_CFA0(devfn, off) \ + + (((unsigned long)bus) << 16) \ + + 0x01000000UL) + +static unsigned long u3_ht_cfg_access(struct pci_controller* hose, + u8 bus, u8 devfn, u8 offset) +{ + if (bus == hose->first_busno) { + /* For now, we don't self probe U3 HT bridge */ + if (PCI_SLOT(devfn) == 0) + return 0; + return ((unsigned long)hose->cfg_data) + U3_HT_CFA0(devfn, offset); + } else + return ((unsigned long)hose->cfg_data) + U3_HT_CFA1(bus, devfn, offset); +} + +static int u3_ht_read_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 *val) +{ + struct pci_controller *hose = bus->sysdata; + unsigned long addr; + + struct device_node *np = pci_busdev_to_OF_node(bus, devfn); + if (np == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + + addr = u3_ht_cfg_access(hose, bus->number, devfn, offset); + if (!addr) + return PCIBIOS_DEVICE_NOT_FOUND; + + switch (u3_ht_skip_device(hose, bus, devfn)) { + case 0: + break; + case 1: + switch (len) { + case 1: + *val = 0xff; break; + case 2: + *val = 0xffff; break; + default: + *val = 0xfffffffful; break; + } + return PCIBIOS_SUCCESSFUL; + default: + return PCIBIOS_DEVICE_NOT_FOUND; + } + + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + *val = in_8((u8 *)addr); + break; + case 2: + *val = in_le16((u16 *)addr); + break; + default: + *val = in_le32((u32 *)addr); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static int u3_ht_write_config(struct pci_bus *bus, unsigned int devfn, + int offset, int len, u32 val) +{ + struct pci_controller *hose = bus->sysdata; + unsigned long addr; + + struct device_node *np = pci_busdev_to_OF_node(bus, devfn); + if (np == NULL) + return PCIBIOS_DEVICE_NOT_FOUND; + + addr = u3_ht_cfg_access(hose, bus->number, devfn, offset); + if (!addr) + return PCIBIOS_DEVICE_NOT_FOUND; + + switch (u3_ht_skip_device(hose, bus, devfn)) { + case 0: + break; + case 1: + return PCIBIOS_SUCCESSFUL; + default: + return PCIBIOS_DEVICE_NOT_FOUND; + } + + /* + * Note: the caller has already checked that offset is + * suitably aligned and that len is 1, 2 or 4. + */ + switch (len) { + case 1: + out_8((u8 *)addr, val); + (void) in_8((u8 *)addr); + break; + case 2: + out_le16((u16 *)addr, val); + (void) in_le16((u16 *)addr); + break; + default: + out_le32((u32 *)addr, val); + (void) in_le32((u32 *)addr); + break; + } + return PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops u3_ht_pci_ops = +{ + u3_ht_read_config, + u3_ht_write_config +}; + +#endif /* CONFIG_POWER4 */ + +/* + * For a bandit bridge, turn on cache coherency if necessary. + * N.B. we could clean this up using the hose ops directly. + */ +static void __init +init_bandit(struct pci_controller *bp) +{ + unsigned int vendev, magic; + int rev; + + /* read the word at offset 0 in config space for device 11 */ + out_le32(bp->cfg_addr, (1UL << BANDIT_DEVNUM) + PCI_VENDOR_ID); + udelay(2); + vendev = in_le32(bp->cfg_data); + if (vendev == (PCI_DEVICE_ID_APPLE_BANDIT << 16) + + PCI_VENDOR_ID_APPLE) { + /* read the revision id */ + out_le32(bp->cfg_addr, + (1UL << BANDIT_DEVNUM) + PCI_REVISION_ID); + udelay(2); + rev = in_8(bp->cfg_data); + if (rev != BANDIT_REVID) + printk(KERN_WARNING + "Unknown revision %d for bandit\n", rev); + } else if (vendev != (BANDIT_DEVID_2 << 16) + PCI_VENDOR_ID_APPLE) { + printk(KERN_WARNING "bandit isn't? (%x)\n", vendev); + return; + } + + /* read the word at offset 0x50 */ + out_le32(bp->cfg_addr, (1UL << BANDIT_DEVNUM) + BANDIT_MAGIC); + udelay(2); + magic = in_le32(bp->cfg_data); + if ((magic & BANDIT_COHERENT) != 0) + return; + magic |= BANDIT_COHERENT; + udelay(2); + out_le32(bp->cfg_data, magic); + printk(KERN_INFO "Cache coherency enabled for bandit/PSX\n"); +} + + +/* + * Tweak the PCI-PCI bridge chip on the blue & white G3s. + */ +static void __init +init_p2pbridge(void) +{ + struct device_node *p2pbridge; + struct pci_controller* hose; + u8 bus, devfn; + u16 val; + + /* XXX it would be better here to identify the specific + PCI-PCI bridge chip we have. */ + if ((p2pbridge = find_devices("pci-bridge")) == 0 + || p2pbridge->parent == NULL + || strcmp(p2pbridge->parent->name, "pci") != 0) + return; + if (pci_device_from_OF_node(p2pbridge, &bus, &devfn) < 0) { + DBG("Can't find PCI infos for PCI<->PCI bridge\n"); + return; + } + /* Warning: At this point, we have not yet renumbered all busses. + * So we must use OF walking to find out hose + */ + hose = pci_find_hose_for_OF_device(p2pbridge); + if (!hose) { + DBG("Can't find hose for PCI<->PCI bridge\n"); + return; + } + if (early_read_config_word(hose, bus, devfn, + PCI_BRIDGE_CONTROL, &val) < 0) { + printk(KERN_ERR "init_p2pbridge: couldn't read bridge control\n"); + return; + } + val &= ~PCI_BRIDGE_CTL_MASTER_ABORT; + early_write_config_word(hose, bus, devfn, PCI_BRIDGE_CONTROL, val); +} + +/* + * Some Apple desktop machines have a NEC PD720100A USB2 controller + * on the motherboard. Open Firmware, on these, will disable the + * EHCI part of it so it behaves like a pair of OHCI's. This fixup + * code re-enables it ;) + */ +static void __init +fixup_nec_usb2(void) +{ + struct device_node *nec; + + for (nec = NULL; (nec = of_find_node_by_name(nec, "usb")) != NULL;) { + struct pci_controller *hose; + u32 data, *prop; + u8 bus, devfn; + + prop = (u32 *)get_property(nec, "vendor-id", NULL); + if (prop == NULL) + continue; + if (0x1033 != *prop) + continue; + prop = (u32 *)get_property(nec, "device-id", NULL); + if (prop == NULL) + continue; + if (0x0035 != *prop) + continue; + prop = (u32 *)get_property(nec, "reg", NULL); + if (prop == NULL) + continue; + devfn = (prop[0] >> 8) & 0xff; + bus = (prop[0] >> 16) & 0xff; + if (PCI_FUNC(devfn) != 0) + continue; + hose = pci_find_hose_for_OF_device(nec); + if (!hose) + continue; + early_read_config_dword(hose, bus, devfn, 0xe4, &data); + if (data & 1UL) { + printk("Found NEC PD720100A USB2 chip with disabled EHCI, fixing up...\n"); + data &= ~1UL; + early_write_config_dword(hose, bus, devfn, 0xe4, data); + early_write_config_byte(hose, bus, devfn | 2, PCI_INTERRUPT_LINE, + nec->intrs[0].line); + } + } +} + +void __init +pmac_find_bridges(void) +{ + struct device_node *np, *root; + struct device_node *ht = NULL; + + root = of_find_node_by_path("/"); + if (root == NULL) { + printk(KERN_CRIT "pmac_find_bridges: can't find root of device tree\n"); + return; + } + for (np = NULL; (np = of_get_next_child(root, np)) != NULL;) { + if (np->name == NULL) + continue; + if (strcmp(np->name, "bandit") == 0 + || strcmp(np->name, "chaos") == 0 + || strcmp(np->name, "pci") == 0) { + if (add_bridge(np) == 0) + of_node_get(np); + } + if (strcmp(np->name, "ht") == 0) { + of_node_get(np); + ht = np; + } + } + of_node_put(root); + + /* Probe HT last as it relies on the agp resources to be already + * setup + */ + if (ht && add_bridge(ht) != 0) + of_node_put(ht); + + init_p2pbridge(); + fixup_nec_usb2(); + + /* We are still having some issues with the Xserve G4, enabling + * some offset between bus number and domains for now when we + * assign all busses should help for now + */ + if (pci_assign_all_busses) + pcibios_assign_bus_offset = 0x10; + +#ifdef CONFIG_POWER4 + /* There is something wrong with DMA on U3/HT. I haven't figured out + * the details yet, but if I set the cache line size to 128 bytes like + * it should, I'm getting memory corruption caused by devices like + * sungem (even without the MWI bit set, but maybe sungem doesn't + * care). Right now, it appears that setting up a 64 bytes line size + * works properly, 64 bytes beeing the max transfer size of HT, I + * suppose this is related the way HT/PCI are hooked together. I still + * need to dive into more specs though to be really sure of what's + * going on. --BenH. + * + * Ok, apparently, it's just that HT can't do more than 64 bytes + * transactions. MWI seem to be meaningless there as well, it may + * be worth nop'ing out pci_set_mwi too though I haven't done that + * yet. + * + * Note that it's a bit different for whatever is in the AGP slot. + * For now, I don't care, but this can become a real issue, we + * should probably hook pci_set_mwi anyway to make sure it sets + * the real cache line size in there. + */ + if (machine_is_compatible("MacRISC4")) + pci_cache_line_size = 16; /* 64 bytes */ + + pmac_check_ht_link(); +#endif /* CONFIG_POWER4 */ +} + +#define GRACKLE_CFA(b, d, o) (0x80 | ((b) << 8) | ((d) << 16) \ + | (((o) & ~3) << 24)) + +#define GRACKLE_PICR1_STG 0x00000040 +#define GRACKLE_PICR1_LOOPSNOOP 0x00000010 + +/* N.B. this is called before bridges is initialized, so we can't + use grackle_pcibios_{read,write}_config_dword. */ +static inline void grackle_set_stg(struct pci_controller* bp, int enable) +{ + unsigned int val; + + out_be32(bp->cfg_addr, GRACKLE_CFA(0, 0, 0xa8)); + val = in_le32(bp->cfg_data); + val = enable? (val | GRACKLE_PICR1_STG) : + (val & ~GRACKLE_PICR1_STG); + out_be32(bp->cfg_addr, GRACKLE_CFA(0, 0, 0xa8)); + out_le32(bp->cfg_data, val); + (void)in_le32(bp->cfg_data); +} + +static inline void grackle_set_loop_snoop(struct pci_controller *bp, int enable) +{ + unsigned int val; + + out_be32(bp->cfg_addr, GRACKLE_CFA(0, 0, 0xa8)); + val = in_le32(bp->cfg_data); + val = enable? (val | GRACKLE_PICR1_LOOPSNOOP) : + (val & ~GRACKLE_PICR1_LOOPSNOOP); + out_be32(bp->cfg_addr, GRACKLE_CFA(0, 0, 0xa8)); + out_le32(bp->cfg_data, val); + (void)in_le32(bp->cfg_data); +} + +static int __init +setup_uninorth(struct pci_controller* hose, struct reg_property* addr) +{ + pci_assign_all_busses = 1; + has_uninorth = 1; + hose->ops = ¯isc_pci_ops; + hose->cfg_addr = ioremap(addr->address + 0x800000, 0x1000); + hose->cfg_data = ioremap(addr->address + 0xc00000, 0x1000); + /* We "know" that the bridge at f2000000 has the PCI slots. */ + return addr->address == 0xf2000000; +} + +static void __init +setup_bandit(struct pci_controller* hose, struct reg_property* addr) +{ + hose->ops = ¯isc_pci_ops; + hose->cfg_addr = ioremap(addr->address + 0x800000, 0x1000); + hose->cfg_data = ioremap(addr->address + 0xc00000, 0x1000); + init_bandit(hose); +} + +static void __init +setup_chaos(struct pci_controller* hose, struct reg_property* addr) +{ + /* assume a `chaos' bridge */ + hose->ops = &chaos_pci_ops; + hose->cfg_addr = ioremap(addr->address + 0x800000, 0x1000); + hose->cfg_data = ioremap(addr->address + 0xc00000, 0x1000); +} + +#ifdef CONFIG_POWER4 + +static void __init setup_u3_agp(struct pci_controller* hose) +{ + /* On G5, we move AGP up to high bus number so we don't need + * to reassign bus numbers for HT. If we ever have P2P bridges + * on AGP, we'll have to move pci_assign_all_busses to the + * pci_controller structure so we enable it for AGP and not for + * HT childs. + * We hard code the address because of the different size of + * the reg address cell, we shall fix that by killing struct + * reg_property and using some accessor functions instead + */ + hose->first_busno = 0xf0; + hose->last_busno = 0xff; + has_uninorth = 1; + hose->ops = ¯isc_pci_ops; + hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000); + hose->cfg_data = ioremap(0xf0000000 + 0xc00000, 0x1000); + + u3_agp = hose; +} + +static void __init setup_u3_ht(struct pci_controller* hose) +{ + struct device_node *np = (struct device_node *)hose->arch_data; + int i, cur; + + hose->ops = &u3_ht_pci_ops; + + /* We hard code the address because of the different size of + * the reg address cell, we shall fix that by killing struct + * reg_property and using some accessor functions instead + */ + hose->cfg_data = (volatile unsigned char *)ioremap(0xf2000000, 0x02000000); + + /* + * /ht node doesn't expose a "ranges" property, so we "remove" regions that + * have been allocated to AGP. So far, this version of the code doesn't assign + * any of the 0xfxxxxxxx "fine" memory regions to /ht. + * We need to fix that sooner or later by either parsing all child "ranges" + * properties or figuring out the U3 address space decoding logic and + * then read its configuration register (if any). + */ + hose->io_base_phys = 0xf4000000; + hose->io_base_virt = ioremap(hose->io_base_phys, 0x00400000); + isa_io_base = (unsigned long) hose->io_base_virt; + hose->io_resource.name = np->full_name; + hose->io_resource.start = 0; + hose->io_resource.end = 0x003fffff; + hose->io_resource.flags = IORESOURCE_IO; + hose->pci_mem_offset = 0; + hose->first_busno = 0; + hose->last_busno = 0xef; + hose->mem_resources[0].name = np->full_name; + hose->mem_resources[0].start = 0x80000000; + hose->mem_resources[0].end = 0xefffffff; + hose->mem_resources[0].flags = IORESOURCE_MEM; + + if (u3_agp == NULL) { + DBG("U3 has no AGP, using full resource range\n"); + return; + } + + /* We "remove" the AGP resources from the resources allocated to HT, that + * is we create "holes". However, that code does assumptions that so far + * happen to be true (cross fingers...), typically that resources in the + * AGP node are properly ordered + */ + cur = 0; + for (i=0; i<3; i++) { + struct resource *res = &u3_agp->mem_resources[i]; + if (res->flags != IORESOURCE_MEM) + continue; + /* We don't care about "fine" resources */ + if (res->start >= 0xf0000000) + continue; + /* Check if it's just a matter of "shrinking" us in one direction */ + if (hose->mem_resources[cur].start == res->start) { + DBG("U3/HT: shrink start of %d, %08lx -> %08lx\n", + cur, hose->mem_resources[cur].start, res->end + 1); + hose->mem_resources[cur].start = res->end + 1; + continue; + } + if (hose->mem_resources[cur].end == res->end) { + DBG("U3/HT: shrink end of %d, %08lx -> %08lx\n", + cur, hose->mem_resources[cur].end, res->start - 1); + hose->mem_resources[cur].end = res->start - 1; + continue; + } + /* No, it's not the case, we need a hole */ + if (cur == 2) { + /* not enough resources to make a hole, we drop part of the range */ + printk(KERN_WARNING "Running out of resources for /ht host !\n"); + hose->mem_resources[cur].end = res->start - 1; + continue; + } + cur++; + DBG("U3/HT: hole, %d end at %08lx, %d start at %08lx\n", + cur-1, res->start - 1, cur, res->end + 1); + hose->mem_resources[cur].name = np->full_name; + hose->mem_resources[cur].flags = IORESOURCE_MEM; + hose->mem_resources[cur].start = res->end + 1; + hose->mem_resources[cur].end = hose->mem_resources[cur-1].end; + hose->mem_resources[cur-1].end = res->start - 1; + } +} + +#endif /* CONFIG_POWER4 */ + +void __init +setup_grackle(struct pci_controller *hose) +{ + setup_indirect_pci(hose, 0xfec00000, 0xfee00000); + if (machine_is_compatible("AAPL,PowerBook1998")) + grackle_set_loop_snoop(hose, 1); +#if 0 /* Disabled for now, HW problems ??? */ + grackle_set_stg(hose, 1); +#endif +} + +static void __init pmac_process_bridge_OF_ranges(struct pci_controller *hose, + struct device_node *dev, int primary) +{ + static unsigned int static_lc_ranges[2024]; + unsigned int *dt_ranges, *lc_ranges, *ranges, *prev; + unsigned int size; + int rlen = 0, orig_rlen; + int memno = 0; + struct resource *res; + int np, na = prom_n_addr_cells(dev); + + np = na + 5; + + /* First we try to merge ranges to fix a problem with some pmacs + * that can have more than 3 ranges, fortunately using contiguous + * addresses -- BenH + */ + dt_ranges = (unsigned int *) get_property(dev, "ranges", &rlen); + if (!dt_ranges) + return; + /* lc_ranges = alloc_bootmem(rlen);*/ + lc_ranges = static_lc_ranges; + if (!lc_ranges) + return; /* what can we do here ? */ + memcpy(lc_ranges, dt_ranges, rlen); + orig_rlen = rlen; + + /* Let's work on a copy of the "ranges" property instead of damaging + * the device-tree image in memory + */ + ranges = lc_ranges; + prev = NULL; + while ((rlen -= np * sizeof(unsigned int)) >= 0) { + if (prev) { + if (prev[0] == ranges[0] && prev[1] == ranges[1] && + (prev[2] + prev[na+4]) == ranges[2] && + (prev[na+2] + prev[na+4]) == ranges[na+2]) { + prev[na+4] += ranges[na+4]; + ranges[0] = 0; + ranges += np; + continue; + } + } + prev = ranges; + ranges += np; + } + + /* + * The ranges property is laid out as an array of elements, + * each of which comprises: + * cells 0 - 2: a PCI address + * cells 3 or 3+4: a CPU physical address + * (size depending on dev->n_addr_cells) + * cells 4+5 or 5+6: the size of the range + */ + ranges = lc_ranges; + rlen = orig_rlen; + while (ranges && (rlen -= np * sizeof(unsigned int)) >= 0) { + res = NULL; + size = ranges[na+4]; + switch (ranges[0] >> 24) { + case 1: /* I/O space */ + if (ranges[2] != 0) + break; + hose->io_base_phys = ranges[na+2]; + /* limit I/O space to 16MB */ + if (size > 0x01000000) + size = 0x01000000; + hose->io_base_virt = ioremap(ranges[na+2], size); + if (primary) + isa_io_base = (unsigned long) hose->io_base_virt; + res = &hose->io_resource; + res->flags = IORESOURCE_IO; + res->start = ranges[2]; + break; + case 2: /* memory space */ + memno = 0; + if (ranges[1] == 0 && ranges[2] == 0 + && ranges[na+4] <= (16 << 20)) { + /* 1st 16MB, i.e. ISA memory area */ +#if 0 + if (primary) + isa_mem_base = ranges[na+2]; +#endif + memno = 1; + } + while (memno < 3 && hose->mem_resources[memno].flags) + ++memno; + if (memno == 0) + hose->pci_mem_offset = ranges[na+2] - ranges[2]; + if (memno < 3) { + res = &hose->mem_resources[memno]; + res->flags = IORESOURCE_MEM; + res->start = ranges[na+2]; + } + break; + } + if (res != NULL) { + res->name = dev->full_name; + res->end = res->start + size - 1; + res->parent = NULL; + res->sibling = NULL; + res->child = NULL; + } + ranges += np; + } +} + +/* + * We assume that if we have a G3 powermac, we have one bridge called + * "pci" (a MPC106) and no bandit or chaos bridges, and contrariwise, + * if we have one or more bandit or chaos bridges, we don't have a MPC106. + */ +static int __init add_bridge(struct device_node *dev) +{ + int len; + struct pci_controller *hose; + struct reg_property *addr; + char* disp_name; + int *bus_range; + int primary = 1; + + DBG("Adding PCI host bridge %s\n", dev->full_name); + + addr = (struct reg_property *) get_property(dev, "reg", &len); + if (addr == NULL || len < sizeof(*addr)) { + printk(KERN_WARNING "Can't use %s: no address\n", + dev->full_name); + return -ENODEV; + } + bus_range = (int *) get_property(dev, "bus-range", &len); + if (bus_range == NULL || len < 2 * sizeof(int)) { + printk(KERN_WARNING "Can't get bus-range for %s, assume bus 0\n", + dev->full_name); + } + + hose = pcibios_alloc_controller(); + if (!hose) + return -ENOMEM; + hose->arch_data = dev; + hose->first_busno = bus_range ? bus_range[0] : 0; + hose->last_busno = bus_range ? bus_range[1] : 0xff; + + disp_name = NULL; +#ifdef CONFIG_POWER4 + if (device_is_compatible(dev, "u3-agp")) { + setup_u3_agp(hose, addr); + disp_name = "U3-AGP"; + primary = 0; + } else if (device_is_compatible(dev, "u3-ht")) { + setup_u3_ht(hose, addr); + disp_name = "U3-HT"; + primary = 1; + } else +#endif /* CONFIG_POWER4 */ + if (device_is_compatible(dev, "uni-north")) { + primary = setup_uninorth(hose, addr); + disp_name = "UniNorth"; + } else if (strcmp(dev->name, "pci") == 0) { + /* XXX assume this is a mpc106 (grackle) */ + setup_grackle(hose); + disp_name = "Grackle (MPC106)"; + } else if (strcmp(dev->name, "bandit") == 0) { + setup_bandit(hose, addr); + disp_name = "Bandit"; + } else if (strcmp(dev->name, "chaos") == 0) { + setup_chaos(hose, addr); + disp_name = "Chaos"; + primary = 0; + } + printk(KERN_INFO "Found %s PCI host bridge at 0x%08x. Firmware bus number: %d->%d\n", + disp_name, addr->address, hose->first_busno, hose->last_busno); + DBG(" ->Hose at 0x%p, cfg_addr=0x%p,cfg_data=0x%p\n", + hose, hose->cfg_addr, hose->cfg_data); + + /* Interpret the "ranges" property */ + /* This also maps the I/O region and sets isa_io/mem_base */ + pci_process_bridge_OF_ranges(hose, dev, primary); + + /* Fixup "bus-range" OF property */ + fixup_bus_range(dev); + + return 0; +} + +static void __init +pcibios_fixup_OF_interrupts(void) +{ + struct pci_dev* dev = NULL; + + /* + * Open Firmware often doesn't initialize the + * PCI_INTERRUPT_LINE config register properly, so we + * should find the device node and apply the interrupt + * obtained from the OF device-tree + */ + for_each_pci_dev(dev) { + struct device_node *node; + node = pci_device_to_OF_node(dev); + /* this is the node, see if it has interrupts */ + if (node && node->n_intrs > 0) + dev->irq = node->intrs[0].line; + pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq); + } +} + +void __init +pmac_pcibios_fixup(void) +{ + /* Fixup interrupts according to OF tree */ + pcibios_fixup_OF_interrupts(); +} + +int +pmac_pci_enable_device_hook(struct pci_dev *dev, int initial) +{ + struct device_node* node; + int updatecfg = 0; + int uninorth_child; + + node = pci_device_to_OF_node(dev); + + /* We don't want to enable USB controllers absent from the OF tree + * (iBook second controller) + */ + if (dev->vendor == PCI_VENDOR_ID_APPLE + && (dev->class == ((PCI_CLASS_SERIAL_USB << 8) | 0x10)) + && !node) { + printk(KERN_INFO "Apple USB OHCI %s disabled by firmware\n", + pci_name(dev)); + return -EINVAL; + } + + if (!node) + return 0; + + uninorth_child = node->parent && + device_is_compatible(node->parent, "uni-north"); + + /* Firewire & GMAC were disabled after PCI probe, the driver is + * claiming them, we must re-enable them now. + */ + if (uninorth_child && !strcmp(node->name, "firewire") && + (device_is_compatible(node, "pci106b,18") || + device_is_compatible(node, "pci106b,30") || + device_is_compatible(node, "pci11c1,5811"))) { + pmac_call_feature(PMAC_FTR_1394_CABLE_POWER, node, 0, 1); + pmac_call_feature(PMAC_FTR_1394_ENABLE, node, 0, 1); + updatecfg = 1; + } + if (uninorth_child && !strcmp(node->name, "ethernet") && + device_is_compatible(node, "gmac")) { + pmac_call_feature(PMAC_FTR_GMAC_ENABLE, node, 0, 1); + updatecfg = 1; + } + + if (updatecfg) { + u16 cmd; + + /* + * Make sure PCI is correctly configured + * + * We use old pci_bios versions of the function since, by + * default, gmac is not powered up, and so will be absent + * from the kernel initial PCI lookup. + * + * Should be replaced by 2.4 new PCI mechanisms and really + * register the device. + */ + pci_read_config_word(dev, PCI_COMMAND, &cmd); + cmd |= PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER | PCI_COMMAND_INVALIDATE; + pci_write_config_word(dev, PCI_COMMAND, cmd); + pci_write_config_byte(dev, PCI_LATENCY_TIMER, 16); + pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, pci_cache_line_size); + } + + return 0; +} + +/* We power down some devices after they have been probed. They'll + * be powered back on later on + */ +void __init +pmac_pcibios_after_init(void) +{ + struct device_node* nd; + +#ifdef CONFIG_BLK_DEV_IDE + struct pci_dev *dev = NULL; + + /* OF fails to initialize IDE controllers on macs + * (and maybe other machines) + * + * Ideally, this should be moved to the IDE layer, but we need + * to check specifically with Andre Hedrick how to do it cleanly + * since the common IDE code seem to care about the fact that the + * BIOS may have disabled a controller. + * + * -- BenH + */ + for_each_pci_dev(dev) { + if ((dev->class >> 16) == PCI_BASE_CLASS_STORAGE) + pci_enable_device(dev); + } +#endif /* CONFIG_BLK_DEV_IDE */ + + nd = find_devices("firewire"); + while (nd) { + if (nd->parent && (device_is_compatible(nd, "pci106b,18") || + device_is_compatible(nd, "pci106b,30") || + device_is_compatible(nd, "pci11c1,5811")) + && device_is_compatible(nd->parent, "uni-north")) { + pmac_call_feature(PMAC_FTR_1394_ENABLE, nd, 0, 0); + pmac_call_feature(PMAC_FTR_1394_CABLE_POWER, nd, 0, 0); + } + nd = nd->next; + } + nd = find_devices("ethernet"); + while (nd) { + if (nd->parent && device_is_compatible(nd, "gmac") + && device_is_compatible(nd->parent, "uni-north")) + pmac_call_feature(PMAC_FTR_GMAC_ENABLE, nd, 0, 0); + nd = nd->next; + } +} + +#ifdef CONFIG_PPC64 +static void __init pmac_fixup_phb_resources(void) +{ + struct pci_controller *hose, *tmp; + + list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { + unsigned long offset = (unsigned long)hose->io_base_virt - pci_io_base; + hose->io_resource.start += offset; + hose->io_resource.end += offset; + printk(KERN_INFO "PCI Host %d, io start: %lx; io end: %lx\n", + hose->global_number, + hose->io_resource.start, hose->io_resource.end); + } +} + +void __init pmac_pci_init(void) +{ + struct device_node *np, *root; + struct device_node *ht = NULL; + + /* Probe root PCI hosts, that is on U3 the AGP host and the + * HyperTransport host. That one is actually "kept" around + * and actually added last as it's resource management relies + * on the AGP resources to have been setup first + */ + root = of_find_node_by_path("/"); + if (root == NULL) { + printk(KERN_CRIT "pmac_find_bridges: can't find root of device tree\n"); + return; + } + for (np = NULL; (np = of_get_next_child(root, np)) != NULL;) { + if (np->name == NULL) + continue; + if (strcmp(np->name, "pci") == 0) { + if (add_bridge(np) == 0) + of_node_get(np); + } + if (strcmp(np->name, "ht") == 0) { + of_node_get(np); + ht = np; + } + } + of_node_put(root); + + /* Now setup the HyperTransport host if we found any + */ + if (ht && add_bridge(ht) != 0) + of_node_put(ht); + + /* Fixup the IO resources on our host bridges as the common code + * does it only for childs of the host bridges + */ + pmac_fixup_phb_resources(); + + /* Setup the linkage between OF nodes and PHBs */ + pci_devs_phb_init(); + + /* Fixup the PCI<->OF mapping for U3 AGP due to bus renumbering. We + * assume there is no P2P bridge on the AGP bus, which should be a + * safe assumptions hopefully. + */ + if (u3_agp) { + struct device_node *np = u3_agp->arch_data; + PCI_DN(np)->busno = 0xf0; + for (np = np->child; np; np = np->sibling) + PCI_DN(np)->busno = 0xf0; + } + + pmac_check_ht_link(); + + /* Tell pci.c to not use the common resource allocation mecanism */ + pci_probe_only = 1; + + /* Allow all IO */ + io_page_mask = -1; +} +#endif + +#ifdef CONFIG_PPC32 +void pmac_pci_fixup_cardbus(struct pci_dev* dev) +{ + if (_machine != _MACH_Pmac) + return; + /* + * Fix the interrupt routing on the various cardbus bridges + * used on powerbooks + */ + if (dev->vendor != PCI_VENDOR_ID_TI) + return; + if (dev->device == PCI_DEVICE_ID_TI_1130 || + dev->device == PCI_DEVICE_ID_TI_1131) { + u8 val; + /* Enable PCI interrupt */ + if (pci_read_config_byte(dev, 0x91, &val) == 0) + pci_write_config_byte(dev, 0x91, val | 0x30); + /* Disable ISA interrupt mode */ + if (pci_read_config_byte(dev, 0x92, &val) == 0) + pci_write_config_byte(dev, 0x92, val & ~0x06); + } + if (dev->device == PCI_DEVICE_ID_TI_1210 || + dev->device == PCI_DEVICE_ID_TI_1211 || + dev->device == PCI_DEVICE_ID_TI_1410 || + dev->device == PCI_DEVICE_ID_TI_1510) { + u8 val; + /* 0x8c == TI122X_IRQMUX, 2 says to route the INTA + signal out the MFUNC0 pin */ + if (pci_read_config_byte(dev, 0x8c, &val) == 0) + pci_write_config_byte(dev, 0x8c, (val & ~0x0f) | 2); + /* Disable ISA interrupt mode */ + if (pci_read_config_byte(dev, 0x92, &val) == 0) + pci_write_config_byte(dev, 0x92, val & ~0x06); + } +} + +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_TI, PCI_ANY_ID, pmac_pci_fixup_cardbus); + +void pmac_pci_fixup_pciata(struct pci_dev* dev) +{ + u8 progif = 0; + + /* + * On PowerMacs, we try to switch any PCI ATA controller to + * fully native mode + */ + if (_machine != _MACH_Pmac) + return; + /* Some controllers don't have the class IDE */ + if (dev->vendor == PCI_VENDOR_ID_PROMISE) + switch(dev->device) { + case PCI_DEVICE_ID_PROMISE_20246: + case PCI_DEVICE_ID_PROMISE_20262: + case PCI_DEVICE_ID_PROMISE_20263: + case PCI_DEVICE_ID_PROMISE_20265: + case PCI_DEVICE_ID_PROMISE_20267: + case PCI_DEVICE_ID_PROMISE_20268: + case PCI_DEVICE_ID_PROMISE_20269: + case PCI_DEVICE_ID_PROMISE_20270: + case PCI_DEVICE_ID_PROMISE_20271: + case PCI_DEVICE_ID_PROMISE_20275: + case PCI_DEVICE_ID_PROMISE_20276: + case PCI_DEVICE_ID_PROMISE_20277: + goto good; + } + /* Others, check PCI class */ + if ((dev->class >> 8) != PCI_CLASS_STORAGE_IDE) + return; + good: + pci_read_config_byte(dev, PCI_CLASS_PROG, &progif); + if ((progif & 5) != 5) { + printk(KERN_INFO "Forcing PCI IDE into native mode: %s\n", pci_name(dev)); + (void) pci_write_config_byte(dev, PCI_CLASS_PROG, progif|5); + if (pci_read_config_byte(dev, PCI_CLASS_PROG, &progif) || + (progif & 5) != 5) + printk(KERN_ERR "Rewrite of PROGIF failed !\n"); + } +} +DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pmac_pci_fixup_pciata); +#endif + +/* + * Disable second function on K2-SATA, it's broken + * and disable IO BARs on first one + */ +static void fixup_k2_sata(struct pci_dev* dev) +{ + int i; + u16 cmd; + + if (PCI_FUNC(dev->devfn) > 0) { + pci_read_config_word(dev, PCI_COMMAND, &cmd); + cmd &= ~(PCI_COMMAND_IO | PCI_COMMAND_MEMORY); + pci_write_config_word(dev, PCI_COMMAND, cmd); + for (i = 0; i < 6; i++) { + dev->resource[i].start = dev->resource[i].end = 0; + dev->resource[i].flags = 0; + pci_write_config_dword(dev, PCI_BASE_ADDRESS_0 + 4 * i, 0); + } + } else { + pci_read_config_word(dev, PCI_COMMAND, &cmd); + cmd &= ~PCI_COMMAND_IO; + pci_write_config_word(dev, PCI_COMMAND, cmd); + for (i = 0; i < 5; i++) { + dev->resource[i].start = dev->resource[i].end = 0; + dev->resource[i].flags = 0; + pci_write_config_dword(dev, PCI_BASE_ADDRESS_0 + 4 * i, 0); + } + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SERVERWORKS, 0x0240, fixup_k2_sata); + diff --git a/arch/powerpc/platforms/powermac/pmac_pic.c b/arch/powerpc/platforms/powermac/pmac_pic.c new file mode 100644 index 00000000000..bf3e1899a4c --- /dev/null +++ b/arch/powerpc/platforms/powermac/pmac_pic.c @@ -0,0 +1,655 @@ +/* + * Support for the interrupt controllers found on Power Macintosh, + * currently Apple's "Grand Central" interrupt controller in all + * it's incarnations. OpenPIC support used on newer machines is + * in a separate file + * + * Copyright (C) 1997 Paul Mackerras (paulus@samba.org) + * + * Maintained by Benjamin Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <linux/stddef.h> +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/signal.h> +#include <linux/pci.h> +#include <linux/interrupt.h> +#include <linux/sysdev.h> +#include <linux/adb.h> +#include <linux/pmu.h> + +#include <asm/sections.h> +#include <asm/io.h> +#include <asm/smp.h> +#include <asm/prom.h> +#include <asm/pci-bridge.h> +#include <asm/time.h> +#include <asm/open_pic.h> +#include <asm/xmon.h> +#include <asm/pmac_feature.h> +#include <asm/mpic.h> + +#include "pmac_pic.h" + +/* + * XXX this should be in xmon.h, but putting it there means xmon.h + * has to include <linux/interrupt.h> (to get irqreturn_t), which + * causes all sorts of problems. -- paulus + */ +extern irqreturn_t xmon_irq(int, void *, struct pt_regs *); + +struct pmac_irq_hw { + unsigned int event; + unsigned int enable; + unsigned int ack; + unsigned int level; +}; + +/* Default addresses */ +static volatile struct pmac_irq_hw *pmac_irq_hw[4] = { + (struct pmac_irq_hw *) 0xf3000020, + (struct pmac_irq_hw *) 0xf3000010, + (struct pmac_irq_hw *) 0xf4000020, + (struct pmac_irq_hw *) 0xf4000010, +}; + +#define GC_LEVEL_MASK 0x3ff00000 +#define OHARE_LEVEL_MASK 0x1ff00000 +#define HEATHROW_LEVEL_MASK 0x1ff00000 + +static int max_irqs; +static int max_real_irqs; +static u32 level_mask[4]; + +static DEFINE_SPINLOCK(pmac_pic_lock); + + +#define GATWICK_IRQ_POOL_SIZE 10 +static struct interrupt_info gatwick_int_pool[GATWICK_IRQ_POOL_SIZE]; + +/* + * Mark an irq as "lost". This is only used on the pmac + * since it can lose interrupts (see pmac_set_irq_mask). + * -- Cort + */ +void +__set_lost(unsigned long irq_nr, int nokick) +{ + if (!test_and_set_bit(irq_nr, ppc_lost_interrupts)) { + atomic_inc(&ppc_n_lost_interrupts); + if (!nokick) + set_dec(1); + } +} + +static void +pmac_mask_and_ack_irq(unsigned int irq_nr) +{ + unsigned long bit = 1UL << (irq_nr & 0x1f); + int i = irq_nr >> 5; + unsigned long flags; + + if ((unsigned)irq_nr >= max_irqs) + return; + + clear_bit(irq_nr, ppc_cached_irq_mask); + if (test_and_clear_bit(irq_nr, ppc_lost_interrupts)) + atomic_dec(&ppc_n_lost_interrupts); + spin_lock_irqsave(&pmac_pic_lock, flags); + out_le32(&pmac_irq_hw[i]->enable, ppc_cached_irq_mask[i]); + out_le32(&pmac_irq_hw[i]->ack, bit); + do { + /* make sure ack gets to controller before we enable + interrupts */ + mb(); + } while((in_le32(&pmac_irq_hw[i]->enable) & bit) + != (ppc_cached_irq_mask[i] & bit)); + spin_unlock_irqrestore(&pmac_pic_lock, flags); +} + +static void pmac_set_irq_mask(unsigned int irq_nr, int nokicklost) +{ + unsigned long bit = 1UL << (irq_nr & 0x1f); + int i = irq_nr >> 5; + unsigned long flags; + + if ((unsigned)irq_nr >= max_irqs) + return; + + spin_lock_irqsave(&pmac_pic_lock, flags); + /* enable unmasked interrupts */ + out_le32(&pmac_irq_hw[i]->enable, ppc_cached_irq_mask[i]); + + do { + /* make sure mask gets to controller before we + return to user */ + mb(); + } while((in_le32(&pmac_irq_hw[i]->enable) & bit) + != (ppc_cached_irq_mask[i] & bit)); + + /* + * Unfortunately, setting the bit in the enable register + * when the device interrupt is already on *doesn't* set + * the bit in the flag register or request another interrupt. + */ + if (bit & ppc_cached_irq_mask[i] & in_le32(&pmac_irq_hw[i]->level)) + __set_lost((ulong)irq_nr, nokicklost); + spin_unlock_irqrestore(&pmac_pic_lock, flags); +} + +/* When an irq gets requested for the first client, if it's an + * edge interrupt, we clear any previous one on the controller + */ +static unsigned int pmac_startup_irq(unsigned int irq_nr) +{ + unsigned long bit = 1UL << (irq_nr & 0x1f); + int i = irq_nr >> 5; + + if ((irq_desc[irq_nr].status & IRQ_LEVEL) == 0) + out_le32(&pmac_irq_hw[i]->ack, bit); + set_bit(irq_nr, ppc_cached_irq_mask); + pmac_set_irq_mask(irq_nr, 0); + + return 0; +} + +static void pmac_mask_irq(unsigned int irq_nr) +{ + clear_bit(irq_nr, ppc_cached_irq_mask); + pmac_set_irq_mask(irq_nr, 0); + mb(); +} + +static void pmac_unmask_irq(unsigned int irq_nr) +{ + set_bit(irq_nr, ppc_cached_irq_mask); + pmac_set_irq_mask(irq_nr, 0); +} + +static void pmac_end_irq(unsigned int irq_nr) +{ + if (!(irq_desc[irq_nr].status & (IRQ_DISABLED|IRQ_INPROGRESS)) + && irq_desc[irq_nr].action) { + set_bit(irq_nr, ppc_cached_irq_mask); + pmac_set_irq_mask(irq_nr, 1); + } +} + + +struct hw_interrupt_type pmac_pic = { + .typename = " PMAC-PIC ", + .startup = pmac_startup_irq, + .enable = pmac_unmask_irq, + .disable = pmac_mask_irq, + .ack = pmac_mask_and_ack_irq, + .end = pmac_end_irq, +}; + +struct hw_interrupt_type gatwick_pic = { + .typename = " GATWICK ", + .startup = pmac_startup_irq, + .enable = pmac_unmask_irq, + .disable = pmac_mask_irq, + .ack = pmac_mask_and_ack_irq, + .end = pmac_end_irq, +}; + +static irqreturn_t gatwick_action(int cpl, void *dev_id, struct pt_regs *regs) +{ + int irq, bits; + + for (irq = max_irqs; (irq -= 32) >= max_real_irqs; ) { + int i = irq >> 5; + bits = in_le32(&pmac_irq_hw[i]->event) | ppc_lost_interrupts[i]; + /* We must read level interrupts from the level register */ + bits |= (in_le32(&pmac_irq_hw[i]->level) & level_mask[i]); + bits &= ppc_cached_irq_mask[i]; + if (bits == 0) + continue; + irq += __ilog2(bits); + __do_IRQ(irq, regs); + return IRQ_HANDLED; + } + printk("gatwick irq not from gatwick pic\n"); + return IRQ_NONE; +} + +int +pmac_get_irq(struct pt_regs *regs) +{ + int irq; + unsigned long bits = 0; + +#ifdef CONFIG_SMP + void psurge_smp_message_recv(struct pt_regs *); + + /* IPI's are a hack on the powersurge -- Cort */ + if ( smp_processor_id() != 0 ) { + psurge_smp_message_recv(regs); + return -2; /* ignore, already handled */ + } +#endif /* CONFIG_SMP */ + for (irq = max_real_irqs; (irq -= 32) >= 0; ) { + int i = irq >> 5; + bits = in_le32(&pmac_irq_hw[i]->event) | ppc_lost_interrupts[i]; + /* We must read level interrupts from the level register */ + bits |= (in_le32(&pmac_irq_hw[i]->level) & level_mask[i]); + bits &= ppc_cached_irq_mask[i]; + if (bits == 0) + continue; + irq += __ilog2(bits); + break; + } + + return irq; +} + +/* This routine will fix some missing interrupt values in the device tree + * on the gatwick mac-io controller used by some PowerBooks + */ +static void __init +pmac_fix_gatwick_interrupts(struct device_node *gw, int irq_base) +{ + struct device_node *node; + int count; + + memset(gatwick_int_pool, 0, sizeof(gatwick_int_pool)); + node = gw->child; + count = 0; + while(node) + { + /* Fix SCC */ + if (strcasecmp(node->name, "escc") == 0) + if (node->child) { + if (node->child->n_intrs < 3) { + node->child->intrs = &gatwick_int_pool[count]; + count += 3; + } + node->child->n_intrs = 3; + node->child->intrs[0].line = 15+irq_base; + node->child->intrs[1].line = 4+irq_base; + node->child->intrs[2].line = 5+irq_base; + printk(KERN_INFO "irq: fixed SCC on second controller (%d,%d,%d)\n", + node->child->intrs[0].line, + node->child->intrs[1].line, + node->child->intrs[2].line); + } + /* Fix media-bay & left SWIM */ + if (strcasecmp(node->name, "media-bay") == 0) { + struct device_node* ya_node; + + if (node->n_intrs == 0) + node->intrs = &gatwick_int_pool[count++]; + node->n_intrs = 1; + node->intrs[0].line = 29+irq_base; + printk(KERN_INFO "irq: fixed media-bay on second controller (%d)\n", + node->intrs[0].line); + + ya_node = node->child; + while(ya_node) + { + if (strcasecmp(ya_node->name, "floppy") == 0) { + if (ya_node->n_intrs < 2) { + ya_node->intrs = &gatwick_int_pool[count]; + count += 2; + } + ya_node->n_intrs = 2; + ya_node->intrs[0].line = 19+irq_base; + ya_node->intrs[1].line = 1+irq_base; + printk(KERN_INFO "irq: fixed floppy on second controller (%d,%d)\n", + ya_node->intrs[0].line, ya_node->intrs[1].line); + } + if (strcasecmp(ya_node->name, "ata4") == 0) { + if (ya_node->n_intrs < 2) { + ya_node->intrs = &gatwick_int_pool[count]; + count += 2; + } + ya_node->n_intrs = 2; + ya_node->intrs[0].line = 14+irq_base; + ya_node->intrs[1].line = 3+irq_base; + printk(KERN_INFO "irq: fixed ide on second controller (%d,%d)\n", + ya_node->intrs[0].line, ya_node->intrs[1].line); + } + ya_node = ya_node->sibling; + } + } + node = node->sibling; + } + if (count > 10) { + printk("WARNING !! Gatwick interrupt pool overflow\n"); + printk(" GATWICK_IRQ_POOL_SIZE = %d\n", GATWICK_IRQ_POOL_SIZE); + printk(" requested = %d\n", count); + } +} + +/* + * The PowerBook 3400/2400/3500 can have a combo ethernet/modem + * card which includes an ohare chip that acts as a second interrupt + * controller. If we find this second ohare, set it up and fix the + * interrupt value in the device tree for the ethernet chip. + */ +static int __init enable_second_ohare(void) +{ + unsigned char bus, devfn; + unsigned short cmd; + unsigned long addr; + struct device_node *irqctrler = find_devices("pci106b,7"); + struct device_node *ether; + + if (irqctrler == NULL || irqctrler->n_addrs <= 0) + return -1; + addr = (unsigned long) ioremap(irqctrler->addrs[0].address, 0x40); + pmac_irq_hw[1] = (volatile struct pmac_irq_hw *)(addr + 0x20); + max_irqs = 64; + if (pci_device_from_OF_node(irqctrler, &bus, &devfn) == 0) { + struct pci_controller* hose = pci_find_hose_for_OF_device(irqctrler); + if (!hose) + printk(KERN_ERR "Can't find PCI hose for OHare2 !\n"); + else { + early_read_config_word(hose, bus, devfn, PCI_COMMAND, &cmd); + cmd |= PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER; + cmd &= ~PCI_COMMAND_IO; + early_write_config_word(hose, bus, devfn, PCI_COMMAND, cmd); + } + } + + /* Fix interrupt for the modem/ethernet combo controller. The number + in the device tree (27) is bogus (correct for the ethernet-only + board but not the combo ethernet/modem board). + The real interrupt is 28 on the second controller -> 28+32 = 60. + */ + ether = find_devices("pci1011,14"); + if (ether && ether->n_intrs > 0) { + ether->intrs[0].line = 60; + printk(KERN_INFO "irq: Fixed ethernet IRQ to %d\n", + ether->intrs[0].line); + } + + /* Return the interrupt number of the cascade */ + return irqctrler->intrs[0].line; +} + +static int pmac_u3_cascade(struct pt_regs *regs, void *data) +{ + return mpic_get_one_irq((struct mpic *)data, regs); +} + +#ifdef CONFIG_XMON +static struct irqaction xmon_action = { + .handler = xmon_irq, + .flags = 0, + .mask = CPU_MASK_NONE, + .name = "NMI - XMON" +}; +#endif + +static struct irqaction gatwick_cascade_action = { + .handler = gatwick_action, + .flags = SA_INTERRUPT, + .mask = CPU_MASK_NONE, + .name = "cascade", +}; + +void __init pmac_pic_init(void) +{ + int i; + struct device_node *irqctrler = NULL; + struct device_node *irqctrler2 = NULL; + struct device_node *np; + unsigned long addr; + int irq_cascade = -1; + struct mpic *mpic1, *mpic2; + + /* We first try to detect Apple's new Core99 chipset, since mac-io + * is quite different on those machines and contains an IBM MPIC2. + */ + np = find_type_devices("open-pic"); + while (np) { + if (np->parent && !strcmp(np->parent->name, "u3")) + irqctrler2 = np; + else + irqctrler = np; + np = np->next; + } + if (irqctrler != NULL && irqctrler->n_addrs > 0) { + unsigned char senses[128]; + + printk(KERN_INFO "PowerMac using OpenPIC irq controller at 0x%08x\n", + (unsigned int)irqctrler->addrs[0].address); + + prom_get_irq_senses(senses, 0, 128); + mpic1 = mpic_alloc(irqctrler->addrs[0].address, + MPIC_PRIMARY | MPIC_WANTS_RESET, + 0, 0, 128, 256, senses, 128, " K2-MPIC "); + BUG_ON(mpic1 == NULL); + mpic_init(mpic1); + + if (irqctrler2 != NULL && irqctrler2->n_intrs > 0 && + irqctrler2->n_addrs > 0) { + printk(KERN_INFO "Slave OpenPIC at 0x%08x hooked on IRQ %d\n", + (u32)irqctrler2->addrs[0].address, + irqctrler2->intrs[0].line); + + pmac_call_feature(PMAC_FTR_ENABLE_MPIC, irqctrler2, 0, 0); + prom_get_irq_senses(senses, 128, 128 + 128); + + /* We don't need to set MPIC_BROKEN_U3 here since we don't have + * hypertransport interrupts routed to it + */ + mpic2 = mpic_alloc(irqctrler2->addrs[0].address, + MPIC_BIG_ENDIAN | MPIC_WANTS_RESET, + 0, 128, 128, 0, senses, 128, " U3-MPIC "); + BUG_ON(mpic2 == NULL); + mpic_init(mpic2); + mpic_setup_cascade(irqctrler2->intrs[0].line, + pmac_u3_cascade, mpic2); + } + } + + /* Get the level/edge settings, assume if it's not + * a Grand Central nor an OHare, then it's an Heathrow + * (or Paddington). + */ + if (find_devices("gc")) + level_mask[0] = GC_LEVEL_MASK; + else if (find_devices("ohare")) { + level_mask[0] = OHARE_LEVEL_MASK; + /* We might have a second cascaded ohare */ + level_mask[1] = OHARE_LEVEL_MASK; + } else { + level_mask[0] = HEATHROW_LEVEL_MASK; + level_mask[1] = 0; + /* We might have a second cascaded heathrow */ + level_mask[2] = HEATHROW_LEVEL_MASK; + level_mask[3] = 0; + } + + /* + * G3 powermacs and 1999 G3 PowerBooks have 64 interrupts, + * 1998 G3 Series PowerBooks have 128, + * other powermacs have 32. + * The combo ethernet/modem card for the Powerstar powerbooks + * (2400/3400/3500, ohare based) has a second ohare chip + * effectively making a total of 64. + */ + max_irqs = max_real_irqs = 32; + irqctrler = find_devices("mac-io"); + if (irqctrler) + { + max_real_irqs = 64; + if (irqctrler->next) + max_irqs = 128; + else + max_irqs = 64; + } + for ( i = 0; i < max_real_irqs ; i++ ) + irq_desc[i].handler = &pmac_pic; + + /* get addresses of first controller */ + if (irqctrler) { + if (irqctrler->n_addrs > 0) { + addr = (unsigned long) + ioremap(irqctrler->addrs[0].address, 0x40); + for (i = 0; i < 2; ++i) + pmac_irq_hw[i] = (volatile struct pmac_irq_hw*) + (addr + (2 - i) * 0x10); + } + + /* get addresses of second controller */ + irqctrler = irqctrler->next; + if (irqctrler && irqctrler->n_addrs > 0) { + addr = (unsigned long) + ioremap(irqctrler->addrs[0].address, 0x40); + for (i = 2; i < 4; ++i) + pmac_irq_hw[i] = (volatile struct pmac_irq_hw*) + (addr + (4 - i) * 0x10); + irq_cascade = irqctrler->intrs[0].line; + if (device_is_compatible(irqctrler, "gatwick")) + pmac_fix_gatwick_interrupts(irqctrler, max_real_irqs); + } + } else { + /* older powermacs have a GC (grand central) or ohare at + f3000000, with interrupt control registers at f3000020. */ + addr = (unsigned long) ioremap(0xf3000000, 0x40); + pmac_irq_hw[0] = (volatile struct pmac_irq_hw *) (addr + 0x20); + } + + /* PowerBooks 3400 and 3500 can have a second controller in a second + ohare chip, on the combo ethernet/modem card */ + if (machine_is_compatible("AAPL,3400/2400") + || machine_is_compatible("AAPL,3500")) + irq_cascade = enable_second_ohare(); + + /* disable all interrupts in all controllers */ + for (i = 0; i * 32 < max_irqs; ++i) + out_le32(&pmac_irq_hw[i]->enable, 0); + /* mark level interrupts */ + for (i = 0; i < max_irqs; i++) + if (level_mask[i >> 5] & (1UL << (i & 0x1f))) + irq_desc[i].status = IRQ_LEVEL; + + /* get interrupt line of secondary interrupt controller */ + if (irq_cascade >= 0) { + printk(KERN_INFO "irq: secondary controller on irq %d\n", + (int)irq_cascade); + for ( i = max_real_irqs ; i < max_irqs ; i++ ) + irq_desc[i].handler = &gatwick_pic; + setup_irq(irq_cascade, &gatwick_cascade_action); + } + printk("System has %d possible interrupts\n", max_irqs); + if (max_irqs != max_real_irqs) + printk(KERN_DEBUG "%d interrupts on main controller\n", + max_real_irqs); + +#ifdef CONFIG_XMON + setup_irq(20, &xmon_action); +#endif /* CONFIG_XMON */ +} + +#ifdef CONFIG_PM +/* + * These procedures are used in implementing sleep on the powerbooks. + * sleep_save_intrs() saves the states of all interrupt enables + * and disables all interrupts except for the nominated one. + * sleep_restore_intrs() restores the states of all interrupt enables. + */ +unsigned long sleep_save_mask[2]; + +/* This used to be passed by the PMU driver but that link got + * broken with the new driver model. We use this tweak for now... + */ +static int pmacpic_find_viaint(void) +{ + int viaint = -1; + +#ifdef CONFIG_ADB_PMU + struct device_node *np; + + if (pmu_get_model() != PMU_OHARE_BASED) + goto not_found; + np = of_find_node_by_name(NULL, "via-pmu"); + if (np == NULL) + goto not_found; + viaint = np->intrs[0].line; +#endif /* CONFIG_ADB_PMU */ + +not_found: + return viaint; +} + +static int pmacpic_suspend(struct sys_device *sysdev, pm_message_t state) +{ + int viaint = pmacpic_find_viaint(); + + sleep_save_mask[0] = ppc_cached_irq_mask[0]; + sleep_save_mask[1] = ppc_cached_irq_mask[1]; + ppc_cached_irq_mask[0] = 0; + ppc_cached_irq_mask[1] = 0; + if (viaint > 0) + set_bit(viaint, ppc_cached_irq_mask); + out_le32(&pmac_irq_hw[0]->enable, ppc_cached_irq_mask[0]); + if (max_real_irqs > 32) + out_le32(&pmac_irq_hw[1]->enable, ppc_cached_irq_mask[1]); + (void)in_le32(&pmac_irq_hw[0]->event); + /* make sure mask gets to controller before we return to caller */ + mb(); + (void)in_le32(&pmac_irq_hw[0]->enable); + + return 0; +} + +static int pmacpic_resume(struct sys_device *sysdev) +{ + int i; + + out_le32(&pmac_irq_hw[0]->enable, 0); + if (max_real_irqs > 32) + out_le32(&pmac_irq_hw[1]->enable, 0); + mb(); + for (i = 0; i < max_real_irqs; ++i) + if (test_bit(i, sleep_save_mask)) + pmac_unmask_irq(i); + + return 0; +} + +#endif /* CONFIG_PM */ + +static struct sysdev_class pmacpic_sysclass = { + set_kset_name("pmac_pic"), +}; + +static struct sys_device device_pmacpic = { + .id = 0, + .cls = &pmacpic_sysclass, +}; + +static struct sysdev_driver driver_pmacpic = { +#ifdef CONFIG_PM + .suspend = &pmacpic_suspend, + .resume = &pmacpic_resume, +#endif /* CONFIG_PM */ +}; + +static int __init init_pmacpic_sysfs(void) +{ + if (max_irqs == 0) + return -ENODEV; + + printk(KERN_DEBUG "Registering pmac pic with sysfs...\n"); + sysdev_class_register(&pmacpic_sysclass); + sysdev_register(&device_pmacpic); + sysdev_driver_register(&pmacpic_sysclass, &driver_pmacpic); + return 0; +} + +subsys_initcall(init_pmacpic_sysfs); + diff --git a/arch/powerpc/platforms/powermac/pmac_pic.h b/arch/powerpc/platforms/powermac/pmac_pic.h new file mode 100644 index 00000000000..664103dfeef --- /dev/null +++ b/arch/powerpc/platforms/powermac/pmac_pic.h @@ -0,0 +1,11 @@ +#ifndef __PPC_PLATFORMS_PMAC_PIC_H +#define __PPC_PLATFORMS_PMAC_PIC_H + +#include <linux/irq.h> + +extern struct hw_interrupt_type pmac_pic; + +void pmac_pic_init(void); +int pmac_get_irq(struct pt_regs *regs); + +#endif /* __PPC_PLATFORMS_PMAC_PIC_H */ diff --git a/arch/powerpc/platforms/powermac/pmac_setup.c b/arch/powerpc/platforms/powermac/pmac_setup.c new file mode 100644 index 00000000000..dbc921a084c --- /dev/null +++ b/arch/powerpc/platforms/powermac/pmac_setup.c @@ -0,0 +1,662 @@ +/* + * arch/ppc/platforms/setup.c + * + * PowerPC version + * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) + * + * Adapted for Power Macintosh by Paul Mackerras + * Copyright (C) 1996 Paul Mackerras (paulus@cs.anu.edu.au) + * + * Derived from "arch/alpha/kernel/setup.c" + * Copyright (C) 1995 Linus Torvalds + * + * Maintained by Benjamin Herrenschmidt (benh@kernel.crashing.org) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +/* + * bootup setup stuff.. + */ + +#include <linux/config.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/stddef.h> +#include <linux/unistd.h> +#include <linux/ptrace.h> +#include <linux/slab.h> +#include <linux/user.h> +#include <linux/a.out.h> +#include <linux/tty.h> +#include <linux/string.h> +#include <linux/delay.h> +#include <linux/ioport.h> +#include <linux/major.h> +#include <linux/initrd.h> +#include <linux/vt_kern.h> +#include <linux/console.h> +#include <linux/ide.h> +#include <linux/pci.h> +#include <linux/adb.h> +#include <linux/cuda.h> +#include <linux/pmu.h> +#include <linux/irq.h> +#include <linux/seq_file.h> +#include <linux/root_dev.h> +#include <linux/bitops.h> +#include <linux/suspend.h> + +#include <asm/reg.h> +#include <asm/sections.h> +#include <asm/prom.h> +#include <asm/system.h> +#include <asm/pgtable.h> +#include <asm/io.h> +#include <asm/pci-bridge.h> +#include <asm/ohare.h> +#include <asm/mediabay.h> +#include <asm/machdep.h> +#include <asm/dma.h> +#include <asm/bootx.h> +#include <asm/cputable.h> +#include <asm/btext.h> +#include <asm/pmac_feature.h> +#include <asm/time.h> +#include <asm/of_device.h> +#include <asm/mmu_context.h> + +#include "pmac_pic.h" + +#undef SHOW_GATWICK_IRQS + +extern long pmac_time_init(void); +extern unsigned long pmac_get_rtc_time(void); +extern int pmac_set_rtc_time(unsigned long nowtime); +extern void pmac_read_rtc_time(void); +extern void pmac_calibrate_decr(void); +extern void pmac_pcibios_fixup(void); +extern void pmac_find_bridges(void); +extern unsigned long pmac_ide_get_base(int index); +extern void pmac_ide_init_hwif_ports(hw_regs_t *hw, + unsigned long data_port, unsigned long ctrl_port, int *irq); + +extern void pmac_nvram_update(void); +extern unsigned char pmac_nvram_read_byte(int addr); +extern void pmac_nvram_write_byte(int addr, unsigned char val); +extern int pmac_pci_enable_device_hook(struct pci_dev *dev, int initial); +extern void pmac_pcibios_after_init(void); +extern int of_show_percpuinfo(struct seq_file *m, int i); + +unsigned char drive_info; + +int ppc_override_l2cr = 0; +int ppc_override_l2cr_value; +int has_l2cache = 0; + +static int current_root_goodness = -1; + +extern int pmac_newworld; + +#define DEFAULT_ROOT_DEVICE Root_SDA1 /* sda1 - slightly silly choice */ + +extern void zs_kgdb_hook(int tty_num); +static void ohare_init(void); +#ifdef CONFIG_BOOTX_TEXT +static void pmac_progress(char *s, unsigned short hex); +#endif + +sys_ctrler_t sys_ctrler = SYS_CTRLER_UNKNOWN; + +#ifdef CONFIG_SMP +extern struct smp_ops_t psurge_smp_ops; +extern struct smp_ops_t core99_smp_ops; +#endif /* CONFIG_SMP */ + +static int +pmac_show_cpuinfo(struct seq_file *m) +{ + struct device_node *np; + char *pp; + int plen; + int mbmodel = pmac_call_feature(PMAC_FTR_GET_MB_INFO, + NULL, PMAC_MB_INFO_MODEL, 0); + unsigned int mbflags = (unsigned int)pmac_call_feature(PMAC_FTR_GET_MB_INFO, + NULL, PMAC_MB_INFO_FLAGS, 0); + char* mbname; + + if (pmac_call_feature(PMAC_FTR_GET_MB_INFO, NULL, PMAC_MB_INFO_NAME, (int)&mbname) != 0) + mbname = "Unknown"; + + /* find motherboard type */ + seq_printf(m, "machine\t\t: "); + np = find_devices("device-tree"); + if (np != NULL) { + pp = (char *) get_property(np, "model", NULL); + if (pp != NULL) + seq_printf(m, "%s\n", pp); + else + seq_printf(m, "PowerMac\n"); + pp = (char *) get_property(np, "compatible", &plen); + if (pp != NULL) { + seq_printf(m, "motherboard\t:"); + while (plen > 0) { + int l = strlen(pp) + 1; + seq_printf(m, " %s", pp); + plen -= l; + pp += l; + } + seq_printf(m, "\n"); + } + } else + seq_printf(m, "PowerMac\n"); + + /* print parsed model */ + seq_printf(m, "detected as\t: %d (%s)\n", mbmodel, mbname); + seq_printf(m, "pmac flags\t: %08x\n", mbflags); + + /* find l2 cache info */ + np = find_devices("l2-cache"); + if (np == 0) + np = find_type_devices("cache"); + if (np != 0) { + unsigned int *ic = (unsigned int *) + get_property(np, "i-cache-size", NULL); + unsigned int *dc = (unsigned int *) + get_property(np, "d-cache-size", NULL); + seq_printf(m, "L2 cache\t:"); + has_l2cache = 1; + if (get_property(np, "cache-unified", NULL) != 0 && dc) { + seq_printf(m, " %dK unified", *dc / 1024); + } else { + if (ic) + seq_printf(m, " %dK instruction", *ic / 1024); + if (dc) + seq_printf(m, "%s %dK data", + (ic? " +": ""), *dc / 1024); + } + pp = get_property(np, "ram-type", NULL); + if (pp) + seq_printf(m, " %s", pp); + seq_printf(m, "\n"); + } + + /* find ram info */ + np = find_devices("memory"); + if (np != 0) { + int n; + struct reg_property *reg = (struct reg_property *) + get_property(np, "reg", &n); + + if (reg != 0) { + unsigned long total = 0; + + for (n /= sizeof(struct reg_property); n > 0; --n) + total += (reg++)->size; + seq_printf(m, "memory\t\t: %luMB\n", total >> 20); + } + } + + /* Checks "l2cr-value" property in the registry */ + np = find_devices("cpus"); + if (np == 0) + np = find_type_devices("cpu"); + if (np != 0) { + unsigned int *l2cr = (unsigned int *) + get_property(np, "l2cr-value", NULL); + if (l2cr != 0) { + seq_printf(m, "l2cr override\t: 0x%x\n", *l2cr); + } + } + + /* Indicate newworld/oldworld */ + seq_printf(m, "pmac-generation\t: %s\n", + pmac_newworld ? "NewWorld" : "OldWorld"); + + + return 0; +} + +static int +pmac_show_percpuinfo(struct seq_file *m, int i) +{ +#ifdef CONFIG_CPU_FREQ_PMAC + extern unsigned int pmac_get_one_cpufreq(int i); + unsigned int freq = pmac_get_one_cpufreq(i); + if (freq != 0) { + seq_printf(m, "clock\t\t: %dMHz\n", freq/1000); + return 0; + } +#endif /* CONFIG_CPU_FREQ_PMAC */ + return of_show_percpuinfo(m, i); +} + +static volatile u32 *sysctrl_regs; + +void __init +pmac_setup_arch(void) +{ + struct device_node *cpu; + int *fp; + unsigned long pvr; + + pvr = PVR_VER(mfspr(SPRN_PVR)); + + /* Set loops_per_jiffy to a half-way reasonable value, + for use until calibrate_delay gets called. */ + cpu = find_type_devices("cpu"); + if (cpu != 0) { + fp = (int *) get_property(cpu, "clock-frequency", NULL); + if (fp != 0) { + if (pvr == 4 || pvr >= 8) + /* 604, G3, G4 etc. */ + loops_per_jiffy = *fp / HZ; + else + /* 601, 603, etc. */ + loops_per_jiffy = *fp / (2*HZ); + } else + loops_per_jiffy = 50000000 / HZ; + } + + /* this area has the CPU identification register + and some registers used by smp boards */ + sysctrl_regs = (volatile u32 *) ioremap(0xf8000000, 0x1000); + ohare_init(); + + /* Lookup PCI hosts */ + pmac_find_bridges(); + + /* Checks "l2cr-value" property in the registry */ + if (cpu_has_feature(CPU_FTR_L2CR)) { + struct device_node *np = find_devices("cpus"); + if (np == 0) + np = find_type_devices("cpu"); + if (np != 0) { + unsigned int *l2cr = (unsigned int *) + get_property(np, "l2cr-value", NULL); + if (l2cr != 0) { + ppc_override_l2cr = 1; + ppc_override_l2cr_value = *l2cr; + _set_L2CR(0); + _set_L2CR(ppc_override_l2cr_value); + } + } + } + + if (ppc_override_l2cr) + printk(KERN_INFO "L2CR overriden (0x%x), backside cache is %s\n", + ppc_override_l2cr_value, (ppc_override_l2cr_value & 0x80000000) + ? "enabled" : "disabled"); + +#ifdef CONFIG_KGDB + zs_kgdb_hook(0); +#endif + +#ifdef CONFIG_ADB_CUDA + find_via_cuda(); +#else + if (find_devices("via-cuda")) { + printk("WARNING ! Your machine is Cuda based but your kernel\n"); + printk(" wasn't compiled with CONFIG_ADB_CUDA option !\n"); + } +#endif +#ifdef CONFIG_ADB_PMU + find_via_pmu(); +#else + if (find_devices("via-pmu")) { + printk("WARNING ! Your machine is PMU based but your kernel\n"); + printk(" wasn't compiled with CONFIG_ADB_PMU option !\n"); + } +#endif +#ifdef CONFIG_NVRAM + pmac_nvram_init(); +#endif +#ifdef CONFIG_BLK_DEV_INITRD + if (initrd_start) + ROOT_DEV = Root_RAM0; + else +#endif + ROOT_DEV = DEFAULT_ROOT_DEVICE; + +#ifdef CONFIG_SMP + /* Check for Core99 */ + if (find_devices("uni-n") || find_devices("u3")) + ppc_md.smp_ops = &core99_smp_ops; + else + ppc_md.smp_ops = &psurge_smp_ops; +#endif /* CONFIG_SMP */ + + pci_create_OF_bus_map(); +} + +static void __init ohare_init(void) +{ + /* + * Turn on the L2 cache. + * We assume that we have a PSX memory controller iff + * we have an ohare I/O controller. + */ + if (find_devices("ohare") != NULL) { + if (((sysctrl_regs[2] >> 24) & 0xf) >= 3) { + if (sysctrl_regs[4] & 0x10) + sysctrl_regs[4] |= 0x04000020; + else + sysctrl_regs[4] |= 0x04000000; + if(has_l2cache) + printk(KERN_INFO "Level 2 cache enabled\n"); + } + } +} + +extern char *bootpath; +extern char *bootdevice; +void *boot_host; +int boot_target; +int boot_part; +extern dev_t boot_dev; + +#ifdef CONFIG_SCSI +void __init +note_scsi_host(struct device_node *node, void *host) +{ + int l; + char *p; + + l = strlen(node->full_name); + if (bootpath != NULL && bootdevice != NULL + && strncmp(node->full_name, bootdevice, l) == 0 + && (bootdevice[l] == '/' || bootdevice[l] == 0)) { + boot_host = host; + /* + * There's a bug in OF 1.0.5. (Why am I not surprised.) + * If you pass a path like scsi/sd@1:0 to canon, it returns + * something like /bandit@F2000000/gc@10/53c94@10000/sd@0,0 + * That is, the scsi target number doesn't get preserved. + * So we pick the target number out of bootpath and use that. + */ + p = strstr(bootpath, "/sd@"); + if (p != NULL) { + p += 4; + boot_target = simple_strtoul(p, NULL, 10); + p = strchr(p, ':'); + if (p != NULL) + boot_part = simple_strtoul(p + 1, NULL, 10); + } + } +} +#endif + +#if defined(CONFIG_BLK_DEV_IDE) && defined(CONFIG_BLK_DEV_IDE_PMAC) +static dev_t __init +find_ide_boot(void) +{ + char *p; + int n; + dev_t __init pmac_find_ide_boot(char *bootdevice, int n); + + if (bootdevice == NULL) + return 0; + p = strrchr(bootdevice, '/'); + if (p == NULL) + return 0; + n = p - bootdevice; + + return pmac_find_ide_boot(bootdevice, n); +} +#endif /* CONFIG_BLK_DEV_IDE && CONFIG_BLK_DEV_IDE_PMAC */ + +static void __init +find_boot_device(void) +{ +#if defined(CONFIG_BLK_DEV_IDE) && defined(CONFIG_BLK_DEV_IDE_PMAC) + boot_dev = find_ide_boot(); +#endif +} + +static int initializing = 1; +/* TODO: Merge the suspend-to-ram with the common code !!! + * currently, this is a stub implementation for suspend-to-disk + * only + */ + +#ifdef CONFIG_SOFTWARE_SUSPEND + +static int pmac_pm_prepare(suspend_state_t state) +{ + printk(KERN_DEBUG "%s(%d)\n", __FUNCTION__, state); + + return 0; +} + +static int pmac_pm_enter(suspend_state_t state) +{ + printk(KERN_DEBUG "%s(%d)\n", __FUNCTION__, state); + + /* Giveup the lazy FPU & vec so we don't have to back them + * up from the low level code + */ + enable_kernel_fp(); + +#ifdef CONFIG_ALTIVEC + if (cur_cpu_spec[0]->cpu_features & CPU_FTR_ALTIVEC) + enable_kernel_altivec(); +#endif /* CONFIG_ALTIVEC */ + + return 0; +} + +static int pmac_pm_finish(suspend_state_t state) +{ + printk(KERN_DEBUG "%s(%d)\n", __FUNCTION__, state); + + /* Restore userland MMU context */ + set_context(current->active_mm->context, current->active_mm->pgd); + + return 0; +} + +static struct pm_ops pmac_pm_ops = { + .pm_disk_mode = PM_DISK_SHUTDOWN, + .prepare = pmac_pm_prepare, + .enter = pmac_pm_enter, + .finish = pmac_pm_finish, +}; + +#endif /* CONFIG_SOFTWARE_SUSPEND */ + +static int pmac_late_init(void) +{ + initializing = 0; +#ifdef CONFIG_SOFTWARE_SUSPEND + pm_set_ops(&pmac_pm_ops); +#endif /* CONFIG_SOFTWARE_SUSPEND */ + return 0; +} + +late_initcall(pmac_late_init); + +/* can't be __init - can be called whenever a disk is first accessed */ +void +note_bootable_part(dev_t dev, int part, int goodness) +{ + static int found_boot = 0; + char *p; + + if (!initializing) + return; + if ((goodness <= current_root_goodness) && + ROOT_DEV != DEFAULT_ROOT_DEVICE) + return; + p = strstr(saved_command_line, "root="); + if (p != NULL && (p == saved_command_line || p[-1] == ' ')) + return; + + if (!found_boot) { + find_boot_device(); + found_boot = 1; + } + if (!boot_dev || dev == boot_dev) { + ROOT_DEV = dev + part; + boot_dev = 0; + current_root_goodness = goodness; + } +} + +static void +pmac_restart(char *cmd) +{ +#ifdef CONFIG_ADB_CUDA + struct adb_request req; +#endif /* CONFIG_ADB_CUDA */ + + switch (sys_ctrler) { +#ifdef CONFIG_ADB_CUDA + case SYS_CTRLER_CUDA: + cuda_request(&req, NULL, 2, CUDA_PACKET, + CUDA_RESET_SYSTEM); + for (;;) + cuda_poll(); + break; +#endif /* CONFIG_ADB_CUDA */ +#ifdef CONFIG_ADB_PMU + case SYS_CTRLER_PMU: + pmu_restart(); + break; +#endif /* CONFIG_ADB_PMU */ + default: ; + } +} + +static void +pmac_power_off(void) +{ +#ifdef CONFIG_ADB_CUDA + struct adb_request req; +#endif /* CONFIG_ADB_CUDA */ + + switch (sys_ctrler) { +#ifdef CONFIG_ADB_CUDA + case SYS_CTRLER_CUDA: + cuda_request(&req, NULL, 2, CUDA_PACKET, + CUDA_POWERDOWN); + for (;;) + cuda_poll(); + break; +#endif /* CONFIG_ADB_CUDA */ +#ifdef CONFIG_ADB_PMU + case SYS_CTRLER_PMU: + pmu_shutdown(); + break; +#endif /* CONFIG_ADB_PMU */ + default: ; + } +} + +static void +pmac_halt(void) +{ + pmac_power_off(); +} + +void __init +pmac_init(unsigned long r3, unsigned long r4, unsigned long r5, + unsigned long r6, unsigned long r7) +{ + /* isa_io_base gets set in pmac_find_bridges */ + isa_mem_base = PMAC_ISA_MEM_BASE; + pci_dram_offset = PMAC_PCI_DRAM_OFFSET; + ISA_DMA_THRESHOLD = ~0L; + DMA_MODE_READ = 1; + DMA_MODE_WRITE = 2; + + ppc_md.setup_arch = pmac_setup_arch; + ppc_md.show_cpuinfo = pmac_show_cpuinfo; + ppc_md.show_percpuinfo = pmac_show_percpuinfo; + ppc_md.irq_canonicalize = NULL; + ppc_md.init_IRQ = pmac_pic_init; + ppc_md.get_irq = pmac_get_irq; /* Changed later on ... */ + + ppc_md.pcibios_fixup = pmac_pcibios_fixup; + ppc_md.pcibios_enable_device_hook = pmac_pci_enable_device_hook; + ppc_md.pcibios_after_init = pmac_pcibios_after_init; + ppc_md.phys_mem_access_prot = pci_phys_mem_access_prot; + + ppc_md.restart = pmac_restart; + ppc_md.power_off = pmac_power_off; + ppc_md.halt = pmac_halt; + + ppc_md.time_init = pmac_time_init; + ppc_md.set_rtc_time = pmac_set_rtc_time; + ppc_md.get_rtc_time = pmac_get_rtc_time; + ppc_md.calibrate_decr = pmac_calibrate_decr; + + ppc_md.feature_call = pmac_do_feature_call; + +#if defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) +#ifdef CONFIG_BLK_DEV_IDE_PMAC + ppc_ide_md.ide_init_hwif = pmac_ide_init_hwif_ports; + ppc_ide_md.default_io_base = pmac_ide_get_base; +#endif /* CONFIG_BLK_DEV_IDE_PMAC */ +#endif /* defined(CONFIG_BLK_DEV_IDE) || defined(CONFIG_BLK_DEV_IDE_MODULE) */ + +#ifdef CONFIG_BOOTX_TEXT + ppc_md.progress = pmac_progress; +#endif /* CONFIG_BOOTX_TEXT */ + + if (ppc_md.progress) ppc_md.progress("pmac_init(): exit", 0); + +} + +#ifdef CONFIG_BOOTX_TEXT +static void __init +pmac_progress(char *s, unsigned short hex) +{ + if (boot_text_mapped) { + btext_drawstring(s); + btext_drawchar('\n'); + } +} +#endif /* CONFIG_BOOTX_TEXT */ + +static int __init +pmac_declare_of_platform_devices(void) +{ + struct device_node *np; + + np = find_devices("uni-n"); + if (np) { + for (np = np->child; np != NULL; np = np->sibling) + if (strncmp(np->name, "i2c", 3) == 0) { + of_platform_device_create(np, "uni-n-i2c", + NULL); + break; + } + } + np = find_devices("u3"); + if (np) { + for (np = np->child; np != NULL; np = np->sibling) + if (strncmp(np->name, "i2c", 3) == 0) { + of_platform_device_create(np, "u3-i2c", + NULL); + break; + } + } + + np = find_devices("valkyrie"); + if (np) + of_platform_device_create(np, "valkyrie", NULL); + np = find_devices("platinum"); + if (np) + of_platform_device_create(np, "platinum", NULL); + + return 0; +} + +device_initcall(pmac_declare_of_platform_devices); diff --git a/arch/powerpc/platforms/powermac/pmac_sleep.S b/arch/powerpc/platforms/powermac/pmac_sleep.S new file mode 100644 index 00000000000..88419c77ac4 --- /dev/null +++ b/arch/powerpc/platforms/powermac/pmac_sleep.S @@ -0,0 +1,396 @@ +/* + * This file contains sleep low-level functions for PowerBook G3. + * Copyright (C) 1999 Benjamin Herrenschmidt (benh@kernel.crashing.org) + * and Paul Mackerras (paulus@samba.org). + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/page.h> +#include <asm/ppc_asm.h> +#include <asm/cputable.h> +#include <asm/cache.h> +#include <asm/thread_info.h> +#include <asm/asm-offsets.h> + +#define MAGIC 0x4c617273 /* 'Lars' */ + +/* + * Structure for storing CPU registers on the stack. + */ +#define SL_SP 0 +#define SL_PC 4 +#define SL_MSR 8 +#define SL_SDR1 0xc +#define SL_SPRG0 0x10 /* 4 sprg's */ +#define SL_DBAT0 0x20 +#define SL_IBAT0 0x28 +#define SL_DBAT1 0x30 +#define SL_IBAT1 0x38 +#define SL_DBAT2 0x40 +#define SL_IBAT2 0x48 +#define SL_DBAT3 0x50 +#define SL_IBAT3 0x58 +#define SL_TB 0x60 +#define SL_R2 0x68 +#define SL_CR 0x6c +#define SL_R12 0x70 /* r12 to r31 */ +#define SL_SIZE (SL_R12 + 80) + + .section .text + .align 5 + +#if defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ_PMAC) + +/* This gets called by via-pmu.c late during the sleep process. + * The PMU was already send the sleep command and will shut us down + * soon. We need to save all that is needed and setup the wakeup + * vector that will be called by the ROM on wakeup + */ +_GLOBAL(low_sleep_handler) +#ifndef CONFIG_6xx + blr +#else + mflr r0 + stw r0,4(r1) + stwu r1,-SL_SIZE(r1) + mfcr r0 + stw r0,SL_CR(r1) + stw r2,SL_R2(r1) + stmw r12,SL_R12(r1) + + /* Save MSR & SDR1 */ + mfmsr r4 + stw r4,SL_MSR(r1) + mfsdr1 r4 + stw r4,SL_SDR1(r1) + + /* Get a stable timebase and save it */ +1: mftbu r4 + stw r4,SL_TB(r1) + mftb r5 + stw r5,SL_TB+4(r1) + mftbu r3 + cmpw r3,r4 + bne 1b + + /* Save SPRGs */ + mfsprg r4,0 + stw r4,SL_SPRG0(r1) + mfsprg r4,1 + stw r4,SL_SPRG0+4(r1) + mfsprg r4,2 + stw r4,SL_SPRG0+8(r1) + mfsprg r4,3 + stw r4,SL_SPRG0+12(r1) + + /* Save BATs */ + mfdbatu r4,0 + stw r4,SL_DBAT0(r1) + mfdbatl r4,0 + stw r4,SL_DBAT0+4(r1) + mfdbatu r4,1 + stw r4,SL_DBAT1(r1) + mfdbatl r4,1 + stw r4,SL_DBAT1+4(r1) + mfdbatu r4,2 + stw r4,SL_DBAT2(r1) + mfdbatl r4,2 + stw r4,SL_DBAT2+4(r1) + mfdbatu r4,3 + stw r4,SL_DBAT3(r1) + mfdbatl r4,3 + stw r4,SL_DBAT3+4(r1) + mfibatu r4,0 + stw r4,SL_IBAT0(r1) + mfibatl r4,0 + stw r4,SL_IBAT0+4(r1) + mfibatu r4,1 + stw r4,SL_IBAT1(r1) + mfibatl r4,1 + stw r4,SL_IBAT1+4(r1) + mfibatu r4,2 + stw r4,SL_IBAT2(r1) + mfibatl r4,2 + stw r4,SL_IBAT2+4(r1) + mfibatu r4,3 + stw r4,SL_IBAT3(r1) + mfibatl r4,3 + stw r4,SL_IBAT3+4(r1) + + /* Backup various CPU config stuffs */ + bl __save_cpu_setup + + /* The ROM can wake us up via 2 different vectors: + * - On wallstreet & lombard, we must write a magic + * value 'Lars' at address 4 and a pointer to a + * memory location containing the PC to resume from + * at address 0. + * - On Core99, we must store the wakeup vector at + * address 0x80 and eventually it's parameters + * at address 0x84. I've have some trouble with those + * parameters however and I no longer use them. + */ + lis r5,grackle_wake_up@ha + addi r5,r5,grackle_wake_up@l + tophys(r5,r5) + stw r5,SL_PC(r1) + lis r4,KERNELBASE@h + tophys(r5,r1) + addi r5,r5,SL_PC + lis r6,MAGIC@ha + addi r6,r6,MAGIC@l + stw r5,0(r4) + stw r6,4(r4) + /* Setup stuffs at 0x80-0x84 for Core99 */ + lis r3,core99_wake_up@ha + addi r3,r3,core99_wake_up@l + tophys(r3,r3) + stw r3,0x80(r4) + stw r5,0x84(r4) + /* Store a pointer to our backup storage into + * a kernel global + */ + lis r3,sleep_storage@ha + addi r3,r3,sleep_storage@l + stw r5,0(r3) + + .globl low_cpu_die +low_cpu_die: + /* Flush & disable all caches */ + bl flush_disable_caches + + /* Turn off data relocation. */ + mfmsr r3 /* Save MSR in r7 */ + rlwinm r3,r3,0,28,26 /* Turn off DR bit */ + sync + mtmsr r3 + isync + +BEGIN_FTR_SECTION + /* Flush any pending L2 data prefetches to work around HW bug */ + sync + lis r3,0xfff0 + lwz r0,0(r3) /* perform cache-inhibited load to ROM */ + sync /* (caches are disabled at this point) */ +END_FTR_SECTION_IFSET(CPU_FTR_SPEC7450) + +/* + * Set the HID0 and MSR for sleep. + */ + mfspr r2,SPRN_HID0 + rlwinm r2,r2,0,10,7 /* clear doze, nap */ + oris r2,r2,HID0_SLEEP@h + sync + isync + mtspr SPRN_HID0,r2 + sync + +/* This loop puts us back to sleep in case we have a spurrious + * wakeup so that the host bridge properly stays asleep. The + * CPU will be turned off, either after a known time (about 1 + * second) on wallstreet & lombard, or as soon as the CPU enters + * SLEEP mode on core99 + */ + mfmsr r2 + oris r2,r2,MSR_POW@h +1: sync + mtmsr r2 + isync + b 1b + +/* + * Here is the resume code. + */ + + +/* + * Core99 machines resume here + * r4 has the physical address of SL_PC(sp) (unused) + */ +_GLOBAL(core99_wake_up) + /* Make sure HID0 no longer contains any sleep bit and that data cache + * is disabled + */ + mfspr r3,SPRN_HID0 + rlwinm r3,r3,0,11,7 /* clear SLEEP, NAP, DOZE bits */ + rlwinm 3,r3,0,18,15 /* clear DCE, ICE */ + mtspr SPRN_HID0,r3 + sync + isync + + /* sanitize MSR */ + mfmsr r3 + ori r3,r3,MSR_EE|MSR_IP + xori r3,r3,MSR_EE|MSR_IP + sync + isync + mtmsr r3 + sync + isync + + /* Recover sleep storage */ + lis r3,sleep_storage@ha + addi r3,r3,sleep_storage@l + tophys(r3,r3) + lwz r1,0(r3) + + /* Pass thru to older resume code ... */ +/* + * Here is the resume code for older machines. + * r1 has the physical address of SL_PC(sp). + */ + +grackle_wake_up: + + /* Restore the kernel's segment registers before + * we do any r1 memory access as we are not sure they + * are in a sane state above the first 256Mb region + */ + li r0,16 /* load up segment register values */ + mtctr r0 /* for context 0 */ + lis r3,0x2000 /* Ku = 1, VSID = 0 */ + li r4,0 +3: mtsrin r3,r4 + addi r3,r3,0x111 /* increment VSID */ + addis r4,r4,0x1000 /* address of next segment */ + bdnz 3b + sync + isync + + subi r1,r1,SL_PC + + /* Restore various CPU config stuffs */ + bl __restore_cpu_setup + + /* Make sure all FPRs have been initialized */ + bl reloc_offset + bl __init_fpu_registers + + /* Invalidate & enable L1 cache, we don't care about + * whatever the ROM may have tried to write to memory + */ + bl __inval_enable_L1 + + /* Restore the BATs, and SDR1. Then we can turn on the MMU. */ + lwz r4,SL_SDR1(r1) + mtsdr1 r4 + lwz r4,SL_SPRG0(r1) + mtsprg 0,r4 + lwz r4,SL_SPRG0+4(r1) + mtsprg 1,r4 + lwz r4,SL_SPRG0+8(r1) + mtsprg 2,r4 + lwz r4,SL_SPRG0+12(r1) + mtsprg 3,r4 + + lwz r4,SL_DBAT0(r1) + mtdbatu 0,r4 + lwz r4,SL_DBAT0+4(r1) + mtdbatl 0,r4 + lwz r4,SL_DBAT1(r1) + mtdbatu 1,r4 + lwz r4,SL_DBAT1+4(r1) + mtdbatl 1,r4 + lwz r4,SL_DBAT2(r1) + mtdbatu 2,r4 + lwz r4,SL_DBAT2+4(r1) + mtdbatl 2,r4 + lwz r4,SL_DBAT3(r1) + mtdbatu 3,r4 + lwz r4,SL_DBAT3+4(r1) + mtdbatl 3,r4 + lwz r4,SL_IBAT0(r1) + mtibatu 0,r4 + lwz r4,SL_IBAT0+4(r1) + mtibatl 0,r4 + lwz r4,SL_IBAT1(r1) + mtibatu 1,r4 + lwz r4,SL_IBAT1+4(r1) + mtibatl 1,r4 + lwz r4,SL_IBAT2(r1) + mtibatu 2,r4 + lwz r4,SL_IBAT2+4(r1) + mtibatl 2,r4 + lwz r4,SL_IBAT3(r1) + mtibatu 3,r4 + lwz r4,SL_IBAT3+4(r1) + mtibatl 3,r4 + +BEGIN_FTR_SECTION + li r4,0 + mtspr SPRN_DBAT4U,r4 + mtspr SPRN_DBAT4L,r4 + mtspr SPRN_DBAT5U,r4 + mtspr SPRN_DBAT5L,r4 + mtspr SPRN_DBAT6U,r4 + mtspr SPRN_DBAT6L,r4 + mtspr SPRN_DBAT7U,r4 + mtspr SPRN_DBAT7L,r4 + mtspr SPRN_IBAT4U,r4 + mtspr SPRN_IBAT4L,r4 + mtspr SPRN_IBAT5U,r4 + mtspr SPRN_IBAT5L,r4 + mtspr SPRN_IBAT6U,r4 + mtspr SPRN_IBAT6L,r4 + mtspr SPRN_IBAT7U,r4 + mtspr SPRN_IBAT7L,r4 +END_FTR_SECTION_IFSET(CPU_FTR_HAS_HIGH_BATS) + + /* Flush all TLBs */ + lis r4,0x1000 +1: addic. r4,r4,-0x1000 + tlbie r4 + blt 1b + sync + + /* restore the MSR and turn on the MMU */ + lwz r3,SL_MSR(r1) + bl turn_on_mmu + + /* get back the stack pointer */ + tovirt(r1,r1) + + /* Restore TB */ + li r3,0 + mttbl r3 + lwz r3,SL_TB(r1) + lwz r4,SL_TB+4(r1) + mttbu r3 + mttbl r4 + + /* Restore the callee-saved registers and return */ + lwz r0,SL_CR(r1) + mtcr r0 + lwz r2,SL_R2(r1) + lmw r12,SL_R12(r1) + addi r1,r1,SL_SIZE + lwz r0,4(r1) + mtlr r0 + blr + +turn_on_mmu: + mflr r4 + tovirt(r4,r4) + mtsrr0 r4 + mtsrr1 r3 + sync + isync + rfi + +#endif /* defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ) */ + + .section .data + .balign L1_CACHE_LINE_SIZE +sleep_storage: + .long 0 + .balign L1_CACHE_LINE_SIZE, 0 + +#endif /* CONFIG_6xx */ + .section .text diff --git a/arch/powerpc/platforms/powermac/pmac_smp.c b/arch/powerpc/platforms/powermac/pmac_smp.c new file mode 100644 index 00000000000..995e9095d86 --- /dev/null +++ b/arch/powerpc/platforms/powermac/pmac_smp.c @@ -0,0 +1,716 @@ +/* + * SMP support for power macintosh. + * + * We support both the old "powersurge" SMP architecture + * and the current Core99 (G4 PowerMac) machines. + * + * Note that we don't support the very first rev. of + * Apple/DayStar 2 CPUs board, the one with the funky + * watchdog. Hopefully, none of these should be there except + * maybe internally to Apple. I should probably still add some + * code to detect this card though and disable SMP. --BenH. + * + * Support Macintosh G4 SMP by Troy Benjegerdes (hozer@drgw.net) + * and Ben Herrenschmidt <benh@kernel.crashing.org>. + * + * Support for DayStar quad CPU cards + * Copyright (C) XLR8, Inc. 1994-2000 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include <linux/config.h> +#include <linux/kernel.h> +#include <linux/sched.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/interrupt.h> +#include <linux/kernel_stat.h> +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/errno.h> +#include <linux/hardirq.h> +#include <linux/cpu.h> + +#include <asm/ptrace.h> +#include <asm/atomic.h> +#include <asm/irq.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include <asm/sections.h> +#include <asm/io.h> +#include <asm/prom.h> +#include <asm/smp.h> +#include <asm/residual.h> +#include <asm/machdep.h> +#include <asm/pmac_feature.h> +#include <asm/time.h> +#include <asm/open_pic.h> +#include <asm/cacheflush.h> +#include <asm/keylargo.h> + +/* + * Powersurge (old powermac SMP) support. + */ + +extern void __secondary_start_pmac_0(void); + +/* Addresses for powersurge registers */ +#define HAMMERHEAD_BASE 0xf8000000 +#define HHEAD_CONFIG 0x90 +#define HHEAD_SEC_INTR 0xc0 + +/* register for interrupting the primary processor on the powersurge */ +/* N.B. this is actually the ethernet ROM! */ +#define PSURGE_PRI_INTR 0xf3019000 + +/* register for storing the start address for the secondary processor */ +/* N.B. this is the PCI config space address register for the 1st bridge */ +#define PSURGE_START 0xf2800000 + +/* Daystar/XLR8 4-CPU card */ +#define PSURGE_QUAD_REG_ADDR 0xf8800000 + +#define PSURGE_QUAD_IRQ_SET 0 +#define PSURGE_QUAD_IRQ_CLR 1 +#define PSURGE_QUAD_IRQ_PRIMARY 2 +#define PSURGE_QUAD_CKSTOP_CTL 3 +#define PSURGE_QUAD_PRIMARY_ARB 4 +#define PSURGE_QUAD_BOARD_ID 6 +#define PSURGE_QUAD_WHICH_CPU 7 +#define PSURGE_QUAD_CKSTOP_RDBK 8 +#define PSURGE_QUAD_RESET_CTL 11 + +#define PSURGE_QUAD_OUT(r, v) (out_8(quad_base + ((r) << 4) + 4, (v))) +#define PSURGE_QUAD_IN(r) (in_8(quad_base + ((r) << 4) + 4) & 0x0f) +#define PSURGE_QUAD_BIS(r, v) (PSURGE_QUAD_OUT((r), PSURGE_QUAD_IN(r) | (v))) +#define PSURGE_QUAD_BIC(r, v) (PSURGE_QUAD_OUT((r), PSURGE_QUAD_IN(r) & ~(v))) + +/* virtual addresses for the above */ +static volatile u8 __iomem *hhead_base; +static volatile u8 __iomem *quad_base; +static volatile u32 __iomem *psurge_pri_intr; +static volatile u8 __iomem *psurge_sec_intr; +static volatile u32 __iomem *psurge_start; + +/* values for psurge_type */ +#define PSURGE_NONE -1 +#define PSURGE_DUAL 0 +#define PSURGE_QUAD_OKEE 1 +#define PSURGE_QUAD_COTTON 2 +#define PSURGE_QUAD_ICEGRASS 3 + +/* what sort of powersurge board we have */ +static int psurge_type = PSURGE_NONE; + +/* L2 and L3 cache settings to pass from CPU0 to CPU1 */ +volatile static long int core99_l2_cache; +volatile static long int core99_l3_cache; + +/* Timebase freeze GPIO */ +static unsigned int core99_tb_gpio; + +/* Sync flag for HW tb sync */ +static volatile int sec_tb_reset = 0; +static unsigned int pri_tb_hi, pri_tb_lo; +static unsigned int pri_tb_stamp; + +static void __devinit core99_init_caches(int cpu) +{ + if (!cpu_has_feature(CPU_FTR_L2CR)) + return; + + if (cpu == 0) { + core99_l2_cache = _get_L2CR(); + printk("CPU0: L2CR is %lx\n", core99_l2_cache); + } else { + printk("CPU%d: L2CR was %lx\n", cpu, _get_L2CR()); + _set_L2CR(0); + _set_L2CR(core99_l2_cache); + printk("CPU%d: L2CR set to %lx\n", cpu, core99_l2_cache); + } + + if (!cpu_has_feature(CPU_FTR_L3CR)) + return; + + if (cpu == 0){ + core99_l3_cache = _get_L3CR(); + printk("CPU0: L3CR is %lx\n", core99_l3_cache); + } else { + printk("CPU%d: L3CR was %lx\n", cpu, _get_L3CR()); + _set_L3CR(0); + _set_L3CR(core99_l3_cache); + printk("CPU%d: L3CR set to %lx\n", cpu, core99_l3_cache); + } +} + +/* + * Set and clear IPIs for powersurge. + */ +static inline void psurge_set_ipi(int cpu) +{ + if (psurge_type == PSURGE_NONE) + return; + if (cpu == 0) + in_be32(psurge_pri_intr); + else if (psurge_type == PSURGE_DUAL) + out_8(psurge_sec_intr, 0); + else + PSURGE_QUAD_OUT(PSURGE_QUAD_IRQ_SET, 1 << cpu); +} + +static inline void psurge_clr_ipi(int cpu) +{ + if (cpu > 0) { + switch(psurge_type) { + case PSURGE_DUAL: + out_8(psurge_sec_intr, ~0); + case PSURGE_NONE: + break; + default: + PSURGE_QUAD_OUT(PSURGE_QUAD_IRQ_CLR, 1 << cpu); + } + } +} + +/* + * On powersurge (old SMP powermac architecture) we don't have + * separate IPIs for separate messages like openpic does. Instead + * we have a bitmap for each processor, where a 1 bit means that + * the corresponding message is pending for that processor. + * Ideally each cpu's entry would be in a different cache line. + * -- paulus. + */ +static unsigned long psurge_smp_message[NR_CPUS]; + +void psurge_smp_message_recv(struct pt_regs *regs) +{ + int cpu = smp_processor_id(); + int msg; + + /* clear interrupt */ + psurge_clr_ipi(cpu); + + if (num_online_cpus() < 2) + return; + + /* make sure there is a message there */ + for (msg = 0; msg < 4; msg++) + if (test_and_clear_bit(msg, &psurge_smp_message[cpu])) + smp_message_recv(msg, regs); +} + +irqreturn_t psurge_primary_intr(int irq, void *d, struct pt_regs *regs) +{ + psurge_smp_message_recv(regs); + return IRQ_HANDLED; +} + +static void smp_psurge_message_pass(int target, int msg, unsigned long data, + int wait) +{ + int i; + + if (num_online_cpus() < 2) + return; + + for (i = 0; i < NR_CPUS; i++) { + if (!cpu_online(i)) + continue; + if (target == MSG_ALL + || (target == MSG_ALL_BUT_SELF && i != smp_processor_id()) + || target == i) { + set_bit(msg, &psurge_smp_message[i]); + psurge_set_ipi(i); + } + } +} + +/* + * Determine a quad card presence. We read the board ID register, we + * force the data bus to change to something else, and we read it again. + * It it's stable, then the register probably exist (ugh !) + */ +static int __init psurge_quad_probe(void) +{ + int type; + unsigned int i; + + type = PSURGE_QUAD_IN(PSURGE_QUAD_BOARD_ID); + if (type < PSURGE_QUAD_OKEE || type > PSURGE_QUAD_ICEGRASS + || type != PSURGE_QUAD_IN(PSURGE_QUAD_BOARD_ID)) + return PSURGE_DUAL; + + /* looks OK, try a slightly more rigorous test */ + /* bogus is not necessarily cacheline-aligned, + though I don't suppose that really matters. -- paulus */ + for (i = 0; i < 100; i++) { + volatile u32 bogus[8]; + bogus[(0+i)%8] = 0x00000000; + bogus[(1+i)%8] = 0x55555555; + bogus[(2+i)%8] = 0xFFFFFFFF; + bogus[(3+i)%8] = 0xAAAAAAAA; + bogus[(4+i)%8] = 0x33333333; + bogus[(5+i)%8] = 0xCCCCCCCC; + bogus[(6+i)%8] = 0xCCCCCCCC; + bogus[(7+i)%8] = 0x33333333; + wmb(); + asm volatile("dcbf 0,%0" : : "r" (bogus) : "memory"); + mb(); + if (type != PSURGE_QUAD_IN(PSURGE_QUAD_BOARD_ID)) + return PSURGE_DUAL; + } + return type; +} + +static void __init psurge_quad_init(void) +{ + int procbits; + + if (ppc_md.progress) ppc_md.progress("psurge_quad_init", 0x351); + procbits = ~PSURGE_QUAD_IN(PSURGE_QUAD_WHICH_CPU); + if (psurge_type == PSURGE_QUAD_ICEGRASS) + PSURGE_QUAD_BIS(PSURGE_QUAD_RESET_CTL, procbits); + else + PSURGE_QUAD_BIC(PSURGE_QUAD_CKSTOP_CTL, procbits); + mdelay(33); + out_8(psurge_sec_intr, ~0); + PSURGE_QUAD_OUT(PSURGE_QUAD_IRQ_CLR, procbits); + PSURGE_QUAD_BIS(PSURGE_QUAD_RESET_CTL, procbits); + if (psurge_type != PSURGE_QUAD_ICEGRASS) + PSURGE_QUAD_BIS(PSURGE_QUAD_CKSTOP_CTL, procbits); + PSURGE_QUAD_BIC(PSURGE_QUAD_PRIMARY_ARB, procbits); + mdelay(33); + PSURGE_QUAD_BIC(PSURGE_QUAD_RESET_CTL, procbits); + mdelay(33); + PSURGE_QUAD_BIS(PSURGE_QUAD_PRIMARY_ARB, procbits); + mdelay(33); +} + +static int __init smp_psurge_probe(void) +{ + int i, ncpus; + + /* We don't do SMP on the PPC601 -- paulus */ + if (PVR_VER(mfspr(SPRN_PVR)) == 1) + return 1; + + /* + * The powersurge cpu board can be used in the generation + * of powermacs that have a socket for an upgradeable cpu card, + * including the 7500, 8500, 9500, 9600. + * The device tree doesn't tell you if you have 2 cpus because + * OF doesn't know anything about the 2nd processor. + * Instead we look for magic bits in magic registers, + * in the hammerhead memory controller in the case of the + * dual-cpu powersurge board. -- paulus. + */ + if (find_devices("hammerhead") == NULL) + return 1; + + hhead_base = ioremap(HAMMERHEAD_BASE, 0x800); + quad_base = ioremap(PSURGE_QUAD_REG_ADDR, 1024); + psurge_sec_intr = hhead_base + HHEAD_SEC_INTR; + + psurge_type = psurge_quad_probe(); + if (psurge_type != PSURGE_DUAL) { + psurge_quad_init(); + /* All released cards using this HW design have 4 CPUs */ + ncpus = 4; + } else { + iounmap(quad_base); + if ((in_8(hhead_base + HHEAD_CONFIG) & 0x02) == 0) { + /* not a dual-cpu card */ + iounmap(hhead_base); + psurge_type = PSURGE_NONE; + return 1; + } + ncpus = 2; + } + + psurge_start = ioremap(PSURGE_START, 4); + psurge_pri_intr = ioremap(PSURGE_PRI_INTR, 4); + + /* this is not actually strictly necessary -- paulus. */ + for (i = 1; i < ncpus; ++i) + smp_hw_index[i] = i; + + if (ppc_md.progress) ppc_md.progress("smp_psurge_probe - done", 0x352); + + return ncpus; +} + +static void __init smp_psurge_kick_cpu(int nr) +{ + unsigned long start = __pa(__secondary_start_pmac_0) + nr * 8; + unsigned long a; + + /* may need to flush here if secondary bats aren't setup */ + for (a = KERNELBASE; a < KERNELBASE + 0x800000; a += 32) + asm volatile("dcbf 0,%0" : : "r" (a) : "memory"); + asm volatile("sync"); + + if (ppc_md.progress) ppc_md.progress("smp_psurge_kick_cpu", 0x353); + + out_be32(psurge_start, start); + mb(); + + psurge_set_ipi(nr); + udelay(10); + psurge_clr_ipi(nr); + + if (ppc_md.progress) ppc_md.progress("smp_psurge_kick_cpu - done", 0x354); +} + +/* + * With the dual-cpu powersurge board, the decrementers and timebases + * of both cpus are frozen after the secondary cpu is started up, + * until we give the secondary cpu another interrupt. This routine + * uses this to get the timebases synchronized. + * -- paulus. + */ +static void __init psurge_dual_sync_tb(int cpu_nr) +{ + int t; + + set_dec(tb_ticks_per_jiffy); + set_tb(0, 0); + last_jiffy_stamp(cpu_nr) = 0; + + if (cpu_nr > 0) { + mb(); + sec_tb_reset = 1; + return; + } + + /* wait for the secondary to have reset its TB before proceeding */ + for (t = 10000000; t > 0 && !sec_tb_reset; --t) + ; + + /* now interrupt the secondary, starting both TBs */ + psurge_set_ipi(1); + + smp_tb_synchronized = 1; +} + +static struct irqaction psurge_irqaction = { + .handler = psurge_primary_intr, + .flags = SA_INTERRUPT, + .mask = CPU_MASK_NONE, + .name = "primary IPI", +}; + +static void __init smp_psurge_setup_cpu(int cpu_nr) +{ + + if (cpu_nr == 0) { + /* If we failed to start the second CPU, we should still + * send it an IPI to start the timebase & DEC or we might + * have them stuck. + */ + if (num_online_cpus() < 2) { + if (psurge_type == PSURGE_DUAL) + psurge_set_ipi(1); + return; + } + /* reset the entry point so if we get another intr we won't + * try to startup again */ + out_be32(psurge_start, 0x100); + if (setup_irq(30, &psurge_irqaction)) + printk(KERN_ERR "Couldn't get primary IPI interrupt"); + } + + if (psurge_type == PSURGE_DUAL) + psurge_dual_sync_tb(cpu_nr); +} + +void __init smp_psurge_take_timebase(void) +{ + /* Dummy implementation */ +} + +void __init smp_psurge_give_timebase(void) +{ + /* Dummy implementation */ +} + +static int __init smp_core99_probe(void) +{ +#ifdef CONFIG_6xx + extern int powersave_nap; +#endif + struct device_node *cpus, *firstcpu; + int i, ncpus = 0, boot_cpu = -1; + u32 *tbprop = NULL; + + if (ppc_md.progress) ppc_md.progress("smp_core99_probe", 0x345); + cpus = firstcpu = find_type_devices("cpu"); + while(cpus != NULL) { + u32 *regprop = (u32 *)get_property(cpus, "reg", NULL); + char *stateprop = (char *)get_property(cpus, "state", NULL); + if (regprop != NULL && stateprop != NULL && + !strncmp(stateprop, "running", 7)) + boot_cpu = *regprop; + ++ncpus; + cpus = cpus->next; + } + if (boot_cpu == -1) + printk(KERN_WARNING "Couldn't detect boot CPU !\n"); + if (boot_cpu != 0) + printk(KERN_WARNING "Boot CPU is %d, unsupported setup !\n", boot_cpu); + + if (machine_is_compatible("MacRISC4")) { + extern struct smp_ops_t core99_smp_ops; + + core99_smp_ops.take_timebase = smp_generic_take_timebase; + core99_smp_ops.give_timebase = smp_generic_give_timebase; + } else { + if (firstcpu != NULL) + tbprop = (u32 *)get_property(firstcpu, "timebase-enable", NULL); + if (tbprop) + core99_tb_gpio = *tbprop; + else + core99_tb_gpio = KL_GPIO_TB_ENABLE; + } + + if (ncpus > 1) { + mpic_request_ipis(); + for (i = 1; i < ncpus; ++i) + smp_hw_index[i] = i; +#ifdef CONFIG_6xx + powersave_nap = 0; +#endif + core99_init_caches(0); + } + + return ncpus; +} + +static void __devinit smp_core99_kick_cpu(int nr) +{ + unsigned long save_vector, new_vector; + unsigned long flags; + + volatile unsigned long *vector + = ((volatile unsigned long *)(KERNELBASE+0x100)); + if (nr < 0 || nr > 3) + return; + if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu", 0x346); + + local_irq_save(flags); + local_irq_disable(); + + /* Save reset vector */ + save_vector = *vector; + + /* Setup fake reset vector that does + * b __secondary_start_pmac_0 + nr*8 - KERNELBASE + */ + new_vector = (unsigned long) __secondary_start_pmac_0 + nr * 8; + *vector = 0x48000002 + new_vector - KERNELBASE; + + /* flush data cache and inval instruction cache */ + flush_icache_range((unsigned long) vector, (unsigned long) vector + 4); + + /* Put some life in our friend */ + pmac_call_feature(PMAC_FTR_RESET_CPU, NULL, nr, 0); + + /* FIXME: We wait a bit for the CPU to take the exception, I should + * instead wait for the entry code to set something for me. Well, + * ideally, all that crap will be done in prom.c and the CPU left + * in a RAM-based wait loop like CHRP. + */ + mdelay(1); + + /* Restore our exception vector */ + *vector = save_vector; + flush_icache_range((unsigned long) vector, (unsigned long) vector + 4); + + local_irq_restore(flags); + if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu done", 0x347); +} + +static void __devinit smp_core99_setup_cpu(int cpu_nr) +{ + /* Setup L2/L3 */ + if (cpu_nr != 0) + core99_init_caches(cpu_nr); + + /* Setup openpic */ + mpic_setup_this_cpu(); + + if (cpu_nr == 0) { +#ifdef CONFIG_POWER4 + extern void g5_phy_disable_cpu1(void); + + /* If we didn't start the second CPU, we must take + * it off the bus + */ + if (machine_is_compatible("MacRISC4") && + num_online_cpus() < 2) + g5_phy_disable_cpu1(); +#endif /* CONFIG_POWER4 */ + if (ppc_md.progress) ppc_md.progress("core99_setup_cpu 0 done", 0x349); + } +} + +/* not __init, called in sleep/wakeup code */ +void smp_core99_take_timebase(void) +{ + unsigned long flags; + + /* tell the primary we're here */ + sec_tb_reset = 1; + mb(); + + /* wait for the primary to set pri_tb_hi/lo */ + while (sec_tb_reset < 2) + mb(); + + /* set our stuff the same as the primary */ + local_irq_save(flags); + set_dec(1); + set_tb(pri_tb_hi, pri_tb_lo); + last_jiffy_stamp(smp_processor_id()) = pri_tb_stamp; + mb(); + + /* tell the primary we're done */ + sec_tb_reset = 0; + mb(); + local_irq_restore(flags); +} + +/* not __init, called in sleep/wakeup code */ +void smp_core99_give_timebase(void) +{ + unsigned long flags; + unsigned int t; + + /* wait for the secondary to be in take_timebase */ + for (t = 100000; t > 0 && !sec_tb_reset; --t) + udelay(10); + if (!sec_tb_reset) { + printk(KERN_WARNING "Timeout waiting sync on second CPU\n"); + return; + } + + /* freeze the timebase and read it */ + /* disable interrupts so the timebase is disabled for the + shortest possible time */ + local_irq_save(flags); + pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, core99_tb_gpio, 4); + pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, core99_tb_gpio, 0); + mb(); + pri_tb_hi = get_tbu(); + pri_tb_lo = get_tbl(); + pri_tb_stamp = last_jiffy_stamp(smp_processor_id()); + mb(); + + /* tell the secondary we're ready */ + sec_tb_reset = 2; + mb(); + + /* wait for the secondary to have taken it */ + for (t = 100000; t > 0 && sec_tb_reset; --t) + udelay(10); + if (sec_tb_reset) + printk(KERN_WARNING "Timeout waiting sync(2) on second CPU\n"); + else + smp_tb_synchronized = 1; + + /* Now, restart the timebase by leaving the GPIO to an open collector */ + pmac_call_feature(PMAC_FTR_WRITE_GPIO, NULL, core99_tb_gpio, 0); + pmac_call_feature(PMAC_FTR_READ_GPIO, NULL, core99_tb_gpio, 0); + local_irq_restore(flags); +} + +void smp_core99_message_pass(int target, int msg, unsigned long data, int wait) +{ + cpumask_t mask = CPU_MASK_ALL; + /* make sure we're sending something that translates to an IPI */ + if (msg > 0x3) { + printk("SMP %d: smp_message_pass: unknown msg %d\n", + smp_processor_id(), msg); + return; + } + switch (target) { + case MSG_ALL: + mpic_send_ipi(msg, mask); + break; + case MSG_ALL_BUT_SELF: + cpu_clear(smp_processor_id(), mask); + mpic_send_ipi(msg, mask); + break; + default: + mpic_send_ipi(msg, cpumask_of_cpu(target)); + break; + } +} + + +/* PowerSurge-style Macs */ +struct smp_ops_t psurge_smp_ops = { + .message_pass = smp_psurge_message_pass, + .probe = smp_psurge_probe, + .kick_cpu = smp_psurge_kick_cpu, + .setup_cpu = smp_psurge_setup_cpu, + .give_timebase = smp_psurge_give_timebase, + .take_timebase = smp_psurge_take_timebase, +}; + +/* Core99 Macs (dual G4s) */ +struct smp_ops_t core99_smp_ops = { + .message_pass = smp_core99_message_pass, + .probe = smp_core99_probe, + .kick_cpu = smp_core99_kick_cpu, + .setup_cpu = smp_core99_setup_cpu, + .give_timebase = smp_core99_give_timebase, + .take_timebase = smp_core99_take_timebase, +}; + +#ifdef CONFIG_HOTPLUG_CPU + +int __cpu_disable(void) +{ + cpu_clear(smp_processor_id(), cpu_online_map); + + /* XXX reset cpu affinity here */ + openpic_set_priority(0xf); + asm volatile("mtdec %0" : : "r" (0x7fffffff)); + mb(); + udelay(20); + asm volatile("mtdec %0" : : "r" (0x7fffffff)); + return 0; +} + +extern void low_cpu_die(void) __attribute__((noreturn)); /* in pmac_sleep.S */ +static int cpu_dead[NR_CPUS]; + +void cpu_die(void) +{ + local_irq_disable(); + cpu_dead[smp_processor_id()] = 1; + mb(); + low_cpu_die(); +} + +void __cpu_die(unsigned int cpu) +{ + int timeout; + + timeout = 1000; + while (!cpu_dead[cpu]) { + if (--timeout == 0) { + printk("CPU %u refused to die!\n", cpu); + break; + } + msleep(1); + } + cpu_callin_map[cpu] = 0; + cpu_dead[cpu] = 0; +} + +#endif diff --git a/arch/powerpc/platforms/powermac/pmac_time.c b/arch/powerpc/platforms/powermac/pmac_time.c new file mode 100644 index 00000000000..ff6adff36cb --- /dev/null +++ b/arch/powerpc/platforms/powermac/pmac_time.c @@ -0,0 +1,291 @@ +/* + * Support for periodic interrupts (100 per second) and for getting + * the current time from the RTC on Power Macintoshes. + * + * We use the decrementer register for our periodic interrupts. + * + * Paul Mackerras August 1996. + * Copyright (C) 1996 Paul Mackerras. + */ +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/param.h> +#include <linux/string.h> +#include <linux/mm.h> +#include <linux/init.h> +#include <linux/time.h> +#include <linux/adb.h> +#include <linux/cuda.h> +#include <linux/pmu.h> +#include <linux/hardirq.h> + +#include <asm/sections.h> +#include <asm/prom.h> +#include <asm/system.h> +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/machdep.h> +#include <asm/time.h> +#include <asm/nvram.h> + +/* Apparently the RTC stores seconds since 1 Jan 1904 */ +#define RTC_OFFSET 2082844800 + +/* + * Calibrate the decrementer frequency with the VIA timer 1. + */ +#define VIA_TIMER_FREQ_6 4700000 /* time 1 frequency * 6 */ + +/* VIA registers */ +#define RS 0x200 /* skip between registers */ +#define T1CL (4*RS) /* Timer 1 ctr/latch (low 8 bits) */ +#define T1CH (5*RS) /* Timer 1 counter (high 8 bits) */ +#define T1LL (6*RS) /* Timer 1 latch (low 8 bits) */ +#define T1LH (7*RS) /* Timer 1 latch (high 8 bits) */ +#define ACR (11*RS) /* Auxiliary control register */ +#define IFR (13*RS) /* Interrupt flag register */ + +/* Bits in ACR */ +#define T1MODE 0xc0 /* Timer 1 mode */ +#define T1MODE_CONT 0x40 /* continuous interrupts */ + +/* Bits in IFR and IER */ +#define T1_INT 0x40 /* Timer 1 interrupt */ + +extern struct timezone sys_tz; + +long __init +pmac_time_init(void) +{ +#ifdef CONFIG_NVRAM + s32 delta = 0; + int dst; + + delta = ((s32)pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0x9)) << 16; + delta |= ((s32)pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0xa)) << 8; + delta |= pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0xb); + if (delta & 0x00800000UL) + delta |= 0xFF000000UL; + dst = ((pmac_xpram_read(PMAC_XPRAM_MACHINE_LOC + 0x8) & 0x80) != 0); + printk("GMT Delta read from XPRAM: %d minutes, DST: %s\n", delta/60, + dst ? "on" : "off"); + return delta; +#else + return 0; +#endif +} + +unsigned long +pmac_get_rtc_time(void) +{ +#if defined(CONFIG_ADB_CUDA) || defined(CONFIG_ADB_PMU) + struct adb_request req; + unsigned long now; +#endif + + /* Get the time from the RTC */ + switch (sys_ctrler) { +#ifdef CONFIG_ADB_CUDA + case SYS_CTRLER_CUDA: + if (cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_GET_TIME) < 0) + return 0; + while (!req.complete) + cuda_poll(); + if (req.reply_len != 7) + printk(KERN_ERR "pmac_get_rtc_time: got %d byte reply\n", + req.reply_len); + now = (req.reply[3] << 24) + (req.reply[4] << 16) + + (req.reply[5] << 8) + req.reply[6]; + return now - RTC_OFFSET; +#endif /* CONFIG_ADB_CUDA */ +#ifdef CONFIG_ADB_PMU + case SYS_CTRLER_PMU: + if (pmu_request(&req, NULL, 1, PMU_READ_RTC) < 0) + return 0; + while (!req.complete) + pmu_poll(); + if (req.reply_len != 4) + printk(KERN_ERR "pmac_get_rtc_time: got %d byte reply\n", + req.reply_len); + now = (req.reply[0] << 24) + (req.reply[1] << 16) + + (req.reply[2] << 8) + req.reply[3]; + return now - RTC_OFFSET; +#endif /* CONFIG_ADB_PMU */ + default: ; + } + return 0; +} + +int +pmac_set_rtc_time(unsigned long nowtime) +{ +#if defined(CONFIG_ADB_CUDA) || defined(CONFIG_ADB_PMU) + struct adb_request req; +#endif + + nowtime += RTC_OFFSET; + + switch (sys_ctrler) { +#ifdef CONFIG_ADB_CUDA + case SYS_CTRLER_CUDA: + if (cuda_request(&req, NULL, 6, CUDA_PACKET, CUDA_SET_TIME, + nowtime >> 24, nowtime >> 16, nowtime >> 8, nowtime) < 0) + return 0; + while (!req.complete) + cuda_poll(); + if ((req.reply_len != 3) && (req.reply_len != 7)) + printk(KERN_ERR "pmac_set_rtc_time: got %d byte reply\n", + req.reply_len); + return 1; +#endif /* CONFIG_ADB_CUDA */ +#ifdef CONFIG_ADB_PMU + case SYS_CTRLER_PMU: + if (pmu_request(&req, NULL, 5, PMU_SET_RTC, + nowtime >> 24, nowtime >> 16, nowtime >> 8, nowtime) < 0) + return 0; + while (!req.complete) + pmu_poll(); + if (req.reply_len != 0) + printk(KERN_ERR "pmac_set_rtc_time: got %d byte reply\n", + req.reply_len); + return 1; +#endif /* CONFIG_ADB_PMU */ + default: + return 0; + } +} + +/* + * Calibrate the decrementer register using VIA timer 1. + * This is used both on powermacs and CHRP machines. + */ +int __init +via_calibrate_decr(void) +{ + struct device_node *vias; + volatile unsigned char __iomem *via; + int count = VIA_TIMER_FREQ_6 / 100; + unsigned int dstart, dend; + + vias = find_devices("via-cuda"); + if (vias == 0) + vias = find_devices("via-pmu"); + if (vias == 0) + vias = find_devices("via"); + if (vias == 0 || vias->n_addrs == 0) + return 0; + via = ioremap(vias->addrs[0].address, vias->addrs[0].size); + + /* set timer 1 for continuous interrupts */ + out_8(&via[ACR], (via[ACR] & ~T1MODE) | T1MODE_CONT); + /* set the counter to a small value */ + out_8(&via[T1CH], 2); + /* set the latch to `count' */ + out_8(&via[T1LL], count); + out_8(&via[T1LH], count >> 8); + /* wait until it hits 0 */ + while ((in_8(&via[IFR]) & T1_INT) == 0) + ; + dstart = get_dec(); + /* clear the interrupt & wait until it hits 0 again */ + in_8(&via[T1CL]); + while ((in_8(&via[IFR]) & T1_INT) == 0) + ; + dend = get_dec(); + + tb_ticks_per_jiffy = (dstart - dend) / (6 * (HZ/100)); + tb_to_us = mulhwu_scale_factor(dstart - dend, 60000); + + printk(KERN_INFO "via_calibrate_decr: ticks per jiffy = %u (%u ticks)\n", + tb_ticks_per_jiffy, dstart - dend); + + iounmap(via); + + return 1; +} + +#ifdef CONFIG_PM +/* + * Reset the time after a sleep. + */ +static int +time_sleep_notify(struct pmu_sleep_notifier *self, int when) +{ + static unsigned long time_diff; + unsigned long flags; + unsigned long seq; + + switch (when) { + case PBOOK_SLEEP_NOW: + do { + seq = read_seqbegin_irqsave(&xtime_lock, flags); + time_diff = xtime.tv_sec - pmac_get_rtc_time(); + } while (read_seqretry_irqrestore(&xtime_lock, seq, flags)); + break; + case PBOOK_WAKE: + write_seqlock_irqsave(&xtime_lock, flags); + xtime.tv_sec = pmac_get_rtc_time() + time_diff; + xtime.tv_nsec = 0; + last_rtc_update = xtime.tv_sec; + write_sequnlock_irqrestore(&xtime_lock, flags); + break; + } + return PBOOK_SLEEP_OK; +} + +static struct pmu_sleep_notifier time_sleep_notifier = { + time_sleep_notify, SLEEP_LEVEL_MISC, +}; +#endif /* CONFIG_PM */ + +/* + * Query the OF and get the decr frequency. + * This was taken from the pmac time_init() when merging the prep/pmac + * time functions. + */ +void __init +pmac_calibrate_decr(void) +{ + struct device_node *cpu; + unsigned int freq, *fp; + +#ifdef CONFIG_PM + pmu_register_sleep_notifier(&time_sleep_notifier); +#endif /* CONFIG_PM */ + + /* We assume MacRISC2 machines have correct device-tree + * calibration. That's better since the VIA itself seems + * to be slightly off. --BenH + */ + if (!machine_is_compatible("MacRISC2") && + !machine_is_compatible("MacRISC3") && + !machine_is_compatible("MacRISC4")) + if (via_calibrate_decr()) + return; + + /* Special case: QuickSilver G4s seem to have a badly calibrated + * timebase-frequency in OF, VIA is much better on these. We should + * probably implement calibration based on the KL timer on these + * machines anyway... -BenH + */ + if (machine_is_compatible("PowerMac3,5")) + if (via_calibrate_decr()) + return; + /* + * The cpu node should have a timebase-frequency property + * to tell us the rate at which the decrementer counts. + */ + cpu = find_type_devices("cpu"); + if (cpu == 0) + panic("can't find cpu node in time_init"); + fp = (unsigned int *) get_property(cpu, "timebase-frequency", NULL); + if (fp == 0) + panic("can't get cpu timebase frequency"); + freq = *fp; + printk("time_init: decrementer frequency = %u.%.6u MHz\n", + freq/1000000, freq%1000000); + tb_ticks_per_jiffy = freq / HZ; + tb_to_us = mulhwu_scale_factor(freq, 1000000); +} diff --git a/arch/powerpc/platforms/prep/Kconfig b/arch/powerpc/platforms/prep/Kconfig new file mode 100644 index 00000000000..673ac47a162 --- /dev/null +++ b/arch/powerpc/platforms/prep/Kconfig @@ -0,0 +1,22 @@ + +config PREP_RESIDUAL + bool "Support for PReP Residual Data" + depends on PPC_PREP + help + Some PReP systems have residual data passed to the kernel by the + firmware. This allows detection of memory size, devices present and + other useful pieces of information. Sometimes this information is + not present or incorrect, in which case it could lead to the machine + behaving incorrectly. If this happens, either disable PREP_RESIDUAL + or pass the 'noresidual' option to the kernel. + + If you are running a PReP system, say Y here, otherwise say N. + +config PROC_PREPRESIDUAL + bool "Support for reading of PReP Residual Data in /proc" + depends on PREP_RESIDUAL && PROC_FS + help + Enabling this option will create a /proc/residual file which allows + you to get at the residual data on PReP systems. You will need a tool + (lsresidual) to parse it. If you aren't on a PReP system, you don't + want this. diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig new file mode 100644 index 00000000000..7a3b6fc4d97 --- /dev/null +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -0,0 +1,47 @@ + +config PPC_SPLPAR + depends on PPC_PSERIES + bool "Support for shared-processor logical partitions" + default n + help + Enabling this option will make the kernel run more efficiently + on logically-partitioned pSeries systems which use shared + processors, that is, which share physical processors between + two or more partitions. + +config HMT + bool "Hardware multithreading" + depends on SMP && PPC_PSERIES && BROKEN + help + This option enables hardware multithreading on RS64 cpus. + pSeries systems p620 and p660 have such a cpu type. + +config EEH + bool "PCI Extended Error Handling (EEH)" if EMBEDDED + depends on PPC_PSERIES + default y if !EMBEDDED + +config PPC_RTAS + bool + depends on PPC_PSERIES || PPC_BPA + default y + +config RTAS_PROC + bool "Proc interface to RTAS" + depends on PPC_RTAS + default y + +config RTAS_FLASH + tristate "Firmware flash interface" + depends on PPC64 && RTAS_PROC + +config SCANLOG + tristate "Scanlog dump interface" + depends on RTAS_PROC && PPC_PSERIES + +config LPARCFG + tristate "LPAR Configuration Data" + depends on PPC_PSERIES || PPC_ISERIES + help + Provide system capacity information via human readable + <key word>=<value> pairs through a /proc/ppc64/lparcfg interface. diff --git a/arch/powerpc/sysdev/Makefile b/arch/powerpc/sysdev/Makefile new file mode 100644 index 00000000000..26bdcd9a2a4 --- /dev/null +++ b/arch/powerpc/sysdev/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_MPIC) += mpic.o diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c new file mode 100644 index 00000000000..c660e7d7c64 --- /dev/null +++ b/arch/powerpc/sysdev/mpic.c @@ -0,0 +1,904 @@ +/* + * arch/powerpc/kernel/mpic.c + * + * Driver for interrupt controllers following the OpenPIC standard, the + * common implementation beeing IBM's MPIC. This driver also can deal + * with various broken implementations of this HW. + * + * Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp. + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file COPYING in the main directory of this archive + * for more details. + */ + +#undef DEBUG + +#include <linux/config.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/irq.h> +#include <linux/smp.h> +#include <linux/interrupt.h> +#include <linux/bootmem.h> +#include <linux/spinlock.h> +#include <linux/pci.h> + +#include <asm/ptrace.h> +#include <asm/signal.h> +#include <asm/io.h> +#include <asm/pgtable.h> +#include <asm/irq.h> +#include <asm/machdep.h> +#include <asm/mpic.h> +#include <asm/smp.h> + +#ifdef DEBUG +#define DBG(fmt...) printk(fmt) +#else +#define DBG(fmt...) +#endif + +static struct mpic *mpics; +static struct mpic *mpic_primary; +static DEFINE_SPINLOCK(mpic_lock); + + +/* + * Register accessor functions + */ + + +static inline u32 _mpic_read(unsigned int be, volatile u32 __iomem *base, + unsigned int reg) +{ + if (be) + return in_be32(base + (reg >> 2)); + else + return in_le32(base + (reg >> 2)); +} + +static inline void _mpic_write(unsigned int be, volatile u32 __iomem *base, + unsigned int reg, u32 value) +{ + if (be) + out_be32(base + (reg >> 2), value); + else + out_le32(base + (reg >> 2), value); +} + +static inline u32 _mpic_ipi_read(struct mpic *mpic, unsigned int ipi) +{ + unsigned int be = (mpic->flags & MPIC_BIG_ENDIAN) != 0; + unsigned int offset = MPIC_GREG_IPI_VECTOR_PRI_0 + (ipi * 0x10); + + if (mpic->flags & MPIC_BROKEN_IPI) + be = !be; + return _mpic_read(be, mpic->gregs, offset); +} + +static inline void _mpic_ipi_write(struct mpic *mpic, unsigned int ipi, u32 value) +{ + unsigned int offset = MPIC_GREG_IPI_VECTOR_PRI_0 + (ipi * 0x10); + + _mpic_write(mpic->flags & MPIC_BIG_ENDIAN, mpic->gregs, offset, value); +} + +static inline u32 _mpic_cpu_read(struct mpic *mpic, unsigned int reg) +{ + unsigned int cpu = 0; + + if (mpic->flags & MPIC_PRIMARY) + cpu = hard_smp_processor_id(); + + return _mpic_read(mpic->flags & MPIC_BIG_ENDIAN, mpic->cpuregs[cpu], reg); +} + +static inline void _mpic_cpu_write(struct mpic *mpic, unsigned int reg, u32 value) +{ + unsigned int cpu = 0; + + if (mpic->flags & MPIC_PRIMARY) + cpu = hard_smp_processor_id(); + + _mpic_write(mpic->flags & MPIC_BIG_ENDIAN, mpic->cpuregs[cpu], reg, value); +} + +static inline u32 _mpic_irq_read(struct mpic *mpic, unsigned int src_no, unsigned int reg) +{ + unsigned int isu = src_no >> mpic->isu_shift; + unsigned int idx = src_no & mpic->isu_mask; + + return _mpic_read(mpic->flags & MPIC_BIG_ENDIAN, mpic->isus[isu], + reg + (idx * MPIC_IRQ_STRIDE)); +} + +static inline void _mpic_irq_write(struct mpic *mpic, unsigned int src_no, + unsigned int reg, u32 value) +{ + unsigned int isu = src_no >> mpic->isu_shift; + unsigned int idx = src_no & mpic->isu_mask; + + _mpic_write(mpic->flags & MPIC_BIG_ENDIAN, mpic->isus[isu], + reg + (idx * MPIC_IRQ_STRIDE), value); +} + +#define mpic_read(b,r) _mpic_read(mpic->flags & MPIC_BIG_ENDIAN,(b),(r)) +#define mpic_write(b,r,v) _mpic_write(mpic->flags & MPIC_BIG_ENDIAN,(b),(r),(v)) +#define mpic_ipi_read(i) _mpic_ipi_read(mpic,(i)) +#define mpic_ipi_write(i,v) _mpic_ipi_write(mpic,(i),(v)) +#define mpic_cpu_read(i) _mpic_cpu_read(mpic,(i)) +#define mpic_cpu_write(i,v) _mpic_cpu_write(mpic,(i),(v)) +#define mpic_irq_read(s,r) _mpic_irq_read(mpic,(s),(r)) +#define mpic_irq_write(s,r,v) _mpic_irq_write(mpic,(s),(r),(v)) + + +/* + * Low level utility functions + */ + + + +/* Check if we have one of those nice broken MPICs with a flipped endian on + * reads from IPI registers + */ +static void __init mpic_test_broken_ipi(struct mpic *mpic) +{ + u32 r; + + mpic_write(mpic->gregs, MPIC_GREG_IPI_VECTOR_PRI_0, MPIC_VECPRI_MASK); + r = mpic_read(mpic->gregs, MPIC_GREG_IPI_VECTOR_PRI_0); + + if (r == le32_to_cpu(MPIC_VECPRI_MASK)) { + printk(KERN_INFO "mpic: Detected reversed IPI registers\n"); + mpic->flags |= MPIC_BROKEN_IPI; + } +} + +#ifdef CONFIG_MPIC_BROKEN_U3 + +/* Test if an interrupt is sourced from HyperTransport (used on broken U3s) + * to force the edge setting on the MPIC and do the ack workaround. + */ +static inline int mpic_is_ht_interrupt(struct mpic *mpic, unsigned int source_no) +{ + if (source_no >= 128 || !mpic->fixups) + return 0; + return mpic->fixups[source_no].base != NULL; +} + +static inline void mpic_apic_end_irq(struct mpic *mpic, unsigned int source_no) +{ + struct mpic_irq_fixup *fixup = &mpic->fixups[source_no]; + u32 tmp; + + spin_lock(&mpic->fixup_lock); + writeb(0x11 + 2 * fixup->irq, fixup->base); + tmp = readl(fixup->base + 2); + writel(tmp | 0x80000000ul, fixup->base + 2); + /* config writes shouldn't be posted but let's be safe ... */ + (void)readl(fixup->base + 2); + spin_unlock(&mpic->fixup_lock); +} + + +static void __init mpic_amd8111_read_irq(struct mpic *mpic, u8 __iomem *devbase) +{ + int i, irq; + u32 tmp; + + printk(KERN_INFO "mpic: - Workarounds on AMD 8111 @ %p\n", devbase); + + for (i=0; i < 24; i++) { + writeb(0x10 + 2*i, devbase + 0xf2); + tmp = readl(devbase + 0xf4); + if ((tmp & 0x1) || !(tmp & 0x20)) + continue; + irq = (tmp >> 16) & 0xff; + mpic->fixups[irq].irq = i; + mpic->fixups[irq].base = devbase + 0xf2; + } +} + +static void __init mpic_amd8131_read_irq(struct mpic *mpic, u8 __iomem *devbase) +{ + int i, irq; + u32 tmp; + + printk(KERN_INFO "mpic: - Workarounds on AMD 8131 @ %p\n", devbase); + + for (i=0; i < 4; i++) { + writeb(0x10 + 2*i, devbase + 0xba); + tmp = readl(devbase + 0xbc); + if ((tmp & 0x1) || !(tmp & 0x20)) + continue; + irq = (tmp >> 16) & 0xff; + mpic->fixups[irq].irq = i; + mpic->fixups[irq].base = devbase + 0xba; + } +} + +static void __init mpic_scan_ioapics(struct mpic *mpic) +{ + unsigned int devfn; + u8 __iomem *cfgspace; + + printk(KERN_INFO "mpic: Setting up IO-APICs workarounds for U3\n"); + + /* Allocate fixups array */ + mpic->fixups = alloc_bootmem(128 * sizeof(struct mpic_irq_fixup)); + BUG_ON(mpic->fixups == NULL); + memset(mpic->fixups, 0, 128 * sizeof(struct mpic_irq_fixup)); + + /* Init spinlock */ + spin_lock_init(&mpic->fixup_lock); + + /* Map u3 config space. We assume all IO-APICs are on the primary bus + * and slot will never be above "0xf" so we only need to map 32k + */ + cfgspace = (unsigned char __iomem *)ioremap(0xf2000000, 0x8000); + BUG_ON(cfgspace == NULL); + + /* Now we scan all slots. We do a very quick scan, we read the header type, + * vendor ID and device ID only, that's plenty enough + */ + for (devfn = 0; devfn < PCI_DEVFN(0x10,0); devfn ++) { + u8 __iomem *devbase = cfgspace + (devfn << 8); + u8 hdr_type = readb(devbase + PCI_HEADER_TYPE); + u32 l = readl(devbase + PCI_VENDOR_ID); + u16 vendor_id, device_id; + int multifunc = 0; + + DBG("devfn %x, l: %x\n", devfn, l); + + /* If no device, skip */ + if (l == 0xffffffff || l == 0x00000000 || + l == 0x0000ffff || l == 0xffff0000) + goto next; + + /* Check if it's a multifunction device (only really used + * to function 0 though + */ + multifunc = !!(hdr_type & 0x80); + vendor_id = l & 0xffff; + device_id = (l >> 16) & 0xffff; + + /* If a known device, go to fixup setup code */ + if (vendor_id == PCI_VENDOR_ID_AMD && device_id == 0x7460) + mpic_amd8111_read_irq(mpic, devbase); + if (vendor_id == PCI_VENDOR_ID_AMD && device_id == 0x7450) + mpic_amd8131_read_irq(mpic, devbase); + next: + /* next device, if function 0 */ + if ((PCI_FUNC(devfn) == 0) && !multifunc) + devfn += 7; + } +} + +#endif /* CONFIG_MPIC_BROKEN_U3 */ + + +/* Find an mpic associated with a given linux interrupt */ +static struct mpic *mpic_find(unsigned int irq, unsigned int *is_ipi) +{ + struct mpic *mpic = mpics; + + while(mpic) { + /* search IPIs first since they may override the main interrupts */ + if (irq >= mpic->ipi_offset && irq < (mpic->ipi_offset + 4)) { + if (is_ipi) + *is_ipi = 1; + return mpic; + } + if (irq >= mpic->irq_offset && + irq < (mpic->irq_offset + mpic->irq_count)) { + if (is_ipi) + *is_ipi = 0; + return mpic; + } + mpic = mpic -> next; + } + return NULL; +} + +/* Convert a cpu mask from logical to physical cpu numbers. */ +static inline u32 mpic_physmask(u32 cpumask) +{ + int i; + u32 mask = 0; + + for (i = 0; i < NR_CPUS; ++i, cpumask >>= 1) + mask |= (cpumask & 1) << get_hard_smp_processor_id(i); + return mask; +} + +#ifdef CONFIG_SMP +/* Get the mpic structure from the IPI number */ +static inline struct mpic * mpic_from_ipi(unsigned int ipi) +{ + return container_of(irq_desc[ipi].handler, struct mpic, hc_ipi); +} +#endif + +/* Get the mpic structure from the irq number */ +static inline struct mpic * mpic_from_irq(unsigned int irq) +{ + return container_of(irq_desc[irq].handler, struct mpic, hc_irq); +} + +/* Send an EOI */ +static inline void mpic_eoi(struct mpic *mpic) +{ + mpic_cpu_write(MPIC_CPU_EOI, 0); + (void)mpic_cpu_read(MPIC_CPU_WHOAMI); +} + +#ifdef CONFIG_SMP +static irqreturn_t mpic_ipi_action(int irq, void *dev_id, struct pt_regs *regs) +{ + struct mpic *mpic = dev_id; + + smp_message_recv(irq - mpic->ipi_offset, regs); + return IRQ_HANDLED; +} +#endif /* CONFIG_SMP */ + +/* + * Linux descriptor level callbacks + */ + + +static void mpic_enable_irq(unsigned int irq) +{ + unsigned int loops = 100000; + struct mpic *mpic = mpic_from_irq(irq); + unsigned int src = irq - mpic->irq_offset; + + DBG("%s: enable_irq: %d (src %d)\n", mpic->name, irq, src); + + mpic_irq_write(src, MPIC_IRQ_VECTOR_PRI, + mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & ~MPIC_VECPRI_MASK); + + /* make sure mask gets to controller before we return to user */ + do { + if (!loops--) { + printk(KERN_ERR "mpic_enable_irq timeout\n"); + break; + } + } while(mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & MPIC_VECPRI_MASK); +} + +static void mpic_disable_irq(unsigned int irq) +{ + unsigned int loops = 100000; + struct mpic *mpic = mpic_from_irq(irq); + unsigned int src = irq - mpic->irq_offset; + + DBG("%s: disable_irq: %d (src %d)\n", mpic->name, irq, src); + + mpic_irq_write(src, MPIC_IRQ_VECTOR_PRI, + mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) | MPIC_VECPRI_MASK); + + /* make sure mask gets to controller before we return to user */ + do { + if (!loops--) { + printk(KERN_ERR "mpic_enable_irq timeout\n"); + break; + } + } while(!(mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) & MPIC_VECPRI_MASK)); +} + +static void mpic_end_irq(unsigned int irq) +{ + struct mpic *mpic = mpic_from_irq(irq); + + DBG("%s: end_irq: %d\n", mpic->name, irq); + + /* We always EOI on end_irq() even for edge interrupts since that + * should only lower the priority, the MPIC should have properly + * latched another edge interrupt coming in anyway + */ + +#ifdef CONFIG_MPIC_BROKEN_U3 + if (mpic->flags & MPIC_BROKEN_U3) { + unsigned int src = irq - mpic->irq_offset; + if (mpic_is_ht_interrupt(mpic, src)) + mpic_apic_end_irq(mpic, src); + } +#endif /* CONFIG_MPIC_BROKEN_U3 */ + + mpic_eoi(mpic); +} + +#ifdef CONFIG_SMP + +static void mpic_enable_ipi(unsigned int irq) +{ + struct mpic *mpic = mpic_from_ipi(irq); + unsigned int src = irq - mpic->ipi_offset; + + DBG("%s: enable_ipi: %d (ipi %d)\n", mpic->name, irq, src); + mpic_ipi_write(src, mpic_ipi_read(src) & ~MPIC_VECPRI_MASK); +} + +static void mpic_disable_ipi(unsigned int irq) +{ + /* NEVER disable an IPI... that's just plain wrong! */ +} + +static void mpic_end_ipi(unsigned int irq) +{ + struct mpic *mpic = mpic_from_ipi(irq); + + /* + * IPIs are marked IRQ_PER_CPU. This has the side effect of + * preventing the IRQ_PENDING/IRQ_INPROGRESS logic from + * applying to them. We EOI them late to avoid re-entering. + * We mark IPI's with SA_INTERRUPT as they must run with + * irqs disabled. + */ + mpic_eoi(mpic); +} + +#endif /* CONFIG_SMP */ + +static void mpic_set_affinity(unsigned int irq, cpumask_t cpumask) +{ + struct mpic *mpic = mpic_from_irq(irq); + + cpumask_t tmp; + + cpus_and(tmp, cpumask, cpu_online_map); + + mpic_irq_write(irq - mpic->irq_offset, MPIC_IRQ_DESTINATION, + mpic_physmask(cpus_addr(tmp)[0])); +} + + +/* + * Exported functions + */ + + +struct mpic * __init mpic_alloc(unsigned long phys_addr, + unsigned int flags, + unsigned int isu_size, + unsigned int irq_offset, + unsigned int irq_count, + unsigned int ipi_offset, + unsigned char *senses, + unsigned int senses_count, + const char *name) +{ + struct mpic *mpic; + u32 reg; + const char *vers; + int i; + + mpic = alloc_bootmem(sizeof(struct mpic)); + if (mpic == NULL) + return NULL; + + + memset(mpic, 0, sizeof(struct mpic)); + mpic->name = name; + + mpic->hc_irq.typename = name; + mpic->hc_irq.enable = mpic_enable_irq; + mpic->hc_irq.disable = mpic_disable_irq; + mpic->hc_irq.end = mpic_end_irq; + if (flags & MPIC_PRIMARY) + mpic->hc_irq.set_affinity = mpic_set_affinity; +#ifdef CONFIG_SMP + mpic->hc_ipi.typename = name; + mpic->hc_ipi.enable = mpic_enable_ipi; + mpic->hc_ipi.disable = mpic_disable_ipi; + mpic->hc_ipi.end = mpic_end_ipi; +#endif /* CONFIG_SMP */ + + mpic->flags = flags; + mpic->isu_size = isu_size; + mpic->irq_offset = irq_offset; + mpic->irq_count = irq_count; + mpic->ipi_offset = ipi_offset; + mpic->num_sources = 0; /* so far */ + mpic->senses = senses; + mpic->senses_count = senses_count; + + /* Map the global registers */ + mpic->gregs = ioremap(phys_addr + MPIC_GREG_BASE, 0x1000); + mpic->tmregs = mpic->gregs + (MPIC_TIMER_BASE >> 2); + BUG_ON(mpic->gregs == NULL); + + /* Reset */ + if (flags & MPIC_WANTS_RESET) { + mpic_write(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0, + mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0) + | MPIC_GREG_GCONF_RESET); + while( mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0) + & MPIC_GREG_GCONF_RESET) + mb(); + } + + /* Read feature register, calculate num CPUs and, for non-ISU + * MPICs, num sources as well. On ISU MPICs, sources are counted + * as ISUs are added + */ + reg = mpic_read(mpic->gregs, MPIC_GREG_FEATURE_0); + mpic->num_cpus = ((reg & MPIC_GREG_FEATURE_LAST_CPU_MASK) + >> MPIC_GREG_FEATURE_LAST_CPU_SHIFT) + 1; + if (isu_size == 0) + mpic->num_sources = ((reg & MPIC_GREG_FEATURE_LAST_SRC_MASK) + >> MPIC_GREG_FEATURE_LAST_SRC_SHIFT) + 1; + + /* Map the per-CPU registers */ + for (i = 0; i < mpic->num_cpus; i++) { + mpic->cpuregs[i] = ioremap(phys_addr + MPIC_CPU_BASE + + i * MPIC_CPU_STRIDE, 0x1000); + BUG_ON(mpic->cpuregs[i] == NULL); + } + + /* Initialize main ISU if none provided */ + if (mpic->isu_size == 0) { + mpic->isu_size = mpic->num_sources; + mpic->isus[0] = ioremap(phys_addr + MPIC_IRQ_BASE, + MPIC_IRQ_STRIDE * mpic->isu_size); + BUG_ON(mpic->isus[0] == NULL); + } + mpic->isu_shift = 1 + __ilog2(mpic->isu_size - 1); + mpic->isu_mask = (1 << mpic->isu_shift) - 1; + + /* Display version */ + switch (reg & MPIC_GREG_FEATURE_VERSION_MASK) { + case 1: + vers = "1.0"; + break; + case 2: + vers = "1.2"; + break; + case 3: + vers = "1.3"; + break; + default: + vers = "<unknown>"; + break; + } + printk(KERN_INFO "mpic: Setting up MPIC \"%s\" version %s at %lx, max %d CPUs\n", + name, vers, phys_addr, mpic->num_cpus); + printk(KERN_INFO "mpic: ISU size: %d, shift: %d, mask: %x\n", mpic->isu_size, + mpic->isu_shift, mpic->isu_mask); + + mpic->next = mpics; + mpics = mpic; + + if (flags & MPIC_PRIMARY) + mpic_primary = mpic; + + return mpic; +} + +void __init mpic_assign_isu(struct mpic *mpic, unsigned int isu_num, + unsigned long phys_addr) +{ + unsigned int isu_first = isu_num * mpic->isu_size; + + BUG_ON(isu_num >= MPIC_MAX_ISU); + + mpic->isus[isu_num] = ioremap(phys_addr, MPIC_IRQ_STRIDE * mpic->isu_size); + if ((isu_first + mpic->isu_size) > mpic->num_sources) + mpic->num_sources = isu_first + mpic->isu_size; +} + +void __init mpic_setup_cascade(unsigned int irq, mpic_cascade_t handler, + void *data) +{ + struct mpic *mpic = mpic_find(irq, NULL); + unsigned long flags; + + /* Synchronization here is a bit dodgy, so don't try to replace cascade + * interrupts on the fly too often ... but normally it's set up at boot. + */ + spin_lock_irqsave(&mpic_lock, flags); + if (mpic->cascade) + mpic_disable_irq(mpic->cascade_vec + mpic->irq_offset); + mpic->cascade = NULL; + wmb(); + mpic->cascade_vec = irq - mpic->irq_offset; + mpic->cascade_data = data; + wmb(); + mpic->cascade = handler; + mpic_enable_irq(irq); + spin_unlock_irqrestore(&mpic_lock, flags); +} + +void __init mpic_init(struct mpic *mpic) +{ + int i; + + BUG_ON(mpic->num_sources == 0); + + printk(KERN_INFO "mpic: Initializing for %d sources\n", mpic->num_sources); + + /* Set current processor priority to max */ + mpic_cpu_write(MPIC_CPU_CURRENT_TASK_PRI, 0xf); + + /* Initialize timers: just disable them all */ + for (i = 0; i < 4; i++) { + mpic_write(mpic->tmregs, + i * MPIC_TIMER_STRIDE + MPIC_TIMER_DESTINATION, 0); + mpic_write(mpic->tmregs, + i * MPIC_TIMER_STRIDE + MPIC_TIMER_VECTOR_PRI, + MPIC_VECPRI_MASK | + (MPIC_VEC_TIMER_0 + i)); + } + + /* Initialize IPIs to our reserved vectors and mark them disabled for now */ + mpic_test_broken_ipi(mpic); + for (i = 0; i < 4; i++) { + mpic_ipi_write(i, + MPIC_VECPRI_MASK | + (10 << MPIC_VECPRI_PRIORITY_SHIFT) | + (MPIC_VEC_IPI_0 + i)); +#ifdef CONFIG_SMP + if (!(mpic->flags & MPIC_PRIMARY)) + continue; + irq_desc[mpic->ipi_offset+i].status |= IRQ_PER_CPU; + irq_desc[mpic->ipi_offset+i].handler = &mpic->hc_ipi; + +#endif /* CONFIG_SMP */ + } + + /* Initialize interrupt sources */ + if (mpic->irq_count == 0) + mpic->irq_count = mpic->num_sources; + +#ifdef CONFIG_MPIC_BROKEN_U3 + /* Do the ioapic fixups on U3 broken mpic */ + DBG("MPIC flags: %x\n", mpic->flags); + if ((mpic->flags & MPIC_BROKEN_U3) && (mpic->flags & MPIC_PRIMARY)) + mpic_scan_ioapics(mpic); +#endif /* CONFIG_MPIC_BROKEN_U3 */ + + for (i = 0; i < mpic->num_sources; i++) { + /* start with vector = source number, and masked */ + u32 vecpri = MPIC_VECPRI_MASK | i | (8 << MPIC_VECPRI_PRIORITY_SHIFT); + int level = 0; + + /* if it's an IPI, we skip it */ + if ((mpic->irq_offset + i) >= (mpic->ipi_offset + i) && + (mpic->irq_offset + i) < (mpic->ipi_offset + i + 4)) + continue; + + /* do senses munging */ + if (mpic->senses && i < mpic->senses_count) { + if (mpic->senses[i] & IRQ_SENSE_LEVEL) + vecpri |= MPIC_VECPRI_SENSE_LEVEL; + if (mpic->senses[i] & IRQ_POLARITY_POSITIVE) + vecpri |= MPIC_VECPRI_POLARITY_POSITIVE; + } else + vecpri |= MPIC_VECPRI_SENSE_LEVEL; + + /* remember if it was a level interrupts */ + level = (vecpri & MPIC_VECPRI_SENSE_LEVEL); + + /* deal with broken U3 */ + if (mpic->flags & MPIC_BROKEN_U3) { +#ifdef CONFIG_MPIC_BROKEN_U3 + if (mpic_is_ht_interrupt(mpic, i)) { + vecpri &= ~(MPIC_VECPRI_SENSE_MASK | + MPIC_VECPRI_POLARITY_MASK); + vecpri |= MPIC_VECPRI_POLARITY_POSITIVE; + } +#else + printk(KERN_ERR "mpic: BROKEN_U3 set, but CONFIG doesn't match\n"); +#endif + } + + DBG("setup source %d, vecpri: %08x, level: %d\n", i, vecpri, + (level != 0)); + + /* init hw */ + mpic_irq_write(i, MPIC_IRQ_VECTOR_PRI, vecpri); + mpic_irq_write(i, MPIC_IRQ_DESTINATION, + 1 << hard_smp_processor_id()); + + /* init linux descriptors */ + if (i < mpic->irq_count) { + irq_desc[mpic->irq_offset+i].status = level ? IRQ_LEVEL : 0; + irq_desc[mpic->irq_offset+i].handler = &mpic->hc_irq; + } + } + + /* Init spurrious vector */ + mpic_write(mpic->gregs, MPIC_GREG_SPURIOUS, MPIC_VEC_SPURRIOUS); + + /* Disable 8259 passthrough */ + mpic_write(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0, + mpic_read(mpic->gregs, MPIC_GREG_GLOBAL_CONF_0) + | MPIC_GREG_GCONF_8259_PTHROU_DIS); + + /* Set current processor priority to 0 */ + mpic_cpu_write(MPIC_CPU_CURRENT_TASK_PRI, 0); +} + + + +void mpic_irq_set_priority(unsigned int irq, unsigned int pri) +{ + int is_ipi; + struct mpic *mpic = mpic_find(irq, &is_ipi); + unsigned long flags; + u32 reg; + + spin_lock_irqsave(&mpic_lock, flags); + if (is_ipi) { + reg = mpic_ipi_read(irq - mpic->ipi_offset) & MPIC_VECPRI_PRIORITY_MASK; + mpic_ipi_write(irq - mpic->ipi_offset, + reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT)); + } else { + reg = mpic_irq_read(irq - mpic->irq_offset, MPIC_IRQ_VECTOR_PRI) + & MPIC_VECPRI_PRIORITY_MASK; + mpic_irq_write(irq - mpic->irq_offset, MPIC_IRQ_VECTOR_PRI, + reg | (pri << MPIC_VECPRI_PRIORITY_SHIFT)); + } + spin_unlock_irqrestore(&mpic_lock, flags); +} + +unsigned int mpic_irq_get_priority(unsigned int irq) +{ + int is_ipi; + struct mpic *mpic = mpic_find(irq, &is_ipi); + unsigned long flags; + u32 reg; + + spin_lock_irqsave(&mpic_lock, flags); + if (is_ipi) + reg = mpic_ipi_read(irq - mpic->ipi_offset); + else + reg = mpic_irq_read(irq - mpic->irq_offset, MPIC_IRQ_VECTOR_PRI); + spin_unlock_irqrestore(&mpic_lock, flags); + return (reg & MPIC_VECPRI_PRIORITY_MASK) >> MPIC_VECPRI_PRIORITY_SHIFT; +} + +void mpic_setup_this_cpu(void) +{ +#ifdef CONFIG_SMP + struct mpic *mpic = mpic_primary; + unsigned long flags; + u32 msk = 1 << hard_smp_processor_id(); + unsigned int i; + + BUG_ON(mpic == NULL); + + DBG("%s: setup_this_cpu(%d)\n", mpic->name, hard_smp_processor_id()); + + spin_lock_irqsave(&mpic_lock, flags); + + /* let the mpic know we want intrs. default affinity is 0xffffffff + * until changed via /proc. That's how it's done on x86. If we want + * it differently, then we should make sure we also change the default + * values of irq_affinity in irq.c. + */ + if (distribute_irqs) { + for (i = 0; i < mpic->num_sources ; i++) + mpic_irq_write(i, MPIC_IRQ_DESTINATION, + mpic_irq_read(i, MPIC_IRQ_DESTINATION) | msk); + } + + /* Set current processor priority to 0 */ + mpic_cpu_write(MPIC_CPU_CURRENT_TASK_PRI, 0); + + spin_unlock_irqrestore(&mpic_lock, flags); +#endif /* CONFIG_SMP */ +} + +int mpic_cpu_get_priority(void) +{ + struct mpic *mpic = mpic_primary; + + return mpic_cpu_read(MPIC_CPU_CURRENT_TASK_PRI); +} + +void mpic_cpu_set_priority(int prio) +{ + struct mpic *mpic = mpic_primary; + + prio &= MPIC_CPU_TASKPRI_MASK; + mpic_cpu_write(MPIC_CPU_CURRENT_TASK_PRI, prio); +} + +/* + * XXX: someone who knows mpic should check this. + * do we need to eoi the ipi including for kexec cpu here (see xics comments)? + * or can we reset the mpic in the new kernel? + */ +void mpic_teardown_this_cpu(int secondary) +{ + struct mpic *mpic = mpic_primary; + unsigned long flags; + u32 msk = 1 << hard_smp_processor_id(); + unsigned int i; + + BUG_ON(mpic == NULL); + + DBG("%s: teardown_this_cpu(%d)\n", mpic->name, hard_smp_processor_id()); + spin_lock_irqsave(&mpic_lock, flags); + + /* let the mpic know we don't want intrs. */ + for (i = 0; i < mpic->num_sources ; i++) + mpic_irq_write(i, MPIC_IRQ_DESTINATION, + mpic_irq_read(i, MPIC_IRQ_DESTINATION) & ~msk); + + /* Set current processor priority to max */ + mpic_cpu_write(MPIC_CPU_CURRENT_TASK_PRI, 0xf); + + spin_unlock_irqrestore(&mpic_lock, flags); +} + + +void mpic_send_ipi(unsigned int ipi_no, unsigned int cpu_mask) +{ + struct mpic *mpic = mpic_primary; + + BUG_ON(mpic == NULL); + + DBG("%s: send_ipi(ipi_no: %d)\n", mpic->name, ipi_no); + + mpic_cpu_write(MPIC_CPU_IPI_DISPATCH_0 + ipi_no * 0x10, + mpic_physmask(cpu_mask & cpus_addr(cpu_online_map)[0])); +} + +int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs) +{ + u32 irq; + + irq = mpic_cpu_read(MPIC_CPU_INTACK) & MPIC_VECPRI_VECTOR_MASK; + DBG("%s: get_one_irq(): %d\n", mpic->name, irq); + + if (mpic->cascade && irq == mpic->cascade_vec) { + DBG("%s: cascading ...\n", mpic->name); + irq = mpic->cascade(regs, mpic->cascade_data); + mpic_eoi(mpic); + return irq; + } + if (unlikely(irq == MPIC_VEC_SPURRIOUS)) + return -1; + if (irq < MPIC_VEC_IPI_0) + return irq + mpic->irq_offset; + DBG("%s: ipi %d !\n", mpic->name, irq - MPIC_VEC_IPI_0); + return irq - MPIC_VEC_IPI_0 + mpic->ipi_offset; +} + +int mpic_get_irq(struct pt_regs *regs) +{ + struct mpic *mpic = mpic_primary; + + BUG_ON(mpic == NULL); + + return mpic_get_one_irq(mpic, regs); +} + + +#ifdef CONFIG_SMP +void mpic_request_ipis(void) +{ + struct mpic *mpic = mpic_primary; + + BUG_ON(mpic == NULL); + + printk("requesting IPIs ... \n"); + + /* IPIs are marked SA_INTERRUPT as they must run with irqs disabled */ + request_irq(mpic->ipi_offset+0, mpic_ipi_action, SA_INTERRUPT, + "IPI0 (call function)", mpic); + request_irq(mpic->ipi_offset+1, mpic_ipi_action, SA_INTERRUPT, + "IPI1 (reschedule)", mpic); + request_irq(mpic->ipi_offset+2, mpic_ipi_action, SA_INTERRUPT, + "IPI2 (unused)", mpic); + request_irq(mpic->ipi_offset+3, mpic_ipi_action, SA_INTERRUPT, + "IPI3 (debugger break)", mpic); + + printk("IPIs requested... \n"); +} +#endif /* CONFIG_SMP */ diff --git a/arch/ppc/kernel/Makefile b/arch/ppc/kernel/Makefile index abf10dcb787..0649540bc7d 100644 --- a/arch/ppc/kernel/Makefile +++ b/arch/ppc/kernel/Makefile @@ -1,6 +1,7 @@ # # Makefile for the linux kernel. # +ifneq ($(CONFIG_PPC_MERGE),y) extra-$(CONFIG_PPC_STD_MMU) := head.o extra-$(CONFIG_40x) := head_4xx.o @@ -15,9 +16,8 @@ extra-y += vmlinux.lds obj-y := entry.o traps.o irq.o idle.o time.o misc.o \ process.o signal.o ptrace.o align.o \ semaphore.o syscalls.o setup.o \ - cputable.o ppc_htab.o + cputable.o ppc_htab.o perfmon.o obj-$(CONFIG_6xx) += l2cr.o cpu_setup_6xx.o -obj-$(CONFIG_E500) += perfmon.o obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o obj-$(CONFIG_POWER4) += cpu_setup_power4.o obj-$(CONFIG_MODULES) += module.o ppc_ksyms.o @@ -38,3 +38,23 @@ endif # These are here while we do the architecture merge vecemu-y += ../../powerpc/kernel/vecemu.o + +else +obj-y := entry.o irq.o idle.o time.o misc.o \ + signal.o ptrace.o align.o \ + syscalls.o setup.o \ + cputable.o perfmon.o +obj-$(CONFIG_6xx) += l2cr.o cpu_setup_6xx.o +obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o +obj-$(CONFIG_POWER4) += cpu_setup_power4.o +obj-$(CONFIG_MODULES) += module.o ppc_ksyms.o +obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-mapping.o +obj-$(CONFIG_PCI) += pci.o +obj-$(CONFIG_KGDB) += ppc-stub.o +obj-$(CONFIG_SMP) += smp.o smp-tbsync.o +obj-$(CONFIG_TAU) += temp.o +ifndef CONFIG_E200 +obj-$(CONFIG_FSL_BOOKE) += perfmon_fsl_booke.o +endif +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o +endif diff --git a/arch/ppc/kernel/perfmon.c b/arch/ppc/kernel/perfmon.c index 91e2786ea09..f9b27d939f7 100644 --- a/arch/ppc/kernel/perfmon.c +++ b/arch/ppc/kernel/perfmon.c @@ -45,7 +45,7 @@ static void dummy_perf(struct pt_regs *regs) mtpmr(PMRN_PMGC0, pmgc0); } -#else +#elif CONFIG_6xx /* Ensure exceptions are disabled */ static void dummy_perf(struct pt_regs *regs) @@ -55,6 +55,10 @@ static void dummy_perf(struct pt_regs *regs) mmcr0 &= ~MMCR0_PMXE; mtspr(SPRN_MMCR0, mmcr0); } +#else +static void dummy_perf(struct pt_regs *regs) +{ +} #endif void (*perf_irq)(struct pt_regs *) = dummy_perf; diff --git a/arch/ppc/kernel/setup.c b/arch/ppc/kernel/setup.c index 8b06b8e0033..1b891b806f3 100644 --- a/arch/ppc/kernel/setup.c +++ b/arch/ppc/kernel/setup.c @@ -71,7 +71,8 @@ struct ide_machdep_calls ppc_ide_md; unsigned long boot_mem_size; unsigned long ISA_DMA_THRESHOLD; -unsigned long DMA_MODE_READ, DMA_MODE_WRITE; +unsigned int DMA_MODE_READ; +unsigned int DMA_MODE_WRITE; #ifdef CONFIG_PPC_MULTIPLATFORM int _machine = 0; @@ -82,6 +83,8 @@ extern void pmac_init(unsigned long r3, unsigned long r4, unsigned long r5, unsigned long r6, unsigned long r7); extern void chrp_init(unsigned long r3, unsigned long r4, unsigned long r5, unsigned long r6, unsigned long r7); + +dev_t boot_dev; #endif /* CONFIG_PPC_MULTIPLATFORM */ #ifdef CONFIG_MAGIC_SYSRQ @@ -404,11 +407,13 @@ platform_init(unsigned long r3, unsigned long r4, unsigned long r5, _machine = _MACH_prep; } +#ifdef CONFIG_PPC_PREP /* not much more to do here, if prep */ if (_machine == _MACH_prep) { prep_init(r3, r4, r5, r6, r7); return; } +#endif /* prom_init has already been called from __start */ if (boot_infos) @@ -479,12 +484,16 @@ platform_init(unsigned long r3, unsigned long r4, unsigned long r5, #endif /* CONFIG_ADB */ switch (_machine) { +#ifdef CONFIG_PPC_PMAC case _MACH_Pmac: pmac_init(r3, r4, r5, r6, r7); break; +#endif +#ifdef CONFIG_PPC_CHRP case _MACH_chrp: chrp_init(r3, r4, r5, r6, r7); break; +#endif } } diff --git a/arch/ppc/platforms/pmac_setup.c b/arch/ppc/platforms/pmac_setup.c index 12cbc85555d..1ad779ecc8f 100644 --- a/arch/ppc/platforms/pmac_setup.c +++ b/arch/ppc/platforms/pmac_setup.c @@ -719,7 +719,8 @@ pmac_declare_of_platform_devices(void) if (np) { for (np = np->child; np != NULL; np = np->sibling) if (strncmp(np->name, "i2c", 3) == 0) { - of_platform_device_create(np, "uni-n-i2c"); + of_platform_device_create(np, "uni-n-i2c", + NULL); break; } } @@ -727,17 +728,18 @@ pmac_declare_of_platform_devices(void) if (np) { for (np = np->child; np != NULL; np = np->sibling) if (strncmp(np->name, "i2c", 3) == 0) { - of_platform_device_create(np, "u3-i2c"); + of_platform_device_create(np, "u3-i2c", + NULL); break; } } np = find_devices("valkyrie"); if (np) - of_platform_device_create(np, "valkyrie"); + of_platform_device_create(np, "valkyrie", NULL); np = find_devices("platinum"); if (np) - of_platform_device_create(np, "platinum"); + of_platform_device_create(np, "platinum", NULL); return 0; } diff --git a/arch/ppc/platforms/prep_setup.c b/arch/ppc/platforms/prep_setup.c index fccafbcd4b5..8bc734fe668 100644 --- a/arch/ppc/platforms/prep_setup.c +++ b/arch/ppc/platforms/prep_setup.c @@ -89,9 +89,6 @@ extern void prep_tiger1_setup_pci(char *irq_edge_mask_lo, char *irq_edge_mask_hi #define cached_21 (((char *)(ppc_cached_irq_mask))[3]) #define cached_A1 (((char *)(ppc_cached_irq_mask))[2]) -/* for the mac fs */ -dev_t boot_dev; - #ifdef CONFIG_SOUND_CS4232 long ppc_cs4232_dma, ppc_cs4232_dma2; #endif diff --git a/arch/ppc/syslib/Makefile b/arch/ppc/syslib/Makefile index b8d08f33f7e..1b0a84931af 100644 --- a/arch/ppc/syslib/Makefile +++ b/arch/ppc/syslib/Makefile @@ -5,6 +5,7 @@ CFLAGS_prom_init.o += -fPIC CFLAGS_btext.o += -fPIC +ifneq ($(CONFIG_PPC_MERGE),y) wdt-mpc8xx-$(CONFIG_8xx_WDT) += m8xx_wdt.o obj-$(CONFIG_PPCBUG_NVRAM) += prep_nvram.o @@ -109,3 +110,16 @@ obj-$(CONFIG_PPC_MPC52xx) += mpc52xx_setup.o mpc52xx_pic.o \ ifeq ($(CONFIG_PPC_MPC52xx),y) obj-$(CONFIG_PCI) += mpc52xx_pci.o endif + +else +# Stuff still needed by the merged powerpc sources + +obj-$(CONFIG_PPCBUG_NVRAM) += prep_nvram.o +obj-$(CONFIG_PPC_OF) += prom_init.o prom.o of_device.o +obj-$(CONFIG_PPC_PMAC) += indirect_pci.o +obj-$(CONFIG_PPC_CHRP) += indirect_pci.o i8259.o +obj-$(CONFIG_PPC_PREP) += indirect_pci.o i8259.o todc_time.o +obj-$(CONFIG_BOOTX_TEXT) += btext.o +obj-$(CONFIG_MPC10X_BRIDGE) += mpc10x_common.o indirect_pci.o ppc_sys.o + +endif diff --git a/arch/ppc/syslib/of_device.c b/arch/ppc/syslib/of_device.c index da8a0f2128d..93c7231ea70 100644 --- a/arch/ppc/syslib/of_device.c +++ b/arch/ppc/syslib/of_device.c @@ -234,7 +234,9 @@ void of_device_unregister(struct of_device *ofdev) device_unregister(&ofdev->dev); } -struct of_device* of_platform_device_create(struct device_node *np, const char *bus_id) +struct of_device* of_platform_device_create(struct device_node *np, + const char *bus_id, + struct device *parent) { struct of_device *dev; u32 *reg; @@ -247,7 +249,7 @@ struct of_device* of_platform_device_create(struct device_node *np, const char * dev->node = of_node_get(np); dev->dma_mask = 0xffffffffUL; dev->dev.dma_mask = &dev->dma_mask; - dev->dev.parent = NULL; + dev->dev.parent = parent; dev->dev.bus = &of_platform_bus_type; dev->dev.release = of_release_dev; diff --git a/arch/ppc/syslib/ppc85xx_setup.c b/arch/ppc/syslib/ppc85xx_setup.c index b7242f1bd93..832b8bf99ae 100644 --- a/arch/ppc/syslib/ppc85xx_setup.c +++ b/arch/ppc/syslib/ppc85xx_setup.c @@ -184,8 +184,8 @@ mpc85xx_setup_pci1(struct pci_controller *hose) pci->powar1 = 0x80044000 | (__ilog2(MPC85XX_PCI1_UPPER_MEM - MPC85XX_PCI1_LOWER_MEM + 1) - 1); - /* Setup outboud IO windows @ MPC85XX_PCI1_IO_BASE */ - pci->potar2 = 0x00000000; + /* Setup outbound IO windows @ MPC85XX_PCI1_IO_BASE */ + pci->potar2 = (MPC85XX_PCI1_LOWER_IO >> 12) & 0x000fffff; pci->potear2 = 0x00000000; pci->powbar2 = (MPC85XX_PCI1_IO_BASE >> 12) & 0x000fffff; /* Enable, IO R/W */ @@ -235,8 +235,8 @@ mpc85xx_setup_pci2(struct pci_controller *hose) pci->powar1 = 0x80044000 | (__ilog2(MPC85XX_PCI2_UPPER_MEM - MPC85XX_PCI2_LOWER_MEM + 1) - 1); - /* Setup outboud IO windows @ MPC85XX_PCI2_IO_BASE */ - pci->potar2 = 0x00000000; + /* Setup outbound IO windows @ MPC85XX_PCI2_IO_BASE */ + pci->potar2 = (MPC85XX_PCI2_LOWER_IO >> 12) & 0x000fffff;; pci->potear2 = 0x00000000; pci->powbar2 = (MPC85XX_PCI2_IO_BASE >> 12) & 0x000fffff; /* Enable, IO R/W */ diff --git a/arch/ppc64/Makefile b/arch/ppc64/Makefile index a8877881bb4..40675b3f924 100644 --- a/arch/ppc64/Makefile +++ b/arch/ppc64/Makefile @@ -107,7 +107,7 @@ install: vmlinux $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) $@ defaultimage-$(CONFIG_PPC_PSERIES) := zImage -defaultimage-$(CONFIG_PPC_PMAC) := vmlinux +defaultimage-$(CONFIG_PPC_PMAC) := zImage.vmode defaultimage-$(CONFIG_PPC_MAPLE) := zImage defaultimage-$(CONFIG_PPC_ISERIES) := vmlinux KBUILD_IMAGE := $(defaultimage-y) diff --git a/arch/ppc64/kernel/of_device.c b/arch/ppc64/kernel/of_device.c index da580812ddf..9f200f0f2ad 100644 --- a/arch/ppc64/kernel/of_device.c +++ b/arch/ppc64/kernel/of_device.c @@ -233,7 +233,9 @@ void of_device_unregister(struct of_device *ofdev) device_unregister(&ofdev->dev); } -struct of_device* of_platform_device_create(struct device_node *np, const char *bus_id) +struct of_device* of_platform_device_create(struct device_node *np, + const char *bus_id, + struct device *parent) { struct of_device *dev; @@ -245,7 +247,7 @@ struct of_device* of_platform_device_create(struct device_node *np, const char * dev->node = np; dev->dma_mask = 0xffffffffUL; dev->dev.dma_mask = &dev->dma_mask; - dev->dev.parent = NULL; + dev->dev.parent = parent; dev->dev.bus = &of_platform_bus_type; dev->dev.release = of_release_dev; @@ -259,6 +261,7 @@ struct of_device* of_platform_device_create(struct device_node *np, const char * return dev; } + EXPORT_SYMBOL(of_match_device); EXPORT_SYMBOL(of_platform_bus_type); EXPORT_SYMBOL(of_register_driver); diff --git a/arch/ppc64/kernel/pSeries_iommu.c b/arch/ppc64/kernel/pSeries_iommu.c index 2b5e622732f..5914f61a152 100644 --- a/arch/ppc64/kernel/pSeries_iommu.c +++ b/arch/ppc64/kernel/pSeries_iommu.c @@ -281,8 +281,10 @@ static void iommu_table_setparms(struct pci_controller *phb, tbl->it_offset = phb->dma_window_base_cur >> PAGE_SHIFT; /* Test if we are going over 2GB of DMA space */ - if (phb->dma_window_base_cur + phb->dma_window_size > (1L << 31)) + if (phb->dma_window_base_cur + phb->dma_window_size > 0x80000000ul) { + udbg_printf("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); panic("PCI_DMA: Unexpected number of IOAs under this PHB.\n"); + } phb->dma_window_base_cur += phb->dma_window_size; @@ -326,92 +328,85 @@ static void iommu_table_setparms_lpar(struct pci_controller *phb, static void iommu_bus_setup_pSeries(struct pci_bus *bus) { - struct device_node *dn, *pdn; - struct pci_dn *pci; + struct device_node *dn; struct iommu_table *tbl; + struct device_node *isa_dn, *isa_dn_orig; + struct device_node *tmp; + struct pci_dn *pci; + int children; DBG("iommu_bus_setup_pSeries, bus %p, bus->self %p\n", bus, bus->self); - /* For each (root) bus, we carve up the available DMA space in 256MB - * pieces. Since each piece is used by one (sub) bus/device, that would - * give a maximum of 7 devices per PHB. In most cases, this is plenty. - * - * The exception is on Python PHBs (pre-POWER4). Here we don't have EADS - * bridges below the PHB to allocate the sectioned tables to, so instead - * we allocate a 1GB table at the PHB level. + dn = pci_bus_to_OF_node(bus); + pci = PCI_DN(dn); + + if (bus->self) { + /* This is not a root bus, any setup will be done for the + * device-side of the bridge in iommu_dev_setup_pSeries(). + */ + return; + } + + /* Check if the ISA bus on the system is under + * this PHB. */ + isa_dn = isa_dn_orig = of_find_node_by_type(NULL, "isa"); - dn = pci_bus_to_OF_node(bus); - pci = dn->data; - - if (!bus->self) { - /* Root bus */ - if (is_python(dn)) { - unsigned int *iohole; - - DBG("Python root bus %s\n", bus->name); - - iohole = (unsigned int *)get_property(dn, "io-hole", 0); - - if (iohole) { - /* On first bus we need to leave room for the - * ISA address space. Just skip the first 256MB - * alltogether. This leaves 768MB for the window. - */ - DBG("PHB has io-hole, reserving 256MB\n"); - pci->phb->dma_window_size = 3 << 28; - pci->phb->dma_window_base_cur = 1 << 28; - } else { - /* 1GB window by default */ - pci->phb->dma_window_size = 1 << 30; - pci->phb->dma_window_base_cur = 0; - } - - tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL); - - iommu_table_setparms(pci->phb, dn, tbl); - pci->iommu_table = iommu_init_table(tbl); - } else { - /* Do a 128MB table at root. This is used for the IDE - * controller on some SMP-mode POWER4 machines. It - * doesn't hurt to allocate it on other machines - * -- it'll just be unused since new tables are - * allocated on the EADS level. - * - * Allocate at offset 128MB to avoid having to deal - * with ISA holes; 128MB table for IDE is plenty. - */ - pci->phb->dma_window_size = 1 << 27; - pci->phb->dma_window_base_cur = 1 << 27; - - tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL); - - iommu_table_setparms(pci->phb, dn, tbl); - pci->iommu_table = iommu_init_table(tbl); - - /* All child buses have 256MB tables */ - pci->phb->dma_window_size = 1 << 28; - } - } else { - pdn = pci_bus_to_OF_node(bus->parent); + while (isa_dn && isa_dn != dn) + isa_dn = isa_dn->parent; + + if (isa_dn_orig) + of_node_put(isa_dn_orig); - if (!bus->parent->self && !is_python(pdn)) { - struct iommu_table *tbl; - /* First child and not python means this is the EADS - * level. Allocate new table for this slot with 256MB - * window. - */ + /* Count number of direct PCI children of the PHB. + * All PCI device nodes have class-code property, so it's + * an easy way to find them. + */ + for (children = 0, tmp = dn->child; tmp; tmp = tmp->sibling) + if (get_property(tmp, "class-code", NULL)) + children++; - tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL); + DBG("Children: %d\n", children); - iommu_table_setparms(pci->phb, dn, tbl); + /* Calculate amount of DMA window per slot. Each window must be + * a power of two (due to pci_alloc_consistent requirements). + * + * Keep 256MB aside for PHBs with ISA. + */ - pci->iommu_table = iommu_init_table(tbl); - } else { - /* Lower than first child or under python, use parent table */ - pci->iommu_table = PCI_DN(pdn)->iommu_table; - } + if (!isa_dn) { + /* No ISA/IDE - just set window size and return */ + pci->phb->dma_window_size = 0x80000000ul; /* To be divided */ + + while (pci->phb->dma_window_size * children > 0x80000000ul) + pci->phb->dma_window_size >>= 1; + DBG("No ISA/IDE, window size is 0x%lx\n", + pci->phb->dma_window_size); + pci->phb->dma_window_base_cur = 0; + + return; } + + /* If we have ISA, then we probably have an IDE + * controller too. Allocate a 128MB table but + * skip the first 128MB to avoid stepping on ISA + * space. + */ + pci->phb->dma_window_size = 0x8000000ul; + pci->phb->dma_window_base_cur = 0x8000000ul; + + tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL); + + iommu_table_setparms(pci->phb, dn, tbl); + pci->iommu_table = iommu_init_table(tbl); + + /* Divide the rest (1.75GB) among the children */ + pci->phb->dma_window_size = 0x80000000ul; + while (pci->phb->dma_window_size * children > 0x70000000ul) + pci->phb->dma_window_size >>= 1; + + DBG("ISA/IDE, window size is 0x%lx\n", pci->phb->dma_window_size); + } @@ -462,21 +457,36 @@ static void iommu_bus_setup_pSeriesLP(struct pci_bus *bus) static void iommu_dev_setup_pSeries(struct pci_dev *dev) { struct device_node *dn, *mydn; + struct iommu_table *tbl; + + DBG("iommu_dev_setup_pSeries, dev %p (%s)\n", dev, pci_name(dev)); - DBG("iommu_dev_setup_pSeries, dev %p (%s)\n", dev, dev->pretty_name); - /* Now copy the iommu_table ptr from the bus device down to the - * pci device_node. This means get_iommu_table() won't need to search - * up the device tree to find it. - */ mydn = dn = pci_device_to_OF_node(dev); + /* If we're the direct child of a root bus, then we need to allocate + * an iommu table ourselves. The bus setup code should have setup + * the window sizes already. + */ + if (!dev->bus->self) { + DBG(" --> first child, no bridge. Allocating iommu table.\n"); + tbl = kmalloc(sizeof(struct iommu_table), GFP_KERNEL); + iommu_table_setparms(PCI_DN(dn)->phb, dn, tbl); + PCI_DN(mydn)->iommu_table = iommu_init_table(tbl); + + return; + } + + /* If this device is further down the bus tree, search upwards until + * an already allocated iommu table is found and use that. + */ + while (dn && dn->data && PCI_DN(dn)->iommu_table == NULL) dn = dn->parent; if (dn && dn->data) { PCI_DN(mydn)->iommu_table = PCI_DN(dn)->iommu_table; } else { - DBG("iommu_dev_setup_pSeries, dev %p (%s) has no iommu table\n", dev, dev->pretty_name); + DBG("iommu_dev_setup_pSeries, dev %p (%s) has no iommu table\n", dev, pci_name(dev)); } } @@ -510,7 +520,7 @@ static void iommu_dev_setup_pSeriesLP(struct pci_dev *dev) int *dma_window = NULL; struct pci_dn *pci; - DBG("iommu_dev_setup_pSeriesLP, dev %p (%s)\n", dev, dev->pretty_name); + DBG("iommu_dev_setup_pSeriesLP, dev %p (%s)\n", dev, pci_name(dev)); /* dev setup for LPAR is a little tricky, since the device tree might * contain the dma-window properties per-device and not neccesarily @@ -532,9 +542,8 @@ static void iommu_dev_setup_pSeriesLP(struct pci_dev *dev) * slots on POWER4 machines. */ if (dma_window == NULL || pdn->parent == NULL) { - /* Fall back to regular (non-LPAR) dev setup */ - DBG("No dma window for device, falling back to regular setup\n"); - iommu_dev_setup_pSeries(dev); + DBG("No dma window for device, linking to parent\n"); + PCI_DN(dn)->iommu_table = PCI_DN(pdn)->iommu_table; return; } else { DBG("Found DMA window, allocating table\n"); diff --git a/arch/ppc64/kernel/pci.c b/arch/ppc64/kernel/pci.c index 861138ad092..ff4be1da69d 100644 --- a/arch/ppc64/kernel/pci.c +++ b/arch/ppc64/kernel/pci.c @@ -246,11 +246,14 @@ static unsigned int pci_parse_of_flags(u32 addr0) unsigned int flags = 0; if (addr0 & 0x02000000) { - flags |= IORESOURCE_MEM; + flags = IORESOURCE_MEM | PCI_BASE_ADDRESS_SPACE_MEMORY; + flags |= (addr0 >> 22) & PCI_BASE_ADDRESS_MEM_TYPE_64; + flags |= (addr0 >> 28) & PCI_BASE_ADDRESS_MEM_TYPE_1M; if (addr0 & 0x40000000) - flags |= IORESOURCE_PREFETCH; + flags |= IORESOURCE_PREFETCH + | PCI_BASE_ADDRESS_MEM_PREFETCH; } else if (addr0 & 0x01000000) - flags |= IORESOURCE_IO; + flags = IORESOURCE_IO | PCI_BASE_ADDRESS_SPACE_IO; return flags; } diff --git a/arch/ppc64/kernel/pmac_setup.c b/arch/ppc64/kernel/pmac_setup.c index 1f5118078f7..bb0c3bfbb7e 100644 --- a/arch/ppc64/kernel/pmac_setup.c +++ b/arch/ppc64/kernel/pmac_setup.c @@ -434,15 +434,23 @@ static int pmac_check_legacy_ioport(unsigned int baseport) static int __init pmac_declare_of_platform_devices(void) { - struct device_node *np; + struct device_node *np, *npp; - np = find_devices("u3"); - if (np) { - for (np = np->child; np != NULL; np = np->sibling) + npp = of_find_node_by_name(NULL, "u3"); + if (npp) { + for (np = NULL; (np = of_get_next_child(npp, np)) != NULL;) { if (strncmp(np->name, "i2c", 3) == 0) { - of_platform_device_create(np, "u3-i2c"); + of_platform_device_create(np, "u3-i2c", NULL); + of_node_put(np); break; } + } + of_node_put(npp); + } + npp = of_find_node_by_type(NULL, "smu"); + if (npp) { + of_platform_device_create(npp, "smu", NULL); + of_node_put(npp); } return 0; diff --git a/arch/ppc64/kernel/pmac_time.c b/arch/ppc64/kernel/pmac_time.c index 6c8c99295e7..9d8c97decd3 100644 --- a/arch/ppc64/kernel/pmac_time.c +++ b/arch/ppc64/kernel/pmac_time.c @@ -84,7 +84,7 @@ void pmac_get_rtc_time(struct rtc_time *tm) #ifdef CONFIG_PMAC_SMU case SYS_CTRLER_SMU: - smu_get_rtc_time(tm); + smu_get_rtc_time(tm, 1); break; #endif /* CONFIG_PMAC_SMU */ default: @@ -128,7 +128,7 @@ int pmac_set_rtc_time(struct rtc_time *tm) #ifdef CONFIG_PMAC_SMU case SYS_CTRLER_SMU: - return smu_set_rtc_time(tm); + return smu_set_rtc_time(tm, 1); #endif /* CONFIG_PMAC_SMU */ default: return -ENODEV; diff --git a/arch/ppc64/kernel/prom_init.c b/arch/ppc64/kernel/prom_init.c index 9979919cdf9..f252670874a 100644 --- a/arch/ppc64/kernel/prom_init.c +++ b/arch/ppc64/kernel/prom_init.c @@ -1711,6 +1711,7 @@ static void __init flatten_device_tree(void) unsigned long offset = reloc_offset(); unsigned long mem_start, mem_end, room; struct boot_param_header *hdr; + struct prom_t *_prom = PTRRELOC(&prom); char *namep; u64 *rsvmap; @@ -1765,6 +1766,7 @@ static void __init flatten_device_tree(void) RELOC(dt_struct_end) = PAGE_ALIGN(mem_start); /* Finish header */ + hdr->boot_cpuid_phys = _prom->cpu; hdr->magic = OF_DT_HEADER; hdr->totalsize = RELOC(dt_struct_end) - RELOC(dt_header_start); hdr->off_dt_struct = RELOC(dt_struct_start) - RELOC(dt_header_start); @@ -1854,7 +1856,6 @@ static void __init prom_find_boot_cpu(void) cpu_pkg = call_prom("instance-to-package", 1, 1, prom_cpu); - prom_setprop(cpu_pkg, "linux,boot-cpu", NULL, 0); prom_getprop(cpu_pkg, "reg", &getprop_rval, sizeof(getprop_rval)); _prom->cpu = getprop_rval; diff --git a/arch/ppc64/kernel/ptrace.c b/arch/ppc64/kernel/ptrace.c index 85ed3188a91..b1c044ca575 100644 --- a/arch/ppc64/kernel/ptrace.c +++ b/arch/ppc64/kernel/ptrace.c @@ -219,6 +219,7 @@ int sys_ptrace(long request, long pid, long addr, long data) case PTRACE_SET_DEBUGREG: ret = ptrace_set_debugreg(child, addr, data); + break; case PTRACE_DETACH: ret = ptrace_detach(child, data); diff --git a/arch/ppc64/mm/hash_native.c b/arch/ppc64/mm/hash_native.c index 29b074505d3..874cf96938f 100644 --- a/arch/ppc64/mm/hash_native.c +++ b/arch/ppc64/mm/hash_native.c @@ -342,15 +342,14 @@ static void native_flush_hash_range(unsigned long number, int local) hpte_t *hptep; unsigned long hpte_v; struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch); - - /* XXX fix for large ptes */ - unsigned long large = 0; + unsigned long large; local_irq_save(flags); j = 0; for (i = 0; i < number; i++) { va = batch->vaddr[j]; + large = pte_huge(batch->pte[i]); if (large) vpn = va >> HPAGE_SHIFT; else diff --git a/arch/ppc64/mm/hugetlbpage.c b/arch/ppc64/mm/hugetlbpage.c index 338771ec70d..0ea0994ed97 100644 --- a/arch/ppc64/mm/hugetlbpage.c +++ b/arch/ppc64/mm/hugetlbpage.c @@ -710,10 +710,13 @@ repeat: hpte_group = ((~hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; slot = ppc_md.hpte_insert(hpte_group, va, prpn, - HPTE_V_LARGE, rflags); + HPTE_V_LARGE | + HPTE_V_SECONDARY, + rflags); if (slot == -1) { if (mftb() & 0x1) - hpte_group = ((hash & htab_hash_mask) * HPTES_PER_GROUP) & ~0x7UL; + hpte_group = ((hash & htab_hash_mask) * + HPTES_PER_GROUP)&~0x7UL; ppc_md.hpte_remove(hpte_group); goto repeat; diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S index 3e0badb820c..b4834952785 100644 --- a/arch/sparc64/kernel/entry.S +++ b/arch/sparc64/kernel/entry.S @@ -42,19 +42,15 @@ * executing (see inherit_locked_prom_mappings() rant). */ sparc64_vpte_nucleus: - /* Load 0xf0000000, which is LOW_OBP_ADDRESS. */ - mov 0xf, %g5 - sllx %g5, 28, %g5 - - /* Is addr >= LOW_OBP_ADDRESS? */ + /* Note that kvmap below has verified that the address is + * in the range MODULES_VADDR --> VMALLOC_END already. So + * here we need only check if it is an OBP address or not. + */ + sethi %hi(LOW_OBP_ADDRESS), %g5 cmp %g4, %g5 blu,pn %xcc, sparc64_vpte_patchme1 mov 0x1, %g5 - - /* Load 0x100000000, which is HI_OBP_ADDRESS. */ sllx %g5, 32, %g5 - - /* Is addr < HI_OBP_ADDRESS? */ cmp %g4, %g5 blu,pn %xcc, obp_iaddr_patch nop @@ -156,26 +152,29 @@ obp_daddr_patch: * rather, use information saved during inherit_prom_mappings() using 8k * pagesize. */ + .align 32 kvmap: - /* Load 0xf0000000, which is LOW_OBP_ADDRESS. */ - mov 0xf, %g5 - sllx %g5, 28, %g5 + sethi %hi(MODULES_VADDR), %g5 + cmp %g4, %g5 + blu,pn %xcc, longpath + mov (VMALLOC_END >> 24), %g5 + sllx %g5, 24, %g5 + cmp %g4, %g5 + bgeu,pn %xcc, longpath + nop - /* Is addr >= LOW_OBP_ADDRESS? */ +kvmap_check_obp: + sethi %hi(LOW_OBP_ADDRESS), %g5 cmp %g4, %g5 - blu,pn %xcc, vmalloc_addr + blu,pn %xcc, kvmap_vmalloc_addr mov 0x1, %g5 - - /* Load 0x100000000, which is HI_OBP_ADDRESS. */ sllx %g5, 32, %g5 - - /* Is addr < HI_OBP_ADDRESS? */ cmp %g4, %g5 blu,pn %xcc, obp_daddr_patch nop -vmalloc_addr: - /* If we get here, a vmalloc addr accessed, load kernel VPTE. */ +kvmap_vmalloc_addr: + /* If we get here, a vmalloc addr was accessed, load kernel VPTE. */ ldxa [%g3 + %g6] ASI_N, %g5 brgez,pn %g5, longpath nop diff --git a/arch/sparc64/kernel/ptrace.c b/arch/sparc64/kernel/ptrace.c index 23ad839d113..5efbff90d66 100644 --- a/arch/sparc64/kernel/ptrace.c +++ b/arch/sparc64/kernel/ptrace.c @@ -30,6 +30,7 @@ #include <asm/psrcompat.h> #include <asm/visasm.h> #include <asm/spitfire.h> +#include <asm/page.h> /* Returning from ptrace is a bit tricky because the syscall return * low level code assumes any value returned which is negative and @@ -128,20 +129,20 @@ void flush_ptrace_access(struct vm_area_struct *vma, struct page *page, * is mapped to in the user's address space, we can skip the * D-cache flush. */ - if ((uaddr ^ kaddr) & (1UL << 13)) { + if ((uaddr ^ (unsigned long) kaddr) & (1UL << 13)) { unsigned long start = __pa(kaddr); unsigned long end = start + len; if (tlb_type == spitfire) { for (; start < end; start += 32) - spitfire_put_dcache_tag(va & 0x3fe0, 0x0); + spitfire_put_dcache_tag(start & 0x3fe0, 0x0); } else { for (; start < end; start += 32) __asm__ __volatile__( "stxa %%g0, [%0] %1\n\t" "membar #Sync" : /* no outputs */ - : "r" (va), + : "r" (start), "i" (ASI_DCACHE_INVALIDATE)); } } diff --git a/arch/sparc64/kernel/una_asm.S b/arch/sparc64/kernel/una_asm.S index cbb40585253..da48400bcc9 100644 --- a/arch/sparc64/kernel/una_asm.S +++ b/arch/sparc64/kernel/una_asm.S @@ -17,7 +17,7 @@ kernel_unaligned_trap_fault: __do_int_store: rd %asi, %o4 wr %o3, 0, %asi - ldx [%o2], %g3 + mov %o2, %g3 cmp %o1, 2 be,pn %icc, 2f cmp %o1, 4 diff --git a/arch/sparc64/kernel/unaligned.c b/arch/sparc64/kernel/unaligned.c index da9739f0d43..42718f6a7d3 100644 --- a/arch/sparc64/kernel/unaligned.c +++ b/arch/sparc64/kernel/unaligned.c @@ -184,13 +184,14 @@ extern void do_int_load(unsigned long *dest_reg, int size, unsigned long *saddr, int is_signed, int asi); extern void __do_int_store(unsigned long *dst_addr, int size, - unsigned long *src_val, int asi); + unsigned long src_val, int asi); static inline void do_int_store(int reg_num, int size, unsigned long *dst_addr, - struct pt_regs *regs, int asi) + struct pt_regs *regs, int asi, int orig_asi) { unsigned long zero = 0; - unsigned long *src_val = &zero; + unsigned long *src_val_p = &zero; + unsigned long src_val; if (size == 16) { size = 8; @@ -198,7 +199,25 @@ static inline void do_int_store(int reg_num, int size, unsigned long *dst_addr, (unsigned)fetch_reg(reg_num, regs) : 0)) << 32) | (unsigned)fetch_reg(reg_num + 1, regs); } else if (reg_num) { - src_val = fetch_reg_addr(reg_num, regs); + src_val_p = fetch_reg_addr(reg_num, regs); + } + src_val = *src_val_p; + if (unlikely(asi != orig_asi)) { + switch (size) { + case 2: + src_val = swab16(src_val); + break; + case 4: + src_val = swab32(src_val); + break; + case 8: + src_val = swab64(src_val); + break; + case 16: + default: + BUG(); + break; + }; } __do_int_store(dst_addr, size, src_val, asi); } @@ -276,6 +295,7 @@ asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn, u kernel_mna_trap_fault(); } else { unsigned long addr; + int orig_asi, asi; addr = compute_effective_address(regs, insn, ((insn >> 25) & 0x1f)); @@ -285,18 +305,48 @@ asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn, u regs->tpc, dirstrings[dir], addr, size, regs->u_regs[UREG_RETPC]); #endif + orig_asi = asi = decode_asi(insn, regs); + switch (asi) { + case ASI_NL: + case ASI_AIUPL: + case ASI_AIUSL: + case ASI_PL: + case ASI_SL: + case ASI_PNFL: + case ASI_SNFL: + asi &= ~0x08; + break; + }; switch (dir) { case load: do_int_load(fetch_reg_addr(((insn>>25)&0x1f), regs), size, (unsigned long *) addr, - decode_signedness(insn), - decode_asi(insn, regs)); + decode_signedness(insn), asi); + if (unlikely(asi != orig_asi)) { + unsigned long val_in = *(unsigned long *) addr; + switch (size) { + case 2: + val_in = swab16(val_in); + break; + case 4: + val_in = swab32(val_in); + break; + case 8: + val_in = swab64(val_in); + break; + case 16: + default: + BUG(); + break; + }; + *(unsigned long *) addr = val_in; + } break; case store: do_int_store(((insn>>25)&0x1f), size, (unsigned long *) addr, regs, - decode_asi(insn, regs)); + asi, orig_asi); break; default: diff --git a/arch/um/Makefile b/arch/um/Makefile index ce987266dac..5b5af95721a 100644 --- a/arch/um/Makefile +++ b/arch/um/Makefile @@ -53,9 +53,13 @@ SYS_DIR := $(ARCH_DIR)/include/sysdep-$(SUBARCH) # -Dvmap=kernel_vmap affects everything, and prevents anything from # referencing the libpcap.o symbol so named. +# +# Same things for in6addr_loopback - found in libc. CFLAGS += $(CFLAGS-y) -D__arch_um__ -DSUBARCH=\"$(SUBARCH)\" \ - $(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap + $(ARCH_INCLUDE) $(MODE_INCLUDE) -Dvmap=kernel_vmap \ + -Din6addr_loopback=kernel_in6addr_loopback + AFLAGS += $(ARCH_INCLUDE) USER_CFLAGS := $(patsubst -I%,,$(CFLAGS)) diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c index 14a12d6b3df..16e7dc89f61 100644 --- a/arch/um/drivers/chan_kern.c +++ b/arch/um/drivers/chan_kern.c @@ -19,18 +19,44 @@ #include "line.h" #include "os.h" -#ifdef CONFIG_NOCONFIG_CHAN +/* XXX: could well be moved to somewhere else, if needed. */ +static int my_printf(const char * fmt, ...) + __attribute__ ((format (printf, 1, 2))); + +static int my_printf(const char * fmt, ...) +{ + /* Yes, can be called on atomic context.*/ + char *buf = kmalloc(4096, GFP_ATOMIC); + va_list args; + int r; + + if (!buf) { + /* We print directly fmt. + * Yes, yes, yes, feel free to complain. */ + r = strlen(fmt); + } else { + va_start(args, fmt); + r = vsprintf(buf, fmt, args); + va_end(args); + fmt = buf; + } -/* The printk's here are wrong because we are complaining that there is no - * output device, but printk is printing to that output device. The user will - * never see the error. printf would be better, except it can't run on a - * kernel stack because it will overflow it. - * Use printk for now since that will avoid crashing. - */ + if (r) + r = os_write_file(1, fmt, r); + return r; + +} + +#ifdef CONFIG_NOCONFIG_CHAN +/* Despite its name, there's no added trailing newline. */ +static int my_puts(const char * buf) +{ + return os_write_file(1, buf, strlen(buf)); +} static void *not_configged_init(char *str, int device, struct chan_opts *opts) { - printk(KERN_ERR "Using a channel type which is configured out of " + my_puts("Using a channel type which is configured out of " "UML\n"); return(NULL); } @@ -38,27 +64,27 @@ static void *not_configged_init(char *str, int device, struct chan_opts *opts) static int not_configged_open(int input, int output, int primary, void *data, char **dev_out) { - printk(KERN_ERR "Using a channel type which is configured out of " + my_puts("Using a channel type which is configured out of " "UML\n"); return(-ENODEV); } static void not_configged_close(int fd, void *data) { - printk(KERN_ERR "Using a channel type which is configured out of " + my_puts("Using a channel type which is configured out of " "UML\n"); } static int not_configged_read(int fd, char *c_out, void *data) { - printk(KERN_ERR "Using a channel type which is configured out of " + my_puts("Using a channel type which is configured out of " "UML\n"); return(-EIO); } static int not_configged_write(int fd, const char *buf, int len, void *data) { - printk(KERN_ERR "Using a channel type which is configured out of " + my_puts("Using a channel type which is configured out of " "UML\n"); return(-EIO); } @@ -66,7 +92,7 @@ static int not_configged_write(int fd, const char *buf, int len, void *data) static int not_configged_console_write(int fd, const char *buf, int len, void *data) { - printk(KERN_ERR "Using a channel type which is configured out of " + my_puts("Using a channel type which is configured out of " "UML\n"); return(-EIO); } @@ -74,14 +100,14 @@ static int not_configged_console_write(int fd, const char *buf, int len, static int not_configged_window_size(int fd, void *data, unsigned short *rows, unsigned short *cols) { - printk(KERN_ERR "Using a channel type which is configured out of " + my_puts("Using a channel type which is configured out of " "UML\n"); return(-ENODEV); } static void not_configged_free(void *data) { - printf(KERN_ERR "Using a channel type which is configured out of " + my_puts("Using a channel type which is configured out of " "UML\n"); } @@ -457,7 +483,7 @@ static struct chan *parse_chan(char *str, int pri, int device, } } if(ops == NULL){ - printk(KERN_ERR "parse_chan couldn't parse \"%s\"\n", + my_printf("parse_chan couldn't parse \"%s\"\n", str); return(NULL); } @@ -465,7 +491,7 @@ static struct chan *parse_chan(char *str, int pri, int device, data = (*ops->init)(str, device, opts); if(data == NULL) return(NULL); - chan = kmalloc(sizeof(*chan), GFP_KERNEL); + chan = kmalloc(sizeof(*chan), GFP_ATOMIC); if(chan == NULL) return(NULL); *chan = ((struct chan) { .list = LIST_HEAD_INIT(chan->list), .primary = 1, diff --git a/arch/um/drivers/mconsole_user.c b/arch/um/drivers/mconsole_user.c index 310c1f823f2..04383f98f4d 100644 --- a/arch/um/drivers/mconsole_user.c +++ b/arch/um/drivers/mconsole_user.c @@ -23,7 +23,7 @@ static struct mconsole_command commands[] = { { "reboot", mconsole_reboot, MCONSOLE_PROC }, { "config", mconsole_config, MCONSOLE_PROC }, { "remove", mconsole_remove, MCONSOLE_PROC }, - { "sysrq", mconsole_sysrq, MCONSOLE_INTR }, + { "sysrq", mconsole_sysrq, MCONSOLE_PROC }, { "help", mconsole_help, MCONSOLE_INTR }, { "cad", mconsole_cad, MCONSOLE_INTR }, { "stop", mconsole_stop, MCONSOLE_PROC }, diff --git a/arch/um/include/common-offsets.h b/arch/um/include/common-offsets.h index 0aa620970ad..782ac3a3baf 100644 --- a/arch/um/include/common-offsets.h +++ b/arch/um/include/common-offsets.h @@ -12,4 +12,6 @@ DEFINE_STR(UM_KERN_WARNING, KERN_WARNING); DEFINE_STR(UM_KERN_NOTICE, KERN_NOTICE); DEFINE_STR(UM_KERN_INFO, KERN_INFO); DEFINE_STR(UM_KERN_DEBUG, KERN_DEBUG); -DEFINE(HOST_ELF_CLASS, ELF_CLASS); +DEFINE(UM_ELF_CLASS, ELF_CLASS); +DEFINE(UM_ELFCLASS32, ELFCLASS32); +DEFINE(UM_ELFCLASS64, ELFCLASS64); diff --git a/arch/um/include/user.h b/arch/um/include/user.h index 57ee9e26122..0f865ef4691 100644 --- a/arch/um/include/user.h +++ b/arch/um/include/user.h @@ -14,7 +14,9 @@ extern void *um_kmalloc_atomic(int size); extern void kfree(void *ptr); extern int in_aton(char *str); extern int open_gdb_chan(void); -extern int strlcpy(char *, const char *, int); +/* These use size_t, however unsigned long is correct on both i386 and x86_64. */ +extern unsigned long strlcpy(char *, const char *, unsigned long); +extern unsigned long strlcat(char *, const char *, unsigned long); extern void *um_vmalloc(int size); extern void vfree(void *ptr); diff --git a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c index 39cf568ccfa..ea65db679e9 100644 --- a/arch/um/kernel/process_kern.c +++ b/arch/um/kernel/process_kern.c @@ -82,7 +82,8 @@ unsigned long alloc_stack(int order, int atomic) unsigned long page; int flags = GFP_KERNEL; - if(atomic) flags |= GFP_ATOMIC; + if (atomic) + flags = GFP_ATOMIC; page = __get_free_pages(flags, order); if(page == 0) return(0); diff --git a/arch/um/kernel/sigio_user.c b/arch/um/kernel/sigio_user.c index e89218958f3..a52751108aa 100644 --- a/arch/um/kernel/sigio_user.c +++ b/arch/um/kernel/sigio_user.c @@ -340,7 +340,7 @@ static int setup_initial_poll(int fd) { struct pollfd *p; - p = um_kmalloc(sizeof(struct pollfd)); + p = um_kmalloc_atomic(sizeof(struct pollfd)); if(p == NULL){ printk("setup_initial_poll : failed to allocate poll\n"); return(-1); diff --git a/arch/um/kernel/skas/include/uaccess-skas.h b/arch/um/kernel/skas/include/uaccess-skas.h index 6ee3f3902e6..7da0c2def0e 100644 --- a/arch/um/kernel/skas/include/uaccess-skas.h +++ b/arch/um/kernel/skas/include/uaccess-skas.h @@ -18,12 +18,6 @@ ((unsigned long) (addr) + (size) <= FIXADDR_USER_END) && \ ((unsigned long) (addr) + (size) >= (unsigned long)(addr)))) -static inline int verify_area_skas(int type, const void __user * addr, - unsigned long size) -{ - return(access_ok_skas(type, addr, size) ? 0 : -EFAULT); -} - extern int copy_from_user_skas(void *to, const void __user *from, int n); extern int copy_to_user_skas(void __user *to, const void *from, int n); extern int strncpy_from_user_skas(char *dst, const char __user *src, int count); diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index 0a562c3c0fd..f5b0636f9ad 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -193,12 +193,12 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, r = pte_read(*npte); w = pte_write(*npte); x = pte_exec(*npte); - if(!pte_dirty(*npte)) - w = 0; - if(!pte_young(*npte)){ - r = 0; - w = 0; - } + if (!pte_young(*npte)) { + r = 0; + w = 0; + } else if (!pte_dirty(*npte)) { + w = 0; + } if(force || pte_newpage(*npte)){ if(pte_present(*npte)) ret = add_mmap(addr, diff --git a/arch/um/kernel/trap_kern.c b/arch/um/kernel/trap_kern.c index 87cc6fd76ce..d297429ac36 100644 --- a/arch/um/kernel/trap_kern.c +++ b/arch/um/kernel/trap_kern.c @@ -18,6 +18,7 @@ #include "asm/a.out.h" #include "asm/current.h" #include "asm/irq.h" +#include "sysdep/sigcontext.h" #include "user_util.h" #include "kern_util.h" #include "kern.h" @@ -39,6 +40,12 @@ int handle_page_fault(unsigned long address, unsigned long ip, int err = -EFAULT; *code_out = SEGV_MAPERR; + + /* If the fault was during atomic operation, don't take the fault, just + * fail. */ + if (in_atomic()) + goto out_nosemaphore; + down_read(&mm->mmap_sem); vma = find_vma(mm, address); if(!vma) @@ -89,6 +96,7 @@ survive: flush_tlb_page(vma, address); out: up_read(&mm->mmap_sem); +out_nosemaphore: return(err); /* @@ -125,7 +133,15 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, void *sc) } else if(current->mm == NULL) panic("Segfault with no mm"); - err = handle_page_fault(address, ip, is_write, is_user, &si.si_code); + + if (SEGV_IS_FIXABLE(&fi)) + err = handle_page_fault(address, ip, is_write, is_user, &si.si_code); + else { + err = -EFAULT; + /* A thread accessed NULL, we get a fault, but CR2 is invalid. + * This code is used in __do_copy_from_user() of TT mode. */ + address = 0; + } catcher = current->thread.fault_catcher; if(!err) diff --git a/arch/um/kernel/tt/include/uaccess-tt.h b/arch/um/kernel/tt/include/uaccess-tt.h index aa6db384af8..dc2ebfa8c54 100644 --- a/arch/um/kernel/tt/include/uaccess-tt.h +++ b/arch/um/kernel/tt/include/uaccess-tt.h @@ -33,12 +33,6 @@ extern unsigned long uml_physmem; (((unsigned long) (addr) <= ((unsigned long) (addr) + (size))) && \ (under_task_size(addr, size) || is_stack(addr, size)))) -static inline int verify_area_tt(int type, const void __user * addr, - unsigned long size) -{ - return(access_ok_tt(type, addr, size) ? 0 : -EFAULT); -} - extern unsigned long get_fault_addr(void); extern int __do_copy_from_user(void *to, const void *from, int n, diff --git a/arch/um/kernel/tt/process_kern.c b/arch/um/kernel/tt/process_kern.c index 0de05a268b2..cfaa373a6e7 100644 --- a/arch/um/kernel/tt/process_kern.c +++ b/arch/um/kernel/tt/process_kern.c @@ -23,10 +23,11 @@ #include "mem_user.h" #include "tlb.h" #include "mode.h" +#include "mode_kern.h" #include "init.h" #include "tt.h" -int switch_to_tt(void *prev, void *next, void *last) +void switch_to_tt(void *prev, void *next) { struct task_struct *from, *to, *prev_sched; unsigned long flags; diff --git a/arch/um/kernel/tt/uaccess_user.c b/arch/um/kernel/tt/uaccess_user.c index f01475512ec..8c220f054b6 100644 --- a/arch/um/kernel/tt/uaccess_user.c +++ b/arch/um/kernel/tt/uaccess_user.c @@ -22,8 +22,15 @@ int __do_copy_from_user(void *to, const void *from, int n, __do_copy, &faulted); TASK_REGS(get_current())->tt = save; - if(!faulted) return(0); - else return(n - (fault - (unsigned long) from)); + if(!faulted) + return 0; + else if (fault) + return n - (fault - (unsigned long) from); + else + /* In case of a general protection fault, we don't have the + * fault address, so NULL is used instead. Pretend we didn't + * copy anything. */ + return n; } static void __do_strncpy(void *dst, const void *src, int count) diff --git a/arch/um/kernel/umid.c b/arch/um/kernel/umid.c index 186c2888501..0b21d59ba0c 100644 --- a/arch/um/kernel/umid.c +++ b/arch/um/kernel/umid.c @@ -31,6 +31,8 @@ static char *uml_dir = UML_DIR; /* Changed by set_umid */ static int umid_is_random = 1; static int umid_inited = 0; +/* Have we created the files? Should we remove them? */ +static int umid_owned = 0; static int make_umid(int (*printer)(const char *fmt, ...)); @@ -82,20 +84,21 @@ int __init umid_file_name(char *name, char *buf, int len) extern int tracing_pid; -static int __init create_pid_file(void) +static void __init create_pid_file(void) { char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")]; char pid[sizeof("nnnnn\0")]; int fd, n; - if(umid_file_name("pid", file, sizeof(file))) return 0; + if(umid_file_name("pid", file, sizeof(file))) + return; fd = os_open_file(file, of_create(of_excl(of_rdwr(OPENFLAGS()))), 0644); if(fd < 0){ printf("Open of machine pid file \"%s\" failed: %s\n", file, strerror(-fd)); - return 0; + return; } sprintf(pid, "%d\n", os_getpid()); @@ -103,7 +106,6 @@ static int __init create_pid_file(void) if(n != strlen(pid)) printf("Write of pid file failed - err = %d\n", -n); os_close_file(fd); - return 0; } static int actually_do_remove(char *dir) @@ -147,7 +149,8 @@ static int actually_do_remove(char *dir) void remove_umid_dir(void) { char dir[strlen(uml_dir) + UMID_LEN + 1]; - if(!umid_inited) return; + if (!umid_owned) + return; sprintf(dir, "%s%s", uml_dir, umid); actually_do_remove(dir); @@ -155,11 +158,12 @@ void remove_umid_dir(void) char *get_umid(int only_if_set) { - if(only_if_set && umid_is_random) return(NULL); - return(umid); + if(only_if_set && umid_is_random) + return NULL; + return umid; } -int not_dead_yet(char *dir) +static int not_dead_yet(char *dir) { char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")]; char pid[sizeof("nnnnn\0")], *end; @@ -193,7 +197,8 @@ int not_dead_yet(char *dir) (p == CHOOSE_MODE(tracing_pid, os_getpid()))) dead = 1; } - if(!dead) return(1); + if(!dead) + return(1); return(actually_do_remove(dir)); } @@ -232,16 +237,13 @@ static int __init make_uml_dir(void) strlcpy(dir, home, sizeof(dir)); uml_dir++; } + strlcat(dir, uml_dir, sizeof(dir)); len = strlen(dir); - strncat(dir, uml_dir, sizeof(dir) - len); - len = strlen(dir); - if((len > 0) && (len < sizeof(dir) - 1) && (dir[len - 1] != '/')){ - dir[len] = '/'; - dir[len + 1] = '\0'; - } + if (len > 0 && dir[len - 1] != '/') + strlcat(dir, "/", sizeof(dir)); uml_dir = malloc(strlen(dir) + 1); - if(uml_dir == NULL){ + if (uml_dir == NULL) { printf("make_uml_dir : malloc failed, errno = %d\n", errno); exit(1); } @@ -286,6 +288,7 @@ static int __init make_umid(int (*printer)(const char *fmt, ...)) if(errno == EEXIST){ if(not_dead_yet(tmp)){ (*printer)("umid '%s' is in use\n", umid); + umid_owned = 0; return(-1); } err = mkdir(tmp, 0777); @@ -296,7 +299,8 @@ static int __init make_umid(int (*printer)(const char *fmt, ...)) return(-1); } - return(0); + umid_owned = 1; + return 0; } __uml_setup("uml_dir=", set_uml_dir, @@ -309,7 +313,8 @@ static int __init make_umid_setup(void) /* one function with the ordering we need ... */ make_uml_dir(); make_umid(printf); - return create_pid_file(); + create_pid_file(); + return 0; } __uml_postsetup(make_umid_setup); diff --git a/arch/um/kernel/user_util.c b/arch/um/kernel/user_util.c index a25f3ea11fd..41d17c71511 100644 --- a/arch/um/kernel/user_util.c +++ b/arch/um/kernel/user_util.c @@ -128,6 +128,12 @@ void setup_machinename(char *machine_out) struct utsname host; uname(&host); +#if defined(UML_CONFIG_UML_X86) && !defined(UML_CONFIG_64BIT) + if (!strcmp(host.machine, "x86_64")) { + strcpy(machine_out, "i686"); + return; + } +#endif strcpy(machine_out, host.machine); } diff --git a/arch/um/os-Linux/aio.c b/arch/um/os-Linux/aio.c index 298d5632128..f6e64026f99 100644 --- a/arch/um/os-Linux/aio.c +++ b/arch/um/os-Linux/aio.c @@ -144,6 +144,7 @@ static int aio_thread(void *arg) "errno = %d\n", errno); } else { + /* This is safe as we've just a pointer here. */ aio = (struct aio_context *) (long) event.data; if(update_aio(aio, event.res)){ do_aio(ctx, aio); diff --git a/arch/um/os-Linux/elf_aux.c b/arch/um/os-Linux/elf_aux.c index e770cb02957..ab33cb3c74e 100644 --- a/arch/um/os-Linux/elf_aux.c +++ b/arch/um/os-Linux/elf_aux.c @@ -14,7 +14,8 @@ #include "mem_user.h" #include <kernel-offsets.h> -#if HOST_ELF_CLASS == ELFCLASS32 +/* Use the one from the kernel - the host may miss it, if having old headers. */ +#if UM_ELF_CLASS == UM_ELFCLASS32 typedef Elf32_auxv_t elf_auxv_t; #else typedef Elf64_auxv_t elf_auxv_t; diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c index d32413e4b4c..d9c52387c4a 100644 --- a/arch/um/os-Linux/process.c +++ b/arch/um/os-Linux/process.c @@ -3,6 +3,7 @@ * Licensed under the GPL */ +#include <unistd.h> #include <stdio.h> #include <errno.h> #include <signal.h> diff --git a/arch/um/sys-i386/ldt.c b/arch/um/sys-i386/ldt.c index bd3c34aa52e..36b5c2c1328 100644 --- a/arch/um/sys-i386/ldt.c +++ b/arch/um/sys-i386/ldt.c @@ -83,6 +83,7 @@ int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount) goto out; } p = buf; + break; default: res = -ENOSYS; goto out; diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 0969d570f3b..21afa69a086 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -308,7 +308,7 @@ config HPET_TIMER present. The HPET provides a stable time base on SMP systems, unlike the TSC, but it is more expensive to access, as it is off-chip. You can find the HPET spec at - <http://www.intel.com/labs/platcomp/hpet/hpetspec.htm>. + <http://www.intel.com/hardwaredesign/hpetspec.htm>. config X86_PM_TIMER bool "PM timer" diff --git a/arch/xtensa/kernel/pci.c b/arch/xtensa/kernel/pci.c index 09887c96e9a..de19501aa80 100644 --- a/arch/xtensa/kernel/pci.c +++ b/arch/xtensa/kernel/pci.c @@ -402,8 +402,8 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, __pci_mmap_set_flags(dev, vma, mmap_state); __pci_mmap_set_pgprot(dev, vma, mmap_state, write_combine); - ret = io_remap_page_range(vma, vma->vm_start, vma->vm_pgoff<<PAGE_SHIFT, - vma->vm_end - vma->vm_start, vma->vm_page_prot); + ret = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + vma->vm_end - vma->vm_start,vma->vm_page_prot); return ret; } diff --git a/arch/xtensa/kernel/platform.c b/arch/xtensa/kernel/platform.c index cf136278444..03674daabc6 100644 --- a/arch/xtensa/kernel/platform.c +++ b/arch/xtensa/kernel/platform.c @@ -39,7 +39,7 @@ _F(int, pcibios_fixup, (void), { return 0; }); _F(int, get_rtc_time, (time_t* t), { return 0; }); _F(int, set_rtc_time, (time_t t), { return 0; }); -#if CONFIG_XTENSA_CALIBRATE_CCOUNT +#ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT _F(void, calibrate_ccount, (void), { printk ("ERROR: Cannot calibrate cpu frequency! Assuming 100MHz.\n"); diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index c83bb0d4178..08ef6d82ee5 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -457,7 +457,7 @@ int dump_task_fpu(struct pt_regs *regs, struct task_struct *task, elf_fpregset_t *r) { /* see asm/coprocessor.h for this magic number 16 */ -#if TOTAL_CPEXTRA_SIZE > 16 +#if XTENSA_CP_EXTRA_SIZE > 16 do_save_fpregs (r, regs, task); /* For now, bit 16 means some extra state may be present: */ diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c index 1f5bf5d624e..513ed8d6776 100644 --- a/arch/xtensa/kernel/setup.c +++ b/arch/xtensa/kernel/setup.c @@ -304,7 +304,7 @@ void __init setup_arch(char **cmdline_p) # endif #endif -#if CONFIG_PCI +#ifdef CONFIG_PCI platform_pcibios_init(); #endif } diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c index dc42cede939..e252b61e45a 100644 --- a/arch/xtensa/kernel/signal.c +++ b/arch/xtensa/kernel/signal.c @@ -182,7 +182,7 @@ restore_cpextra (struct _cpstate *buf) struct task_struct *tsk = current; release_all_cp(tsk); - return __copy_from_user(tsk->thread.cpextra, buf, TOTAL_CPEXTRA_SIZE); + return __copy_from_user(tsk->thread.cpextra, buf, XTENSA_CP_EXTRA_SIZE); #endif return 0; } diff --git a/arch/xtensa/kernel/time.c b/arch/xtensa/kernel/time.c index 1ac7d5ce745..8e423d1335c 100644 --- a/arch/xtensa/kernel/time.c +++ b/arch/xtensa/kernel/time.c @@ -68,7 +68,7 @@ void __init time_init(void) * speed for the CALIBRATE. */ -#if CONFIG_XTENSA_CALIBRATE_CCOUNT +#ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT printk("Calibrating CPU frequency "); platform_calibrate_ccount(); printk("%d.%02d MHz\n", (int)ccount_per_jiffy/(1000000/HZ), diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index 56aace84aae..5a91d6c9e66 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -239,7 +239,7 @@ void __init mem_init(void) high_memory = (void *) __va(max_mapnr << PAGE_SHIFT); highmemsize = 0; -#if CONFIG_HIGHMEM +#ifdef CONFIG_HIGHMEM #error HIGHGMEM not implemented in init.c #endif diff --git a/drivers/acorn/char/pcf8583.c b/drivers/acorn/char/pcf8583.c index 141b4c237a5..2b850e5860a 100644 --- a/drivers/acorn/char/pcf8583.c +++ b/drivers/acorn/char/pcf8583.c @@ -23,12 +23,13 @@ static struct i2c_driver pcf8583_driver; static unsigned short ignore[] = { I2C_CLIENT_END }; static unsigned short normal_addr[] = { 0x50, I2C_CLIENT_END }; +static unsigned short *forces[] = { NULL }; static struct i2c_client_address_data addr_data = { .normal_i2c = normal_addr, .probe = ignore, .ignore = ignore, - .force = ignore, + .forces = forces, }; #define DAT(x) ((unsigned int)(x->dev.driver_data)) diff --git a/drivers/base/class.c b/drivers/base/class.c index 3b112e3542f..ce23dc8c18c 100644 --- a/drivers/base/class.c +++ b/drivers/base/class.c @@ -669,6 +669,7 @@ void class_device_destroy(struct class *cls, dev_t devt) int class_device_rename(struct class_device *class_dev, char *new_name) { int error = 0; + char *old_class_name = NULL, *new_class_name = NULL; class_dev = class_device_get(class_dev); if (!class_dev) @@ -677,12 +678,24 @@ int class_device_rename(struct class_device *class_dev, char *new_name) pr_debug("CLASS: renaming '%s' to '%s'\n", class_dev->class_id, new_name); + if (class_dev->dev) + old_class_name = make_class_name(class_dev); + strlcpy(class_dev->class_id, new_name, KOBJ_NAME_LEN); error = kobject_rename(&class_dev->kobj, new_name); + if (class_dev->dev) { + new_class_name = make_class_name(class_dev); + sysfs_create_link(&class_dev->dev->kobj, &class_dev->kobj, + new_class_name); + sysfs_remove_link(&class_dev->dev->kobj, old_class_name); + } class_device_put(class_dev); + kfree(old_class_name); + kfree(new_class_name); + return error; } diff --git a/drivers/base/dd.c b/drivers/base/dd.c index d5bbce38282..3565e979530 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -40,6 +40,9 @@ */ void device_bind_driver(struct device * dev) { + if (klist_node_attached(&dev->knode_driver)) + return; + pr_debug("bound device '%s' to driver '%s'\n", dev->bus_id, dev->driver->name); klist_add_tail(&dev->knode_driver, &dev->driver->klist_devices); diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index c56f995aada..486b6e1c7df 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -483,9 +483,6 @@ static int cciss_open(struct inode *inode, struct file *filep) printk(KERN_DEBUG "cciss_open %s\n", inode->i_bdev->bd_disk->disk_name); #endif /* CCISS_DEBUG */ - if (host->busy_initializing) - return -EBUSY; - if (host->busy_initializing || drv->busy_configuring) return -EBUSY; /* @@ -2991,6 +2988,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, hba[i]->access.set_intr_mask(hba[i], CCISS_INTR_ON); cciss_procinit(i); + hba[i]->busy_initializing = 0; for(j=0; j < NWD; j++) { /* mfm */ drive_info_struct *drv = &(hba[i]->drv[j]); @@ -3033,7 +3031,6 @@ static int __devinit cciss_init_one(struct pci_dev *pdev, add_disk(disk); } - hba[i]->busy_initializing = 0; return(1); clean4: diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c index 483d71b10cf..baedac52294 100644 --- a/drivers/block/ll_rw_blk.c +++ b/drivers/block/ll_rw_blk.c @@ -2373,44 +2373,6 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) EXPORT_SYMBOL(blkdev_issue_flush); -/** - * blkdev_scsi_issue_flush_fn - issue flush for SCSI devices - * @q: device queue - * @disk: gendisk - * @error_sector: error offset - * - * Description: - * Devices understanding the SCSI command set, can use this function as - * a helper for issuing a cache flush. Note: driver is required to store - * the error offset (in case of error flushing) in ->sector of struct - * request. - */ -int blkdev_scsi_issue_flush_fn(request_queue_t *q, struct gendisk *disk, - sector_t *error_sector) -{ - struct request *rq = blk_get_request(q, WRITE, __GFP_WAIT); - int ret; - - rq->flags |= REQ_BLOCK_PC | REQ_SOFTBARRIER; - rq->sector = 0; - memset(rq->cmd, 0, sizeof(rq->cmd)); - rq->cmd[0] = 0x35; - rq->cmd_len = 12; - rq->data = NULL; - rq->data_len = 0; - rq->timeout = 60 * HZ; - - ret = blk_execute_rq(q, disk, rq, 0); - - if (ret && error_sector) - *error_sector = rq->sector; - - blk_put_request(rq); - return ret; -} - -EXPORT_SYMBOL(blkdev_scsi_issue_flush_fn); - static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io) { int rw = rq_data_dir(rq); diff --git a/drivers/block/ub.c b/drivers/block/ub.c index aa0bf7ee008..ed4d5006fe6 100644 --- a/drivers/block/ub.c +++ b/drivers/block/ub.c @@ -172,7 +172,7 @@ struct bulk_cs_wrap { */ struct ub_dev; -#define UB_MAX_REQ_SG 4 +#define UB_MAX_REQ_SG 9 /* cdrecord requires 32KB and maybe a header */ #define UB_MAX_SECTORS 64 /* @@ -387,7 +387,7 @@ struct ub_dev { struct bulk_cs_wrap work_bcs; struct usb_ctrlrequest work_cr; - int sg_stat[UB_MAX_REQ_SG+1]; + int sg_stat[6]; struct ub_scsi_trace tr; }; @@ -525,12 +525,13 @@ static ssize_t ub_diag_show(struct device *dev, struct device_attribute *attr, "qlen %d qmax %d\n", sc->cmd_queue.qlen, sc->cmd_queue.qmax); cnt += sprintf(page + cnt, - "sg %d %d %d %d %d\n", + "sg %d %d %d %d %d .. %d\n", sc->sg_stat[0], sc->sg_stat[1], sc->sg_stat[2], sc->sg_stat[3], - sc->sg_stat[4]); + sc->sg_stat[4], + sc->sg_stat[5]); list_for_each (p, &sc->luns) { lun = list_entry(p, struct ub_lun, link); @@ -835,7 +836,7 @@ static int ub_cmd_build_block(struct ub_dev *sc, struct ub_lun *lun, return -1; } cmd->nsg = n_elem; - sc->sg_stat[n_elem]++; + sc->sg_stat[n_elem < 5 ? n_elem : 5]++; /* * build the command @@ -891,7 +892,7 @@ static int ub_cmd_build_packet(struct ub_dev *sc, struct ub_lun *lun, return -1; } cmd->nsg = n_elem; - sc->sg_stat[n_elem]++; + sc->sg_stat[n_elem < 5 ? n_elem : 5]++; memcpy(&cmd->cdb, rq->cmd, rq->cmd_len); cmd->cdb_len = rq->cmd_len; @@ -1010,7 +1011,6 @@ static int ub_scsi_cmd_start(struct ub_dev *sc, struct ub_scsi_cmd *cmd) sc->last_pipe = sc->send_bulk_pipe; usb_fill_bulk_urb(&sc->work_urb, sc->dev, sc->send_bulk_pipe, bcb, US_BULK_CB_WRAP_LEN, ub_urb_complete, sc); - sc->work_urb.transfer_flags = 0; /* Fill what we shouldn't be filling, because usb-storage did so. */ sc->work_urb.actual_length = 0; @@ -1019,7 +1019,6 @@ static int ub_scsi_cmd_start(struct ub_dev *sc, struct ub_scsi_cmd *cmd) if ((rc = usb_submit_urb(&sc->work_urb, GFP_ATOMIC)) != 0) { /* XXX Clear stalls */ - printk("ub: cmd #%d start failed (%d)\n", cmd->tag, rc); /* P3 */ ub_complete(&sc->work_done); return rc; } @@ -1190,11 +1189,9 @@ static void ub_scsi_urb_compl(struct ub_dev *sc, struct ub_scsi_cmd *cmd) return; } if (urb->status != 0) { - printk("ub: cmd #%d cmd status (%d)\n", cmd->tag, urb->status); /* P3 */ goto Bad_End; } if (urb->actual_length != US_BULK_CB_WRAP_LEN) { - printk("ub: cmd #%d xferred %d\n", cmd->tag, urb->actual_length); /* P3 */ /* XXX Must do reset here to unconfuse the device */ goto Bad_End; } @@ -1395,14 +1392,12 @@ static void ub_data_start(struct ub_dev *sc, struct ub_scsi_cmd *cmd) usb_fill_bulk_urb(&sc->work_urb, sc->dev, pipe, page_address(sg->page) + sg->offset, sg->length, ub_urb_complete, sc); - sc->work_urb.transfer_flags = 0; sc->work_urb.actual_length = 0; sc->work_urb.error_count = 0; sc->work_urb.status = 0; if ((rc = usb_submit_urb(&sc->work_urb, GFP_ATOMIC)) != 0) { /* XXX Clear stalls */ - printk("ub: data #%d submit failed (%d)\n", cmd->tag, rc); /* P3 */ ub_complete(&sc->work_done); ub_state_done(sc, cmd, rc); return; @@ -1442,7 +1437,6 @@ static int __ub_state_stat(struct ub_dev *sc, struct ub_scsi_cmd *cmd) sc->last_pipe = sc->recv_bulk_pipe; usb_fill_bulk_urb(&sc->work_urb, sc->dev, sc->recv_bulk_pipe, &sc->work_bcs, US_BULK_CS_WRAP_LEN, ub_urb_complete, sc); - sc->work_urb.transfer_flags = 0; sc->work_urb.actual_length = 0; sc->work_urb.error_count = 0; sc->work_urb.status = 0; @@ -1563,7 +1557,6 @@ static int ub_submit_clear_stall(struct ub_dev *sc, struct ub_scsi_cmd *cmd, usb_fill_control_urb(&sc->work_urb, sc->dev, sc->send_ctrl_pipe, (unsigned char*) cr, NULL, 0, ub_urb_complete, sc); - sc->work_urb.transfer_flags = 0; sc->work_urb.actual_length = 0; sc->work_urb.error_count = 0; sc->work_urb.status = 0; @@ -2000,17 +1993,16 @@ static int ub_sync_getmaxlun(struct ub_dev *sc) usb_fill_control_urb(&sc->work_urb, sc->dev, sc->recv_ctrl_pipe, (unsigned char*) cr, p, 1, ub_probe_urb_complete, &compl); - sc->work_urb.transfer_flags = 0; sc->work_urb.actual_length = 0; sc->work_urb.error_count = 0; sc->work_urb.status = 0; if ((rc = usb_submit_urb(&sc->work_urb, GFP_KERNEL)) != 0) { if (rc == -EPIPE) { - printk("%s: Stall at GetMaxLUN, using 1 LUN\n", + printk("%s: Stall submitting GetMaxLUN, using 1 LUN\n", sc->name); /* P3 */ } else { - printk(KERN_WARNING + printk(KERN_NOTICE "%s: Unable to submit GetMaxLUN (%d)\n", sc->name, rc); } @@ -2028,6 +2020,18 @@ static int ub_sync_getmaxlun(struct ub_dev *sc) del_timer_sync(&timer); usb_kill_urb(&sc->work_urb); + if ((rc = sc->work_urb.status) < 0) { + if (rc == -EPIPE) { + printk("%s: Stall at GetMaxLUN, using 1 LUN\n", + sc->name); /* P3 */ + } else { + printk(KERN_NOTICE + "%s: Error at GetMaxLUN (%d)\n", + sc->name, rc); + } + goto err_io; + } + if (sc->work_urb.actual_length != 1) { printk("%s: GetMaxLUN returned %d bytes\n", sc->name, sc->work_urb.actual_length); /* P3 */ @@ -2048,6 +2052,7 @@ static int ub_sync_getmaxlun(struct ub_dev *sc) kfree(p); return nluns; +err_io: err_submit: kfree(p); err_alloc: @@ -2080,7 +2085,6 @@ static int ub_probe_clear_stall(struct ub_dev *sc, int stalled_pipe) usb_fill_control_urb(&sc->work_urb, sc->dev, sc->send_ctrl_pipe, (unsigned char*) cr, NULL, 0, ub_probe_urb_complete, &compl); - sc->work_urb.transfer_flags = 0; sc->work_urb.actual_length = 0; sc->work_urb.error_count = 0; sc->work_urb.status = 0; @@ -2213,8 +2217,10 @@ static int ub_probe(struct usb_interface *intf, * This is needed to clear toggles. It is a problem only if we do * `rmmod ub && modprobe ub` without disconnects, but we like that. */ +#if 0 /* iPod Mini fails if we do this (big white iPod works) */ ub_probe_clear_stall(sc, sc->recv_bulk_pipe); ub_probe_clear_stall(sc, sc->send_bulk_pipe); +#endif /* * The way this is used by the startup code is a little specific. @@ -2241,10 +2247,10 @@ static int ub_probe(struct usb_interface *intf, for (i = 0; i < 3; i++) { if ((rc = ub_sync_getmaxlun(sc)) < 0) { /* - * Some devices (i.e. Iomega Zip100) need this -- - * apparently the bulk pipes get STALLed when the - * GetMaxLUN request is processed. - * XXX I have a ZIP-100, verify it does this. + * This segment is taken from usb-storage. They say + * that ZIP-100 needs this, but my own ZIP-100 works + * fine without this. + * Still, it does not seem to hurt anything. */ if (rc == -EPIPE) { ub_probe_clear_stall(sc, sc->recv_bulk_pipe); @@ -2313,7 +2319,7 @@ static int ub_probe_lun(struct ub_dev *sc, int lnum) disk->first_minor = lun->id * UB_MINORS_PER_MAJOR; disk->fops = &ub_bd_fops; disk->private_data = lun; - disk->driverfs_dev = &sc->intf->dev; /* XXX Many to one ok? */ + disk->driverfs_dev = &sc->intf->dev; rc = -ENOMEM; if ((q = blk_init_queue(ub_request_fn, &sc->lock)) == NULL) @@ -2466,9 +2472,6 @@ static int __init ub_init(void) { int rc; - /* P3 */ printk("ub: sizeof ub_scsi_cmd %zu ub_dev %zu ub_lun %zu\n", - sizeof(struct ub_scsi_cmd), sizeof(struct ub_dev), sizeof(struct ub_lun)); - if ((rc = register_blkdev(UB_MAJOR, DRV_NAME)) != 0) goto err_regblkdev; devfs_mk_dir(DEVFS_NAME); diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index de0379b6d50..c055bb630ff 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -273,7 +273,6 @@ static int hpet_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_flags |= VM_IO; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - addr = __pa(addr); if (io_remap_pfn_range(vma, vma->vm_start, addr >> PAGE_SHIFT, PAGE_SIZE, vma->vm_page_prot)) { diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c index 463351d4f94..32fa82c78c7 100644 --- a/drivers/char/ipmi/ipmi_msghandler.c +++ b/drivers/char/ipmi/ipmi_msghandler.c @@ -2620,7 +2620,7 @@ void ipmi_smi_msg_received(ipmi_smi_t intf, spin_lock_irqsave(&(intf->waiting_msgs_lock), flags); if (!list_empty(&(intf->waiting_msgs))) { list_add_tail(&(msg->link), &(intf->waiting_msgs)); - spin_unlock(&(intf->waiting_msgs_lock)); + spin_unlock_irqrestore(&(intf->waiting_msgs_lock), flags); goto out_unlock; } spin_unlock_irqrestore(&(intf->waiting_msgs_lock), flags); @@ -2629,9 +2629,9 @@ void ipmi_smi_msg_received(ipmi_smi_t intf, if (rv > 0) { /* Could not handle the message now, just add it to a list to handle later. */ - spin_lock(&(intf->waiting_msgs_lock)); + spin_lock_irqsave(&(intf->waiting_msgs_lock), flags); list_add_tail(&(msg->link), &(intf->waiting_msgs)); - spin_unlock(&(intf->waiting_msgs_lock)); + spin_unlock_irqrestore(&(intf->waiting_msgs_lock), flags); } else if (rv == 0) { ipmi_free_smi_msg(msg); } diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 7e72e922b41..db358cfa7cb 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -418,12 +418,11 @@ config SENSORS_HDAPS help This driver provides support for the IBM Hard Drive Active Protection System (hdaps), which provides an accelerometer and other misc. data. - Supported laptops include the IBM ThinkPad T41, T42, T43, and R51. - The accelerometer data is readable via sysfs. + ThinkPads starting with the R50, T41, and X40 are supported. The + accelerometer data is readable via sysfs. - This driver also provides an input class device, allowing the - laptop to act as a pinball machine-esque mouse. This is off by - default but enabled via sysfs or the module parameter "mousedev". + This driver also provides an absolute input class device, allowing + the laptop to act as a pinball machine-esque joystick. Say Y here if you have an applicable laptop and want to experience the awesome power of hdaps. diff --git a/drivers/hwmon/hdaps.c b/drivers/hwmon/hdaps.c index 4c56411f399..7f010761382 100644 --- a/drivers/hwmon/hdaps.c +++ b/drivers/hwmon/hdaps.c @@ -4,9 +4,9 @@ * Copyright (C) 2005 Robert Love <rml@novell.com> * Copyright (C) 2005 Jesper Juhl <jesper.juhl@gmail.com> * - * The HardDisk Active Protection System (hdaps) is present in the IBM ThinkPad - * T41, T42, T43, R50, R50p, R51, and X40, at least. It provides a basic - * two-axis accelerometer and other data, such as the device's temperature. + * The HardDisk Active Protection System (hdaps) is present in IBM ThinkPads + * starting with the R40, T41, and X40. It provides a basic two-axis + * accelerometer and other data, such as the device's temperature. * * This driver is based on the document by Mark A. Smith available at * http://www.almaden.ibm.com/cs/people/marksmith/tpaps.html and a lot of trial @@ -487,24 +487,19 @@ static struct attribute_group hdaps_attribute_group = { /* Module stuff */ -/* - * XXX: We should be able to return nonzero and halt the detection process. - * But there is a bug in dmi_check_system() where a nonzero return from the - * first match will result in a return of failure from dmi_check_system(). - * I fixed this; the patch is 2.6-git. Once in a released tree, we can make - * hdaps_dmi_match_invert() return hdaps_dmi_match(), which in turn returns 1. - */ +/* hdaps_dmi_match - found a match. return one, short-circuiting the hunt. */ static int hdaps_dmi_match(struct dmi_system_id *id) { printk(KERN_INFO "hdaps: %s detected.\n", id->ident); - return 0; + return 1; } +/* hdaps_dmi_match_invert - found an inverted match. */ static int hdaps_dmi_match_invert(struct dmi_system_id *id) { hdaps_invert = 1; printk(KERN_INFO "hdaps: inverting axis readings.\n"); - return 0; + return hdaps_dmi_match(id); } #define HDAPS_DMI_MATCH_NORMAL(model) { \ @@ -534,6 +529,7 @@ static int __init hdaps_init(void) HDAPS_DMI_MATCH_INVERT("ThinkPad R50p"), HDAPS_DMI_MATCH_NORMAL("ThinkPad R50"), HDAPS_DMI_MATCH_NORMAL("ThinkPad R51"), + HDAPS_DMI_MATCH_NORMAL("ThinkPad R52"), HDAPS_DMI_MATCH_INVERT("ThinkPad T41p"), HDAPS_DMI_MATCH_NORMAL("ThinkPad T41"), HDAPS_DMI_MATCH_INVERT("ThinkPad T42p"), @@ -541,6 +537,7 @@ static int __init hdaps_init(void) HDAPS_DMI_MATCH_NORMAL("ThinkPad T43"), HDAPS_DMI_MATCH_NORMAL("ThinkPad X40"), HDAPS_DMI_MATCH_NORMAL("ThinkPad X41 Tablet"), + HDAPS_DMI_MATCH_NORMAL("ThinkPad X41"), { .ident = NULL } }; diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 8334496a7e0..3badfec75b1 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -245,6 +245,18 @@ config I2C_KEYWEST This support is also available as a module. If so, the module will be called i2c-keywest. +config I2C_PMAC_SMU + tristate "Powermac SMU I2C interface" + depends on I2C && PMAC_SMU + help + This supports the use of the I2C interface in the SMU + chip on recent Apple machines like the iMac G5. It is used + among others by the thermal control driver for those machines. + Say Y if you have such a machine. + + This support is also available as a module. If so, the module + will be called i2c-pmac-smu. + config I2C_MPC tristate "MPC107/824x/85xx/52xx" depends on I2C && PPC32 diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile index 980b3e98367..f1df00f66c6 100644 --- a/drivers/i2c/busses/Makefile +++ b/drivers/i2c/busses/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_I2C_ITE) += i2c-ite.o obj-$(CONFIG_I2C_IXP2000) += i2c-ixp2000.o obj-$(CONFIG_I2C_IXP4XX) += i2c-ixp4xx.o obj-$(CONFIG_I2C_KEYWEST) += i2c-keywest.o +obj-$(CONFIG_I2C_PMAC_SMU) += i2c-pmac-smu.o obj-$(CONFIG_I2C_MPC) += i2c-mpc.o obj-$(CONFIG_I2C_MV64XXX) += i2c-mv64xxx.o obj-$(CONFIG_I2C_NFORCE2) += i2c-nforce2.o diff --git a/drivers/i2c/busses/i2c-pmac-smu.c b/drivers/i2c/busses/i2c-pmac-smu.c new file mode 100644 index 00000000000..8a9f5648a23 --- /dev/null +++ b/drivers/i2c/busses/i2c-pmac-smu.c @@ -0,0 +1,316 @@ +/* + i2c Support for Apple SMU Controller + + Copyright (c) 2005 Benjamin Herrenschmidt, IBM Corp. + <benh@kernel.crashing.org> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +*/ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/i2c.h> +#include <linux/init.h> +#include <linux/completion.h> +#include <linux/device.h> +#include <asm/prom.h> +#include <asm/of_device.h> +#include <asm/smu.h> + +static int probe; + +MODULE_AUTHOR("Benjamin Herrenschmidt <benh@kernel.crashing.org>"); +MODULE_DESCRIPTION("I2C driver for Apple's SMU"); +MODULE_LICENSE("GPL"); +module_param(probe, bool, 0); + + +/* Physical interface */ +struct smu_iface +{ + struct i2c_adapter adapter; + struct completion complete; + u32 busid; +}; + +static void smu_i2c_done(struct smu_i2c_cmd *cmd, void *misc) +{ + struct smu_iface *iface = misc; + complete(&iface->complete); +} + +/* + * SMBUS-type transfer entrypoint + */ +static s32 smu_smbus_xfer( struct i2c_adapter* adap, + u16 addr, + unsigned short flags, + char read_write, + u8 command, + int size, + union i2c_smbus_data* data) +{ + struct smu_iface *iface = i2c_get_adapdata(adap); + struct smu_i2c_cmd cmd; + int rc = 0; + int read = (read_write == I2C_SMBUS_READ); + + cmd.info.bus = iface->busid; + cmd.info.devaddr = (addr << 1) | (read ? 0x01 : 0x00); + + /* Prepare datas & select mode */ + switch (size) { + case I2C_SMBUS_QUICK: + cmd.info.type = SMU_I2C_TRANSFER_SIMPLE; + cmd.info.datalen = 0; + break; + case I2C_SMBUS_BYTE: + cmd.info.type = SMU_I2C_TRANSFER_SIMPLE; + cmd.info.datalen = 1; + if (!read) + cmd.info.data[0] = data->byte; + break; + case I2C_SMBUS_BYTE_DATA: + cmd.info.type = SMU_I2C_TRANSFER_STDSUB; + cmd.info.datalen = 1; + cmd.info.sublen = 1; + cmd.info.subaddr[0] = command; + cmd.info.subaddr[1] = 0; + cmd.info.subaddr[2] = 0; + if (!read) + cmd.info.data[0] = data->byte; + break; + case I2C_SMBUS_WORD_DATA: + cmd.info.type = SMU_I2C_TRANSFER_STDSUB; + cmd.info.datalen = 2; + cmd.info.sublen = 1; + cmd.info.subaddr[0] = command; + cmd.info.subaddr[1] = 0; + cmd.info.subaddr[2] = 0; + if (!read) { + cmd.info.data[0] = data->byte & 0xff; + cmd.info.data[1] = (data->byte >> 8) & 0xff; + } + break; + /* Note that these are broken vs. the expected smbus API where + * on reads, the lenght is actually returned from the function, + * but I think the current API makes no sense and I don't want + * any driver that I haven't verified for correctness to go + * anywhere near a pmac i2c bus anyway ... + */ + case I2C_SMBUS_BLOCK_DATA: + cmd.info.type = SMU_I2C_TRANSFER_STDSUB; + cmd.info.datalen = data->block[0] + 1; + if (cmd.info.datalen > 6) + return -EINVAL; + if (!read) + memcpy(cmd.info.data, data->block, cmd.info.datalen); + cmd.info.sublen = 1; + cmd.info.subaddr[0] = command; + cmd.info.subaddr[1] = 0; + cmd.info.subaddr[2] = 0; + break; + case I2C_SMBUS_I2C_BLOCK_DATA: + cmd.info.type = SMU_I2C_TRANSFER_STDSUB; + cmd.info.datalen = data->block[0]; + if (cmd.info.datalen > 7) + return -EINVAL; + if (!read) + memcpy(cmd.info.data, &data->block[1], + cmd.info.datalen); + cmd.info.sublen = 1; + cmd.info.subaddr[0] = command; + cmd.info.subaddr[1] = 0; + cmd.info.subaddr[2] = 0; + break; + + default: + return -EINVAL; + } + + /* Turn a standardsub read into a combined mode access */ + if (read_write == I2C_SMBUS_READ && + cmd.info.type == SMU_I2C_TRANSFER_STDSUB) + cmd.info.type = SMU_I2C_TRANSFER_COMBINED; + + /* Finish filling command and submit it */ + cmd.done = smu_i2c_done; + cmd.misc = iface; + rc = smu_queue_i2c(&cmd); + if (rc < 0) + return rc; + wait_for_completion(&iface->complete); + rc = cmd.status; + + if (!read || rc < 0) + return rc; + + switch (size) { + case I2C_SMBUS_BYTE: + case I2C_SMBUS_BYTE_DATA: + data->byte = cmd.info.data[0]; + break; + case I2C_SMBUS_WORD_DATA: + data->word = ((u16)cmd.info.data[1]) << 8; + data->word |= cmd.info.data[0]; + break; + /* Note that these are broken vs. the expected smbus API where + * on reads, the lenght is actually returned from the function, + * but I think the current API makes no sense and I don't want + * any driver that I haven't verified for correctness to go + * anywhere near a pmac i2c bus anyway ... + */ + case I2C_SMBUS_BLOCK_DATA: + case I2C_SMBUS_I2C_BLOCK_DATA: + memcpy(&data->block[0], cmd.info.data, cmd.info.datalen); + break; + } + + return rc; +} + +static u32 +smu_smbus_func(struct i2c_adapter * adapter) +{ + return I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE | + I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA | + I2C_FUNC_SMBUS_BLOCK_DATA; +} + +/* For now, we only handle combined mode (smbus) */ +static struct i2c_algorithm smu_algorithm = { + .smbus_xfer = smu_smbus_xfer, + .functionality = smu_smbus_func, +}; + +static int create_iface(struct device_node *np, struct device *dev) +{ + struct smu_iface* iface; + u32 *reg, busid; + int rc; + + reg = (u32 *)get_property(np, "reg", NULL); + if (reg == NULL) { + printk(KERN_ERR "i2c-pmac-smu: can't find bus number !\n"); + return -ENXIO; + } + busid = *reg; + + iface = kmalloc(sizeof(struct smu_iface), GFP_KERNEL); + if (iface == NULL) { + printk(KERN_ERR "i2c-pmac-smu: can't allocate inteface !\n"); + return -ENOMEM; + } + memset(iface, 0, sizeof(struct smu_iface)); + init_completion(&iface->complete); + iface->busid = busid; + + dev_set_drvdata(dev, iface); + + sprintf(iface->adapter.name, "smu-i2c-%02x", busid); + iface->adapter.algo = &smu_algorithm; + iface->adapter.algo_data = NULL; + iface->adapter.client_register = NULL; + iface->adapter.client_unregister = NULL; + i2c_set_adapdata(&iface->adapter, iface); + iface->adapter.dev.parent = dev; + + rc = i2c_add_adapter(&iface->adapter); + if (rc) { + printk(KERN_ERR "i2c-pamc-smu.c: Adapter %s registration " + "failed\n", iface->adapter.name); + i2c_set_adapdata(&iface->adapter, NULL); + } + + if (probe) { + unsigned char addr; + printk("Probe: "); + for (addr = 0x00; addr <= 0x7f; addr++) { + if (i2c_smbus_xfer(&iface->adapter,addr, + 0,0,0,I2C_SMBUS_QUICK,NULL) >= 0) + printk("%02x ", addr); + } + printk("\n"); + } + + printk(KERN_INFO "SMU i2c bus %x registered\n", busid); + + return 0; +} + +static int dispose_iface(struct device *dev) +{ + struct smu_iface *iface = dev_get_drvdata(dev); + int rc; + + rc = i2c_del_adapter(&iface->adapter); + i2c_set_adapdata(&iface->adapter, NULL); + /* We aren't that prepared to deal with this... */ + if (rc) + printk("i2c-pmac-smu.c: Failed to remove bus %s !\n", + iface->adapter.name); + dev_set_drvdata(dev, NULL); + kfree(iface); + + return 0; +} + + +static int create_iface_of_platform(struct of_device* dev, + const struct of_device_id *match) +{ + return create_iface(dev->node, &dev->dev); +} + + +static int dispose_iface_of_platform(struct of_device* dev) +{ + return dispose_iface(&dev->dev); +} + + +static struct of_device_id i2c_smu_match[] = +{ + { + .compatible = "smu-i2c", + }, + {}, +}; +static struct of_platform_driver i2c_smu_of_platform_driver = +{ + .name = "i2c-smu", + .match_table = i2c_smu_match, + .probe = create_iface_of_platform, + .remove = dispose_iface_of_platform +}; + + +static int __init i2c_pmac_smu_init(void) +{ + of_register_driver(&i2c_smu_of_platform_driver); + return 0; +} + + +static void __exit i2c_pmac_smu_cleanup(void) +{ + of_unregister_driver(&i2c_smu_of_platform_driver); +} + +module_init(i2c_pmac_smu_init); +module_exit(i2c_pmac_smu_cleanup); diff --git a/drivers/infiniband/core/mad_rmpp.c b/drivers/infiniband/core/mad_rmpp.c index 2bd8b1cc57c..e23836d0e21 100644 --- a/drivers/infiniband/core/mad_rmpp.c +++ b/drivers/infiniband/core/mad_rmpp.c @@ -412,8 +412,8 @@ static inline int get_mad_len(struct mad_rmpp_recv *rmpp_recv) hdr_size = data_offset(rmpp_mad->mad_hdr.mgmt_class); data_size = sizeof(struct ib_rmpp_mad) - hdr_size; - pad = data_size - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); - if (pad > data_size || pad < 0) + pad = IB_MGMT_RMPP_DATA - be32_to_cpu(rmpp_mad->rmpp_hdr.paylen_newwin); + if (pad > IB_MGMT_RMPP_DATA || pad < 0) pad = 0; return hdr_size + rmpp_recv->seg_num * data_size - pad; @@ -583,6 +583,7 @@ static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr) { struct ib_rmpp_mad *rmpp_mad; int timeout; + u32 paylen; rmpp_mad = (struct ib_rmpp_mad *)mad_send_wr->send_wr.wr.ud.mad_hdr; ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE); @@ -590,11 +591,9 @@ static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr) if (mad_send_wr->seg_num == 1) { rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_FIRST; - rmpp_mad->rmpp_hdr.paylen_newwin = - cpu_to_be32(mad_send_wr->total_seg * - (sizeof(struct ib_rmpp_mad) - - offsetof(struct ib_rmpp_mad, data)) - - mad_send_wr->pad); + paylen = mad_send_wr->total_seg * IB_MGMT_RMPP_DATA - + mad_send_wr->pad; + rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen); mad_send_wr->sg_list[0].length = sizeof(struct ib_rmpp_mad); } else { mad_send_wr->send_wr.num_sge = 2; @@ -608,10 +607,8 @@ static int send_next_seg(struct ib_mad_send_wr_private *mad_send_wr) if (mad_send_wr->seg_num == mad_send_wr->total_seg) { rmpp_mad->rmpp_hdr.rmpp_rtime_flags |= IB_MGMT_RMPP_FLAG_LAST; - rmpp_mad->rmpp_hdr.paylen_newwin = - cpu_to_be32(sizeof(struct ib_rmpp_mad) - - offsetof(struct ib_rmpp_mad, data) - - mad_send_wr->pad); + paylen = IB_MGMT_RMPP_DATA - mad_send_wr->pad; + rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(paylen); } /* 2 seconds for an ACK until we can find the packet lifetime */ diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 7c2f03057dd..a64d6b4dcc1 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -334,10 +334,11 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, ret = -EINVAL; goto err_ah; } - /* Validate that management class can support RMPP */ + + /* Validate that the management class can support RMPP */ if (rmpp_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_ADM) { hdr_len = offsetof(struct ib_sa_mad, data); - data_len = length; + data_len = length - hdr_len; } else if ((rmpp_mad->mad_hdr.mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) && (rmpp_mad->mad_hdr.mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)) { hdr_len = offsetof(struct ib_vendor_mad, data); diff --git a/drivers/infiniband/hw/mthca/mthca_eq.c b/drivers/infiniband/hw/mthca/mthca_eq.c index 18f0981eb0c..78152a8ad17 100644 --- a/drivers/infiniband/hw/mthca/mthca_eq.c +++ b/drivers/infiniband/hw/mthca/mthca_eq.c @@ -476,12 +476,8 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, int i; u8 status; - /* Make sure EQ size is aligned to a power of 2 size. */ - for (i = 1; i < nent; i <<= 1) - ; /* nothing */ - nent = i; - - eq->dev = dev; + eq->dev = dev; + eq->nent = roundup_pow_of_two(max(nent, 2)); eq->page_list = kmalloc(npages * sizeof *eq->page_list, GFP_KERNEL); @@ -512,7 +508,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, memset(eq->page_list[i].buf, 0, PAGE_SIZE); } - for (i = 0; i < nent; ++i) + for (i = 0; i < eq->nent; ++i) set_eqe_hw(get_eqe(eq, i)); eq->eqn = mthca_alloc(&dev->eq_table.alloc); @@ -528,8 +524,6 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, if (err) goto err_out_free_eq; - eq->nent = nent; - memset(eq_context, 0, sizeof *eq_context); eq_context->flags = cpu_to_be32(MTHCA_EQ_STATUS_OK | MTHCA_EQ_OWNER_HW | @@ -538,7 +532,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, if (mthca_is_memfree(dev)) eq_context->flags |= cpu_to_be32(MTHCA_EQ_STATE_ARBEL); - eq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24); + eq_context->logsize_usrpage = cpu_to_be32((ffs(eq->nent) - 1) << 24); if (mthca_is_memfree(dev)) { eq_context->arbel_pd = cpu_to_be32(dev->driver_pd.pd_num); } else { @@ -569,7 +563,7 @@ static int __devinit mthca_create_eq(struct mthca_dev *dev, dev->eq_table.arm_mask |= eq->eqn_mask; mthca_dbg(dev, "Allocated EQ %d with %d entries\n", - eq->eqn, nent); + eq->eqn, eq->nent); return err; diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c index bcef06bf15e..5fa00669f9b 100644 --- a/drivers/infiniband/hw/mthca/mthca_qp.c +++ b/drivers/infiniband/hw/mthca/mthca_qp.c @@ -227,7 +227,6 @@ static void mthca_wq_init(struct mthca_wq *wq) wq->last_comp = wq->max - 1; wq->head = 0; wq->tail = 0; - wq->last = NULL; } void mthca_qp_event(struct mthca_dev *dev, u32 qpn, @@ -687,7 +686,7 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask) } if (attr_mask & IB_QP_TIMEOUT) { - qp_context->pri_path.ackto = attr->timeout; + qp_context->pri_path.ackto = attr->timeout << 3; qp_param->opt_param_mask |= cpu_to_be32(MTHCA_QP_OPTPAR_ACK_TIMEOUT); } @@ -1103,6 +1102,9 @@ static int mthca_alloc_qp_common(struct mthca_dev *dev, } } + qp->sq.last = get_send_wqe(qp, qp->sq.max - 1); + qp->rq.last = get_recv_wqe(qp, qp->rq.max - 1); + return 0; } @@ -1583,15 +1585,13 @@ int mthca_tavor_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - if (prev_wqe) { - ((struct mthca_next_seg *) prev_wqe)->nda_op = - cpu_to_be32(((ind << qp->sq.wqe_shift) + - qp->send_wqe_offset) | - mthca_opcode[wr->opcode]); - wmb(); - ((struct mthca_next_seg *) prev_wqe)->ee_nds = - cpu_to_be32((size0 ? 0 : MTHCA_NEXT_DBD) | size); - } + ((struct mthca_next_seg *) prev_wqe)->nda_op = + cpu_to_be32(((ind << qp->sq.wqe_shift) + + qp->send_wqe_offset) | + mthca_opcode[wr->opcode]); + wmb(); + ((struct mthca_next_seg *) prev_wqe)->ee_nds = + cpu_to_be32((size0 ? 0 : MTHCA_NEXT_DBD) | size); if (!size0) { size0 = size; @@ -1688,13 +1688,11 @@ int mthca_tavor_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr, qp->wrid[ind] = wr->wr_id; - if (likely(prev_wqe)) { - ((struct mthca_next_seg *) prev_wqe)->nda_op = - cpu_to_be32((ind << qp->rq.wqe_shift) | 1); - wmb(); - ((struct mthca_next_seg *) prev_wqe)->ee_nds = - cpu_to_be32(MTHCA_NEXT_DBD | size); - } + ((struct mthca_next_seg *) prev_wqe)->nda_op = + cpu_to_be32((ind << qp->rq.wqe_shift) | 1); + wmb(); + ((struct mthca_next_seg *) prev_wqe)->ee_nds = + cpu_to_be32(MTHCA_NEXT_DBD | size); if (!size0) size0 = size; @@ -1905,15 +1903,13 @@ int mthca_arbel_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, goto out; } - if (likely(prev_wqe)) { - ((struct mthca_next_seg *) prev_wqe)->nda_op = - cpu_to_be32(((ind << qp->sq.wqe_shift) + - qp->send_wqe_offset) | - mthca_opcode[wr->opcode]); - wmb(); - ((struct mthca_next_seg *) prev_wqe)->ee_nds = - cpu_to_be32(MTHCA_NEXT_DBD | size); - } + ((struct mthca_next_seg *) prev_wqe)->nda_op = + cpu_to_be32(((ind << qp->sq.wqe_shift) + + qp->send_wqe_offset) | + mthca_opcode[wr->opcode]); + wmb(); + ((struct mthca_next_seg *) prev_wqe)->ee_nds = + cpu_to_be32(MTHCA_NEXT_DBD | size); if (!size0) { size0 = size; @@ -2127,5 +2123,6 @@ void __devexit mthca_cleanup_qp_table(struct mthca_dev *dev) for (i = 0; i < 2; ++i) mthca_CONF_SPECIAL_QP(dev, i, 0, &status); + mthca_array_cleanup(&dev->qp_table.qp, dev->limits.num_qps); mthca_alloc_cleanup(&dev->qp_table.alloc); } diff --git a/drivers/infiniband/hw/mthca/mthca_srq.c b/drivers/infiniband/hw/mthca/mthca_srq.c index 75cd2d84ef1..18998d48c53 100644 --- a/drivers/infiniband/hw/mthca/mthca_srq.c +++ b/drivers/infiniband/hw/mthca/mthca_srq.c @@ -172,6 +172,8 @@ static int mthca_alloc_srq_buf(struct mthca_dev *dev, struct mthca_pd *pd, scatter->lkey = cpu_to_be32(MTHCA_INVAL_LKEY); } + srq->last = get_wqe(srq, srq->max - 1); + return 0; } @@ -189,7 +191,6 @@ int mthca_alloc_srq(struct mthca_dev *dev, struct mthca_pd *pd, srq->max = attr->max_wr; srq->max_gs = attr->max_sge; - srq->last = NULL; srq->counter = 0; if (mthca_is_memfree(dev)) @@ -409,7 +410,7 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, mthca_err(dev, "SRQ %06x full\n", srq->srqn); err = -ENOMEM; *bad_wr = wr; - return nreq; + break; } wqe = get_wqe(srq, ind); @@ -427,7 +428,7 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, err = -EINVAL; *bad_wr = wr; srq->last = prev_wqe; - return nreq; + break; } for (i = 0; i < wr->num_sge; ++i) { @@ -446,20 +447,16 @@ int mthca_tavor_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, ((struct mthca_data_seg *) wqe)->addr = 0; } - if (likely(prev_wqe)) { - ((struct mthca_next_seg *) prev_wqe)->nda_op = - cpu_to_be32((ind << srq->wqe_shift) | 1); - wmb(); - ((struct mthca_next_seg *) prev_wqe)->ee_nds = - cpu_to_be32(MTHCA_NEXT_DBD); - } + ((struct mthca_next_seg *) prev_wqe)->nda_op = + cpu_to_be32((ind << srq->wqe_shift) | 1); + wmb(); + ((struct mthca_next_seg *) prev_wqe)->ee_nds = + cpu_to_be32(MTHCA_NEXT_DBD); srq->wrid[ind] = wr->wr_id; srq->first_free = next_ind; } - return nreq; - if (likely(nreq)) { __be32 doorbell[2]; @@ -503,7 +500,7 @@ int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, mthca_err(dev, "SRQ %06x full\n", srq->srqn); err = -ENOMEM; *bad_wr = wr; - return nreq; + break; } wqe = get_wqe(srq, ind); @@ -519,7 +516,7 @@ int mthca_arbel_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr, if (unlikely(wr->num_sge > srq->max_gs)) { err = -EINVAL; *bad_wr = wr; - return nreq; + break; } for (i = 0; i < wr->num_sge; ++i) { diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index bea960b8191..4ea1c1ca85b 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -257,7 +257,7 @@ void ipoib_mcast_send(struct net_device *dev, union ib_gid *mgid, void ipoib_mcast_restart_task(void *dev_ptr); int ipoib_mcast_start_thread(struct net_device *dev); -int ipoib_mcast_stop_thread(struct net_device *dev); +int ipoib_mcast_stop_thread(struct net_device *dev, int flush); void ipoib_mcast_dev_down(struct net_device *dev); void ipoib_mcast_dev_flush(struct net_device *dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index ef0e3894863..f7440096b5e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -432,7 +432,7 @@ int ipoib_ib_dev_down(struct net_device *dev) flush_workqueue(ipoib_workqueue); } - ipoib_mcast_stop_thread(dev); + ipoib_mcast_stop_thread(dev, 1); /* * Flush the multicast groups first so we stop any multicast joins. The @@ -599,7 +599,7 @@ void ipoib_ib_dev_cleanup(struct net_device *dev) ipoib_dbg(priv, "cleaning up ib_dev\n"); - ipoib_mcast_stop_thread(dev); + ipoib_mcast_stop_thread(dev, 1); /* Delete the broadcast address and the local address */ ipoib_mcast_dev_down(dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 49d120d2b92..704f48e0b6a 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -1005,6 +1005,7 @@ debug_failed: register_failed: ib_unregister_event_handler(&priv->event_handler); + flush_scheduled_work(); event_failed: ipoib_dev_cleanup(priv->dev); @@ -1057,6 +1058,7 @@ static void ipoib_remove_one(struct ib_device *device) list_for_each_entry_safe(priv, tmp, dev_list, list) { ib_unregister_event_handler(&priv->event_handler); + flush_scheduled_work(); unregister_netdev(priv->dev); ipoib_dev_cleanup(priv->dev); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index aca7aea18a6..36ce29836bf 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -145,7 +145,7 @@ static struct ipoib_mcast *ipoib_mcast_alloc(struct net_device *dev, mcast->dev = dev; mcast->created = jiffies; - mcast->backoff = HZ; + mcast->backoff = 1; mcast->logcount = 0; INIT_LIST_HEAD(&mcast->list); @@ -396,7 +396,7 @@ static void ipoib_mcast_join_complete(int status, IPOIB_GID_ARG(mcast->mcmember.mgid), status); if (!status && !ipoib_mcast_join_finish(mcast, mcmember)) { - mcast->backoff = HZ; + mcast->backoff = 1; down(&mcast_mutex); if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) queue_work(ipoib_workqueue, &priv->mcast_task); @@ -496,7 +496,7 @@ static void ipoib_mcast_join(struct net_device *dev, struct ipoib_mcast *mcast, if (test_bit(IPOIB_MCAST_RUN, &priv->flags)) queue_delayed_work(ipoib_workqueue, &priv->mcast_task, - mcast->backoff); + mcast->backoff * HZ); up(&mcast_mutex); } else mcast->query_id = ret; @@ -598,7 +598,7 @@ int ipoib_mcast_start_thread(struct net_device *dev) return 0; } -int ipoib_mcast_stop_thread(struct net_device *dev) +int ipoib_mcast_stop_thread(struct net_device *dev, int flush) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_mcast *mcast; @@ -610,7 +610,8 @@ int ipoib_mcast_stop_thread(struct net_device *dev) cancel_delayed_work(&priv->mcast_task); up(&mcast_mutex); - flush_workqueue(ipoib_workqueue); + if (flush) + flush_workqueue(ipoib_workqueue); if (priv->broadcast && priv->broadcast->query) { ib_sa_cancel_query(priv->broadcast->query_id, priv->broadcast->query); @@ -832,7 +833,7 @@ void ipoib_mcast_restart_task(void *dev_ptr) ipoib_dbg_mcast(priv, "restarting multicast task\n"); - ipoib_mcast_stop_thread(dev); + ipoib_mcast_stop_thread(dev, 0); spin_lock_irqsave(&priv->lock, flags); diff --git a/drivers/input/input.c b/drivers/input/input.c index 88636a20452..14ae5583e19 100644 --- a/drivers/input/input.c +++ b/drivers/input/input.c @@ -308,6 +308,7 @@ static struct input_device_id *input_match_device(struct input_device_id *id, st MATCH_BIT(ledbit, LED_MAX); MATCH_BIT(sndbit, SND_MAX); MATCH_BIT(ffbit, FF_MAX); + MATCH_BIT(swbit, SW_MAX); return id; } diff --git a/drivers/isdn/hisax/st5481_b.c b/drivers/isdn/hisax/st5481_b.c index 0a2536d6240..657817a591f 100644 --- a/drivers/isdn/hisax/st5481_b.c +++ b/drivers/isdn/hisax/st5481_b.c @@ -209,9 +209,7 @@ static void st5481B_mode(struct st5481_bcs *bcs, int mode) bcs->mode = mode; // Cancel all USB transfers on this B channel - b_out->urb[0]->transfer_flags |= URB_ASYNC_UNLINK; usb_unlink_urb(b_out->urb[0]); - b_out->urb[1]->transfer_flags |= URB_ASYNC_UNLINK; usb_unlink_urb(b_out->urb[1]); b_out->busy = 0; diff --git a/drivers/isdn/hisax/st5481_usb.c b/drivers/isdn/hisax/st5481_usb.c index ffd5b2d4555..89fbeb58485 100644 --- a/drivers/isdn/hisax/st5481_usb.c +++ b/drivers/isdn/hisax/st5481_usb.c @@ -645,9 +645,7 @@ void st5481_in_mode(struct st5481_in *in, int mode) in->mode = mode; - in->urb[0]->transfer_flags |= URB_ASYNC_UNLINK; usb_unlink_urb(in->urb[0]); - in->urb[1]->transfer_flags |= URB_ASYNC_UNLINK; usb_unlink_urb(in->urb[1]); if (in->mode != L1_MODE_NULL) { diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c index fb535737d17..a85ac18dd21 100644 --- a/drivers/macintosh/smu.c +++ b/drivers/macintosh/smu.c @@ -8,21 +8,15 @@ */ /* - * For now, this driver includes: - * - RTC get & set - * - reboot & shutdown commands - * all synchronous with IRQ disabled (ugh) - * * TODO: - * rework in a way the PMU driver works, that is asynchronous - * with a queue of commands. I'll do that as soon as I have an - * SMU based machine at hand. Some more cleanup is needed too, - * like maybe fitting it into a platform device, etc... - * Also check what's up with cache coherency, and if we really - * can't do better than flushing the cache, maybe build a table - * of command len/reply len like the PMU driver to only flush - * what is actually necessary. - * --BenH. + * - maybe add timeout to commands ? + * - blocking version of time functions + * - polling version of i2c commands (including timer that works with + * interrutps off) + * - maybe avoid some data copies with i2c by directly using the smu cmd + * buffer and a lower level internal interface + * - understand SMU -> CPU events and implement reception of them via + * the userland interface */ #include <linux/config.h> @@ -36,6 +30,11 @@ #include <linux/jiffies.h> #include <linux/interrupt.h> #include <linux/rtc.h> +#include <linux/completion.h> +#include <linux/miscdevice.h> +#include <linux/delay.h> +#include <linux/sysdev.h> +#include <linux/poll.h> #include <asm/byteorder.h> #include <asm/io.h> @@ -45,8 +44,13 @@ #include <asm/smu.h> #include <asm/sections.h> #include <asm/abs_addr.h> +#include <asm/uaccess.h> +#include <asm/of_device.h> + +#define VERSION "0.6" +#define AUTHOR "(c) 2005 Benjamin Herrenschmidt, IBM Corp." -#define DEBUG_SMU 1 +#undef DEBUG_SMU #ifdef DEBUG_SMU #define DPRINTK(fmt, args...) do { printk(KERN_DEBUG fmt , ##args); } while (0) @@ -57,20 +61,30 @@ /* * This is the command buffer passed to the SMU hardware */ +#define SMU_MAX_DATA 254 + struct smu_cmd_buf { u8 cmd; u8 length; - u8 data[0x0FFE]; + u8 data[SMU_MAX_DATA]; }; struct smu_device { spinlock_t lock; struct device_node *of_node; - int db_ack; /* doorbell ack GPIO */ - int db_req; /* doorbell req GPIO */ + struct of_device *of_dev; + int doorbell; /* doorbell gpio */ u32 __iomem *db_buf; /* doorbell buffer */ + int db_irq; + int msg; + int msg_irq; struct smu_cmd_buf *cmd_buf; /* command buffer virtual */ u32 cmd_buf_abs; /* command buffer absolute */ + struct list_head cmd_list; + struct smu_cmd *cmd_cur; /* pending command */ + struct list_head cmd_i2c_list; + struct smu_i2c_cmd *cmd_i2c_cur; /* pending i2c command */ + struct timer_list i2c_timer; }; /* @@ -79,113 +93,243 @@ struct smu_device { */ static struct smu_device *smu; + /* - * SMU low level communication stuff + * SMU driver low level stuff */ -static inline int smu_cmd_stat(struct smu_cmd_buf *cmd_buf, u8 cmd_ack) -{ - rmb(); - return cmd_buf->cmd == cmd_ack && cmd_buf->length != 0; -} -static inline u8 smu_save_ack_cmd(struct smu_cmd_buf *cmd_buf) +static void smu_start_cmd(void) { - return (~cmd_buf->cmd) & 0xff; -} + unsigned long faddr, fend; + struct smu_cmd *cmd; -static void smu_send_cmd(struct smu_device *dev) -{ - /* SMU command buf is currently cacheable, we need a physical - * address. This isn't exactly a DMA mapping here, I suspect + if (list_empty(&smu->cmd_list)) + return; + + /* Fetch first command in queue */ + cmd = list_entry(smu->cmd_list.next, struct smu_cmd, link); + smu->cmd_cur = cmd; + list_del(&cmd->link); + + DPRINTK("SMU: starting cmd %x, %d bytes data\n", cmd->cmd, + cmd->data_len); + DPRINTK("SMU: data buffer: %02x %02x %02x %02x ...\n", + ((u8 *)cmd->data_buf)[0], ((u8 *)cmd->data_buf)[1], + ((u8 *)cmd->data_buf)[2], ((u8 *)cmd->data_buf)[3]); + + /* Fill the SMU command buffer */ + smu->cmd_buf->cmd = cmd->cmd; + smu->cmd_buf->length = cmd->data_len; + memcpy(smu->cmd_buf->data, cmd->data_buf, cmd->data_len); + + /* Flush command and data to RAM */ + faddr = (unsigned long)smu->cmd_buf; + fend = faddr + smu->cmd_buf->length + 2; + flush_inval_dcache_range(faddr, fend); + + /* This isn't exactly a DMA mapping here, I suspect * the SMU is actually communicating with us via i2c to the * northbridge or the CPU to access RAM. */ - writel(dev->cmd_buf_abs, dev->db_buf); + writel(smu->cmd_buf_abs, smu->db_buf); /* Ring the SMU doorbell */ - pmac_do_feature_call(PMAC_FTR_WRITE_GPIO, NULL, dev->db_req, 4); - pmac_do_feature_call(PMAC_FTR_READ_GPIO, NULL, dev->db_req, 4); + pmac_do_feature_call(PMAC_FTR_WRITE_GPIO, NULL, smu->doorbell, 4); } -static int smu_cmd_done(struct smu_device *dev) + +static irqreturn_t smu_db_intr(int irq, void *arg, struct pt_regs *regs) { - unsigned long wait = 0; - int gpio; + unsigned long flags; + struct smu_cmd *cmd; + void (*done)(struct smu_cmd *cmd, void *misc) = NULL; + void *misc = NULL; + u8 gpio; + int rc = 0; - /* Check the SMU doorbell */ - do { - gpio = pmac_do_feature_call(PMAC_FTR_READ_GPIO, - NULL, dev->db_ack); - if ((gpio & 7) == 7) - return 0; - udelay(100); - } while(++wait < 10000); + /* SMU completed the command, well, we hope, let's make sure + * of it + */ + spin_lock_irqsave(&smu->lock, flags); - printk(KERN_ERR "SMU timeout !\n"); - return -ENXIO; + gpio = pmac_do_feature_call(PMAC_FTR_READ_GPIO, NULL, smu->doorbell); + if ((gpio & 7) != 7) + return IRQ_HANDLED; + + cmd = smu->cmd_cur; + smu->cmd_cur = NULL; + if (cmd == NULL) + goto bail; + + if (rc == 0) { + unsigned long faddr; + int reply_len; + u8 ack; + + /* CPU might have brought back the cache line, so we need + * to flush again before peeking at the SMU response. We + * flush the entire buffer for now as we haven't read the + * reply lenght (it's only 2 cache lines anyway) + */ + faddr = (unsigned long)smu->cmd_buf; + flush_inval_dcache_range(faddr, faddr + 256); + + /* Now check ack */ + ack = (~cmd->cmd) & 0xff; + if (ack != smu->cmd_buf->cmd) { + DPRINTK("SMU: incorrect ack, want %x got %x\n", + ack, smu->cmd_buf->cmd); + rc = -EIO; + } + reply_len = rc == 0 ? smu->cmd_buf->length : 0; + DPRINTK("SMU: reply len: %d\n", reply_len); + if (reply_len > cmd->reply_len) { + printk(KERN_WARNING "SMU: reply buffer too small," + "got %d bytes for a %d bytes buffer\n", + reply_len, cmd->reply_len); + reply_len = cmd->reply_len; + } + cmd->reply_len = reply_len; + if (cmd->reply_buf && reply_len) + memcpy(cmd->reply_buf, smu->cmd_buf->data, reply_len); + } + + /* Now complete the command. Write status last in order as we lost + * ownership of the command structure as soon as it's no longer -1 + */ + done = cmd->done; + misc = cmd->misc; + mb(); + cmd->status = rc; + bail: + /* Start next command if any */ + smu_start_cmd(); + spin_unlock_irqrestore(&smu->lock, flags); + + /* Call command completion handler if any */ + if (done) + done(cmd, misc); + + /* It's an edge interrupt, nothing to do */ + return IRQ_HANDLED; } -static int smu_do_cmd(struct smu_device *dev) + +static irqreturn_t smu_msg_intr(int irq, void *arg, struct pt_regs *regs) { - int rc; - u8 cmd_ack; + /* I don't quite know what to do with this one, we seem to never + * receive it, so I suspect we have to arm it someway in the SMU + * to start getting events that way. + */ + + printk(KERN_INFO "SMU: message interrupt !\n"); - DPRINTK("SMU do_cmd %02x len=%d %02x\n", - dev->cmd_buf->cmd, dev->cmd_buf->length, - dev->cmd_buf->data[0]); + /* It's an edge interrupt, nothing to do */ + return IRQ_HANDLED; +} - cmd_ack = smu_save_ack_cmd(dev->cmd_buf); - /* Clear cmd_buf cache lines */ - flush_inval_dcache_range((unsigned long)dev->cmd_buf, - ((unsigned long)dev->cmd_buf) + - sizeof(struct smu_cmd_buf)); - smu_send_cmd(dev); - rc = smu_cmd_done(dev); - if (rc == 0) - rc = smu_cmd_stat(dev->cmd_buf, cmd_ack) ? 0 : -1; +/* + * Queued command management. + * + */ - DPRINTK("SMU do_cmd %02x len=%d %02x => %d (%02x)\n", - dev->cmd_buf->cmd, dev->cmd_buf->length, - dev->cmd_buf->data[0], rc, cmd_ack); +int smu_queue_cmd(struct smu_cmd *cmd) +{ + unsigned long flags; - return rc; + if (smu == NULL) + return -ENODEV; + if (cmd->data_len > SMU_MAX_DATA || + cmd->reply_len > SMU_MAX_DATA) + return -EINVAL; + + cmd->status = 1; + spin_lock_irqsave(&smu->lock, flags); + list_add_tail(&cmd->link, &smu->cmd_list); + if (smu->cmd_cur == NULL) + smu_start_cmd(); + spin_unlock_irqrestore(&smu->lock, flags); + + return 0; } +EXPORT_SYMBOL(smu_queue_cmd); -/* RTC low level commands */ -static inline int bcd2hex (int n) + +int smu_queue_simple(struct smu_simple_cmd *scmd, u8 command, + unsigned int data_len, + void (*done)(struct smu_cmd *cmd, void *misc), + void *misc, ...) { - return (((n & 0xf0) >> 4) * 10) + (n & 0xf); + struct smu_cmd *cmd = &scmd->cmd; + va_list list; + int i; + + if (data_len > sizeof(scmd->buffer)) + return -EINVAL; + + memset(scmd, 0, sizeof(*scmd)); + cmd->cmd = command; + cmd->data_len = data_len; + cmd->data_buf = scmd->buffer; + cmd->reply_len = sizeof(scmd->buffer); + cmd->reply_buf = scmd->buffer; + cmd->done = done; + cmd->misc = misc; + + va_start(list, misc); + for (i = 0; i < data_len; ++i) + scmd->buffer[i] = (u8)va_arg(list, int); + va_end(list); + + return smu_queue_cmd(cmd); } +EXPORT_SYMBOL(smu_queue_simple); -static inline int hex2bcd (int n) + +void smu_poll(void) { - return ((n / 10) << 4) + (n % 10); + u8 gpio; + + if (smu == NULL) + return; + + gpio = pmac_do_feature_call(PMAC_FTR_READ_GPIO, NULL, smu->doorbell); + if ((gpio & 7) == 7) + smu_db_intr(smu->db_irq, smu, NULL); } +EXPORT_SYMBOL(smu_poll); + -#if 0 -static inline void smu_fill_set_pwrup_timer_cmd(struct smu_cmd_buf *cmd_buf) +void smu_done_complete(struct smu_cmd *cmd, void *misc) { - cmd_buf->cmd = 0x8e; - cmd_buf->length = 8; - cmd_buf->data[0] = 0x00; - memset(cmd_buf->data + 1, 0, 7); + struct completion *comp = misc; + + complete(comp); } +EXPORT_SYMBOL(smu_done_complete); + -static inline void smu_fill_get_pwrup_timer_cmd(struct smu_cmd_buf *cmd_buf) +void smu_spinwait_cmd(struct smu_cmd *cmd) { - cmd_buf->cmd = 0x8e; - cmd_buf->length = 1; - cmd_buf->data[0] = 0x01; + while(cmd->status == 1) + smu_poll(); +} +EXPORT_SYMBOL(smu_spinwait_cmd); + + +/* RTC low level commands */ +static inline int bcd2hex (int n) +{ + return (((n & 0xf0) >> 4) * 10) + (n & 0xf); } -static inline void smu_fill_dis_pwrup_timer_cmd(struct smu_cmd_buf *cmd_buf) + +static inline int hex2bcd (int n) { - cmd_buf->cmd = 0x8e; - cmd_buf->length = 1; - cmd_buf->data[0] = 0x02; + return ((n / 10) << 4) + (n % 10); } -#endif + static inline void smu_fill_set_rtc_cmd(struct smu_cmd_buf *cmd_buf, struct rtc_time *time) @@ -202,100 +346,96 @@ static inline void smu_fill_set_rtc_cmd(struct smu_cmd_buf *cmd_buf, cmd_buf->data[7] = hex2bcd(time->tm_year - 100); } -static inline void smu_fill_get_rtc_cmd(struct smu_cmd_buf *cmd_buf) -{ - cmd_buf->cmd = 0x8e; - cmd_buf->length = 1; - cmd_buf->data[0] = 0x81; -} -static void smu_parse_get_rtc_reply(struct smu_cmd_buf *cmd_buf, - struct rtc_time *time) +int smu_get_rtc_time(struct rtc_time *time, int spinwait) { - time->tm_sec = bcd2hex(cmd_buf->data[0]); - time->tm_min = bcd2hex(cmd_buf->data[1]); - time->tm_hour = bcd2hex(cmd_buf->data[2]); - time->tm_wday = bcd2hex(cmd_buf->data[3]); - time->tm_mday = bcd2hex(cmd_buf->data[4]); - time->tm_mon = bcd2hex(cmd_buf->data[5]) - 1; - time->tm_year = bcd2hex(cmd_buf->data[6]) + 100; -} - -int smu_get_rtc_time(struct rtc_time *time) -{ - unsigned long flags; + struct smu_simple_cmd cmd; int rc; if (smu == NULL) return -ENODEV; memset(time, 0, sizeof(struct rtc_time)); - spin_lock_irqsave(&smu->lock, flags); - smu_fill_get_rtc_cmd(smu->cmd_buf); - rc = smu_do_cmd(smu); - if (rc == 0) - smu_parse_get_rtc_reply(smu->cmd_buf, time); - spin_unlock_irqrestore(&smu->lock, flags); + rc = smu_queue_simple(&cmd, SMU_CMD_RTC_COMMAND, 1, NULL, NULL, + SMU_CMD_RTC_GET_DATETIME); + if (rc) + return rc; + smu_spinwait_simple(&cmd); - return rc; + time->tm_sec = bcd2hex(cmd.buffer[0]); + time->tm_min = bcd2hex(cmd.buffer[1]); + time->tm_hour = bcd2hex(cmd.buffer[2]); + time->tm_wday = bcd2hex(cmd.buffer[3]); + time->tm_mday = bcd2hex(cmd.buffer[4]); + time->tm_mon = bcd2hex(cmd.buffer[5]) - 1; + time->tm_year = bcd2hex(cmd.buffer[6]) + 100; + + return 0; } -int smu_set_rtc_time(struct rtc_time *time) + +int smu_set_rtc_time(struct rtc_time *time, int spinwait) { - unsigned long flags; + struct smu_simple_cmd cmd; int rc; if (smu == NULL) return -ENODEV; - spin_lock_irqsave(&smu->lock, flags); - smu_fill_set_rtc_cmd(smu->cmd_buf, time); - rc = smu_do_cmd(smu); - spin_unlock_irqrestore(&smu->lock, flags); + rc = smu_queue_simple(&cmd, SMU_CMD_RTC_COMMAND, 8, NULL, NULL, + SMU_CMD_RTC_SET_DATETIME, + hex2bcd(time->tm_sec), + hex2bcd(time->tm_min), + hex2bcd(time->tm_hour), + time->tm_wday, + hex2bcd(time->tm_mday), + hex2bcd(time->tm_mon) + 1, + hex2bcd(time->tm_year - 100)); + if (rc) + return rc; + smu_spinwait_simple(&cmd); - return rc; + return 0; } + void smu_shutdown(void) { - const unsigned char *command = "SHUTDOWN"; - unsigned long flags; + struct smu_simple_cmd cmd; if (smu == NULL) return; - spin_lock_irqsave(&smu->lock, flags); - smu->cmd_buf->cmd = 0xaa; - smu->cmd_buf->length = strlen(command); - strcpy(smu->cmd_buf->data, command); - smu_do_cmd(smu); + if (smu_queue_simple(&cmd, SMU_CMD_POWER_COMMAND, 9, NULL, NULL, + 'S', 'H', 'U', 'T', 'D', 'O', 'W', 'N', 0)) + return; + smu_spinwait_simple(&cmd); for (;;) ; - spin_unlock_irqrestore(&smu->lock, flags); } + void smu_restart(void) { - const unsigned char *command = "RESTART"; - unsigned long flags; + struct smu_simple_cmd cmd; if (smu == NULL) return; - spin_lock_irqsave(&smu->lock, flags); - smu->cmd_buf->cmd = 0xaa; - smu->cmd_buf->length = strlen(command); - strcpy(smu->cmd_buf->data, command); - smu_do_cmd(smu); + if (smu_queue_simple(&cmd, SMU_CMD_POWER_COMMAND, 8, NULL, NULL, + 'R', 'E', 'S', 'T', 'A', 'R', 'T', 0)) + return; + smu_spinwait_simple(&cmd); for (;;) ; - spin_unlock_irqrestore(&smu->lock, flags); } + int smu_present(void) { return smu != NULL; } +EXPORT_SYMBOL(smu_present); int smu_init (void) @@ -307,6 +447,8 @@ int smu_init (void) if (np == NULL) return -ENODEV; + printk(KERN_INFO "SMU driver %s %s\n", VERSION, AUTHOR); + if (smu_cmdbuf_abs == 0) { printk(KERN_ERR "SMU: Command buffer not allocated !\n"); return -EINVAL; @@ -318,7 +460,13 @@ int smu_init (void) memset(smu, 0, sizeof(*smu)); spin_lock_init(&smu->lock); + INIT_LIST_HEAD(&smu->cmd_list); + INIT_LIST_HEAD(&smu->cmd_i2c_list); smu->of_node = np; + smu->db_irq = NO_IRQ; + smu->msg_irq = NO_IRQ; + init_timer(&smu->i2c_timer); + /* smu_cmdbuf_abs is in the low 2G of RAM, can be converted to a * 32 bits value safely */ @@ -331,8 +479,8 @@ int smu_init (void) goto fail; } data = (u32 *)get_property(np, "reg", NULL); - of_node_put(np); if (data == NULL) { + of_node_put(np); printk(KERN_ERR "SMU: Can't find doorbell GPIO address !\n"); goto fail; } @@ -341,8 +489,31 @@ int smu_init (void) * and ack. GPIOs are at 0x50, best would be to find that out * in the device-tree though. */ - smu->db_req = 0x50 + *data; - smu->db_ack = 0x50 + *data; + smu->doorbell = *data; + if (smu->doorbell < 0x50) + smu->doorbell += 0x50; + if (np->n_intrs > 0) + smu->db_irq = np->intrs[0].line; + + of_node_put(np); + + /* Now look for the smu-interrupt GPIO */ + do { + np = of_find_node_by_name(NULL, "smu-interrupt"); + if (np == NULL) + break; + data = (u32 *)get_property(np, "reg", NULL); + if (data == NULL) { + of_node_put(np); + break; + } + smu->msg = *data; + if (smu->msg < 0x50) + smu->msg += 0x50; + if (np->n_intrs > 0) + smu->msg_irq = np->intrs[0].line; + of_node_put(np); + } while(0); /* Doorbell buffer is currently hard-coded, I didn't find a proper * device-tree entry giving the address. Best would probably to use @@ -362,3 +533,584 @@ int smu_init (void) return -ENXIO; } + + +static int smu_late_init(void) +{ + if (!smu) + return 0; + + /* + * Try to request the interrupts + */ + + if (smu->db_irq != NO_IRQ) { + if (request_irq(smu->db_irq, smu_db_intr, + SA_SHIRQ, "SMU doorbell", smu) < 0) { + printk(KERN_WARNING "SMU: can't " + "request interrupt %d\n", + smu->db_irq); + smu->db_irq = NO_IRQ; + } + } + + if (smu->msg_irq != NO_IRQ) { + if (request_irq(smu->msg_irq, smu_msg_intr, + SA_SHIRQ, "SMU message", smu) < 0) { + printk(KERN_WARNING "SMU: can't " + "request interrupt %d\n", + smu->msg_irq); + smu->msg_irq = NO_IRQ; + } + } + + return 0; +} +arch_initcall(smu_late_init); + +/* + * sysfs visibility + */ + +static void smu_expose_childs(void *unused) +{ + struct device_node *np; + + for (np = NULL; (np = of_get_next_child(smu->of_node, np)) != NULL;) { + if (device_is_compatible(np, "smu-i2c")) { + char name[32]; + u32 *reg = (u32 *)get_property(np, "reg", NULL); + + if (reg == NULL) + continue; + sprintf(name, "smu-i2c-%02x", *reg); + of_platform_device_create(np, name, &smu->of_dev->dev); + } + } + +} + +static DECLARE_WORK(smu_expose_childs_work, smu_expose_childs, NULL); + +static int smu_platform_probe(struct of_device* dev, + const struct of_device_id *match) +{ + if (!smu) + return -ENODEV; + smu->of_dev = dev; + + /* + * Ok, we are matched, now expose all i2c busses. We have to defer + * that unfortunately or it would deadlock inside the device model + */ + schedule_work(&smu_expose_childs_work); + + return 0; +} + +static struct of_device_id smu_platform_match[] = +{ + { + .type = "smu", + }, + {}, +}; + +static struct of_platform_driver smu_of_platform_driver = +{ + .name = "smu", + .match_table = smu_platform_match, + .probe = smu_platform_probe, +}; + +static int __init smu_init_sysfs(void) +{ + int rc; + + /* + * Due to sysfs bogosity, a sysdev is not a real device, so + * we should in fact create both if we want sysdev semantics + * for power management. + * For now, we don't power manage machines with an SMU chip, + * I'm a bit too far from figuring out how that works with those + * new chipsets, but that will come back and bite us + */ + rc = of_register_driver(&smu_of_platform_driver); + return 0; +} + +device_initcall(smu_init_sysfs); + +struct of_device *smu_get_ofdev(void) +{ + if (!smu) + return NULL; + return smu->of_dev; +} + +EXPORT_SYMBOL_GPL(smu_get_ofdev); + +/* + * i2c interface + */ + +static void smu_i2c_complete_command(struct smu_i2c_cmd *cmd, int fail) +{ + void (*done)(struct smu_i2c_cmd *cmd, void *misc) = cmd->done; + void *misc = cmd->misc; + unsigned long flags; + + /* Check for read case */ + if (!fail && cmd->read) { + if (cmd->pdata[0] < 1) + fail = 1; + else + memcpy(cmd->info.data, &cmd->pdata[1], + cmd->info.datalen); + } + + DPRINTK("SMU: completing, success: %d\n", !fail); + + /* Update status and mark no pending i2c command with lock + * held so nobody comes in while we dequeue an eventual + * pending next i2c command + */ + spin_lock_irqsave(&smu->lock, flags); + smu->cmd_i2c_cur = NULL; + wmb(); + cmd->status = fail ? -EIO : 0; + + /* Is there another i2c command waiting ? */ + if (!list_empty(&smu->cmd_i2c_list)) { + struct smu_i2c_cmd *newcmd; + + /* Fetch it, new current, remove from list */ + newcmd = list_entry(smu->cmd_i2c_list.next, + struct smu_i2c_cmd, link); + smu->cmd_i2c_cur = newcmd; + list_del(&cmd->link); + + /* Queue with low level smu */ + list_add_tail(&cmd->scmd.link, &smu->cmd_list); + if (smu->cmd_cur == NULL) + smu_start_cmd(); + } + spin_unlock_irqrestore(&smu->lock, flags); + + /* Call command completion handler if any */ + if (done) + done(cmd, misc); + +} + + +static void smu_i2c_retry(unsigned long data) +{ + struct smu_i2c_cmd *cmd = (struct smu_i2c_cmd *)data; + + DPRINTK("SMU: i2c failure, requeuing...\n"); + + /* requeue command simply by resetting reply_len */ + cmd->pdata[0] = 0xff; + cmd->scmd.reply_len = 0x10; + smu_queue_cmd(&cmd->scmd); +} + + +static void smu_i2c_low_completion(struct smu_cmd *scmd, void *misc) +{ + struct smu_i2c_cmd *cmd = misc; + int fail = 0; + + DPRINTK("SMU: i2c compl. stage=%d status=%x pdata[0]=%x rlen: %x\n", + cmd->stage, scmd->status, cmd->pdata[0], scmd->reply_len); + + /* Check for possible status */ + if (scmd->status < 0) + fail = 1; + else if (cmd->read) { + if (cmd->stage == 0) + fail = cmd->pdata[0] != 0; + else + fail = cmd->pdata[0] >= 0x80; + } else { + fail = cmd->pdata[0] != 0; + } + + /* Handle failures by requeuing command, after 5ms interval + */ + if (fail && --cmd->retries > 0) { + DPRINTK("SMU: i2c failure, starting timer...\n"); + smu->i2c_timer.function = smu_i2c_retry; + smu->i2c_timer.data = (unsigned long)cmd; + smu->i2c_timer.expires = jiffies + msecs_to_jiffies(5); + add_timer(&smu->i2c_timer); + return; + } + + /* If failure or stage 1, command is complete */ + if (fail || cmd->stage != 0) { + smu_i2c_complete_command(cmd, fail); + return; + } + + DPRINTK("SMU: going to stage 1\n"); + + /* Ok, initial command complete, now poll status */ + scmd->reply_buf = cmd->pdata; + scmd->reply_len = 0x10; + scmd->data_buf = cmd->pdata; + scmd->data_len = 1; + cmd->pdata[0] = 0; + cmd->stage = 1; + cmd->retries = 20; + smu_queue_cmd(scmd); +} + + +int smu_queue_i2c(struct smu_i2c_cmd *cmd) +{ + unsigned long flags; + + if (smu == NULL) + return -ENODEV; + + /* Fill most fields of scmd */ + cmd->scmd.cmd = SMU_CMD_I2C_COMMAND; + cmd->scmd.done = smu_i2c_low_completion; + cmd->scmd.misc = cmd; + cmd->scmd.reply_buf = cmd->pdata; + cmd->scmd.reply_len = 0x10; + cmd->scmd.data_buf = (u8 *)(char *)&cmd->info; + cmd->scmd.status = 1; + cmd->stage = 0; + cmd->pdata[0] = 0xff; + cmd->retries = 20; + cmd->status = 1; + + /* Check transfer type, sanitize some "info" fields + * based on transfer type and do more checking + */ + cmd->info.caddr = cmd->info.devaddr; + cmd->read = cmd->info.devaddr & 0x01; + switch(cmd->info.type) { + case SMU_I2C_TRANSFER_SIMPLE: + memset(&cmd->info.sublen, 0, 4); + break; + case SMU_I2C_TRANSFER_COMBINED: + cmd->info.devaddr &= 0xfe; + case SMU_I2C_TRANSFER_STDSUB: + if (cmd->info.sublen > 3) + return -EINVAL; + break; + default: + return -EINVAL; + } + + /* Finish setting up command based on transfer direction + */ + if (cmd->read) { + if (cmd->info.datalen > SMU_I2C_READ_MAX) + return -EINVAL; + memset(cmd->info.data, 0xff, cmd->info.datalen); + cmd->scmd.data_len = 9; + } else { + if (cmd->info.datalen > SMU_I2C_WRITE_MAX) + return -EINVAL; + cmd->scmd.data_len = 9 + cmd->info.datalen; + } + + DPRINTK("SMU: i2c enqueuing command\n"); + DPRINTK("SMU: %s, len=%d bus=%x addr=%x sub0=%x type=%x\n", + cmd->read ? "read" : "write", cmd->info.datalen, + cmd->info.bus, cmd->info.caddr, + cmd->info.subaddr[0], cmd->info.type); + + + /* Enqueue command in i2c list, and if empty, enqueue also in + * main command list + */ + spin_lock_irqsave(&smu->lock, flags); + if (smu->cmd_i2c_cur == NULL) { + smu->cmd_i2c_cur = cmd; + list_add_tail(&cmd->scmd.link, &smu->cmd_list); + if (smu->cmd_cur == NULL) + smu_start_cmd(); + } else + list_add_tail(&cmd->link, &smu->cmd_i2c_list); + spin_unlock_irqrestore(&smu->lock, flags); + + return 0; +} + + + +/* + * Userland driver interface + */ + + +static LIST_HEAD(smu_clist); +static DEFINE_SPINLOCK(smu_clist_lock); + +enum smu_file_mode { + smu_file_commands, + smu_file_events, + smu_file_closing +}; + +struct smu_private +{ + struct list_head list; + enum smu_file_mode mode; + int busy; + struct smu_cmd cmd; + spinlock_t lock; + wait_queue_head_t wait; + u8 buffer[SMU_MAX_DATA]; +}; + + +static int smu_open(struct inode *inode, struct file *file) +{ + struct smu_private *pp; + unsigned long flags; + + pp = kmalloc(sizeof(struct smu_private), GFP_KERNEL); + if (pp == 0) + return -ENOMEM; + memset(pp, 0, sizeof(struct smu_private)); + spin_lock_init(&pp->lock); + pp->mode = smu_file_commands; + init_waitqueue_head(&pp->wait); + + spin_lock_irqsave(&smu_clist_lock, flags); + list_add(&pp->list, &smu_clist); + spin_unlock_irqrestore(&smu_clist_lock, flags); + file->private_data = pp; + + return 0; +} + + +static void smu_user_cmd_done(struct smu_cmd *cmd, void *misc) +{ + struct smu_private *pp = misc; + + wake_up_all(&pp->wait); +} + + +static ssize_t smu_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct smu_private *pp = file->private_data; + unsigned long flags; + struct smu_user_cmd_hdr hdr; + int rc = 0; + + if (pp->busy) + return -EBUSY; + else if (copy_from_user(&hdr, buf, sizeof(hdr))) + return -EFAULT; + else if (hdr.cmdtype == SMU_CMDTYPE_WANTS_EVENTS) { + pp->mode = smu_file_events; + return 0; + } else if (hdr.cmdtype != SMU_CMDTYPE_SMU) + return -EINVAL; + else if (pp->mode != smu_file_commands) + return -EBADFD; + else if (hdr.data_len > SMU_MAX_DATA) + return -EINVAL; + + spin_lock_irqsave(&pp->lock, flags); + if (pp->busy) { + spin_unlock_irqrestore(&pp->lock, flags); + return -EBUSY; + } + pp->busy = 1; + pp->cmd.status = 1; + spin_unlock_irqrestore(&pp->lock, flags); + + if (copy_from_user(pp->buffer, buf + sizeof(hdr), hdr.data_len)) { + pp->busy = 0; + return -EFAULT; + } + + pp->cmd.cmd = hdr.cmd; + pp->cmd.data_len = hdr.data_len; + pp->cmd.reply_len = SMU_MAX_DATA; + pp->cmd.data_buf = pp->buffer; + pp->cmd.reply_buf = pp->buffer; + pp->cmd.done = smu_user_cmd_done; + pp->cmd.misc = pp; + rc = smu_queue_cmd(&pp->cmd); + if (rc < 0) + return rc; + return count; +} + + +static ssize_t smu_read_command(struct file *file, struct smu_private *pp, + char __user *buf, size_t count) +{ + DECLARE_WAITQUEUE(wait, current); + struct smu_user_reply_hdr hdr; + unsigned long flags; + int size, rc = 0; + + if (!pp->busy) + return 0; + if (count < sizeof(struct smu_user_reply_hdr)) + return -EOVERFLOW; + spin_lock_irqsave(&pp->lock, flags); + if (pp->cmd.status == 1) { + if (file->f_flags & O_NONBLOCK) + return -EAGAIN; + add_wait_queue(&pp->wait, &wait); + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + rc = 0; + if (pp->cmd.status != 1) + break; + rc = -ERESTARTSYS; + if (signal_pending(current)) + break; + spin_unlock_irqrestore(&pp->lock, flags); + schedule(); + spin_lock_irqsave(&pp->lock, flags); + } + set_current_state(TASK_RUNNING); + remove_wait_queue(&pp->wait, &wait); + } + spin_unlock_irqrestore(&pp->lock, flags); + if (rc) + return rc; + if (pp->cmd.status != 0) + pp->cmd.reply_len = 0; + size = sizeof(hdr) + pp->cmd.reply_len; + if (count < size) + size = count; + rc = size; + hdr.status = pp->cmd.status; + hdr.reply_len = pp->cmd.reply_len; + if (copy_to_user(buf, &hdr, sizeof(hdr))) + return -EFAULT; + size -= sizeof(hdr); + if (size && copy_to_user(buf + sizeof(hdr), pp->buffer, size)) + return -EFAULT; + pp->busy = 0; + + return rc; +} + + +static ssize_t smu_read_events(struct file *file, struct smu_private *pp, + char __user *buf, size_t count) +{ + /* Not implemented */ + msleep_interruptible(1000); + return 0; +} + + +static ssize_t smu_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct smu_private *pp = file->private_data; + + if (pp->mode == smu_file_commands) + return smu_read_command(file, pp, buf, count); + if (pp->mode == smu_file_events) + return smu_read_events(file, pp, buf, count); + + return -EBADFD; +} + +static unsigned int smu_fpoll(struct file *file, poll_table *wait) +{ + struct smu_private *pp = file->private_data; + unsigned int mask = 0; + unsigned long flags; + + if (pp == 0) + return 0; + + if (pp->mode == smu_file_commands) { + poll_wait(file, &pp->wait, wait); + + spin_lock_irqsave(&pp->lock, flags); + if (pp->busy && pp->cmd.status != 1) + mask |= POLLIN; + spin_unlock_irqrestore(&pp->lock, flags); + } if (pp->mode == smu_file_events) { + /* Not yet implemented */ + } + return mask; +} + +static int smu_release(struct inode *inode, struct file *file) +{ + struct smu_private *pp = file->private_data; + unsigned long flags; + unsigned int busy; + + if (pp == 0) + return 0; + + file->private_data = NULL; + + /* Mark file as closing to avoid races with new request */ + spin_lock_irqsave(&pp->lock, flags); + pp->mode = smu_file_closing; + busy = pp->busy; + + /* Wait for any pending request to complete */ + if (busy && pp->cmd.status == 1) { + DECLARE_WAITQUEUE(wait, current); + + add_wait_queue(&pp->wait, &wait); + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (pp->cmd.status != 1) + break; + spin_lock_irqsave(&pp->lock, flags); + schedule(); + spin_unlock_irqrestore(&pp->lock, flags); + } + set_current_state(TASK_RUNNING); + remove_wait_queue(&pp->wait, &wait); + } + spin_unlock_irqrestore(&pp->lock, flags); + + spin_lock_irqsave(&smu_clist_lock, flags); + list_del(&pp->list); + spin_unlock_irqrestore(&smu_clist_lock, flags); + kfree(pp); + + return 0; +} + + +static struct file_operations smu_device_fops __pmacdata = { + .llseek = no_llseek, + .read = smu_read, + .write = smu_write, + .poll = smu_fpoll, + .open = smu_open, + .release = smu_release, +}; + +static struct miscdevice pmu_device __pmacdata = { + MISC_DYNAMIC_MINOR, "smu", &smu_device_fops +}; + +static int smu_device_init(void) +{ + if (!smu) + return -ENODEV; + if (misc_register(&pmu_device) < 0) + printk(KERN_ERR "via-pmu: cannot register misc device.\n"); + return 0; +} +device_initcall(smu_device_init); diff --git a/drivers/macintosh/therm_adt746x.c b/drivers/macintosh/therm_adt746x.c index c9ca1118e44..f38696622eb 100644 --- a/drivers/macintosh/therm_adt746x.c +++ b/drivers/macintosh/therm_adt746x.c @@ -599,7 +599,7 @@ thermostat_init(void) sensor_location[2] = "?"; } - of_dev = of_platform_device_create(np, "temperatures"); + of_dev = of_platform_device_create(np, "temperatures", NULL); if (of_dev == NULL) { printk(KERN_ERR "Can't register temperatures device !\n"); diff --git a/drivers/macintosh/therm_pm72.c b/drivers/macintosh/therm_pm72.c index 703e3197331..cc507ceef15 100644 --- a/drivers/macintosh/therm_pm72.c +++ b/drivers/macintosh/therm_pm72.c @@ -2051,7 +2051,7 @@ static int __init therm_pm72_init(void) return -ENODEV; } } - of_dev = of_platform_device_create(np, "temperature"); + of_dev = of_platform_device_create(np, "temperature", NULL); if (of_dev == NULL) { printk(KERN_ERR "Can't register FCU platform device !\n"); return -ENODEV; diff --git a/drivers/macintosh/therm_windtunnel.c b/drivers/macintosh/therm_windtunnel.c index cbb72eb0426..6aaa1df1a64 100644 --- a/drivers/macintosh/therm_windtunnel.c +++ b/drivers/macintosh/therm_windtunnel.c @@ -504,7 +504,7 @@ g4fan_init( void ) } if( !(np=of_find_node_by_name(NULL, "fan")) ) return -ENODEV; - x.of_dev = of_platform_device_create( np, "temperature" ); + x.of_dev = of_platform_device_create(np, "temperature", NULL); of_node_put( np ); if( !x.of_dev ) { diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index 76719451e38..50346188452 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -405,7 +405,7 @@ static int __init via_pmu_start(void) bright_req_2.complete = 1; batt_req.complete = 1; -#ifdef CONFIG_PPC32 +#if defined(CONFIG_PPC32) && !defined(CONFIG_PPC_MERGE) if (pmu_kind == PMU_KEYLARGO_BASED) openpic_set_irq_priority(vias->intrs[0].line, OPENPIC_PRIORITY_DEFAULT + 1); diff --git a/drivers/media/video/bttv-driver.c b/drivers/media/video/bttv-driver.c index a564321db2f..c062a017491 100644 --- a/drivers/media/video/bttv-driver.c +++ b/drivers/media/video/bttv-driver.c @@ -763,21 +763,21 @@ static void set_pll(struct bttv *btv) /* no PLL needed */ if (btv->pll.pll_current == 0) return; - vprintk(KERN_INFO "bttv%d: PLL can sleep, using XTAL (%d).\n", - btv->c.nr,btv->pll.pll_ifreq); + bttv_printk(KERN_INFO "bttv%d: PLL can sleep, using XTAL (%d).\n", + btv->c.nr,btv->pll.pll_ifreq); btwrite(0x00,BT848_TGCTRL); btwrite(0x00,BT848_PLL_XCI); btv->pll.pll_current = 0; return; } - vprintk(KERN_INFO "bttv%d: PLL: %d => %d ",btv->c.nr, - btv->pll.pll_ifreq, btv->pll.pll_ofreq); + bttv_printk(KERN_INFO "bttv%d: PLL: %d => %d ",btv->c.nr, + btv->pll.pll_ifreq, btv->pll.pll_ofreq); set_pll_freq(btv, btv->pll.pll_ifreq, btv->pll.pll_ofreq); for (i=0; i<10; i++) { /* Let other people run while the PLL stabilizes */ - vprintk("."); + bttv_printk("."); msleep(10); if (btread(BT848_DSTATUS) & BT848_DSTATUS_PLOCK) { @@ -785,12 +785,12 @@ static void set_pll(struct bttv *btv) } else { btwrite(0x08,BT848_TGCTRL); btv->pll.pll_current = btv->pll.pll_ofreq; - vprintk(" ok\n"); + bttv_printk(" ok\n"); return; } } btv->pll.pll_current = -1; - vprintk("failed\n"); + bttv_printk("failed\n"); return; } diff --git a/drivers/media/video/bttvp.h b/drivers/media/video/bttvp.h index 9b0b7ca035f..7a312f79340 100644 --- a/drivers/media/video/bttvp.h +++ b/drivers/media/video/bttvp.h @@ -221,7 +221,7 @@ extern void bttv_gpio_tracking(struct bttv *btv, char *comment); extern int init_bttv_i2c(struct bttv *btv); extern int fini_bttv_i2c(struct bttv *btv); -#define vprintk if (bttv_verbose) printk +#define bttv_printk if (bttv_verbose) printk #define dprintk if (bttv_debug >= 1) printk #define d2printk if (bttv_debug >= 2) printk diff --git a/drivers/message/fusion/Kconfig b/drivers/message/fusion/Kconfig index 33f209a39cb..1883d22cffe 100644 --- a/drivers/message/fusion/Kconfig +++ b/drivers/message/fusion/Kconfig @@ -35,6 +35,23 @@ config FUSION_FC LSIFC929X LSIFC929XL +config FUSION_SAS + tristate "Fusion MPT ScsiHost drivers for SAS" + depends on PCI && SCSI + select FUSION + select SCSI_SAS_ATTRS + ---help--- + SCSI HOST support for a SAS host adapters. + + List of supported controllers: + + LSISAS1064 + LSISAS1066 + LSISAS1068 + LSISAS1064E + LSISAS1066E + LSISAS1068E + config FUSION_MAX_SGE int "Maximum number of scatter gather entries (16 - 128)" depends on FUSION diff --git a/drivers/message/fusion/Makefile b/drivers/message/fusion/Makefile index 1d2f9db813c..8a2e2657f4c 100644 --- a/drivers/message/fusion/Makefile +++ b/drivers/message/fusion/Makefile @@ -34,5 +34,6 @@ obj-$(CONFIG_FUSION_SPI) += mptbase.o mptscsih.o mptspi.o obj-$(CONFIG_FUSION_FC) += mptbase.o mptscsih.o mptfc.o +obj-$(CONFIG_FUSION_SAS) += mptbase.o mptscsih.o mptsas.o obj-$(CONFIG_FUSION_CTL) += mptctl.o obj-$(CONFIG_FUSION_LAN) += mptlan.o diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c index f517d0692d5..790a2932ded 100644 --- a/drivers/message/fusion/mptbase.c +++ b/drivers/message/fusion/mptbase.c @@ -135,13 +135,12 @@ static void mpt_adapter_dispose(MPT_ADAPTER *ioc); static void MptDisplayIocCapabilities(MPT_ADAPTER *ioc); static int MakeIocReady(MPT_ADAPTER *ioc, int force, int sleepFlag); -//static u32 mpt_GetIocState(MPT_ADAPTER *ioc, int cooked); static int GetIocFacts(MPT_ADAPTER *ioc, int sleepFlag, int reason); static int GetPortFacts(MPT_ADAPTER *ioc, int portnum, int sleepFlag); static int SendIocInit(MPT_ADAPTER *ioc, int sleepFlag); static int SendPortEnable(MPT_ADAPTER *ioc, int portnum, int sleepFlag); static int mpt_do_upload(MPT_ADAPTER *ioc, int sleepFlag); -static int mpt_downloadboot(MPT_ADAPTER *ioc, int sleepFlag); +static int mpt_downloadboot(MPT_ADAPTER *ioc, MpiFwHeader_t *pFwHeader, int sleepFlag); static int mpt_diag_reset(MPT_ADAPTER *ioc, int ignore, int sleepFlag); static int KickStart(MPT_ADAPTER *ioc, int ignore, int sleepFlag); static int SendIocReset(MPT_ADAPTER *ioc, u8 reset_type, int sleepFlag); @@ -152,6 +151,7 @@ static int WaitForDoorbellReply(MPT_ADAPTER *ioc, int howlong, int sleepFlag); static int GetLanConfigPages(MPT_ADAPTER *ioc); static int GetFcPortPage0(MPT_ADAPTER *ioc, int portnum); static int GetIoUnitPage2(MPT_ADAPTER *ioc); +int mptbase_sas_persist_operation(MPT_ADAPTER *ioc, u8 persist_opcode); static int mpt_GetScsiPortSettings(MPT_ADAPTER *ioc, int portnum); static int mpt_readScsiDevicePageHeaders(MPT_ADAPTER *ioc, int portnum); static void mpt_read_ioc_pg_1(MPT_ADAPTER *ioc); @@ -159,6 +159,8 @@ static void mpt_read_ioc_pg_4(MPT_ADAPTER *ioc); static void mpt_timer_expired(unsigned long data); static int SendEventNotification(MPT_ADAPTER *ioc, u8 EvSwitch); static int SendEventAck(MPT_ADAPTER *ioc, EventNotificationReply_t *evnp); +static int mpt_host_page_access_control(MPT_ADAPTER *ioc, u8 access_control_value, int sleepFlag); +static int mpt_host_page_alloc(MPT_ADAPTER *ioc, pIOCInit_t ioc_init); #ifdef CONFIG_PROC_FS static int procmpt_summary_read(char *buf, char **start, off_t offset, @@ -175,6 +177,7 @@ static int ProcessEventNotification(MPT_ADAPTER *ioc, EventNotificationReply_t * static void mpt_sp_ioc_info(MPT_ADAPTER *ioc, u32 ioc_status, MPT_FRAME_HDR *mf); static void mpt_fc_log_info(MPT_ADAPTER *ioc, u32 log_info); static void mpt_sp_log_info(MPT_ADAPTER *ioc, u32 log_info); +static void mpt_sas_log_info(MPT_ADAPTER *ioc, u32 log_info); /* module entry point */ static int __init fusion_init (void); @@ -206,6 +209,144 @@ pci_enable_io_access(struct pci_dev *pdev) pci_write_config_word(pdev, PCI_COMMAND, command_reg); } +/* + * Process turbo (context) reply... + */ +static void +mpt_turbo_reply(MPT_ADAPTER *ioc, u32 pa) +{ + MPT_FRAME_HDR *mf = NULL; + MPT_FRAME_HDR *mr = NULL; + int req_idx = 0; + int cb_idx; + + dmfprintk((MYIOC_s_INFO_FMT "Got TURBO reply req_idx=%08x\n", + ioc->name, pa)); + + switch (pa >> MPI_CONTEXT_REPLY_TYPE_SHIFT) { + case MPI_CONTEXT_REPLY_TYPE_SCSI_INIT: + req_idx = pa & 0x0000FFFF; + cb_idx = (pa & 0x00FF0000) >> 16; + mf = MPT_INDEX_2_MFPTR(ioc, req_idx); + break; + case MPI_CONTEXT_REPLY_TYPE_LAN: + cb_idx = mpt_lan_index; + /* + * Blind set of mf to NULL here was fatal + * after lan_reply says "freeme" + * Fix sort of combined with an optimization here; + * added explicit check for case where lan_reply + * was just returning 1 and doing nothing else. + * For this case skip the callback, but set up + * proper mf value first here:-) + */ + if ((pa & 0x58000000) == 0x58000000) { + req_idx = pa & 0x0000FFFF; + mf = MPT_INDEX_2_MFPTR(ioc, req_idx); + mpt_free_msg_frame(ioc, mf); + mb(); + return; + break; + } + mr = (MPT_FRAME_HDR *) CAST_U32_TO_PTR(pa); + break; + case MPI_CONTEXT_REPLY_TYPE_SCSI_TARGET: + cb_idx = mpt_stm_index; + mr = (MPT_FRAME_HDR *) CAST_U32_TO_PTR(pa); + break; + default: + cb_idx = 0; + BUG(); + } + + /* Check for (valid) IO callback! */ + if (cb_idx < 1 || cb_idx >= MPT_MAX_PROTOCOL_DRIVERS || + MptCallbacks[cb_idx] == NULL) { + printk(MYIOC_s_WARN_FMT "%s: Invalid cb_idx (%d)!\n", + __FUNCTION__, ioc->name, cb_idx); + goto out; + } + + if (MptCallbacks[cb_idx](ioc, mf, mr)) + mpt_free_msg_frame(ioc, mf); + out: + mb(); +} + +static void +mpt_reply(MPT_ADAPTER *ioc, u32 pa) +{ + MPT_FRAME_HDR *mf; + MPT_FRAME_HDR *mr; + int req_idx; + int cb_idx; + int freeme; + + u32 reply_dma_low; + u16 ioc_stat; + + /* non-TURBO reply! Hmmm, something may be up... + * Newest turbo reply mechanism; get address + * via left shift 1 (get rid of MPI_ADDRESS_REPLY_A_BIT)! + */ + + /* Map DMA address of reply header to cpu address. + * pa is 32 bits - but the dma address may be 32 or 64 bits + * get offset based only only the low addresses + */ + + reply_dma_low = (pa <<= 1); + mr = (MPT_FRAME_HDR *)((u8 *)ioc->reply_frames + + (reply_dma_low - ioc->reply_frames_low_dma)); + + req_idx = le16_to_cpu(mr->u.frame.hwhdr.msgctxu.fld.req_idx); + cb_idx = mr->u.frame.hwhdr.msgctxu.fld.cb_idx; + mf = MPT_INDEX_2_MFPTR(ioc, req_idx); + + dmfprintk((MYIOC_s_INFO_FMT "Got non-TURBO reply=%p req_idx=%x cb_idx=%x Function=%x\n", + ioc->name, mr, req_idx, cb_idx, mr->u.hdr.Function)); + DBG_DUMP_REPLY_FRAME(mr) + + /* Check/log IOC log info + */ + ioc_stat = le16_to_cpu(mr->u.reply.IOCStatus); + if (ioc_stat & MPI_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE) { + u32 log_info = le32_to_cpu(mr->u.reply.IOCLogInfo); + if (ioc->bus_type == FC) + mpt_fc_log_info(ioc, log_info); + else if (ioc->bus_type == SCSI) + mpt_sp_log_info(ioc, log_info); + else if (ioc->bus_type == SAS) + mpt_sas_log_info(ioc, log_info); + } + if (ioc_stat & MPI_IOCSTATUS_MASK) { + if (ioc->bus_type == SCSI && + cb_idx != mpt_stm_index && + cb_idx != mpt_lan_index) + mpt_sp_ioc_info(ioc, (u32)ioc_stat, mf); + } + + + /* Check for (valid) IO callback! */ + if (cb_idx < 1 || cb_idx >= MPT_MAX_PROTOCOL_DRIVERS || + MptCallbacks[cb_idx] == NULL) { + printk(MYIOC_s_WARN_FMT "%s: Invalid cb_idx (%d)!\n", + __FUNCTION__, ioc->name, cb_idx); + freeme = 0; + goto out; + } + + freeme = MptCallbacks[cb_idx](ioc, mf, mr); + + out: + /* Flush (non-TURBO) reply with a WRITE! */ + CHIPREG_WRITE32(&ioc->chip->ReplyFifo, pa); + + if (freeme) + mpt_free_msg_frame(ioc, mf); + mb(); +} + /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /* * mpt_interrupt - MPT adapter (IOC) specific interrupt handler. @@ -227,164 +368,21 @@ pci_enable_io_access(struct pci_dev *pdev) static irqreturn_t mpt_interrupt(int irq, void *bus_id, struct pt_regs *r) { - MPT_ADAPTER *ioc; - MPT_FRAME_HDR *mf; - MPT_FRAME_HDR *mr; - u32 pa; - int req_idx; - int cb_idx; - int type; - int freeme; - - ioc = (MPT_ADAPTER *)bus_id; + MPT_ADAPTER *ioc = bus_id; + u32 pa; /* * Drain the reply FIFO! - * - * NOTES: I've seen up to 10 replies processed in this loop, so far... - * Update: I've seen up to 9182 replies processed in this loop! ?? - * Update: Limit ourselves to processing max of N replies - * (bottom of loop). */ while (1) { - - if ((pa = CHIPREG_READ32_dmasync(&ioc->chip->ReplyFifo)) == 0xFFFFFFFF) + pa = CHIPREG_READ32_dmasync(&ioc->chip->ReplyFifo); + if (pa == 0xFFFFFFFF) return IRQ_HANDLED; - - cb_idx = 0; - freeme = 0; - - /* - * Check for non-TURBO reply! - */ - if (pa & MPI_ADDRESS_REPLY_A_BIT) { - u32 reply_dma_low; - u16 ioc_stat; - - /* non-TURBO reply! Hmmm, something may be up... - * Newest turbo reply mechanism; get address - * via left shift 1 (get rid of MPI_ADDRESS_REPLY_A_BIT)! - */ - - /* Map DMA address of reply header to cpu address. - * pa is 32 bits - but the dma address may be 32 or 64 bits - * get offset based only only the low addresses - */ - reply_dma_low = (pa = (pa << 1)); - mr = (MPT_FRAME_HDR *)((u8 *)ioc->reply_frames + - (reply_dma_low - ioc->reply_frames_low_dma)); - - req_idx = le16_to_cpu(mr->u.frame.hwhdr.msgctxu.fld.req_idx); - cb_idx = mr->u.frame.hwhdr.msgctxu.fld.cb_idx; - mf = MPT_INDEX_2_MFPTR(ioc, req_idx); - - dmfprintk((MYIOC_s_INFO_FMT "Got non-TURBO reply=%p req_idx=%x cb_idx=%x Function=%x\n", - ioc->name, mr, req_idx, cb_idx, mr->u.hdr.Function)); - DBG_DUMP_REPLY_FRAME(mr) - - /* Check/log IOC log info - */ - ioc_stat = le16_to_cpu(mr->u.reply.IOCStatus); - if (ioc_stat & MPI_IOCSTATUS_FLAG_LOG_INFO_AVAILABLE) { - u32 log_info = le32_to_cpu(mr->u.reply.IOCLogInfo); - if (ioc->bus_type == FC) - mpt_fc_log_info(ioc, log_info); - else if (ioc->bus_type == SCSI) - mpt_sp_log_info(ioc, log_info); - } - if (ioc_stat & MPI_IOCSTATUS_MASK) { - if (ioc->bus_type == SCSI) - mpt_sp_ioc_info(ioc, (u32)ioc_stat, mf); - } - } else { - /* - * Process turbo (context) reply... - */ - dmfprintk((MYIOC_s_INFO_FMT "Got TURBO reply req_idx=%08x\n", ioc->name, pa)); - type = (pa >> MPI_CONTEXT_REPLY_TYPE_SHIFT); - if (type == MPI_CONTEXT_REPLY_TYPE_SCSI_TARGET) { - cb_idx = mpt_stm_index; - mf = NULL; - mr = (MPT_FRAME_HDR *) CAST_U32_TO_PTR(pa); - } else if (type == MPI_CONTEXT_REPLY_TYPE_LAN) { - cb_idx = mpt_lan_index; - /* Blind set of mf to NULL here was fatal - * after lan_reply says "freeme" - * Fix sort of combined with an optimization here; - * added explicit check for case where lan_reply - * was just returning 1 and doing nothing else. - * For this case skip the callback, but set up - * proper mf value first here:-) - */ - if ((pa & 0x58000000) == 0x58000000) { - req_idx = pa & 0x0000FFFF; - mf = MPT_INDEX_2_MFPTR(ioc, req_idx); - freeme = 1; - /* - * IMPORTANT! Invalidate the callback! - */ - cb_idx = 0; - } else { - mf = NULL; - } - mr = (MPT_FRAME_HDR *) CAST_U32_TO_PTR(pa); - } else { - req_idx = pa & 0x0000FFFF; - cb_idx = (pa & 0x00FF0000) >> 16; - mf = MPT_INDEX_2_MFPTR(ioc, req_idx); - mr = NULL; - } - pa = 0; /* No reply flush! */ - } - -#ifdef MPT_DEBUG_IRQ - if (ioc->bus_type == SCSI) { - /* Verify mf, mr are reasonable. - */ - if ((mf) && ((mf >= MPT_INDEX_2_MFPTR(ioc, ioc->req_depth)) - || (mf < ioc->req_frames)) ) { - printk(MYIOC_s_WARN_FMT - "mpt_interrupt: Invalid mf (%p)!\n", ioc->name, (void *)mf); - cb_idx = 0; - pa = 0; - freeme = 0; - } - if ((pa) && (mr) && ((mr >= MPT_INDEX_2_RFPTR(ioc, ioc->req_depth)) - || (mr < ioc->reply_frames)) ) { - printk(MYIOC_s_WARN_FMT - "mpt_interrupt: Invalid rf (%p)!\n", ioc->name, (void *)mr); - cb_idx = 0; - pa = 0; - freeme = 0; - } - if (cb_idx > (MPT_MAX_PROTOCOL_DRIVERS-1)) { - printk(MYIOC_s_WARN_FMT - "mpt_interrupt: Invalid cb_idx (%d)!\n", ioc->name, cb_idx); - cb_idx = 0; - pa = 0; - freeme = 0; - } - } -#endif - - /* Check for (valid) IO callback! */ - if (cb_idx) { - /* Do the callback! */ - freeme = (*(MptCallbacks[cb_idx]))(ioc, mf, mr); - } - - if (pa) { - /* Flush (non-TURBO) reply with a WRITE! */ - CHIPREG_WRITE32(&ioc->chip->ReplyFifo, pa); - } - - if (freeme) { - /* Put Request back on FreeQ! */ - mpt_free_msg_frame(ioc, mf); - } - - mb(); - } /* drain reply FIFO */ + else if (pa & MPI_ADDRESS_REPLY_A_BIT) + mpt_reply(ioc, pa); + else + mpt_turbo_reply(ioc, pa); + } return IRQ_HANDLED; } @@ -509,6 +507,14 @@ mpt_base_reply(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *reply) pCfg->wait_done = 1; wake_up(&mpt_waitq); } + } else if (func == MPI_FUNCTION_SAS_IO_UNIT_CONTROL) { + /* we should be always getting a reply frame */ + memcpy(ioc->persist_reply_frame, reply, + min(MPT_DEFAULT_FRAME_SIZE, + 4*reply->u.reply.MsgLength)); + del_timer(&ioc->persist_timer); + ioc->persist_wait_done = 1; + wake_up(&mpt_waitq); } else { printk(MYIOC_s_ERR_FMT "Unexpected msg function (=%02Xh) reply received!\n", ioc->name, func); @@ -750,6 +756,7 @@ mpt_get_msg_frame(int handle, MPT_ADAPTER *ioc) mf = list_entry(ioc->FreeQ.next, MPT_FRAME_HDR, u.frame.linkage.list); list_del(&mf->u.frame.linkage.list); + mf->u.frame.linkage.arg1 = 0; mf->u.frame.hwhdr.msgctxu.fld.cb_idx = handle; /* byte */ req_offset = (u8 *)mf - (u8 *)ioc->req_frames; /* u16! */ @@ -845,6 +852,7 @@ mpt_free_msg_frame(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf) /* Put Request back on FreeQ! */ spin_lock_irqsave(&ioc->FreeQlock, flags); + mf->u.frame.linkage.arg1 = 0xdeadbeaf; /* signature to know if this mf is freed */ list_add_tail(&mf->u.frame.linkage.list, &ioc->FreeQ); #ifdef MFCNT ioc->mfcnt--; @@ -971,12 +979,123 @@ mpt_send_handshake_request(int handle, MPT_ADAPTER *ioc, int reqBytes, u32 *req, /* Make sure there are no doorbells */ CHIPREG_WRITE32(&ioc->chip->IntStatus, 0); - + return r; } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /** + * mpt_host_page_access_control - provides mechanism for the host + * driver to control the IOC's Host Page Buffer access. + * @ioc: Pointer to MPT adapter structure + * @access_control_value: define bits below + * + * Access Control Value - bits[15:12] + * 0h Reserved + * 1h Enable Access { MPI_DB_HPBAC_ENABLE_ACCESS } + * 2h Disable Access { MPI_DB_HPBAC_DISABLE_ACCESS } + * 3h Free Buffer { MPI_DB_HPBAC_FREE_BUFFER } + * + * Returns 0 for success, non-zero for failure. + */ + +static int +mpt_host_page_access_control(MPT_ADAPTER *ioc, u8 access_control_value, int sleepFlag) +{ + int r = 0; + + /* return if in use */ + if (CHIPREG_READ32(&ioc->chip->Doorbell) + & MPI_DOORBELL_ACTIVE) + return -1; + + CHIPREG_WRITE32(&ioc->chip->IntStatus, 0); + + CHIPREG_WRITE32(&ioc->chip->Doorbell, + ((MPI_FUNCTION_HOST_PAGEBUF_ACCESS_CONTROL + <<MPI_DOORBELL_FUNCTION_SHIFT) | + (access_control_value<<12))); + + /* Wait for IOC to clear Doorbell Status bit */ + if ((r = WaitForDoorbellAck(ioc, 5, sleepFlag)) < 0) { + return -2; + }else + return 0; +} + +/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ +/** + * mpt_host_page_alloc - allocate system memory for the fw + * If we already allocated memory in past, then resend the same pointer. + * ioc@: Pointer to pointer to IOC adapter + * ioc_init@: Pointer to ioc init config page + * + * Returns 0 for success, non-zero for failure. + */ +static int +mpt_host_page_alloc(MPT_ADAPTER *ioc, pIOCInit_t ioc_init) +{ + char *psge; + int flags_length; + u32 host_page_buffer_sz=0; + + if(!ioc->HostPageBuffer) { + + host_page_buffer_sz = + le32_to_cpu(ioc->facts.HostPageBufferSGE.FlagsLength) & 0xFFFFFF; + + if(!host_page_buffer_sz) + return 0; /* fw doesn't need any host buffers */ + + /* spin till we get enough memory */ + while(host_page_buffer_sz > 0) { + + if((ioc->HostPageBuffer = pci_alloc_consistent( + ioc->pcidev, + host_page_buffer_sz, + &ioc->HostPageBuffer_dma)) != NULL) { + + dinitprintk((MYIOC_s_INFO_FMT + "host_page_buffer @ %p, dma @ %x, sz=%d bytes\n", + ioc->name, + ioc->HostPageBuffer, + ioc->HostPageBuffer_dma, + host_page_buffer_sz)); + ioc->alloc_total += host_page_buffer_sz; + ioc->HostPageBuffer_sz = host_page_buffer_sz; + break; + } + + host_page_buffer_sz -= (4*1024); + } + } + + if(!ioc->HostPageBuffer) { + printk(MYIOC_s_ERR_FMT + "Failed to alloc memory for host_page_buffer!\n", + ioc->name); + return -999; + } + + psge = (char *)&ioc_init->HostPageBufferSGE; + flags_length = MPI_SGE_FLAGS_SIMPLE_ELEMENT | + MPI_SGE_FLAGS_SYSTEM_ADDRESS | + MPI_SGE_FLAGS_32_BIT_ADDRESSING | + MPI_SGE_FLAGS_HOST_TO_IOC | + MPI_SGE_FLAGS_END_OF_BUFFER; + if (sizeof(dma_addr_t) == sizeof(u64)) { + flags_length |= MPI_SGE_FLAGS_64_BIT_ADDRESSING; + } + flags_length = flags_length << MPI_SGE_FLAGS_SHIFT; + flags_length |= ioc->HostPageBuffer_sz; + mpt_add_sge(psge, flags_length, ioc->HostPageBuffer_dma); + ioc->facts.HostPageBufferSGE = ioc_init->HostPageBufferSGE; + +return 0; +} + +/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ +/** * mpt_verify_adapter - Given a unique IOC identifier, set pointer to * the associated MPT adapter structure. * @iocid: IOC unique identifier (integer) @@ -1084,7 +1203,7 @@ mpt_attach(struct pci_dev *pdev, const struct pci_device_id *id) /* Initilize SCSI Config Data structure */ - memset(&ioc->spi_data, 0, sizeof(ScsiCfgData)); + memset(&ioc->spi_data, 0, sizeof(SpiCfgData)); /* Initialize the running configQ head. */ @@ -1213,6 +1332,33 @@ mpt_attach(struct pci_dev *pdev, const struct pci_device_id *id) ioc->prod_name = "LSI53C1035"; ioc->bus_type = SCSI; } + else if (pdev->device == MPI_MANUFACTPAGE_DEVID_SAS1064) { + ioc->prod_name = "LSISAS1064"; + ioc->bus_type = SAS; + ioc->errata_flag_1064 = 1; + } + else if (pdev->device == MPI_MANUFACTPAGE_DEVID_SAS1066) { + ioc->prod_name = "LSISAS1066"; + ioc->bus_type = SAS; + ioc->errata_flag_1064 = 1; + } + else if (pdev->device == MPI_MANUFACTPAGE_DEVID_SAS1068) { + ioc->prod_name = "LSISAS1068"; + ioc->bus_type = SAS; + ioc->errata_flag_1064 = 1; + } + else if (pdev->device == MPI_MANUFACTPAGE_DEVID_SAS1064E) { + ioc->prod_name = "LSISAS1064E"; + ioc->bus_type = SAS; + } + else if (pdev->device == MPI_MANUFACTPAGE_DEVID_SAS1066E) { + ioc->prod_name = "LSISAS1066E"; + ioc->bus_type = SAS; + } + else if (pdev->device == MPI_MANUFACTPAGE_DEVID_SAS1068E) { + ioc->prod_name = "LSISAS1068E"; + ioc->bus_type = SAS; + } if (ioc->errata_flag_1064) pci_disable_io_access(pdev); @@ -1604,8 +1750,23 @@ mpt_do_ioc_recovery(MPT_ADAPTER *ioc, u32 reason, int sleepFlag) */ if (ret == 0) { rc = mpt_do_upload(ioc, sleepFlag); - if (rc != 0) + if (rc == 0) { + if (ioc->alt_ioc && ioc->alt_ioc->cached_fw) { + /* + * Maintain only one pointer to FW memory + * so there will not be two attempt to + * downloadboot onboard dual function + * chips (mpt_adapter_disable, + * mpt_diag_reset) + */ + ioc->cached_fw = NULL; + ddlprintk((MYIOC_s_INFO_FMT ": mpt_upload: alt_%s has cached_fw=%p \n", + ioc->name, ioc->alt_ioc->name, ioc->alt_ioc->cached_fw)); + } + } else { printk(KERN_WARNING MYNAM ": firmware upload failure!\n"); + ret = -5; + } } } } @@ -1640,7 +1801,22 @@ mpt_do_ioc_recovery(MPT_ADAPTER *ioc, u32 reason, int sleepFlag) * and we try GetLanConfigPages again... */ if ((ret == 0) && (reason == MPT_HOSTEVENT_IOC_BRINGUP)) { - if (ioc->bus_type == FC) { + if (ioc->bus_type == SAS) { + + /* clear persistency table */ + if(ioc->facts.IOCExceptions & + MPI_IOCFACTS_EXCEPT_PERSISTENT_TABLE_FULL) { + ret = mptbase_sas_persist_operation(ioc, + MPI_SAS_OP_CLEAR_NOT_PRESENT); + if(ret != 0) + return -1; + } + + /* Find IM volumes + */ + mpt_findImVolumes(ioc); + + } else if (ioc->bus_type == FC) { /* * Pre-fetch FC port WWN and stuff... * (FCPortPage0_t stuff) @@ -1783,7 +1959,7 @@ mpt_adapter_disable(MPT_ADAPTER *ioc) if (ioc->cached_fw != NULL) { ddlprintk((KERN_INFO MYNAM ": mpt_adapter_disable: Pushing FW onto adapter\n")); - if ((ret = mpt_downloadboot(ioc, NO_SLEEP)) < 0) { + if ((ret = mpt_downloadboot(ioc, (MpiFwHeader_t *)ioc->cached_fw, NO_SLEEP)) < 0) { printk(KERN_WARNING MYNAM ": firmware downloadboot failure (%d)!\n", ret); } @@ -1831,9 +2007,9 @@ mpt_adapter_disable(MPT_ADAPTER *ioc) } kfree(ioc->spi_data.nvram); - kfree(ioc->spi_data.pIocPg3); + kfree(ioc->raid_data.pIocPg3); ioc->spi_data.nvram = NULL; - ioc->spi_data.pIocPg3 = NULL; + ioc->raid_data.pIocPg3 = NULL; if (ioc->spi_data.pIocPg4 != NULL) { sz = ioc->spi_data.IocPg4Sz; @@ -1852,6 +2028,23 @@ mpt_adapter_disable(MPT_ADAPTER *ioc) kfree(ioc->ChainToChain); ioc->ChainToChain = NULL; + + if (ioc->HostPageBuffer != NULL) { + if((ret = mpt_host_page_access_control(ioc, + MPI_DB_HPBAC_FREE_BUFFER, NO_SLEEP)) != 0) { + printk(KERN_ERR MYNAM + ": %s: host page buffers free failed (%d)!\n", + __FUNCTION__, ret); + } + dexitprintk((KERN_INFO MYNAM ": %s HostPageBuffer free @ %p, sz=%d bytes\n", + ioc->name, ioc->HostPageBuffer, ioc->HostPageBuffer_sz)); + pci_free_consistent(ioc->pcidev, ioc->HostPageBuffer_sz, + ioc->HostPageBuffer, + ioc->HostPageBuffer_dma); + ioc->HostPageBuffer = NULL; + ioc->HostPageBuffer_sz = 0; + ioc->alloc_total -= ioc->HostPageBuffer_sz; + } } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ @@ -2034,7 +2227,7 @@ MakeIocReady(MPT_ADAPTER *ioc, int force, int sleepFlag) * Loop here waiting for IOC to come READY. */ ii = 0; - cntdn = ((sleepFlag == CAN_SLEEP) ? HZ : 1000) * 15; /* 15 seconds */ + cntdn = ((sleepFlag == CAN_SLEEP) ? HZ : 1000) * 5; /* 5 seconds */ while ((ioc_state = mpt_GetIocState(ioc, 1)) != MPI_IOC_STATE_READY) { if (ioc_state == MPI_IOC_STATE_OPERATIONAL) { @@ -2212,6 +2405,7 @@ GetIocFacts(MPT_ADAPTER *ioc, int sleepFlag, int reason) le32_to_cpu(facts->CurrentSenseBufferHighAddr); facts->CurReplyFrameSize = le16_to_cpu(facts->CurReplyFrameSize); + facts->IOCCapabilities = le32_to_cpu(facts->IOCCapabilities); /* * Handle NEW (!) IOCFactsReply fields in MPI-1.01.xx @@ -2383,13 +2577,25 @@ SendIocInit(MPT_ADAPTER *ioc, int sleepFlag) ddlprintk((MYIOC_s_INFO_FMT "upload_fw %d facts.Flags=%x\n", ioc->name, ioc->upload_fw, ioc->facts.Flags)); - if (ioc->bus_type == FC) + if(ioc->bus_type == SAS) + ioc_init.MaxDevices = ioc->facts.MaxDevices; + else if(ioc->bus_type == FC) ioc_init.MaxDevices = MPT_MAX_FC_DEVICES; else ioc_init.MaxDevices = MPT_MAX_SCSI_DEVICES; - ioc_init.MaxBuses = MPT_MAX_BUS; - + dinitprintk((MYIOC_s_INFO_FMT "facts.MsgVersion=%x\n", + ioc->name, ioc->facts.MsgVersion)); + if (ioc->facts.MsgVersion >= MPI_VERSION_01_05) { + // set MsgVersion and HeaderVersion host driver was built with + ioc_init.MsgVersion = cpu_to_le16(MPI_VERSION); + ioc_init.HeaderVersion = cpu_to_le16(MPI_HEADER_VERSION); + + if (ioc->facts.Flags & MPI_IOCFACTS_FLAGS_HOST_PAGE_BUFFER_PERSISTENT) { + ioc_init.HostPageBufferSGE = ioc->facts.HostPageBufferSGE; + } else if(mpt_host_page_alloc(ioc, &ioc_init)) + return -99; + } ioc_init.ReplyFrameSize = cpu_to_le16(ioc->reply_sz); /* in BYTES */ if (sizeof(dma_addr_t) == sizeof(u64)) { @@ -2403,17 +2609,21 @@ SendIocInit(MPT_ADAPTER *ioc, int sleepFlag) ioc_init.HostMfaHighAddr = cpu_to_le32(0); ioc_init.SenseBufferHighAddr = cpu_to_le32(0); } - + ioc->facts.CurrentHostMfaHighAddr = ioc_init.HostMfaHighAddr; ioc->facts.CurrentSenseBufferHighAddr = ioc_init.SenseBufferHighAddr; + ioc->facts.MaxDevices = ioc_init.MaxDevices; + ioc->facts.MaxBuses = ioc_init.MaxBuses; dhsprintk((MYIOC_s_INFO_FMT "Sending IOCInit (req @ %p)\n", ioc->name, &ioc_init)); r = mpt_handshake_req_reply_wait(ioc, sizeof(IOCInit_t), (u32*)&ioc_init, sizeof(MPIDefaultReply_t), (u16*)&init_reply, 10 /*seconds*/, sleepFlag); - if (r != 0) + if (r != 0) { + printk(MYIOC_s_ERR_FMT "Sending IOCInit failed(%d)!\n",ioc->name, r); return r; + } /* No need to byte swap the multibyte fields in the reply * since we don't even look at it's contents. @@ -2472,7 +2682,7 @@ SendPortEnable(MPT_ADAPTER *ioc, int portnum, int sleepFlag) { PortEnable_t port_enable; MPIDefaultReply_t reply_buf; - int ii; + int rc; int req_sz; int reply_sz; @@ -2494,22 +2704,15 @@ SendPortEnable(MPT_ADAPTER *ioc, int portnum, int sleepFlag) /* RAID FW may take a long time to enable */ - if (ioc->bus_type == FC) { - ii = mpt_handshake_req_reply_wait(ioc, req_sz, (u32*)&port_enable, - reply_sz, (u16*)&reply_buf, 65 /*seconds*/, sleepFlag); - } else { - ii = mpt_handshake_req_reply_wait(ioc, req_sz, (u32*)&port_enable, + if ( (ioc->facts.ProductID & MPI_FW_HEADER_PID_PROD_MASK) + > MPI_FW_HEADER_PID_PROD_TARGET_SCSI ) { + rc = mpt_handshake_req_reply_wait(ioc, req_sz, (u32*)&port_enable, reply_sz, (u16*)&reply_buf, 300 /*seconds*/, sleepFlag); + } else { + rc = mpt_handshake_req_reply_wait(ioc, req_sz, (u32*)&port_enable, + reply_sz, (u16*)&reply_buf, 30 /*seconds*/, sleepFlag); } - - if (ii != 0) - return ii; - - /* We do not even look at the reply, so we need not - * swap the multi-byte fields. - */ - - return 0; + return rc; } /* @@ -2666,9 +2869,8 @@ mpt_do_upload(MPT_ADAPTER *ioc, int sleepFlag) * <0 for fw upload failure. */ static int -mpt_downloadboot(MPT_ADAPTER *ioc, int sleepFlag) +mpt_downloadboot(MPT_ADAPTER *ioc, MpiFwHeader_t *pFwHeader, int sleepFlag) { - MpiFwHeader_t *pFwHeader; MpiExtImageHeader_t *pExtImage; u32 fwSize; u32 diag0val; @@ -2679,18 +2881,8 @@ mpt_downloadboot(MPT_ADAPTER *ioc, int sleepFlag) u32 load_addr; u32 ioc_state=0; - ddlprintk((MYIOC_s_INFO_FMT "downloadboot: fw size 0x%x, ioc FW Ptr %p\n", - ioc->name, ioc->facts.FWImageSize, ioc->cached_fw)); - - if ( ioc->facts.FWImageSize == 0 ) - return -1; - - if (ioc->cached_fw == NULL) - return -2; - - /* prevent a second downloadboot and memory free with alt_ioc */ - if (ioc->alt_ioc && ioc->alt_ioc->cached_fw) - ioc->alt_ioc->cached_fw = NULL; + ddlprintk((MYIOC_s_INFO_FMT "downloadboot: fw size 0x%x (%d), FW Ptr %p\n", + ioc->name, pFwHeader->ImageSize, pFwHeader->ImageSize, pFwHeader)); CHIPREG_WRITE32(&ioc->chip->WriteSequence, 0xFF); CHIPREG_WRITE32(&ioc->chip->WriteSequence, MPI_WRSEQ_1ST_KEY_VALUE); @@ -2718,16 +2910,17 @@ mpt_downloadboot(MPT_ADAPTER *ioc, int sleepFlag) ioc->name, count)); break; } - /* wait 1 sec */ + /* wait .1 sec */ if (sleepFlag == CAN_SLEEP) { - msleep_interruptible (1000); + msleep_interruptible (100); } else { - mdelay (1000); + mdelay (100); } } if ( count == 30 ) { - ddlprintk((MYIOC_s_INFO_FMT "downloadboot failed! Unable to RESET_ADAPTER diag0val=%x\n", + ddlprintk((MYIOC_s_INFO_FMT "downloadboot failed! " + "Unable to get MPI_DIAG_DRWE mode, diag0val=%x\n", ioc->name, diag0val)); return -3; } @@ -2742,7 +2935,6 @@ mpt_downloadboot(MPT_ADAPTER *ioc, int sleepFlag) /* Set the DiagRwEn and Disable ARM bits */ CHIPREG_WRITE32(&ioc->chip->Diagnostic, (MPI_DIAG_RW_ENABLE | MPI_DIAG_DISABLE_ARM)); - pFwHeader = (MpiFwHeader_t *) ioc->cached_fw; fwSize = (pFwHeader->ImageSize + 3)/4; ptrFw = (u32 *) pFwHeader; @@ -2792,19 +2984,38 @@ mpt_downloadboot(MPT_ADAPTER *ioc, int sleepFlag) /* Clear the internal flash bad bit - autoincrementing register, * so must do two writes. */ - CHIPREG_PIO_WRITE32(&ioc->pio_chip->DiagRwAddress, 0x3F000000); - diagRwData = CHIPREG_PIO_READ32(&ioc->pio_chip->DiagRwData); - diagRwData |= 0x4000000; - CHIPREG_PIO_WRITE32(&ioc->pio_chip->DiagRwAddress, 0x3F000000); - CHIPREG_PIO_WRITE32(&ioc->pio_chip->DiagRwData, diagRwData); + if (ioc->bus_type == SCSI) { + /* + * 1030 and 1035 H/W errata, workaround to access + * the ClearFlashBadSignatureBit + */ + CHIPREG_PIO_WRITE32(&ioc->pio_chip->DiagRwAddress, 0x3F000000); + diagRwData = CHIPREG_PIO_READ32(&ioc->pio_chip->DiagRwData); + diagRwData |= 0x40000000; + CHIPREG_PIO_WRITE32(&ioc->pio_chip->DiagRwAddress, 0x3F000000); + CHIPREG_PIO_WRITE32(&ioc->pio_chip->DiagRwData, diagRwData); + + } else /* if((ioc->bus_type == SAS) || (ioc->bus_type == FC)) */ { + diag0val = CHIPREG_READ32(&ioc->chip->Diagnostic); + CHIPREG_WRITE32(&ioc->chip->Diagnostic, diag0val | + MPI_DIAG_CLEAR_FLASH_BAD_SIG); + + /* wait 1 msec */ + if (sleepFlag == CAN_SLEEP) { + msleep_interruptible (1); + } else { + mdelay (1); + } + } if (ioc->errata_flag_1064) pci_disable_io_access(ioc->pcidev); diag0val = CHIPREG_READ32(&ioc->chip->Diagnostic); - ddlprintk((MYIOC_s_INFO_FMT "downloadboot diag0val=%x, turning off PREVENT_IOC_BOOT, DISABLE_ARM\n", + ddlprintk((MYIOC_s_INFO_FMT "downloadboot diag0val=%x, " + "turning off PREVENT_IOC_BOOT, DISABLE_ARM, RW_ENABLE\n", ioc->name, diag0val)); - diag0val &= ~(MPI_DIAG_PREVENT_IOC_BOOT | MPI_DIAG_DISABLE_ARM); + diag0val &= ~(MPI_DIAG_PREVENT_IOC_BOOT | MPI_DIAG_DISABLE_ARM | MPI_DIAG_RW_ENABLE); ddlprintk((MYIOC_s_INFO_FMT "downloadboot now diag0val=%x\n", ioc->name, diag0val)); CHIPREG_WRITE32(&ioc->chip->Diagnostic, diag0val); @@ -2812,10 +3023,23 @@ mpt_downloadboot(MPT_ADAPTER *ioc, int sleepFlag) /* Write 0xFF to reset the sequencer */ CHIPREG_WRITE32(&ioc->chip->WriteSequence, 0xFF); + if (ioc->bus_type == SAS) { + ioc_state = mpt_GetIocState(ioc, 0); + if ( (GetIocFacts(ioc, sleepFlag, + MPT_HOSTEVENT_IOC_BRINGUP)) != 0 ) { + ddlprintk((MYIOC_s_INFO_FMT "GetIocFacts failed: IocState=%x\n", + ioc->name, ioc_state)); + return -EFAULT; + } + } + for (count=0; count<HZ*20; count++) { if ((ioc_state = mpt_GetIocState(ioc, 0)) & MPI_IOC_STATE_READY) { ddlprintk((MYIOC_s_INFO_FMT "downloadboot successful! (count=%d) IocState=%x\n", ioc->name, count, ioc_state)); + if (ioc->bus_type == SAS) { + return 0; + } if ((SendIocInit(ioc, sleepFlag)) != 0) { ddlprintk((MYIOC_s_INFO_FMT "downloadboot: SendIocInit failed\n", ioc->name)); @@ -3049,12 +3273,13 @@ mpt_diag_reset(MPT_ADAPTER *ioc, int ignore, int sleepFlag) /* wait 1 sec */ if (sleepFlag == CAN_SLEEP) { - ssleep(1); + msleep_interruptible (1000); } else { mdelay (1000); } } - if ((count = mpt_downloadboot(ioc, sleepFlag)) < 0) { + if ((count = mpt_downloadboot(ioc, + (MpiFwHeader_t *)ioc->cached_fw, sleepFlag)) < 0) { printk(KERN_WARNING MYNAM ": firmware downloadboot failure (%d)!\n", count); } @@ -3637,7 +3862,7 @@ WaitForDoorbellAck(MPT_ADAPTER *ioc, int howlong, int sleepFlag) int count = 0; u32 intstat=0; - cntdn = ((sleepFlag == CAN_SLEEP) ? HZ : 1000) * howlong; + cntdn = 1000 * howlong; if (sleepFlag == CAN_SLEEP) { while (--cntdn) { @@ -3687,7 +3912,7 @@ WaitForDoorbellInt(MPT_ADAPTER *ioc, int howlong, int sleepFlag) int count = 0; u32 intstat=0; - cntdn = ((sleepFlag == CAN_SLEEP) ? HZ : 1000) * howlong; + cntdn = 1000 * howlong; if (sleepFlag == CAN_SLEEP) { while (--cntdn) { intstat = CHIPREG_READ32(&ioc->chip->IntStatus); @@ -4001,6 +4226,85 @@ GetFcPortPage0(MPT_ADAPTER *ioc, int portnum) /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /* + * mptbase_sas_persist_operation - Perform operation on SAS Persitent Table + * @ioc: Pointer to MPT_ADAPTER structure + * @sas_address: 64bit SAS Address for operation. + * @target_id: specified target for operation + * @bus: specified bus for operation + * @persist_opcode: see below + * + * MPI_SAS_OP_CLEAR_NOT_PRESENT - Free all persist TargetID mappings for + * devices not currently present. + * MPI_SAS_OP_CLEAR_ALL_PERSISTENT - Clear al persist TargetID mappings + * + * NOTE: Don't use not this function during interrupt time. + * + * Returns: 0 for success, non-zero error + */ + +/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ +int +mptbase_sas_persist_operation(MPT_ADAPTER *ioc, u8 persist_opcode) +{ + SasIoUnitControlRequest_t *sasIoUnitCntrReq; + SasIoUnitControlReply_t *sasIoUnitCntrReply; + MPT_FRAME_HDR *mf = NULL; + MPIHeader_t *mpi_hdr; + + + /* insure garbage is not sent to fw */ + switch(persist_opcode) { + + case MPI_SAS_OP_CLEAR_NOT_PRESENT: + case MPI_SAS_OP_CLEAR_ALL_PERSISTENT: + break; + + default: + return -1; + break; + } + + printk("%s: persist_opcode=%x\n",__FUNCTION__, persist_opcode); + + /* Get a MF for this command. + */ + if ((mf = mpt_get_msg_frame(mpt_base_index, ioc)) == NULL) { + printk("%s: no msg frames!\n",__FUNCTION__); + return -1; + } + + mpi_hdr = (MPIHeader_t *) mf; + sasIoUnitCntrReq = (SasIoUnitControlRequest_t *)mf; + memset(sasIoUnitCntrReq,0,sizeof(SasIoUnitControlRequest_t)); + sasIoUnitCntrReq->Function = MPI_FUNCTION_SAS_IO_UNIT_CONTROL; + sasIoUnitCntrReq->MsgContext = mpi_hdr->MsgContext; + sasIoUnitCntrReq->Operation = persist_opcode; + + init_timer(&ioc->persist_timer); + ioc->persist_timer.data = (unsigned long) ioc; + ioc->persist_timer.function = mpt_timer_expired; + ioc->persist_timer.expires = jiffies + HZ*10 /* 10 sec */; + ioc->persist_wait_done=0; + add_timer(&ioc->persist_timer); + mpt_put_msg_frame(mpt_base_index, ioc, mf); + wait_event(mpt_waitq, ioc->persist_wait_done); + + sasIoUnitCntrReply = + (SasIoUnitControlReply_t *)ioc->persist_reply_frame; + if (le16_to_cpu(sasIoUnitCntrReply->IOCStatus) != MPI_IOCSTATUS_SUCCESS) { + printk("%s: IOCStatus=0x%X IOCLogInfo=0x%X\n", + __FUNCTION__, + sasIoUnitCntrReply->IOCStatus, + sasIoUnitCntrReply->IOCLogInfo); + return -1; + } + + printk("%s: success\n",__FUNCTION__); + return 0; +} + +/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ +/* * GetIoUnitPage2 - Retrieve BIOS version and boot order information. * @ioc: Pointer to MPT_ADAPTER structure * @@ -4340,10 +4644,10 @@ mpt_findImVolumes(MPT_ADAPTER *ioc) if (mpt_config(ioc, &cfg) != 0) goto done_and_free; - if ( (mem = (u8 *)ioc->spi_data.pIocPg2) == NULL ) { + if ( (mem = (u8 *)ioc->raid_data.pIocPg2) == NULL ) { mem = kmalloc(iocpage2sz, GFP_ATOMIC); if (mem) { - ioc->spi_data.pIocPg2 = (IOCPage2_t *) mem; + ioc->raid_data.pIocPg2 = (IOCPage2_t *) mem; } else { goto done_and_free; } @@ -4360,7 +4664,7 @@ mpt_findImVolumes(MPT_ADAPTER *ioc) /* At least 1 RAID Volume */ pIocRv = pIoc2->RaidVolume; - ioc->spi_data.isRaid = 0; + ioc->raid_data.isRaid = 0; for (jj = 0; jj < nVols; jj++, pIocRv++) { vid = pIocRv->VolumeID; vbus = pIocRv->VolumeBus; @@ -4369,7 +4673,7 @@ mpt_findImVolumes(MPT_ADAPTER *ioc) /* find the match */ if (vbus == 0) { - ioc->spi_data.isRaid |= (1 << vid); + ioc->raid_data.isRaid |= (1 << vid); } else { /* Error! Always bus 0 */ @@ -4404,8 +4708,8 @@ mpt_read_ioc_pg_3(MPT_ADAPTER *ioc) /* Free the old page */ - kfree(ioc->spi_data.pIocPg3); - ioc->spi_data.pIocPg3 = NULL; + kfree(ioc->raid_data.pIocPg3); + ioc->raid_data.pIocPg3 = NULL; /* There is at least one physical disk. * Read and save IOC Page 3 @@ -4442,7 +4746,7 @@ mpt_read_ioc_pg_3(MPT_ADAPTER *ioc) mem = kmalloc(iocpage3sz, GFP_ATOMIC); if (mem) { memcpy(mem, (u8 *)pIoc3, iocpage3sz); - ioc->spi_data.pIocPg3 = (IOCPage3_t *) mem; + ioc->raid_data.pIocPg3 = (IOCPage3_t *) mem; } } @@ -5366,8 +5670,8 @@ mpt_HardResetHandler(MPT_ADAPTER *ioc, int sleepFlag) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ -static char * -EventDescriptionStr(u8 event, u32 evData0) +static void +EventDescriptionStr(u8 event, u32 evData0, char *evStr) { char *ds; @@ -5420,8 +5724,95 @@ EventDescriptionStr(u8 event, u32 evData0) ds = "Events(OFF) Change"; break; case MPI_EVENT_INTEGRATED_RAID: - ds = "Integrated Raid"; + { + u8 ReasonCode = (u8)(evData0 >> 16); + switch (ReasonCode) { + case MPI_EVENT_RAID_RC_VOLUME_CREATED : + ds = "Integrated Raid: Volume Created"; + break; + case MPI_EVENT_RAID_RC_VOLUME_DELETED : + ds = "Integrated Raid: Volume Deleted"; + break; + case MPI_EVENT_RAID_RC_VOLUME_SETTINGS_CHANGED : + ds = "Integrated Raid: Volume Settings Changed"; + break; + case MPI_EVENT_RAID_RC_VOLUME_STATUS_CHANGED : + ds = "Integrated Raid: Volume Status Changed"; + break; + case MPI_EVENT_RAID_RC_VOLUME_PHYSDISK_CHANGED : + ds = "Integrated Raid: Volume Physdisk Changed"; + break; + case MPI_EVENT_RAID_RC_PHYSDISK_CREATED : + ds = "Integrated Raid: Physdisk Created"; + break; + case MPI_EVENT_RAID_RC_PHYSDISK_DELETED : + ds = "Integrated Raid: Physdisk Deleted"; + break; + case MPI_EVENT_RAID_RC_PHYSDISK_SETTINGS_CHANGED : + ds = "Integrated Raid: Physdisk Settings Changed"; + break; + case MPI_EVENT_RAID_RC_PHYSDISK_STATUS_CHANGED : + ds = "Integrated Raid: Physdisk Status Changed"; + break; + case MPI_EVENT_RAID_RC_DOMAIN_VAL_NEEDED : + ds = "Integrated Raid: Domain Validation Needed"; + break; + case MPI_EVENT_RAID_RC_SMART_DATA : + ds = "Integrated Raid; Smart Data"; + break; + case MPI_EVENT_RAID_RC_REPLACE_ACTION_STARTED : + ds = "Integrated Raid: Replace Action Started"; + break; + default: + ds = "Integrated Raid"; + break; + } + break; + } + case MPI_EVENT_SCSI_DEVICE_STATUS_CHANGE: + ds = "SCSI Device Status Change"; + break; + case MPI_EVENT_SAS_DEVICE_STATUS_CHANGE: + { + u8 ReasonCode = (u8)(evData0 >> 16); + switch (ReasonCode) { + case MPI_EVENT_SAS_DEV_STAT_RC_ADDED: + ds = "SAS Device Status Change: Added"; + break; + case MPI_EVENT_SAS_DEV_STAT_RC_NOT_RESPONDING: + ds = "SAS Device Status Change: Deleted"; + break; + case MPI_EVENT_SAS_DEV_STAT_RC_SMART_DATA: + ds = "SAS Device Status Change: SMART Data"; + break; + case MPI_EVENT_SAS_DEV_STAT_RC_NO_PERSIST_ADDED: + ds = "SAS Device Status Change: No Persistancy Added"; + break; + default: + ds = "SAS Device Status Change: Unknown"; + break; + } + break; + } + case MPI_EVENT_ON_BUS_TIMER_EXPIRED: + ds = "Bus Timer Expired"; + break; + case MPI_EVENT_QUEUE_FULL: + ds = "Queue Full"; + break; + case MPI_EVENT_SAS_SES: + ds = "SAS SES Event"; + break; + case MPI_EVENT_PERSISTENT_TABLE_FULL: + ds = "Persistent Table Full"; + break; + case MPI_EVENT_SAS_PHY_LINK_STATUS: + ds = "SAS PHY Link Status"; + break; + case MPI_EVENT_SAS_DISCOVERY_ERROR: + ds = "SAS Discovery Error"; break; + /* * MPT base "custom" events may be added here... */ @@ -5429,7 +5820,7 @@ EventDescriptionStr(u8 event, u32 evData0) ds = "Unknown"; break; } - return ds; + strcpy(evStr,ds); } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ @@ -5451,7 +5842,7 @@ ProcessEventNotification(MPT_ADAPTER *ioc, EventNotificationReply_t *pEventReply int ii; int r = 0; int handlers = 0; - char *evStr; + char evStr[100]; u8 event; /* @@ -5464,7 +5855,7 @@ ProcessEventNotification(MPT_ADAPTER *ioc, EventNotificationReply_t *pEventReply evData0 = le32_to_cpu(pEventReply->Data[0]); } - evStr = EventDescriptionStr(event, evData0); + EventDescriptionStr(event, evData0, evStr); devtprintk((MYIOC_s_INFO_FMT "MPT event (%s=%02Xh) detected!\n", ioc->name, evStr, @@ -5481,20 +5872,6 @@ ProcessEventNotification(MPT_ADAPTER *ioc, EventNotificationReply_t *pEventReply * Do general / base driver event processing */ switch(event) { - case MPI_EVENT_NONE: /* 00 */ - case MPI_EVENT_LOG_DATA: /* 01 */ - case MPI_EVENT_STATE_CHANGE: /* 02 */ - case MPI_EVENT_UNIT_ATTENTION: /* 03 */ - case MPI_EVENT_IOC_BUS_RESET: /* 04 */ - case MPI_EVENT_EXT_BUS_RESET: /* 05 */ - case MPI_EVENT_RESCAN: /* 06 */ - case MPI_EVENT_LINK_STATUS_CHANGE: /* 07 */ - case MPI_EVENT_LOOP_STATE_CHANGE: /* 08 */ - case MPI_EVENT_LOGOUT: /* 09 */ - case MPI_EVENT_INTEGRATED_RAID: /* 0B */ - case MPI_EVENT_SCSI_DEVICE_STATUS_CHANGE: /* 0C */ - default: - break; case MPI_EVENT_EVENT_CHANGE: /* 0A */ if (evDataLen) { u8 evState = evData0 & 0xFF; @@ -5507,6 +5884,8 @@ ProcessEventNotification(MPT_ADAPTER *ioc, EventNotificationReply_t *pEventReply } } break; + default: + break; } /* @@ -5653,6 +6032,111 @@ mpt_sp_log_info(MPT_ADAPTER *ioc, u32 log_info) printk(MYIOC_s_INFO_FMT "LogInfo(0x%08x): F/W: %s\n", ioc->name, log_info, desc); } +/* strings for sas loginfo */ + static char *originator_str[] = { + "IOP", /* 00h */ + "PL", /* 01h */ + "IR" /* 02h */ + }; + static char *iop_code_str[] = { + NULL, /* 00h */ + "Invalid SAS Address", /* 01h */ + NULL, /* 02h */ + "Invalid Page", /* 03h */ + NULL, /* 04h */ + "Task Terminated" /* 05h */ + }; + static char *pl_code_str[] = { + NULL, /* 00h */ + "Open Failure", /* 01h */ + "Invalid Scatter Gather List", /* 02h */ + "Wrong Relative Offset or Frame Length", /* 03h */ + "Frame Transfer Error", /* 04h */ + "Transmit Frame Connected Low", /* 05h */ + "SATA Non-NCQ RW Error Bit Set", /* 06h */ + "SATA Read Log Receive Data Error", /* 07h */ + "SATA NCQ Fail All Commands After Error", /* 08h */ + "SATA Error in Receive Set Device Bit FIS", /* 09h */ + "Receive Frame Invalid Message", /* 0Ah */ + "Receive Context Message Valid Error", /* 0Bh */ + "Receive Frame Current Frame Error", /* 0Ch */ + "SATA Link Down", /* 0Dh */ + "Discovery SATA Init W IOS", /* 0Eh */ + "Config Invalid Page", /* 0Fh */ + "Discovery SATA Init Timeout", /* 10h */ + "Reset", /* 11h */ + "Abort", /* 12h */ + "IO Not Yet Executed", /* 13h */ + "IO Executed", /* 14h */ + NULL, /* 15h */ + NULL, /* 16h */ + NULL, /* 17h */ + NULL, /* 18h */ + NULL, /* 19h */ + NULL, /* 1Ah */ + NULL, /* 1Bh */ + NULL, /* 1Ch */ + NULL, /* 1Dh */ + NULL, /* 1Eh */ + NULL, /* 1Fh */ + "Enclosure Management" /* 20h */ + }; + +/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ +/* + * mpt_sas_log_info - Log information returned from SAS IOC. + * @ioc: Pointer to MPT_ADAPTER structure + * @log_info: U32 LogInfo reply word from the IOC + * + * Refer to lsi/mpi_log_sas.h. + */ +static void +mpt_sas_log_info(MPT_ADAPTER *ioc, u32 log_info) +{ +union loginfo_type { + u32 loginfo; + struct { + u32 subcode:16; + u32 code:8; + u32 originator:4; + u32 bus_type:4; + }dw; +}; + union loginfo_type sas_loginfo; + char *code_desc = NULL; + + sas_loginfo.loginfo = log_info; + if ((sas_loginfo.dw.bus_type != 3 /*SAS*/) && + (sas_loginfo.dw.originator < sizeof(originator_str)/sizeof(char*))) + return; + if ((sas_loginfo.dw.originator == 0 /*IOP*/) && + (sas_loginfo.dw.code < sizeof(iop_code_str)/sizeof(char*))) { + code_desc = iop_code_str[sas_loginfo.dw.code]; + }else if ((sas_loginfo.dw.originator == 1 /*PL*/) && + (sas_loginfo.dw.code < sizeof(pl_code_str)/sizeof(char*) )) { + code_desc = pl_code_str[sas_loginfo.dw.code]; + } + + if (code_desc != NULL) + printk(MYIOC_s_INFO_FMT + "LogInfo(0x%08x): Originator={%s}, Code={%s}," + " SubCode(0x%04x)\n", + ioc->name, + log_info, + originator_str[sas_loginfo.dw.originator], + code_desc, + sas_loginfo.dw.subcode); + else + printk(MYIOC_s_INFO_FMT + "LogInfo(0x%08x): Originator={%s}, Code=(0x%02x)," + " SubCode(0x%04x)\n", + ioc->name, + log_info, + originator_str[sas_loginfo.dw.originator], + sas_loginfo.dw.code, + sas_loginfo.dw.subcode); +} + /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /* * mpt_sp_ioc_info - IOC information returned from SCSI Parallel IOC. @@ -5814,6 +6298,7 @@ EXPORT_SYMBOL(mpt_findImVolumes); EXPORT_SYMBOL(mpt_read_ioc_pg_3); EXPORT_SYMBOL(mpt_alloc_fw_memory); EXPORT_SYMBOL(mpt_free_fw_memory); +EXPORT_SYMBOL(mptbase_sas_persist_operation); /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ diff --git a/drivers/message/fusion/mptbase.h b/drivers/message/fusion/mptbase.h index f4827d92373..75105277e22 100644 --- a/drivers/message/fusion/mptbase.h +++ b/drivers/message/fusion/mptbase.h @@ -65,6 +65,7 @@ #include "lsi/mpi_fc.h" /* Fibre Channel (lowlevel) support */ #include "lsi/mpi_targ.h" /* SCSI/FCP Target protcol support */ #include "lsi/mpi_tool.h" /* Tools support */ +#include "lsi/mpi_sas.h" /* SAS support */ /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ @@ -76,8 +77,8 @@ #define COPYRIGHT "Copyright (c) 1999-2005 " MODULEAUTHOR #endif -#define MPT_LINUX_VERSION_COMMON "3.03.02" -#define MPT_LINUX_PACKAGE_NAME "@(#)mptlinux-3.03.02" +#define MPT_LINUX_VERSION_COMMON "3.03.03" +#define MPT_LINUX_PACKAGE_NAME "@(#)mptlinux-3.03.03" #define WHAT_MAGIC_STRING "@" "(" "#" ")" #define show_mptmod_ver(s,ver) \ @@ -423,7 +424,7 @@ typedef struct _MPT_IOCTL { /* * Event Structure and define */ -#define MPTCTL_EVENT_LOG_SIZE (0x0000000A) +#define MPTCTL_EVENT_LOG_SIZE (0x000000032) typedef struct _mpt_ioctl_events { u32 event; /* Specified by define above */ u32 eventContext; /* Index or counter */ @@ -451,16 +452,13 @@ typedef struct _mpt_ioctl_events { #define MPT_SCSICFG_ALL_IDS 0x02 /* WriteSDP1 to all IDS */ /* #define MPT_SCSICFG_BLK_NEGO 0x10 WriteSDP1 with WDTR and SDTR disabled */ -typedef struct _ScsiCfgData { +typedef struct _SpiCfgData { u32 PortFlags; int *nvram; /* table of device NVRAM values */ - IOCPage2_t *pIocPg2; /* table of Raid Volumes */ - IOCPage3_t *pIocPg3; /* table of physical disks */ IOCPage4_t *pIocPg4; /* SEP devices addressing */ dma_addr_t IocPg4_dma; /* Phys Addr of IOCPage4 data */ int IocPg4Sz; /* IOCPage4 size */ u8 dvStatus[MPT_MAX_SCSI_DEVICES]; - int isRaid; /* bit field, 1 if RAID */ u8 minSyncFactor; /* 0xFF if async */ u8 maxSyncOffset; /* 0 if async */ u8 maxBusWidth; /* 0 if narrow, 1 if wide */ @@ -472,10 +470,28 @@ typedef struct _ScsiCfgData { u8 dvScheduled; /* 1 if scheduled */ u8 forceDv; /* 1 to force DV scheduling */ u8 noQas; /* Disable QAS for this adapter */ - u8 Saf_Te; /* 1 to force all Processors as SAF-TE if Inquiry data length is too short to check for SAF-TE */ + u8 Saf_Te; /* 1 to force all Processors as + * SAF-TE if Inquiry data length + * is too short to check for SAF-TE + */ u8 mpt_dv; /* command line option: enhanced=1, basic=0 */ + u8 bus_reset; /* 1 to allow bus reset */ u8 rsvd[1]; -} ScsiCfgData; +}SpiCfgData; + +typedef struct _SasCfgData { + u8 ptClear; /* 1 to automatically clear the + * persistent table. + * 0 to disable + * automatic clearing. + */ +}SasCfgData; + +typedef struct _RaidCfgData { + IOCPage2_t *pIocPg2; /* table of Raid Volumes */ + IOCPage3_t *pIocPg3; /* table of physical disks */ + int isRaid; /* bit field, 1 if RAID */ +}RaidCfgData; /* * Adapter Structure - pci_dev specific. Maximum: MPT_MAX_ADAPTERS @@ -530,11 +546,16 @@ typedef struct _MPT_ADAPTER u8 *sense_buf_pool; dma_addr_t sense_buf_pool_dma; u32 sense_buf_low_dma; + u8 *HostPageBuffer; /* SAS - host page buffer support */ + u32 HostPageBuffer_sz; + dma_addr_t HostPageBuffer_dma; int mtrr_reg; struct pci_dev *pcidev; /* struct pci_dev pointer */ u8 __iomem *memmap; /* mmap address */ struct Scsi_Host *sh; /* Scsi Host pointer */ - ScsiCfgData spi_data; /* Scsi config. data */ + SpiCfgData spi_data; /* Scsi config. data */ + RaidCfgData raid_data; /* Raid config. data */ + SasCfgData sas_data; /* Sas config. data */ MPT_IOCTL *ioctl; /* ioctl data pointer */ struct proc_dir_entry *ioc_dentry; struct _MPT_ADAPTER *alt_ioc; /* ptr to 929 bound adapter port */ @@ -554,31 +575,35 @@ typedef struct _MPT_ADAPTER #else u32 mfcnt; #endif - u32 NB_for_64_byte_frame; + u32 NB_for_64_byte_frame; u32 hs_req[MPT_MAX_FRAME_SIZE/sizeof(u32)]; u16 hs_reply[MPT_MAX_FRAME_SIZE/sizeof(u16)]; IOCFactsReply_t facts; PortFactsReply_t pfacts[2]; FCPortPage0_t fc_port_page0[2]; + struct timer_list persist_timer; /* persist table timer */ + int persist_wait_done; /* persist completion flag */ + u8 persist_reply_frame[MPT_DEFAULT_FRAME_SIZE]; /* persist reply */ LANPage0_t lan_cnfg_page0; LANPage1_t lan_cnfg_page1; - /* + /* * Description: errata_flag_1064 * If a PCIX read occurs within 1 or 2 cycles after the chip receives * a split completion for a read data, an internal address pointer incorrectly * increments by 32 bytes */ - int errata_flag_1064; + int errata_flag_1064; u8 FirstWhoInit; u8 upload_fw; /* If set, do a fw upload */ u8 reload_fw; /* Force a FW Reload on next reset */ - u8 NBShiftFactor; /* NB Shift Factor based on Block Size (Facts) */ + u8 NBShiftFactor; /* NB Shift Factor based on Block Size (Facts) */ u8 pad1[4]; int DoneCtx; int TaskCtx; int InternalCtx; - struct list_head list; + struct list_head list; struct net_device *netdev; + struct list_head sas_topology; } MPT_ADAPTER; /* @@ -964,6 +989,7 @@ extern void mpt_alloc_fw_memory(MPT_ADAPTER *ioc, int size); extern void mpt_free_fw_memory(MPT_ADAPTER *ioc); extern int mpt_findImVolumes(MPT_ADAPTER *ioc); extern int mpt_read_ioc_pg_3(MPT_ADAPTER *ioc); +extern int mptbase_sas_persist_operation(MPT_ADAPTER *ioc, u8 persist_opcode); /* * Public data decl's... diff --git a/drivers/message/fusion/mptctl.c b/drivers/message/fusion/mptctl.c index 7577c2417e2..cb2d59d5f5a 100644 --- a/drivers/message/fusion/mptctl.c +++ b/drivers/message/fusion/mptctl.c @@ -1326,7 +1326,7 @@ mptctl_gettargetinfo (unsigned long arg) */ if (hd && hd->Targets) { mpt_findImVolumes(ioc); - pIoc2 = ioc->spi_data.pIocPg2; + pIoc2 = ioc->raid_data.pIocPg2; for ( id = 0; id <= max_id; ) { if ( pIoc2 && pIoc2->NumActiveVolumes ) { if ( id == pIoc2->RaidVolume[0].VolumeID ) { @@ -1348,7 +1348,7 @@ mptctl_gettargetinfo (unsigned long arg) --maxWordsLeft; goto next_id; } else { - pIoc3 = ioc->spi_data.pIocPg3; + pIoc3 = ioc->raid_data.pIocPg3; for ( jj = 0; jj < pIoc3->NumPhysDisks; jj++ ) { if ( pIoc3->PhysDisk[jj].PhysDiskID == id ) goto next_id; diff --git a/drivers/message/fusion/mptfc.c b/drivers/message/fusion/mptfc.c index 13771abea13..a628be9bbba 100644 --- a/drivers/message/fusion/mptfc.c +++ b/drivers/message/fusion/mptfc.c @@ -189,7 +189,7 @@ mptfc_probe(struct pci_dev *pdev, const struct pci_device_id *id) printk(MYIOC_s_WARN_FMT "Skipping ioc=%p because SCSI Initiator mode is NOT enabled!\n", ioc->name, ioc); - return -ENODEV; + return 0; } sh = scsi_host_alloc(&mptfc_driver_template, sizeof(MPT_SCSI_HOST)); diff --git a/drivers/message/fusion/mptlan.c b/drivers/message/fusion/mptlan.c index 52794be5a95..ed3c891e388 100644 --- a/drivers/message/fusion/mptlan.c +++ b/drivers/message/fusion/mptlan.c @@ -312,7 +312,12 @@ static int mpt_lan_ioc_reset(MPT_ADAPTER *ioc, int reset_phase) { struct net_device *dev = ioc->netdev; - struct mpt_lan_priv *priv = netdev_priv(dev); + struct mpt_lan_priv *priv; + + if (dev == NULL) + return(1); + else + priv = netdev_priv(dev); dlprintk((KERN_INFO MYNAM ": IOC %s_reset routed to LAN driver!\n", reset_phase==MPT_IOC_SETUP_RESET ? "setup" : ( diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c new file mode 100644 index 00000000000..429820e48c6 --- /dev/null +++ b/drivers/message/fusion/mptsas.c @@ -0,0 +1,1235 @@ +/* + * linux/drivers/message/fusion/mptsas.c + * For use with LSI Logic PCI chip/adapter(s) + * running LSI Logic Fusion MPT (Message Passing Technology) firmware. + * + * Copyright (c) 1999-2005 LSI Logic Corporation + * (mailto:mpt_linux_developer@lsil.com) + * Copyright (c) 2005 Dell + */ +/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ +/* + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + NO WARRANTY + THE PROGRAM IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR + CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT + LIMITATION, ANY WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, + MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is + solely responsible for determining the appropriateness of using and + distributing the Program and assumes all risks associated with its + exercise of rights under this Agreement, including but not limited to + the risks and costs of program errors, damage to or loss of data, + programs or equipment, and unavailability or interruption of operations. + + DISCLAIMER OF LIABILITY + NEITHER RECIPIENT NOR ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND + ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR + TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE + USE OR DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED + HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/errno.h> +#include <linux/sched.h> +#include <linux/workqueue.h> + +#include <scsi/scsi_cmnd.h> +#include <scsi/scsi_device.h> +#include <scsi/scsi_host.h> +#include <scsi/scsi_transport_sas.h> + +#include "mptbase.h" +#include "mptscsih.h" + + +#define my_NAME "Fusion MPT SAS Host driver" +#define my_VERSION MPT_LINUX_VERSION_COMMON +#define MYNAM "mptsas" + +MODULE_AUTHOR(MODULEAUTHOR); +MODULE_DESCRIPTION(my_NAME); +MODULE_LICENSE("GPL"); + +static int mpt_pq_filter; +module_param(mpt_pq_filter, int, 0); +MODULE_PARM_DESC(mpt_pq_filter, + "Enable peripheral qualifier filter: enable=1 " + "(default=0)"); + +static int mpt_pt_clear; +module_param(mpt_pt_clear, int, 0); +MODULE_PARM_DESC(mpt_pt_clear, + "Clear persistency table: enable=1 " + "(default=MPTSCSIH_PT_CLEAR=0)"); + +static int mptsasDoneCtx = -1; +static int mptsasTaskCtx = -1; +static int mptsasInternalCtx = -1; /* Used only for internal commands */ + + +/* + * SAS topology structures + * + * The MPT Fusion firmware interface spreads information about the + * SAS topology over many manufacture pages, thus we need some data + * structure to collect it and process it for the SAS transport class. + */ + +struct mptsas_devinfo { + u16 handle; /* unique id to address this device */ + u8 phy_id; /* phy number of parent device */ + u8 port_id; /* sas physical port this device + is assoc'd with */ + u8 target; /* logical target id of this device */ + u8 bus; /* logical bus number of this device */ + u64 sas_address; /* WWN of this device, + SATA is assigned by HBA,expander */ + u32 device_info; /* bitfield detailed info about this device */ +}; + +struct mptsas_phyinfo { + u8 phy_id; /* phy index */ + u8 port_id; /* port number this phy is part of */ + u8 negotiated_link_rate; /* nego'd link rate for this phy */ + u8 hw_link_rate; /* hardware max/min phys link rate */ + u8 programmed_link_rate; /* programmed max/min phy link rate */ + struct mptsas_devinfo identify; /* point to phy device info */ + struct mptsas_devinfo attached; /* point to attached device info */ + struct sas_rphy *rphy; +}; + +struct mptsas_portinfo { + struct list_head list; + u16 handle; /* unique id to address this */ + u8 num_phys; /* number of phys */ + struct mptsas_phyinfo *phy_info; +}; + +/* + * This is pretty ugly. We will be able to seriously clean it up + * once the DV code in mptscsih goes away and we can properly + * implement ->target_alloc. + */ +static int +mptsas_slave_alloc(struct scsi_device *device) +{ + struct Scsi_Host *host = device->host; + MPT_SCSI_HOST *hd = (MPT_SCSI_HOST *)host->hostdata; + struct sas_rphy *rphy; + struct mptsas_portinfo *p; + VirtDevice *vdev; + uint target = device->id; + int i; + + if ((vdev = hd->Targets[target]) != NULL) + goto out; + + vdev = kmalloc(sizeof(VirtDevice), GFP_KERNEL); + if (!vdev) { + printk(MYIOC_s_ERR_FMT "slave_alloc kmalloc(%zd) FAILED!\n", + hd->ioc->name, sizeof(VirtDevice)); + return -ENOMEM; + } + + memset(vdev, 0, sizeof(VirtDevice)); + vdev->tflags = MPT_TARGET_FLAGS_Q_YES|MPT_TARGET_FLAGS_VALID_INQUIRY; + vdev->ioc_id = hd->ioc->id; + + rphy = dev_to_rphy(device->sdev_target->dev.parent); + list_for_each_entry(p, &hd->ioc->sas_topology, list) { + for (i = 0; i < p->num_phys; i++) { + if (p->phy_info[i].attached.sas_address == + rphy->identify.sas_address) { + vdev->target_id = + p->phy_info[i].attached.target; + vdev->bus_id = p->phy_info[i].attached.bus; + hd->Targets[device->id] = vdev; + goto out; + } + } + } + + printk("No matching SAS device found!!\n"); + kfree(vdev); + return -ENODEV; + + out: + vdev->num_luns++; + device->hostdata = vdev; + return 0; +} + +static struct scsi_host_template mptsas_driver_template = { + .proc_name = "mptsas", + .proc_info = mptscsih_proc_info, + .name = "MPT SPI Host", + .info = mptscsih_info, + .queuecommand = mptscsih_qcmd, + .slave_alloc = mptsas_slave_alloc, + .slave_configure = mptscsih_slave_configure, + .slave_destroy = mptscsih_slave_destroy, + .change_queue_depth = mptscsih_change_queue_depth, + .eh_abort_handler = mptscsih_abort, + .eh_device_reset_handler = mptscsih_dev_reset, + .eh_bus_reset_handler = mptscsih_bus_reset, + .eh_host_reset_handler = mptscsih_host_reset, + .bios_param = mptscsih_bios_param, + .can_queue = MPT_FC_CAN_QUEUE, + .this_id = -1, + .sg_tablesize = MPT_SCSI_SG_DEPTH, + .max_sectors = 8192, + .cmd_per_lun = 7, + .use_clustering = ENABLE_CLUSTERING, +}; + +static struct sas_function_template mptsas_transport_functions = { +}; + +static struct scsi_transport_template *mptsas_transport_template; + +#ifdef SASDEBUG +static void mptsas_print_phy_data(MPI_SAS_IO_UNIT0_PHY_DATA *phy_data) +{ + printk("---- IO UNIT PAGE 0 ------------\n"); + printk("Handle=0x%X\n", + le16_to_cpu(phy_data->AttachedDeviceHandle)); + printk("Controller Handle=0x%X\n", + le16_to_cpu(phy_data->ControllerDevHandle)); + printk("Port=0x%X\n", phy_data->Port); + printk("Port Flags=0x%X\n", phy_data->PortFlags); + printk("PHY Flags=0x%X\n", phy_data->PhyFlags); + printk("Negotiated Link Rate=0x%X\n", phy_data->NegotiatedLinkRate); + printk("Controller PHY Device Info=0x%X\n", + le32_to_cpu(phy_data->ControllerPhyDeviceInfo)); + printk("DiscoveryStatus=0x%X\n", + le32_to_cpu(phy_data->DiscoveryStatus)); + printk("\n"); +} + +static void mptsas_print_phy_pg0(SasPhyPage0_t *pg0) +{ + __le64 sas_address; + + memcpy(&sas_address, &pg0->SASAddress, sizeof(__le64)); + + printk("---- SAS PHY PAGE 0 ------------\n"); + printk("Attached Device Handle=0x%X\n", + le16_to_cpu(pg0->AttachedDevHandle)); + printk("SAS Address=0x%llX\n", + (unsigned long long)le64_to_cpu(sas_address)); + printk("Attached PHY Identifier=0x%X\n", pg0->AttachedPhyIdentifier); + printk("Attached Device Info=0x%X\n", + le32_to_cpu(pg0->AttachedDeviceInfo)); + printk("Programmed Link Rate=0x%X\n", pg0->ProgrammedLinkRate); + printk("Change Count=0x%X\n", pg0->ChangeCount); + printk("PHY Info=0x%X\n", le32_to_cpu(pg0->PhyInfo)); + printk("\n"); +} + +static void mptsas_print_device_pg0(SasDevicePage0_t *pg0) +{ + __le64 sas_address; + + memcpy(&sas_address, &pg0->SASAddress, sizeof(__le64)); + + printk("---- SAS DEVICE PAGE 0 ---------\n"); + printk("Handle=0x%X\n" ,le16_to_cpu(pg0->DevHandle)); + printk("Enclosure Handle=0x%X\n", le16_to_cpu(pg0->EnclosureHandle)); + printk("Slot=0x%X\n", le16_to_cpu(pg0->Slot)); + printk("SAS Address=0x%llX\n", le64_to_cpu(sas_address)); + printk("Target ID=0x%X\n", pg0->TargetID); + printk("Bus=0x%X\n", pg0->Bus); + printk("PhyNum=0x%X\n", pg0->PhyNum); + printk("AccessStatus=0x%X\n", le16_to_cpu(pg0->AccessStatus)); + printk("Device Info=0x%X\n", le32_to_cpu(pg0->DeviceInfo)); + printk("Flags=0x%X\n", le16_to_cpu(pg0->Flags)); + printk("Physical Port=0x%X\n", pg0->PhysicalPort); + printk("\n"); +} + +static void mptsas_print_expander_pg1(SasExpanderPage1_t *pg1) +{ + printk("---- SAS EXPANDER PAGE 1 ------------\n"); + + printk("Physical Port=0x%X\n", pg1->PhysicalPort); + printk("PHY Identifier=0x%X\n", pg1->Phy); + printk("Negotiated Link Rate=0x%X\n", pg1->NegotiatedLinkRate); + printk("Programmed Link Rate=0x%X\n", pg1->ProgrammedLinkRate); + printk("Hardware Link Rate=0x%X\n", pg1->HwLinkRate); + printk("Owner Device Handle=0x%X\n", + le16_to_cpu(pg1->OwnerDevHandle)); + printk("Attached Device Handle=0x%X\n", + le16_to_cpu(pg1->AttachedDevHandle)); +} +#else +#define mptsas_print_phy_data(phy_data) do { } while (0) +#define mptsas_print_phy_pg0(pg0) do { } while (0) +#define mptsas_print_device_pg0(pg0) do { } while (0) +#define mptsas_print_expander_pg1(pg1) do { } while (0) +#endif + +static int +mptsas_sas_io_unit_pg0(MPT_ADAPTER *ioc, struct mptsas_portinfo *port_info) +{ + ConfigExtendedPageHeader_t hdr; + CONFIGPARMS cfg; + SasIOUnitPage0_t *buffer; + dma_addr_t dma_handle; + int error, i; + + hdr.PageVersion = MPI_SASIOUNITPAGE0_PAGEVERSION; + hdr.ExtPageLength = 0; + hdr.PageNumber = 0; + hdr.Reserved1 = 0; + hdr.Reserved2 = 0; + hdr.PageType = MPI_CONFIG_PAGETYPE_EXTENDED; + hdr.ExtPageType = MPI_CONFIG_EXTPAGETYPE_SAS_IO_UNIT; + + cfg.cfghdr.ehdr = &hdr; + cfg.physAddr = -1; + cfg.pageAddr = 0; + cfg.action = MPI_CONFIG_ACTION_PAGE_HEADER; + cfg.dir = 0; /* read */ + cfg.timeout = 10; + + error = mpt_config(ioc, &cfg); + if (error) + goto out; + if (!hdr.ExtPageLength) { + error = -ENXIO; + goto out; + } + + buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4, + &dma_handle); + if (!buffer) { + error = -ENOMEM; + goto out; + } + + cfg.physAddr = dma_handle; + cfg.action = MPI_CONFIG_ACTION_PAGE_READ_CURRENT; + + error = mpt_config(ioc, &cfg); + if (error) + goto out_free_consistent; + + port_info->num_phys = buffer->NumPhys; + port_info->phy_info = kcalloc(port_info->num_phys, + sizeof(struct mptsas_phyinfo),GFP_KERNEL); + if (!port_info->phy_info) { + error = -ENOMEM; + goto out_free_consistent; + } + + for (i = 0; i < port_info->num_phys; i++) { + mptsas_print_phy_data(&buffer->PhyData[i]); + port_info->phy_info[i].phy_id = i; + port_info->phy_info[i].port_id = + buffer->PhyData[i].Port; + port_info->phy_info[i].negotiated_link_rate = + buffer->PhyData[i].NegotiatedLinkRate; + } + + out_free_consistent: + pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4, + buffer, dma_handle); + out: + return error; +} + +static int +mptsas_sas_phy_pg0(MPT_ADAPTER *ioc, struct mptsas_phyinfo *phy_info, + u32 form, u32 form_specific) +{ + ConfigExtendedPageHeader_t hdr; + CONFIGPARMS cfg; + SasPhyPage0_t *buffer; + dma_addr_t dma_handle; + int error; + + hdr.PageVersion = MPI_SASPHY0_PAGEVERSION; + hdr.ExtPageLength = 0; + hdr.PageNumber = 0; + hdr.Reserved1 = 0; + hdr.Reserved2 = 0; + hdr.PageType = MPI_CONFIG_PAGETYPE_EXTENDED; + hdr.ExtPageType = MPI_CONFIG_EXTPAGETYPE_SAS_PHY; + + cfg.cfghdr.ehdr = &hdr; + cfg.dir = 0; /* read */ + cfg.timeout = 10; + + /* Get Phy Pg 0 for each Phy. */ + cfg.physAddr = -1; + cfg.pageAddr = form + form_specific; + cfg.action = MPI_CONFIG_ACTION_PAGE_HEADER; + + error = mpt_config(ioc, &cfg); + if (error) + goto out; + + if (!hdr.ExtPageLength) { + error = -ENXIO; + goto out; + } + + buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4, + &dma_handle); + if (!buffer) { + error = -ENOMEM; + goto out; + } + + cfg.physAddr = dma_handle; + cfg.action = MPI_CONFIG_ACTION_PAGE_READ_CURRENT; + + error = mpt_config(ioc, &cfg); + if (error) + goto out_free_consistent; + + mptsas_print_phy_pg0(buffer); + + phy_info->hw_link_rate = buffer->HwLinkRate; + phy_info->programmed_link_rate = buffer->ProgrammedLinkRate; + phy_info->identify.handle = le16_to_cpu(buffer->OwnerDevHandle); + phy_info->attached.handle = le16_to_cpu(buffer->AttachedDevHandle); + + out_free_consistent: + pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4, + buffer, dma_handle); + out: + return error; +} + +static int +mptsas_sas_device_pg0(MPT_ADAPTER *ioc, struct mptsas_devinfo *device_info, + u32 form, u32 form_specific) +{ + ConfigExtendedPageHeader_t hdr; + CONFIGPARMS cfg; + SasDevicePage0_t *buffer; + dma_addr_t dma_handle; + __le64 sas_address; + int error; + + hdr.PageVersion = MPI_SASDEVICE0_PAGEVERSION; + hdr.ExtPageLength = 0; + hdr.PageNumber = 0; + hdr.Reserved1 = 0; + hdr.Reserved2 = 0; + hdr.PageType = MPI_CONFIG_PAGETYPE_EXTENDED; + hdr.ExtPageType = MPI_CONFIG_EXTPAGETYPE_SAS_DEVICE; + + cfg.cfghdr.ehdr = &hdr; + cfg.pageAddr = form + form_specific; + cfg.physAddr = -1; + cfg.action = MPI_CONFIG_ACTION_PAGE_HEADER; + cfg.dir = 0; /* read */ + cfg.timeout = 10; + + error = mpt_config(ioc, &cfg); + if (error) + goto out; + if (!hdr.ExtPageLength) { + error = -ENXIO; + goto out; + } + + buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4, + &dma_handle); + if (!buffer) { + error = -ENOMEM; + goto out; + } + + cfg.physAddr = dma_handle; + cfg.action = MPI_CONFIG_ACTION_PAGE_READ_CURRENT; + + error = mpt_config(ioc, &cfg); + if (error) + goto out_free_consistent; + + mptsas_print_device_pg0(buffer); + + device_info->handle = le16_to_cpu(buffer->DevHandle); + device_info->phy_id = buffer->PhyNum; + device_info->port_id = buffer->PhysicalPort; + device_info->target = buffer->TargetID; + device_info->bus = buffer->Bus; + memcpy(&sas_address, &buffer->SASAddress, sizeof(__le64)); + device_info->sas_address = le64_to_cpu(sas_address); + device_info->device_info = + le32_to_cpu(buffer->DeviceInfo); + + out_free_consistent: + pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4, + buffer, dma_handle); + out: + return error; +} + +static int +mptsas_sas_expander_pg0(MPT_ADAPTER *ioc, struct mptsas_portinfo *port_info, + u32 form, u32 form_specific) +{ + ConfigExtendedPageHeader_t hdr; + CONFIGPARMS cfg; + SasExpanderPage0_t *buffer; + dma_addr_t dma_handle; + int error; + + hdr.PageVersion = MPI_SASEXPANDER0_PAGEVERSION; + hdr.ExtPageLength = 0; + hdr.PageNumber = 0; + hdr.Reserved1 = 0; + hdr.Reserved2 = 0; + hdr.PageType = MPI_CONFIG_PAGETYPE_EXTENDED; + hdr.ExtPageType = MPI_CONFIG_EXTPAGETYPE_SAS_EXPANDER; + + cfg.cfghdr.ehdr = &hdr; + cfg.physAddr = -1; + cfg.pageAddr = form + form_specific; + cfg.action = MPI_CONFIG_ACTION_PAGE_HEADER; + cfg.dir = 0; /* read */ + cfg.timeout = 10; + + error = mpt_config(ioc, &cfg); + if (error) + goto out; + + if (!hdr.ExtPageLength) { + error = -ENXIO; + goto out; + } + + buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4, + &dma_handle); + if (!buffer) { + error = -ENOMEM; + goto out; + } + + cfg.physAddr = dma_handle; + cfg.action = MPI_CONFIG_ACTION_PAGE_READ_CURRENT; + + error = mpt_config(ioc, &cfg); + if (error) + goto out_free_consistent; + + /* save config data */ + port_info->num_phys = buffer->NumPhys; + port_info->handle = le16_to_cpu(buffer->DevHandle); + port_info->phy_info = kcalloc(port_info->num_phys, + sizeof(struct mptsas_phyinfo),GFP_KERNEL); + if (!port_info->phy_info) { + error = -ENOMEM; + goto out_free_consistent; + } + + out_free_consistent: + pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4, + buffer, dma_handle); + out: + return error; +} + +static int +mptsas_sas_expander_pg1(MPT_ADAPTER *ioc, struct mptsas_phyinfo *phy_info, + u32 form, u32 form_specific) +{ + ConfigExtendedPageHeader_t hdr; + CONFIGPARMS cfg; + SasExpanderPage1_t *buffer; + dma_addr_t dma_handle; + int error; + + hdr.PageVersion = MPI_SASEXPANDER0_PAGEVERSION; + hdr.ExtPageLength = 0; + hdr.PageNumber = 1; + hdr.Reserved1 = 0; + hdr.Reserved2 = 0; + hdr.PageType = MPI_CONFIG_PAGETYPE_EXTENDED; + hdr.ExtPageType = MPI_CONFIG_EXTPAGETYPE_SAS_EXPANDER; + + cfg.cfghdr.ehdr = &hdr; + cfg.physAddr = -1; + cfg.pageAddr = form + form_specific; + cfg.action = MPI_CONFIG_ACTION_PAGE_HEADER; + cfg.dir = 0; /* read */ + cfg.timeout = 10; + + error = mpt_config(ioc, &cfg); + if (error) + goto out; + + if (!hdr.ExtPageLength) { + error = -ENXIO; + goto out; + } + + buffer = pci_alloc_consistent(ioc->pcidev, hdr.ExtPageLength * 4, + &dma_handle); + if (!buffer) { + error = -ENOMEM; + goto out; + } + + cfg.physAddr = dma_handle; + cfg.action = MPI_CONFIG_ACTION_PAGE_READ_CURRENT; + + error = mpt_config(ioc, &cfg); + if (error) + goto out_free_consistent; + + + mptsas_print_expander_pg1(buffer); + + /* save config data */ + phy_info->phy_id = buffer->Phy; + phy_info->port_id = buffer->PhysicalPort; + phy_info->negotiated_link_rate = buffer->NegotiatedLinkRate; + phy_info->programmed_link_rate = buffer->ProgrammedLinkRate; + phy_info->hw_link_rate = buffer->HwLinkRate; + phy_info->identify.handle = le16_to_cpu(buffer->OwnerDevHandle); + phy_info->attached.handle = le16_to_cpu(buffer->AttachedDevHandle); + + + out_free_consistent: + pci_free_consistent(ioc->pcidev, hdr.ExtPageLength * 4, + buffer, dma_handle); + out: + return error; +} + +static void +mptsas_parse_device_info(struct sas_identify *identify, + struct mptsas_devinfo *device_info) +{ + u16 protocols; + + identify->sas_address = device_info->sas_address; + identify->phy_identifier = device_info->phy_id; + + /* + * Fill in Phy Initiator Port Protocol. + * Bits 6:3, more than one bit can be set, fall through cases. + */ + protocols = device_info->device_info & 0x78; + identify->initiator_port_protocols = 0; + if (protocols & MPI_SAS_DEVICE_INFO_SSP_INITIATOR) + identify->initiator_port_protocols |= SAS_PROTOCOL_SSP; + if (protocols & MPI_SAS_DEVICE_INFO_STP_INITIATOR) + identify->initiator_port_protocols |= SAS_PROTOCOL_STP; + if (protocols & MPI_SAS_DEVICE_INFO_SMP_INITIATOR) + identify->initiator_port_protocols |= SAS_PROTOCOL_SMP; + if (protocols & MPI_SAS_DEVICE_INFO_SATA_HOST) + identify->initiator_port_protocols |= SAS_PROTOCOL_SATA; + + /* + * Fill in Phy Target Port Protocol. + * Bits 10:7, more than one bit can be set, fall through cases. + */ + protocols = device_info->device_info & 0x780; + identify->target_port_protocols = 0; + if (protocols & MPI_SAS_DEVICE_INFO_SSP_TARGET) + identify->target_port_protocols |= SAS_PROTOCOL_SSP; + if (protocols & MPI_SAS_DEVICE_INFO_STP_TARGET) + identify->target_port_protocols |= SAS_PROTOCOL_STP; + if (protocols & MPI_SAS_DEVICE_INFO_SMP_TARGET) + identify->target_port_protocols |= SAS_PROTOCOL_SMP; + if (protocols & MPI_SAS_DEVICE_INFO_SATA_DEVICE) + identify->target_port_protocols |= SAS_PROTOCOL_SATA; + + /* + * Fill in Attached device type. + */ + switch (device_info->device_info & + MPI_SAS_DEVICE_INFO_MASK_DEVICE_TYPE) { + case MPI_SAS_DEVICE_INFO_NO_DEVICE: + identify->device_type = SAS_PHY_UNUSED; + break; + case MPI_SAS_DEVICE_INFO_END_DEVICE: + identify->device_type = SAS_END_DEVICE; + break; + case MPI_SAS_DEVICE_INFO_EDGE_EXPANDER: + identify->device_type = SAS_EDGE_EXPANDER_DEVICE; + break; + case MPI_SAS_DEVICE_INFO_FANOUT_EXPANDER: + identify->device_type = SAS_FANOUT_EXPANDER_DEVICE; + break; + } +} + +static int mptsas_probe_one_phy(struct device *dev, + struct mptsas_phyinfo *phy_info, int index) +{ + struct sas_phy *port; + int error; + + port = sas_phy_alloc(dev, index); + if (!port) + return -ENOMEM; + + port->port_identifier = phy_info->port_id; + mptsas_parse_device_info(&port->identify, &phy_info->identify); + + /* + * Set Negotiated link rate. + */ + switch (phy_info->negotiated_link_rate) { + case MPI_SAS_IOUNIT0_RATE_PHY_DISABLED: + port->negotiated_linkrate = SAS_PHY_DISABLED; + break; + case MPI_SAS_IOUNIT0_RATE_FAILED_SPEED_NEGOTIATION: + port->negotiated_linkrate = SAS_LINK_RATE_FAILED; + break; + case MPI_SAS_IOUNIT0_RATE_1_5: + port->negotiated_linkrate = SAS_LINK_RATE_1_5_GBPS; + break; + case MPI_SAS_IOUNIT0_RATE_3_0: + port->negotiated_linkrate = SAS_LINK_RATE_3_0_GBPS; + break; + case MPI_SAS_IOUNIT0_RATE_SATA_OOB_COMPLETE: + case MPI_SAS_IOUNIT0_RATE_UNKNOWN: + default: + port->negotiated_linkrate = SAS_LINK_RATE_UNKNOWN; + break; + } + + /* + * Set Max hardware link rate. + */ + switch (phy_info->hw_link_rate & MPI_SAS_PHY0_PRATE_MAX_RATE_MASK) { + case MPI_SAS_PHY0_HWRATE_MAX_RATE_1_5: + port->maximum_linkrate_hw = SAS_LINK_RATE_1_5_GBPS; + break; + case MPI_SAS_PHY0_PRATE_MAX_RATE_3_0: + port->maximum_linkrate_hw = SAS_LINK_RATE_3_0_GBPS; + break; + default: + break; + } + + /* + * Set Max programmed link rate. + */ + switch (phy_info->programmed_link_rate & + MPI_SAS_PHY0_PRATE_MAX_RATE_MASK) { + case MPI_SAS_PHY0_PRATE_MAX_RATE_1_5: + port->maximum_linkrate = SAS_LINK_RATE_1_5_GBPS; + break; + case MPI_SAS_PHY0_PRATE_MAX_RATE_3_0: + port->maximum_linkrate = SAS_LINK_RATE_3_0_GBPS; + break; + default: + break; + } + + /* + * Set Min hardware link rate. + */ + switch (phy_info->hw_link_rate & MPI_SAS_PHY0_HWRATE_MIN_RATE_MASK) { + case MPI_SAS_PHY0_HWRATE_MIN_RATE_1_5: + port->minimum_linkrate_hw = SAS_LINK_RATE_1_5_GBPS; + break; + case MPI_SAS_PHY0_PRATE_MIN_RATE_3_0: + port->minimum_linkrate_hw = SAS_LINK_RATE_3_0_GBPS; + break; + default: + break; + } + + /* + * Set Min programmed link rate. + */ + switch (phy_info->programmed_link_rate & + MPI_SAS_PHY0_PRATE_MIN_RATE_MASK) { + case MPI_SAS_PHY0_PRATE_MIN_RATE_1_5: + port->minimum_linkrate = SAS_LINK_RATE_1_5_GBPS; + break; + case MPI_SAS_PHY0_PRATE_MIN_RATE_3_0: + port->minimum_linkrate = SAS_LINK_RATE_3_0_GBPS; + break; + default: + break; + } + + error = sas_phy_add(port); + if (error) { + sas_phy_free(port); + return error; + } + + if (phy_info->attached.handle) { + struct sas_rphy *rphy; + + rphy = sas_rphy_alloc(port); + if (!rphy) + return 0; /* non-fatal: an rphy can be added later */ + + mptsas_parse_device_info(&rphy->identify, &phy_info->attached); + error = sas_rphy_add(rphy); + if (error) { + sas_rphy_free(rphy); + return error; + } + + phy_info->rphy = rphy; + } + + return 0; +} + +static int +mptsas_probe_hba_phys(MPT_ADAPTER *ioc, int *index) +{ + struct mptsas_portinfo *port_info; + u32 handle = 0xFFFF; + int error = -ENOMEM, i; + + port_info = kmalloc(sizeof(*port_info), GFP_KERNEL); + if (!port_info) + goto out; + memset(port_info, 0, sizeof(*port_info)); + + error = mptsas_sas_io_unit_pg0(ioc, port_info); + if (error) + goto out_free_port_info; + + list_add_tail(&port_info->list, &ioc->sas_topology); + + for (i = 0; i < port_info->num_phys; i++) { + mptsas_sas_phy_pg0(ioc, &port_info->phy_info[i], + (MPI_SAS_PHY_PGAD_FORM_PHY_NUMBER << + MPI_SAS_PHY_PGAD_FORM_SHIFT), i); + + mptsas_sas_device_pg0(ioc, &port_info->phy_info[i].identify, + (MPI_SAS_DEVICE_PGAD_FORM_GET_NEXT_HANDLE << + MPI_SAS_DEVICE_PGAD_FORM_SHIFT), handle); + handle = port_info->phy_info[i].identify.handle; + + if (port_info->phy_info[i].attached.handle) { + mptsas_sas_device_pg0(ioc, + &port_info->phy_info[i].attached, + (MPI_SAS_DEVICE_PGAD_FORM_HANDLE << + MPI_SAS_DEVICE_PGAD_FORM_SHIFT), + port_info->phy_info[i].attached.handle); + } + + mptsas_probe_one_phy(&ioc->sh->shost_gendev, + &port_info->phy_info[i], *index); + (*index)++; + } + + return 0; + + out_free_port_info: + kfree(port_info); + out: + return error; +} + +static int +mptsas_probe_expander_phys(MPT_ADAPTER *ioc, u32 *handle, int *index) +{ + struct mptsas_portinfo *port_info, *p; + int error = -ENOMEM, i, j; + + port_info = kmalloc(sizeof(*port_info), GFP_KERNEL); + if (!port_info) + goto out; + memset(port_info, 0, sizeof(*port_info)); + + error = mptsas_sas_expander_pg0(ioc, port_info, + (MPI_SAS_EXPAND_PGAD_FORM_GET_NEXT_HANDLE << + MPI_SAS_EXPAND_PGAD_FORM_SHIFT), *handle); + if (error) + goto out_free_port_info; + + *handle = port_info->handle; + + list_add_tail(&port_info->list, &ioc->sas_topology); + for (i = 0; i < port_info->num_phys; i++) { + struct device *parent; + + mptsas_sas_expander_pg1(ioc, &port_info->phy_info[i], + (MPI_SAS_EXPAND_PGAD_FORM_HANDLE_PHY_NUM << + MPI_SAS_EXPAND_PGAD_FORM_SHIFT), (i << 16) + *handle); + + if (port_info->phy_info[i].identify.handle) { + mptsas_sas_device_pg0(ioc, + &port_info->phy_info[i].identify, + (MPI_SAS_DEVICE_PGAD_FORM_HANDLE << + MPI_SAS_DEVICE_PGAD_FORM_SHIFT), + port_info->phy_info[i].identify.handle); + } + + if (port_info->phy_info[i].attached.handle) { + mptsas_sas_device_pg0(ioc, + &port_info->phy_info[i].attached, + (MPI_SAS_DEVICE_PGAD_FORM_HANDLE << + MPI_SAS_DEVICE_PGAD_FORM_SHIFT), + port_info->phy_info[i].attached.handle); + } + + /* + * If we find a parent port handle this expander is + * attached to another expander, else it hangs of the + * HBA phys. + */ + parent = &ioc->sh->shost_gendev; + list_for_each_entry(p, &ioc->sas_topology, list) { + for (j = 0; j < p->num_phys; j++) { + if (port_info->phy_info[i].identify.handle == + p->phy_info[j].attached.handle) + parent = &p->phy_info[j].rphy->dev; + } + } + + mptsas_probe_one_phy(parent, &port_info->phy_info[i], *index); + (*index)++; + } + + return 0; + + out_free_port_info: + kfree(port_info); + out: + return error; +} + +static void +mptsas_scan_sas_topology(MPT_ADAPTER *ioc) +{ + u32 handle = 0xFFFF; + int index = 0; + + mptsas_probe_hba_phys(ioc, &index); + while (!mptsas_probe_expander_phys(ioc, &handle, &index)) + ; +} + +static int +mptsas_probe(struct pci_dev *pdev, const struct pci_device_id *id) +{ + struct Scsi_Host *sh; + MPT_SCSI_HOST *hd; + MPT_ADAPTER *ioc; + unsigned long flags; + int sz, ii; + int numSGE = 0; + int scale; + int ioc_cap; + u8 *mem; + int error=0; + int r; + + r = mpt_attach(pdev,id); + if (r) + return r; + + ioc = pci_get_drvdata(pdev); + ioc->DoneCtx = mptsasDoneCtx; + ioc->TaskCtx = mptsasTaskCtx; + ioc->InternalCtx = mptsasInternalCtx; + + /* Added sanity check on readiness of the MPT adapter. + */ + if (ioc->last_state != MPI_IOC_STATE_OPERATIONAL) { + printk(MYIOC_s_WARN_FMT + "Skipping because it's not operational!\n", + ioc->name); + return -ENODEV; + } + + if (!ioc->active) { + printk(MYIOC_s_WARN_FMT "Skipping because it's disabled!\n", + ioc->name); + return -ENODEV; + } + + /* Sanity check - ensure at least 1 port is INITIATOR capable + */ + ioc_cap = 0; + for (ii = 0; ii < ioc->facts.NumberOfPorts; ii++) { + if (ioc->pfacts[ii].ProtocolFlags & + MPI_PORTFACTS_PROTOCOL_INITIATOR) + ioc_cap++; + } + + if (!ioc_cap) { + printk(MYIOC_s_WARN_FMT + "Skipping ioc=%p because SCSI Initiator mode " + "is NOT enabled!\n", ioc->name, ioc); + return 0; + } + + sh = scsi_host_alloc(&mptsas_driver_template, sizeof(MPT_SCSI_HOST)); + if (!sh) { + printk(MYIOC_s_WARN_FMT + "Unable to register controller with SCSI subsystem\n", + ioc->name); + return -1; + } + + spin_lock_irqsave(&ioc->FreeQlock, flags); + + /* Attach the SCSI Host to the IOC structure + */ + ioc->sh = sh; + + sh->io_port = 0; + sh->n_io_port = 0; + sh->irq = 0; + + /* set 16 byte cdb's */ + sh->max_cmd_len = 16; + + sh->max_id = ioc->pfacts->MaxDevices + 1; + + sh->transportt = mptsas_transport_template; + + sh->max_lun = MPT_LAST_LUN + 1; + sh->max_channel = 0; + sh->this_id = ioc->pfacts[0].PortSCSIID; + + /* Required entry. + */ + sh->unique_id = ioc->id; + + INIT_LIST_HEAD(&ioc->sas_topology); + + /* Verify that we won't exceed the maximum + * number of chain buffers + * We can optimize: ZZ = req_sz/sizeof(SGE) + * For 32bit SGE's: + * numSGE = 1 + (ZZ-1)*(maxChain -1) + ZZ + * + (req_sz - 64)/sizeof(SGE) + * A slightly different algorithm is required for + * 64bit SGEs. + */ + scale = ioc->req_sz/(sizeof(dma_addr_t) + sizeof(u32)); + if (sizeof(dma_addr_t) == sizeof(u64)) { + numSGE = (scale - 1) * + (ioc->facts.MaxChainDepth-1) + scale + + (ioc->req_sz - 60) / (sizeof(dma_addr_t) + + sizeof(u32)); + } else { + numSGE = 1 + (scale - 1) * + (ioc->facts.MaxChainDepth-1) + scale + + (ioc->req_sz - 64) / (sizeof(dma_addr_t) + + sizeof(u32)); + } + + if (numSGE < sh->sg_tablesize) { + /* Reset this value */ + dprintk((MYIOC_s_INFO_FMT + "Resetting sg_tablesize to %d from %d\n", + ioc->name, numSGE, sh->sg_tablesize)); + sh->sg_tablesize = numSGE; + } + + spin_unlock_irqrestore(&ioc->FreeQlock, flags); + + hd = (MPT_SCSI_HOST *) sh->hostdata; + hd->ioc = ioc; + + /* SCSI needs scsi_cmnd lookup table! + * (with size equal to req_depth*PtrSz!) + */ + sz = ioc->req_depth * sizeof(void *); + mem = kmalloc(sz, GFP_ATOMIC); + if (mem == NULL) { + error = -ENOMEM; + goto mptsas_probe_failed; + } + + memset(mem, 0, sz); + hd->ScsiLookup = (struct scsi_cmnd **) mem; + + dprintk((MYIOC_s_INFO_FMT "ScsiLookup @ %p, sz=%d\n", + ioc->name, hd->ScsiLookup, sz)); + + /* Allocate memory for the device structures. + * A non-Null pointer at an offset + * indicates a device exists. + * max_id = 1 + maximum id (hosts.h) + */ + sz = sh->max_id * sizeof(void *); + mem = kmalloc(sz, GFP_ATOMIC); + if (mem == NULL) { + error = -ENOMEM; + goto mptsas_probe_failed; + } + + memset(mem, 0, sz); + hd->Targets = (VirtDevice **) mem; + + dprintk((KERN_INFO + " Targets @ %p, sz=%d\n", hd->Targets, sz)); + + /* Clear the TM flags + */ + hd->tmPending = 0; + hd->tmState = TM_STATE_NONE; + hd->resetPending = 0; + hd->abortSCpnt = NULL; + + /* Clear the pointer used to store + * single-threaded commands, i.e., those + * issued during a bus scan, dv and + * configuration pages. + */ + hd->cmdPtr = NULL; + + /* Initialize this SCSI Hosts' timers + * To use, set the timer expires field + * and add_timer + */ + init_timer(&hd->timer); + hd->timer.data = (unsigned long) hd; + hd->timer.function = mptscsih_timer_expired; + + hd->mpt_pq_filter = mpt_pq_filter; + ioc->sas_data.ptClear = mpt_pt_clear; + + if (ioc->sas_data.ptClear==1) { + mptbase_sas_persist_operation( + ioc, MPI_SAS_OP_CLEAR_ALL_PERSISTENT); + } + + ddvprintk((MYIOC_s_INFO_FMT + "mpt_pq_filter %x mpt_pq_filter %x\n", + ioc->name, + mpt_pq_filter, + mpt_pq_filter)); + + init_waitqueue_head(&hd->scandv_waitq); + hd->scandv_wait_done = 0; + hd->last_queue_full = 0; + + error = scsi_add_host(sh, &ioc->pcidev->dev); + if (error) { + dprintk((KERN_ERR MYNAM + "scsi_add_host failed\n")); + goto mptsas_probe_failed; + } + + mptsas_scan_sas_topology(ioc); + + return 0; + +mptsas_probe_failed: + + mptscsih_remove(pdev); + return error; +} + +static void __devexit mptsas_remove(struct pci_dev *pdev) +{ + MPT_ADAPTER *ioc = pci_get_drvdata(pdev); + struct mptsas_portinfo *p, *n; + + sas_remove_host(ioc->sh); + + list_for_each_entry_safe(p, n, &ioc->sas_topology, list) { + list_del(&p->list); + kfree(p); + } + + mptscsih_remove(pdev); +} + +static struct pci_device_id mptsas_pci_table[] = { + { PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_SAS1064, + PCI_ANY_ID, PCI_ANY_ID }, + { PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_SAS1066, + PCI_ANY_ID, PCI_ANY_ID }, + { PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_SAS1068, + PCI_ANY_ID, PCI_ANY_ID }, + { PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_SAS1064E, + PCI_ANY_ID, PCI_ANY_ID }, + { PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_SAS1066E, + PCI_ANY_ID, PCI_ANY_ID }, + { PCI_VENDOR_ID_LSI_LOGIC, PCI_DEVICE_ID_LSI_SAS1068E, + PCI_ANY_ID, PCI_ANY_ID }, + {0} /* Terminating entry */ +}; +MODULE_DEVICE_TABLE(pci, mptsas_pci_table); + + +static struct pci_driver mptsas_driver = { + .name = "mptsas", + .id_table = mptsas_pci_table, + .probe = mptsas_probe, + .remove = __devexit_p(mptsas_remove), + .shutdown = mptscsih_shutdown, +#ifdef CONFIG_PM + .suspend = mptscsih_suspend, + .resume = mptscsih_resume, +#endif +}; + +static int __init +mptsas_init(void) +{ + show_mptmod_ver(my_NAME, my_VERSION); + + mptsas_transport_template = + sas_attach_transport(&mptsas_transport_functions); + if (!mptsas_transport_template) + return -ENODEV; + + mptsasDoneCtx = mpt_register(mptscsih_io_done, MPTSAS_DRIVER); + mptsasTaskCtx = mpt_register(mptscsih_taskmgmt_complete, MPTSAS_DRIVER); + mptsasInternalCtx = + mpt_register(mptscsih_scandv_complete, MPTSAS_DRIVER); + + if (mpt_event_register(mptsasDoneCtx, mptscsih_event_process) == 0) { + devtprintk((KERN_INFO MYNAM + ": Registered for IOC event notifications\n")); + } + + if (mpt_reset_register(mptsasDoneCtx, mptscsih_ioc_reset) == 0) { + dprintk((KERN_INFO MYNAM + ": Registered for IOC reset notifications\n")); + } + + return pci_register_driver(&mptsas_driver); +} + +static void __exit +mptsas_exit(void) +{ + pci_unregister_driver(&mptsas_driver); + sas_release_transport(mptsas_transport_template); + + mpt_reset_deregister(mptsasDoneCtx); + mpt_event_deregister(mptsasDoneCtx); + + mpt_deregister(mptsasInternalCtx); + mpt_deregister(mptsasTaskCtx); + mpt_deregister(mptsasDoneCtx); +} + +module_init(mptsas_init); +module_exit(mptsas_exit); diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c index 4a003dc5fde..5cb07eb224d 100644 --- a/drivers/message/fusion/mptscsih.c +++ b/drivers/message/fusion/mptscsih.c @@ -62,6 +62,7 @@ #include <scsi/scsi_device.h> #include <scsi/scsi_host.h> #include <scsi/scsi_tcq.h> +#include <scsi/scsi_dbg.h> #include "mptbase.h" #include "mptscsih.h" @@ -93,8 +94,9 @@ typedef struct _BIG_SENSE_BUF { #define MPT_ICFLAG_BUF_CAP 0x01 /* ReadBuffer Read Capacity format */ #define MPT_ICFLAG_ECHO 0x02 /* ReadBuffer Echo buffer format */ -#define MPT_ICFLAG_PHYS_DISK 0x04 /* Any SCSI IO but do Phys Disk Format */ -#define MPT_ICFLAG_TAGGED_CMD 0x08 /* Do tagged IO */ +#define MPT_ICFLAG_EBOS 0x04 /* ReadBuffer Echo buffer has EBOS */ +#define MPT_ICFLAG_PHYS_DISK 0x08 /* Any SCSI IO but do Phys Disk Format */ +#define MPT_ICFLAG_TAGGED_CMD 0x10 /* Do tagged IO */ #define MPT_ICFLAG_DID_RESET 0x20 /* Bus Reset occurred with this command */ #define MPT_ICFLAG_RESERVED 0x40 /* Reserved has been issued */ @@ -159,6 +161,8 @@ int mptscsih_scandv_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR static int mptscsih_do_cmd(MPT_SCSI_HOST *hd, INTERNAL_CMD *iocmd); static int mptscsih_synchronize_cache(MPT_SCSI_HOST *hd, int portnum); +static struct work_struct mptscsih_persistTask; + #ifdef MPTSCSIH_ENABLE_DOMAIN_VALIDATION static int mptscsih_do_raid(MPT_SCSI_HOST *hd, u8 action, INTERNAL_CMD *io); static void mptscsih_domainValidation(void *hd); @@ -167,6 +171,7 @@ static void mptscsih_qas_check(MPT_SCSI_HOST *hd, int id); static int mptscsih_doDv(MPT_SCSI_HOST *hd, int channel, int target); static void mptscsih_dv_parms(MPT_SCSI_HOST *hd, DVPARAMETERS *dv,void *pPage); static void mptscsih_fillbuf(char *buffer, int size, int index, int width); +static void mptscsih_set_dvflags_raid(MPT_SCSI_HOST *hd, int id); #endif void mptscsih_remove(struct pci_dev *); @@ -606,11 +611,24 @@ mptscsih_io_done(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *mr) xfer_cnt = le32_to_cpu(pScsiReply->TransferCount); sc->resid = sc->request_bufflen - xfer_cnt; + /* + * if we get a data underrun indication, yet no data was + * transferred and the SCSI status indicates that the + * command was never started, change the data underrun + * to success + */ + if (status == MPI_IOCSTATUS_SCSI_DATA_UNDERRUN && xfer_cnt == 0 && + (scsi_status == MPI_SCSI_STATUS_BUSY || + scsi_status == MPI_SCSI_STATUS_RESERVATION_CONFLICT || + scsi_status == MPI_SCSI_STATUS_TASK_SET_FULL)) { + status = MPI_IOCSTATUS_SUCCESS; + } + dreplyprintk((KERN_NOTICE "Reply ha=%d id=%d lun=%d:\n" "IOCStatus=%04xh SCSIState=%02xh SCSIStatus=%02xh\n" "resid=%d bufflen=%d xfer_cnt=%d\n", ioc->id, pScsiReq->TargetID, pScsiReq->LUN[1], - status, scsi_state, scsi_status, sc->resid, + status, scsi_state, scsi_status, sc->resid, sc->request_bufflen, xfer_cnt)); if (scsi_state & MPI_SCSI_STATE_AUTOSENSE_VALID) @@ -619,8 +637,11 @@ mptscsih_io_done(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *mr) /* * Look for + dump FCP ResponseInfo[]! */ - if (scsi_state & MPI_SCSI_STATE_RESPONSE_INFO_VALID) { - printk(KERN_NOTICE " FCP_ResponseInfo=%08xh\n", + if (scsi_state & MPI_SCSI_STATE_RESPONSE_INFO_VALID && + pScsiReply->ResponseInfo) { + printk(KERN_NOTICE "ha=%d id=%d lun=%d: " + "FCP_ResponseInfo=%08xh\n", + ioc->id, pScsiReq->TargetID, pScsiReq->LUN[1], le32_to_cpu(pScsiReply->ResponseInfo)); } @@ -661,23 +682,13 @@ mptscsih_io_done(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *mr) break; case MPI_IOCSTATUS_SCSI_RESIDUAL_MISMATCH: /* 0x0049 */ - if ( xfer_cnt >= sc->underflow ) { - /* Sufficient data transfer occurred */ + sc->resid = sc->request_bufflen - xfer_cnt; + if((xfer_cnt==0)||(sc->underflow > xfer_cnt)) + sc->result=DID_SOFT_ERROR << 16; + else /* Sufficient data transfer occurred */ sc->result = (DID_OK << 16) | scsi_status; - } else if ( xfer_cnt == 0 ) { - /* A CRC Error causes this condition; retry */ - sc->result = (DRIVER_SENSE << 24) | (DID_OK << 16) | - (CHECK_CONDITION << 1); - sc->sense_buffer[0] = 0x70; - sc->sense_buffer[2] = NO_SENSE; - sc->sense_buffer[12] = 0; - sc->sense_buffer[13] = 0; - } else { - sc->result = DID_SOFT_ERROR << 16; - } - dreplyprintk((KERN_NOTICE - "RESIDUAL_MISMATCH: result=%x on id=%d\n", - sc->result, sc->device->id)); + dreplyprintk((KERN_NOTICE + "RESIDUAL_MISMATCH: result=%x on id=%d\n", sc->result, sc->device->id)); break; case MPI_IOCSTATUS_SCSI_DATA_UNDERRUN: /* 0x0045 */ @@ -692,7 +703,10 @@ mptscsih_io_done(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *mr) ; } else { if (xfer_cnt < sc->underflow) { - sc->result = DID_SOFT_ERROR << 16; + if (scsi_status == SAM_STAT_BUSY) + sc->result = SAM_STAT_BUSY; + else + sc->result = DID_SOFT_ERROR << 16; } if (scsi_state & (MPI_SCSI_STATE_AUTOSENSE_FAILED | MPI_SCSI_STATE_NO_SCSI_STATUS)) { /* What to do? @@ -717,8 +731,10 @@ mptscsih_io_done(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *mr) case MPI_IOCSTATUS_SCSI_RECOVERED_ERROR: /* 0x0040 */ case MPI_IOCSTATUS_SUCCESS: /* 0x0000 */ - scsi_status = pScsiReply->SCSIStatus; - sc->result = (DID_OK << 16) | scsi_status; + if (scsi_status == MPI_SCSI_STATUS_BUSY) + sc->result = (DID_BUS_BUSY << 16) | scsi_status; + else + sc->result = (DID_OK << 16) | scsi_status; if (scsi_state == 0) { ; } else if (scsi_state & MPI_SCSI_STATE_AUTOSENSE_VALID) { @@ -890,12 +906,13 @@ mptscsih_search_running_cmds(MPT_SCSI_HOST *hd, uint target, uint lun) SCSIIORequest_t *mf = NULL; int ii; int max = hd->ioc->req_depth; + struct scsi_cmnd *sc; dsprintk((KERN_INFO MYNAM ": search_running target %d lun %d max %d\n", target, lun, max)); for (ii=0; ii < max; ii++) { - if (hd->ScsiLookup[ii] != NULL) { + if ((sc = hd->ScsiLookup[ii]) != NULL) { mf = (SCSIIORequest_t *)MPT_INDEX_2_MFPTR(hd->ioc, ii); @@ -910,9 +927,22 @@ mptscsih_search_running_cmds(MPT_SCSI_HOST *hd, uint target, uint lun) hd->ScsiLookup[ii] = NULL; mptscsih_freeChainBuffers(hd->ioc, ii); mpt_free_msg_frame(hd->ioc, (MPT_FRAME_HDR *)mf); + if (sc->use_sg) { + pci_unmap_sg(hd->ioc->pcidev, + (struct scatterlist *) sc->request_buffer, + sc->use_sg, + sc->sc_data_direction); + } else if (sc->request_bufflen) { + pci_unmap_single(hd->ioc->pcidev, + sc->SCp.dma_handle, + sc->request_bufflen, + sc->sc_data_direction); + } + sc->host_scribble = NULL; + sc->result = DID_NO_CONNECT << 16; + sc->scsi_done(sc); } } - return; } @@ -967,8 +997,10 @@ mptscsih_remove(struct pci_dev *pdev) unsigned long flags; int sz1; - if(!host) + if(!host) { + mpt_detach(pdev); return; + } scsi_remove_host(host); @@ -1256,8 +1288,7 @@ mptscsih_qcmd(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *)) MPT_SCSI_HOST *hd; MPT_FRAME_HDR *mf; SCSIIORequest_t *pScsiReq; - VirtDevice *pTarget; - int target; + VirtDevice *pTarget = SCpnt->device->hostdata; int lun; u32 datalen; u32 scsictl; @@ -1267,12 +1298,9 @@ mptscsih_qcmd(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *)) int ii; hd = (MPT_SCSI_HOST *) SCpnt->device->host->hostdata; - target = SCpnt->device->id; lun = SCpnt->device->lun; SCpnt->scsi_done = done; - pTarget = hd->Targets[target]; - dmfprintk((MYIOC_s_INFO_FMT "qcmd: SCpnt=%p, done()=%p\n", (hd && hd->ioc) ? hd->ioc->name : "ioc?", SCpnt, done)); @@ -1315,7 +1343,7 @@ mptscsih_qcmd(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *)) /* Default to untagged. Once a target structure has been allocated, * use the Inquiry data to determine if device supports tagged. */ - if ( pTarget + if (pTarget && (pTarget->tflags & MPT_TARGET_FLAGS_Q_YES) && (SCpnt->device->tagged_supported)) { scsictl = scsidir | MPI_SCSIIO_CONTROL_SIMPLEQ; @@ -1325,8 +1353,8 @@ mptscsih_qcmd(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *)) /* Use the above information to set up the message frame */ - pScsiReq->TargetID = (u8) target; - pScsiReq->Bus = (u8) SCpnt->device->channel; + pScsiReq->TargetID = (u8) pTarget->target_id; + pScsiReq->Bus = pTarget->bus_id; pScsiReq->ChainOffset = 0; pScsiReq->Function = MPI_FUNCTION_SCSI_IO_REQUEST; pScsiReq->CDBLength = SCpnt->cmd_len; @@ -1378,7 +1406,7 @@ mptscsih_qcmd(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *)) #ifdef MPTSCSIH_ENABLE_DOMAIN_VALIDATION if (hd->ioc->bus_type == SCSI) { - int dvStatus = hd->ioc->spi_data.dvStatus[target]; + int dvStatus = hd->ioc->spi_data.dvStatus[pTarget->target_id]; int issueCmd = 1; if (dvStatus || hd->ioc->spi_data.forceDv) { @@ -1426,6 +1454,7 @@ mptscsih_qcmd(struct scsi_cmnd *SCpnt, void (*done)(struct scsi_cmnd *)) return 0; fail: + hd->ScsiLookup[my_idx] = NULL; mptscsih_freeChainBuffers(hd->ioc, my_idx); mpt_free_msg_frame(hd->ioc, mf); return SCSI_MLQUEUE_HOST_BUSY; @@ -1713,24 +1742,23 @@ mptscsih_abort(struct scsi_cmnd * SCpnt) MPT_FRAME_HDR *mf; u32 ctx2abort; int scpnt_idx; + int retval; /* If we can't locate our host adapter structure, return FAILED status. */ if ((hd = (MPT_SCSI_HOST *) SCpnt->device->host->hostdata) == NULL) { SCpnt->result = DID_RESET << 16; SCpnt->scsi_done(SCpnt); - dfailprintk((KERN_WARNING MYNAM ": mptscsih_abort: " + dfailprintk((KERN_INFO MYNAM ": mptscsih_abort: " "Can't locate host! (sc=%p)\n", SCpnt)); return FAILED; } ioc = hd->ioc; - if (hd->resetPending) + if (hd->resetPending) { return FAILED; - - printk(KERN_WARNING MYNAM ": %s: >> Attempting task abort! (sc=%p)\n", - hd->ioc->name, SCpnt); + } if (hd->timeouts < -1) hd->timeouts++; @@ -1738,16 +1766,20 @@ mptscsih_abort(struct scsi_cmnd * SCpnt) /* Find this command */ if ((scpnt_idx = SCPNT_TO_LOOKUP_IDX(SCpnt)) < 0) { - /* Cmd not found in ScsiLookup. + /* Cmd not found in ScsiLookup. * Do OS callback. */ SCpnt->result = DID_RESET << 16; - dtmprintk((KERN_WARNING MYNAM ": %s: mptscsih_abort: " + dtmprintk((KERN_INFO MYNAM ": %s: mptscsih_abort: " "Command not in the active list! (sc=%p)\n", hd->ioc->name, SCpnt)); return SUCCESS; } + printk(KERN_WARNING MYNAM ": %s: attempting task abort! (sc=%p)\n", + hd->ioc->name, SCpnt); + scsi_print_command(SCpnt); + /* Most important! Set TaskMsgContext to SCpnt's MsgContext! * (the IO to be ABORT'd) * @@ -1760,38 +1792,22 @@ mptscsih_abort(struct scsi_cmnd * SCpnt) hd->abortSCpnt = SCpnt; - if (mptscsih_TMHandler(hd, MPI_SCSITASKMGMT_TASKTYPE_ABORT_TASK, + retval = mptscsih_TMHandler(hd, MPI_SCSITASKMGMT_TASKTYPE_ABORT_TASK, SCpnt->device->channel, SCpnt->device->id, SCpnt->device->lun, - ctx2abort, 2 /* 2 second timeout */) - < 0) { + ctx2abort, 2 /* 2 second timeout */); - /* The TM request failed and the subsequent FW-reload failed! - * Fatal error case. - */ - printk(MYIOC_s_WARN_FMT "Error issuing abort task! (sc=%p)\n", - hd->ioc->name, SCpnt); + printk (KERN_WARNING MYNAM ": %s: task abort: %s (sc=%p)\n", + hd->ioc->name, + ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt); - /* We must clear our pending flag before clearing our state. - */ + if (retval == 0) + return SUCCESS; + + if(retval != FAILED ) { hd->tmPending = 0; hd->tmState = TM_STATE_NONE; - - /* Unmap the DMA buffers, if any. */ - if (SCpnt->use_sg) { - pci_unmap_sg(ioc->pcidev, (struct scatterlist *) SCpnt->request_buffer, - SCpnt->use_sg, SCpnt->sc_data_direction); - } else if (SCpnt->request_bufflen) { - pci_unmap_single(ioc->pcidev, SCpnt->SCp.dma_handle, - SCpnt->request_bufflen, SCpnt->sc_data_direction); - } - hd->ScsiLookup[scpnt_idx] = NULL; - SCpnt->result = DID_RESET << 16; - SCpnt->scsi_done(SCpnt); /* Issue the command callback */ - mptscsih_freeChainBuffers(ioc, scpnt_idx); - mpt_free_msg_frame(ioc, mf); - return FAILED; } - return SUCCESS; + return FAILED; } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ @@ -1807,11 +1823,12 @@ int mptscsih_dev_reset(struct scsi_cmnd * SCpnt) { MPT_SCSI_HOST *hd; + int retval; /* If we can't locate our host adapter structure, return FAILED status. */ if ((hd = (MPT_SCSI_HOST *) SCpnt->device->host->hostdata) == NULL){ - dtmprintk((KERN_WARNING MYNAM ": mptscsih_dev_reset: " + dtmprintk((KERN_INFO MYNAM ": mptscsih_dev_reset: " "Can't locate host! (sc=%p)\n", SCpnt)); return FAILED; @@ -1820,24 +1837,26 @@ mptscsih_dev_reset(struct scsi_cmnd * SCpnt) if (hd->resetPending) return FAILED; - printk(KERN_WARNING MYNAM ": %s: >> Attempting target reset! (sc=%p)\n", + printk(KERN_WARNING MYNAM ": %s: attempting target reset! (sc=%p)\n", hd->ioc->name, SCpnt); + scsi_print_command(SCpnt); - if (mptscsih_TMHandler(hd, MPI_SCSITASKMGMT_TASKTYPE_TARGET_RESET, + retval = mptscsih_TMHandler(hd, MPI_SCSITASKMGMT_TASKTYPE_TARGET_RESET, SCpnt->device->channel, SCpnt->device->id, - 0, 0, 5 /* 5 second timeout */) - < 0){ - /* The TM request failed and the subsequent FW-reload failed! - * Fatal error case. - */ - printk(MYIOC_s_WARN_FMT "Error processing TaskMgmt request (sc=%p)\n", - hd->ioc->name, SCpnt); + 0, 0, 5 /* 5 second timeout */); + + printk (KERN_WARNING MYNAM ": %s: target reset: %s (sc=%p)\n", + hd->ioc->name, + ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt); + + if (retval == 0) + return SUCCESS; + + if(retval != FAILED ) { hd->tmPending = 0; hd->tmState = TM_STATE_NONE; - return FAILED; } - - return SUCCESS; + return FAILED; } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ @@ -1853,41 +1872,39 @@ int mptscsih_bus_reset(struct scsi_cmnd * SCpnt) { MPT_SCSI_HOST *hd; - spinlock_t *host_lock = SCpnt->device->host->host_lock; + int retval; /* If we can't locate our host adapter structure, return FAILED status. */ if ((hd = (MPT_SCSI_HOST *) SCpnt->device->host->hostdata) == NULL){ - dtmprintk((KERN_WARNING MYNAM ": mptscsih_bus_reset: " + dtmprintk((KERN_INFO MYNAM ": mptscsih_bus_reset: " "Can't locate host! (sc=%p)\n", SCpnt ) ); return FAILED; } - printk(KERN_WARNING MYNAM ": %s: >> Attempting bus reset! (sc=%p)\n", + printk(KERN_WARNING MYNAM ": %s: attempting bus reset! (sc=%p)\n", hd->ioc->name, SCpnt); + scsi_print_command(SCpnt); if (hd->timeouts < -1) hd->timeouts++; - /* We are now ready to execute the task management request. */ - if (mptscsih_TMHandler(hd, MPI_SCSITASKMGMT_TASKTYPE_RESET_BUS, - SCpnt->device->channel, 0, 0, 0, 5 /* 5 second timeout */) - < 0){ + retval = mptscsih_TMHandler(hd, MPI_SCSITASKMGMT_TASKTYPE_RESET_BUS, + SCpnt->device->channel, 0, 0, 0, 5 /* 5 second timeout */); - /* The TM request failed and the subsequent FW-reload failed! - * Fatal error case. - */ - printk(MYIOC_s_WARN_FMT - "Error processing TaskMgmt request (sc=%p)\n", - hd->ioc->name, SCpnt); + printk (KERN_WARNING MYNAM ": %s: bus reset: %s (sc=%p)\n", + hd->ioc->name, + ((retval == 0) ? "SUCCESS" : "FAILED" ), SCpnt); + + if (retval == 0) + return SUCCESS; + + if(retval != FAILED ) { hd->tmPending = 0; hd->tmState = TM_STATE_NONE; - spin_lock_irq(host_lock); - return FAILED; } - - return SUCCESS; + return FAILED; } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ @@ -2169,7 +2186,7 @@ mptscsih_slave_alloc(struct scsi_device *device) vdev->raidVolume = 0; hd->Targets[device->id] = vdev; if (hd->ioc->bus_type == SCSI) { - if (hd->ioc->spi_data.isRaid & (1 << device->id)) { + if (hd->ioc->raid_data.isRaid & (1 << device->id)) { vdev->raidVolume = 1; ddvtprintk((KERN_INFO "RAID Volume @ id %d\n", device->id)); @@ -2180,22 +2197,7 @@ mptscsih_slave_alloc(struct scsi_device *device) out: vdev->num_luns++; - return 0; -} - -static int -mptscsih_is_raid_volume(MPT_SCSI_HOST *hd, uint id) -{ - int i; - - if (!hd->ioc->spi_data.isRaid || !hd->ioc->spi_data.pIocPg3) - return 0; - - for (i = 0; i < hd->ioc->spi_data.pIocPg3->NumPhysDisks; i++) { - if (id == hd->ioc->spi_data.pIocPg3->PhysDisk[i].PhysDiskID) - return 1; - } - + device->hostdata = vdev; return 0; } @@ -2226,7 +2228,7 @@ mptscsih_slave_destroy(struct scsi_device *device) hd->Targets[target] = NULL; if (hd->ioc->bus_type == SCSI) { - if (mptscsih_is_raid_volume(hd, target)) { + if (mptscsih_is_phys_disk(hd->ioc, target)) { hd->ioc->spi_data.forceDv |= MPT_SCSICFG_RELOAD_IOC_PG3; } else { hd->ioc->spi_data.dvStatus[target] = @@ -2439,6 +2441,7 @@ mptscsih_ioc_reset(MPT_ADAPTER *ioc, int reset_phase) { MPT_SCSI_HOST *hd; unsigned long flags; + int ii; dtmprintk((KERN_WARNING MYNAM ": IOC %s_reset routed to SCSI host driver!\n", @@ -2496,11 +2499,8 @@ mptscsih_ioc_reset(MPT_ADAPTER *ioc, int reset_phase) /* ScsiLookup initialization */ - { - int ii; - for (ii=0; ii < hd->ioc->req_depth; ii++) - hd->ScsiLookup[ii] = NULL; - } + for (ii=0; ii < hd->ioc->req_depth; ii++) + hd->ScsiLookup[ii] = NULL; /* 2. Chain Buffer initialization */ @@ -2549,6 +2549,16 @@ mptscsih_ioc_reset(MPT_ADAPTER *ioc, int reset_phase) } /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ +/* work queue thread to clear the persitency table */ +static void +mptscsih_sas_persist_clear_table(void * arg) +{ + MPT_ADAPTER *ioc = (MPT_ADAPTER *)arg; + + mptbase_sas_persist_operation(ioc, MPI_SAS_OP_CLEAR_NOT_PRESENT); +} + +/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ int mptscsih_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *pEvReply) { @@ -2558,18 +2568,18 @@ mptscsih_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *pEvReply) devtprintk((MYIOC_s_INFO_FMT "MPT event (=%02Xh) routed to SCSI host driver!\n", ioc->name, event)); + if (ioc->sh == NULL || + ((hd = (MPT_SCSI_HOST *)ioc->sh->hostdata) == NULL)) + return 1; + switch (event) { case MPI_EVENT_UNIT_ATTENTION: /* 03 */ /* FIXME! */ break; case MPI_EVENT_IOC_BUS_RESET: /* 04 */ case MPI_EVENT_EXT_BUS_RESET: /* 05 */ - hd = NULL; - if (ioc->sh) { - hd = (MPT_SCSI_HOST *) ioc->sh->hostdata; - if (hd && (ioc->bus_type == SCSI) && (hd->soft_resets < -1)) - hd->soft_resets++; - } + if (hd && (ioc->bus_type == SCSI) && (hd->soft_resets < -1)) + hd->soft_resets++; break; case MPI_EVENT_LOGOUT: /* 09 */ /* FIXME! */ @@ -2588,69 +2598,24 @@ mptscsih_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *pEvReply) break; case MPI_EVENT_INTEGRATED_RAID: /* 0B */ + { + pMpiEventDataRaid_t pRaidEventData = + (pMpiEventDataRaid_t) pEvReply->Data; #ifdef MPTSCSIH_ENABLE_DOMAIN_VALIDATION - /* negoNvram set to 0 if DV enabled and to USE_NVRAM if - * if DV disabled. Need to check for target mode. - */ - hd = NULL; - if (ioc->sh) - hd = (MPT_SCSI_HOST *) ioc->sh->hostdata; - - if (hd && (ioc->bus_type == SCSI) && (hd->negoNvram == 0)) { - ScsiCfgData *pSpi; - Ioc3PhysDisk_t *pPDisk; - int numPDisk; - u8 reason; - u8 physDiskNum; - - reason = (le32_to_cpu(pEvReply->Data[0]) & 0x00FF0000) >> 16; - if (reason == MPI_EVENT_RAID_RC_DOMAIN_VAL_NEEDED) { - /* New or replaced disk. - * Set DV flag and schedule DV. - */ - pSpi = &ioc->spi_data; - physDiskNum = (le32_to_cpu(pEvReply->Data[0]) & 0xFF000000) >> 24; - ddvtprintk(("DV requested for phys disk id %d\n", physDiskNum)); - if (pSpi->pIocPg3) { - pPDisk = pSpi->pIocPg3->PhysDisk; - numPDisk =pSpi->pIocPg3->NumPhysDisks; - - while (numPDisk) { - if (physDiskNum == pPDisk->PhysDiskNum) { - pSpi->dvStatus[pPDisk->PhysDiskID] = (MPT_SCSICFG_NEED_DV | MPT_SCSICFG_DV_NOT_DONE); - pSpi->forceDv = MPT_SCSICFG_NEED_DV; - ddvtprintk(("NEED_DV set for phys disk id %d\n", pPDisk->PhysDiskID)); - break; - } - pPDisk++; - numPDisk--; - } - - if (numPDisk == 0) { - /* The physical disk that needs DV was not found - * in the stored IOC Page 3. The driver must reload - * this page. DV routine will set the NEED_DV flag for - * all phys disks that have DV_NOT_DONE set. - */ - pSpi->forceDv = MPT_SCSICFG_NEED_DV | MPT_SCSICFG_RELOAD_IOC_PG3; - ddvtprintk(("phys disk %d not found. Setting reload IOC Pg3 Flag\n", physDiskNum)); - } - } - } - } + /* Domain Validation Needed */ + if (ioc->bus_type == SCSI && + pRaidEventData->ReasonCode == + MPI_EVENT_RAID_RC_DOMAIN_VAL_NEEDED) + mptscsih_set_dvflags_raid(hd, pRaidEventData->PhysDiskNum); #endif + break; + } -#if defined(MPT_DEBUG_DV) || defined(MPT_DEBUG_DV_TINY) - printk("Raid Event RF: "); - { - u32 *m = (u32 *)pEvReply; - int ii; - int n = (int)pEvReply->MsgLength; - for (ii=6; ii < n; ii++) - printk(" %08x", le32_to_cpu(m[ii])); - printk("\n"); - } -#endif + /* Persistent table is full. */ + case MPI_EVENT_PERSISTENT_TABLE_FULL: + INIT_WORK(&mptscsih_persistTask, + mptscsih_sas_persist_clear_table,(void *)ioc); + schedule_work(&mptscsih_persistTask); break; case MPI_EVENT_NONE: /* 00 */ @@ -2687,7 +2652,7 @@ mptscsih_initTarget(MPT_SCSI_HOST *hd, int bus_id, int target_id, u8 lun, char * { int indexed_lun, lun_index; VirtDevice *vdev; - ScsiCfgData *pSpi; + SpiCfgData *pSpi; char data_56; dinitprintk((MYIOC_s_INFO_FMT "initTarget bus=%d id=%d lun=%d hd=%p\n", @@ -2794,7 +2759,7 @@ mptscsih_initTarget(MPT_SCSI_HOST *hd, int bus_id, int target_id, u8 lun, char * static void mptscsih_setTargetNegoParms(MPT_SCSI_HOST *hd, VirtDevice *target, char byte56) { - ScsiCfgData *pspi_data = &hd->ioc->spi_data; + SpiCfgData *pspi_data = &hd->ioc->spi_data; int id = (int) target->target_id; int nvram; VirtDevice *vdev; @@ -2973,11 +2938,13 @@ mptscsih_setTargetNegoParms(MPT_SCSI_HOST *hd, VirtDevice *target, char byte56) static void mptscsih_set_dvflags(MPT_SCSI_HOST *hd, SCSIIORequest_t *pReq) { + MPT_ADAPTER *ioc = hd->ioc; u8 cmd; - ScsiCfgData *pSpi; + SpiCfgData *pSpi; - ddvtprintk((" set_dvflags: id=%d lun=%d negoNvram=%x cmd=%x\n", - pReq->TargetID, pReq->LUN[1], hd->negoNvram, pReq->CDB[0])); + ddvtprintk((MYIOC_s_NOTE_FMT + " set_dvflags: id=%d lun=%d negoNvram=%x cmd=%x\n", + hd->ioc->name, pReq->TargetID, pReq->LUN[1], hd->negoNvram, pReq->CDB[0])); if ((pReq->LUN[1] != 0) || (hd->negoNvram != 0)) return; @@ -2985,12 +2952,12 @@ mptscsih_set_dvflags(MPT_SCSI_HOST *hd, SCSIIORequest_t *pReq) cmd = pReq->CDB[0]; if ((cmd == READ_CAPACITY) || (cmd == MODE_SENSE)) { - pSpi = &hd->ioc->spi_data; - if ((pSpi->isRaid & (1 << pReq->TargetID)) && pSpi->pIocPg3) { + pSpi = &ioc->spi_data; + if ((ioc->raid_data.isRaid & (1 << pReq->TargetID)) && ioc->raid_data.pIocPg3) { /* Set NEED_DV for all hidden disks */ - Ioc3PhysDisk_t *pPDisk = pSpi->pIocPg3->PhysDisk; - int numPDisk = pSpi->pIocPg3->NumPhysDisks; + Ioc3PhysDisk_t *pPDisk = ioc->raid_data.pIocPg3->PhysDisk; + int numPDisk = ioc->raid_data.pIocPg3->NumPhysDisks; while (numPDisk) { pSpi->dvStatus[pPDisk->PhysDiskID] |= MPT_SCSICFG_NEED_DV; @@ -3004,6 +2971,50 @@ mptscsih_set_dvflags(MPT_SCSI_HOST *hd, SCSIIORequest_t *pReq) } } +/* mptscsih_raid_set_dv_flags() + * + * New or replaced disk. Set DV flag and schedule DV. + */ +static void +mptscsih_set_dvflags_raid(MPT_SCSI_HOST *hd, int id) +{ + MPT_ADAPTER *ioc = hd->ioc; + SpiCfgData *pSpi = &ioc->spi_data; + Ioc3PhysDisk_t *pPDisk; + int numPDisk; + + if (hd->negoNvram != 0) + return; + + ddvtprintk(("DV requested for phys disk id %d\n", id)); + if (ioc->raid_data.pIocPg3) { + pPDisk = ioc->raid_data.pIocPg3->PhysDisk; + numPDisk = ioc->raid_data.pIocPg3->NumPhysDisks; + while (numPDisk) { + if (id == pPDisk->PhysDiskNum) { + pSpi->dvStatus[pPDisk->PhysDiskID] = + (MPT_SCSICFG_NEED_DV | MPT_SCSICFG_DV_NOT_DONE); + pSpi->forceDv = MPT_SCSICFG_NEED_DV; + ddvtprintk(("NEED_DV set for phys disk id %d\n", + pPDisk->PhysDiskID)); + break; + } + pPDisk++; + numPDisk--; + } + + if (numPDisk == 0) { + /* The physical disk that needs DV was not found + * in the stored IOC Page 3. The driver must reload + * this page. DV routine will set the NEED_DV flag for + * all phys disks that have DV_NOT_DONE set. + */ + pSpi->forceDv = MPT_SCSICFG_NEED_DV | MPT_SCSICFG_RELOAD_IOC_PG3; + ddvtprintk(("phys disk %d not found. Setting reload IOC Pg3 Flag\n",id)); + } + } +} + /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/ /* * If no Target, bus reset on 1st I/O. Set the flag to @@ -3091,7 +3102,7 @@ mptscsih_writeSDP1(MPT_SCSI_HOST *hd, int portnum, int target_id, int flags) MPT_ADAPTER *ioc = hd->ioc; Config_t *pReq; SCSIDevicePage1_t *pData; - VirtDevice *pTarget; + VirtDevice *pTarget=NULL; MPT_FRAME_HDR *mf; dma_addr_t dataDma; u16 req_idx; @@ -3190,7 +3201,7 @@ mptscsih_writeSDP1(MPT_SCSI_HOST *hd, int portnum, int target_id, int flags) #endif if (flags & MPT_SCSICFG_BLK_NEGO) - negoFlags = MPT_TARGET_NO_NEGO_WIDE | MPT_TARGET_NO_NEGO_SYNC; + negoFlags |= MPT_TARGET_NO_NEGO_WIDE | MPT_TARGET_NO_NEGO_SYNC; mptscsih_setDevicePage1Flags(width, factor, offset, &requested, &configuration, negoFlags); @@ -4011,7 +4022,7 @@ mptscsih_synchronize_cache(MPT_SCSI_HOST *hd, int portnum) /* If target Ptr NULL or if this target is NOT a disk, skip. */ - if ((pTarget) && (pTarget->tflags & MPT_TARGET_FLAGS_Q_YES)){ + if ((pTarget) && (pTarget->inq_data[0] == TYPE_DISK)){ for (lun=0; lun <= MPT_LAST_LUN; lun++) { /* If LUN present, issue the command */ @@ -4106,9 +4117,9 @@ mptscsih_domainValidation(void *arg) if ((ioc->spi_data.forceDv & MPT_SCSICFG_RELOAD_IOC_PG3) != 0) { mpt_read_ioc_pg_3(ioc); - if (ioc->spi_data.pIocPg3) { - Ioc3PhysDisk_t *pPDisk = ioc->spi_data.pIocPg3->PhysDisk; - int numPDisk = ioc->spi_data.pIocPg3->NumPhysDisks; + if (ioc->raid_data.pIocPg3) { + Ioc3PhysDisk_t *pPDisk = ioc->raid_data.pIocPg3->PhysDisk; + int numPDisk = ioc->raid_data.pIocPg3->NumPhysDisks; while (numPDisk) { if (ioc->spi_data.dvStatus[pPDisk->PhysDiskID] & MPT_SCSICFG_DV_NOT_DONE) @@ -4147,7 +4158,7 @@ mptscsih_domainValidation(void *arg) isPhysDisk = mptscsih_is_phys_disk(ioc, id); if (isPhysDisk) { for (ii=0; ii < MPT_MAX_SCSI_DEVICES; ii++) { - if (hd->ioc->spi_data.isRaid & (1 << ii)) { + if (hd->ioc->raid_data.isRaid & (1 << ii)) { hd->ioc->spi_data.dvStatus[ii] |= MPT_SCSICFG_DV_PENDING; } } @@ -4166,7 +4177,7 @@ mptscsih_domainValidation(void *arg) if (isPhysDisk) { for (ii=0; ii < MPT_MAX_SCSI_DEVICES; ii++) { - if (hd->ioc->spi_data.isRaid & (1 << ii)) { + if (hd->ioc->raid_data.isRaid & (1 << ii)) { hd->ioc->spi_data.dvStatus[ii] &= ~MPT_SCSICFG_DV_PENDING; } } @@ -4188,21 +4199,21 @@ mptscsih_domainValidation(void *arg) /* Search IOC page 3 to determine if this is hidden physical disk */ -static int +/* Search IOC page 3 to determine if this is hidden physical disk + */ +static int mptscsih_is_phys_disk(MPT_ADAPTER *ioc, int id) { - if (ioc->spi_data.pIocPg3) { - Ioc3PhysDisk_t *pPDisk = ioc->spi_data.pIocPg3->PhysDisk; - int numPDisk = ioc->spi_data.pIocPg3->NumPhysDisks; + int i; - while (numPDisk) { - if (pPDisk->PhysDiskID == id) { - return 1; - } - pPDisk++; - numPDisk--; - } + if (!ioc->raid_data.isRaid || !ioc->raid_data.pIocPg3) + return 0; + + for (i = 0; i < ioc->raid_data.pIocPg3->NumPhysDisks; i++) { + if (id == ioc->raid_data.pIocPg3->PhysDisk[i].PhysDiskID) + return 1; } + return 0; } @@ -4408,7 +4419,7 @@ mptscsih_doDv(MPT_SCSI_HOST *hd, int bus_number, int id) /* Skip this ID? Set cfg.cfghdr.hdr to force config page write */ { - ScsiCfgData *pspi_data = &hd->ioc->spi_data; + SpiCfgData *pspi_data = &hd->ioc->spi_data; if (pspi_data->nvram && (pspi_data->nvram[id] != MPT_HOST_NVRAM_INVALID)) { /* Set the factor from nvram */ nfactor = (pspi_data->nvram[id] & MPT_NVRAM_SYNC_MASK) >> 8; @@ -4438,11 +4449,11 @@ mptscsih_doDv(MPT_SCSI_HOST *hd, int bus_number, int id) } /* Finish iocmd inititialization - hidden or visible disk? */ - if (ioc->spi_data.pIocPg3) { + if (ioc->raid_data.pIocPg3) { /* Search IOC page 3 for matching id */ - Ioc3PhysDisk_t *pPDisk = ioc->spi_data.pIocPg3->PhysDisk; - int numPDisk = ioc->spi_data.pIocPg3->NumPhysDisks; + Ioc3PhysDisk_t *pPDisk = ioc->raid_data.pIocPg3->PhysDisk; + int numPDisk = ioc->raid_data.pIocPg3->NumPhysDisks; while (numPDisk) { if (pPDisk->PhysDiskID == id) { @@ -4466,7 +4477,7 @@ mptscsih_doDv(MPT_SCSI_HOST *hd, int bus_number, int id) /* RAID Volume ID's may double for a physical device. If RAID but * not a physical ID as well, skip DV. */ - if ((hd->ioc->spi_data.isRaid & (1 << id)) && !(iocmd.flags & MPT_ICFLAG_PHYS_DISK)) + if ((hd->ioc->raid_data.isRaid & (1 << id)) && !(iocmd.flags & MPT_ICFLAG_PHYS_DISK)) goto target_done; @@ -4815,6 +4826,8 @@ mptscsih_doDv(MPT_SCSI_HOST *hd, int bus_number, int id) notDone = 0; if (iocmd.flags & MPT_ICFLAG_ECHO) { bufsize = ((pbuf1[2] & 0x1F) <<8) | pbuf1[3]; + if (pbuf1[0] & 0x01) + iocmd.flags |= MPT_ICFLAG_EBOS; } else { bufsize = pbuf1[1]<<16 | pbuf1[2]<<8 | pbuf1[3]; } @@ -4911,6 +4924,9 @@ mptscsih_doDv(MPT_SCSI_HOST *hd, int bus_number, int id) } iocmd.flags &= ~MPT_ICFLAG_DID_RESET; + if (iocmd.flags & MPT_ICFLAG_EBOS) + goto skip_Reserve; + repeat = 5; while (repeat && (!(iocmd.flags & MPT_ICFLAG_RESERVED))) { iocmd.cmd = RESERVE; @@ -4954,6 +4970,7 @@ mptscsih_doDv(MPT_SCSI_HOST *hd, int bus_number, int id) } } +skip_Reserve: mptscsih_fillbuf(pbuf1, sz, patt, 1); iocmd.cmd = WRITE_BUFFER; iocmd.data_dma = buf1_dma; @@ -5198,11 +5215,12 @@ mptscsih_dv_parms(MPT_SCSI_HOST *hd, DVPARAMETERS *dv,void *pPage) * If not an LVD bus, the adapter minSyncFactor has been * already throttled back. */ + negoFlags = hd->ioc->spi_data.noQas; if ((hd->Targets)&&((pTarget = hd->Targets[(int)id]) != NULL) && !pTarget->raidVolume) { width = pTarget->maxWidth; offset = pTarget->maxOffset; factor = pTarget->minSyncFactor; - negoFlags = pTarget->negoFlags; + negoFlags |= pTarget->negoFlags; } else { if (hd->ioc->spi_data.nvram && (hd->ioc->spi_data.nvram[id] != MPT_HOST_NVRAM_INVALID)) { data = hd->ioc->spi_data.nvram[id]; @@ -5223,7 +5241,6 @@ mptscsih_dv_parms(MPT_SCSI_HOST *hd, DVPARAMETERS *dv,void *pPage) } /* Set the negotiation flags */ - negoFlags = hd->ioc->spi_data.noQas; if (!width) negoFlags |= MPT_TARGET_NO_NEGO_WIDE; diff --git a/drivers/message/fusion/mptscsih.h b/drivers/message/fusion/mptscsih.h index 51c0255ac16..971fda4b8b5 100644 --- a/drivers/message/fusion/mptscsih.h +++ b/drivers/message/fusion/mptscsih.h @@ -1,5 +1,5 @@ /* - * linux/drivers/message/fusion/mptscsi.h + * linux/drivers/message/fusion/mptscsih.h * High performance SCSI / Fibre Channel SCSI Host device driver. * For use with PCI chip/adapter(s): * LSIFC9xx/LSI409xx Fibre Channel @@ -53,8 +53,8 @@ * SCSI Public stuff... */ -#define MPT_SCSI_CMD_PER_DEV_HIGH 31 -#define MPT_SCSI_CMD_PER_DEV_LOW 7 +#define MPT_SCSI_CMD_PER_DEV_HIGH 64 +#define MPT_SCSI_CMD_PER_DEV_LOW 32 #define MPT_SCSI_CMD_PER_LUN 7 @@ -77,6 +77,7 @@ #define MPTSCSIH_MAX_WIDTH 1 #define MPTSCSIH_MIN_SYNC 0x08 #define MPTSCSIH_SAF_TE 0 +#define MPTSCSIH_PT_CLEAR 0 #endif diff --git a/drivers/message/fusion/mptspi.c b/drivers/message/fusion/mptspi.c index 587d1274fd7..5c0e307d1d5 100644 --- a/drivers/message/fusion/mptspi.c +++ b/drivers/message/fusion/mptspi.c @@ -199,7 +199,7 @@ mptspi_probe(struct pci_dev *pdev, const struct pci_device_id *id) printk(MYIOC_s_WARN_FMT "Skipping ioc=%p because SCSI Initiator mode is NOT enabled!\n", ioc->name, ioc); - return -ENODEV; + return 0; } sh = scsi_host_alloc(&mptspi_driver_template, sizeof(MPT_SCSI_HOST)); diff --git a/drivers/message/i2o/config-osm.c b/drivers/message/i2o/config-osm.c index af32ab4e90c..10432f66520 100644 --- a/drivers/message/i2o/config-osm.c +++ b/drivers/message/i2o/config-osm.c @@ -56,8 +56,11 @@ static int __init i2o_config_init(void) return -EBUSY; } #ifdef CONFIG_I2O_CONFIG_OLD_IOCTL - if (i2o_config_old_init()) + if (i2o_config_old_init()) { + osm_err("old config handler initialization failed\n"); i2o_driver_unregister(&i2o_config_driver); + return -EBUSY; + } #endif return 0; diff --git a/drivers/mfd/ucb1x00-ts.c b/drivers/mfd/ucb1x00-ts.c index a851d65c7cf..a260f83bcb0 100644 --- a/drivers/mfd/ucb1x00-ts.c +++ b/drivers/mfd/ucb1x00-ts.c @@ -48,8 +48,8 @@ struct ucb1x00_ts { u16 x_res; u16 y_res; - int restart:1; - int adcsync:1; + unsigned int restart:1; + unsigned int adcsync:1; }; static int adcsync; diff --git a/drivers/mtd/devices/docecc.c b/drivers/mtd/devices/docecc.c index 9a087c1fb0b..24f670b5a4f 100644 --- a/drivers/mtd/devices/docecc.c +++ b/drivers/mtd/devices/docecc.c @@ -40,7 +40,7 @@ #include <linux/mtd/mtd.h> #include <linux/mtd/doc2000.h> -#define DEBUG 0 +#define DEBUG_ECC 0 /* need to undef it (from asm/termbits.h) */ #undef B0 @@ -249,7 +249,7 @@ eras_dec_rs(dtype Alpha_to[NN + 1], dtype Index_of[NN + 1], lambda[j] ^= Alpha_to[modnn(u + tmp)]; } } -#if DEBUG >= 1 +#if DEBUG_ECC >= 1 /* Test code that verifies the erasure locator polynomial just constructed Needed only for decoder debugging. */ @@ -276,7 +276,7 @@ eras_dec_rs(dtype Alpha_to[NN + 1], dtype Index_of[NN + 1], count = -1; goto finish; } -#if DEBUG >= 2 +#if DEBUG_ECC >= 2 printf("\n Erasure positions as determined by roots of Eras Loc Poly:\n"); for (i = 0; i < count; i++) printf("%d ", loc[i]); @@ -409,7 +409,7 @@ eras_dec_rs(dtype Alpha_to[NN + 1], dtype Index_of[NN + 1], den ^= Alpha_to[modnn(lambda[i+1] + i * root[j])]; } if (den == 0) { -#if DEBUG >= 1 +#if DEBUG_ECC >= 1 printf("\n ERROR: denominator = 0\n"); #endif /* Convert to dual- basis */ diff --git a/drivers/net/8390.c b/drivers/net/8390.c index 6d76f3a99b1..f8702742008 100644 --- a/drivers/net/8390.c +++ b/drivers/net/8390.c @@ -1094,7 +1094,7 @@ static void NS8390_trigger_send(struct net_device *dev, unsigned int length, outb_p(E8390_NODMA+E8390_PAGE0, e8390_base+E8390_CMD); - if (inb_p(e8390_base) & E8390_TRANS) + if (inb_p(e8390_base + E8390_CMD) & E8390_TRANS) { printk(KERN_WARNING "%s: trigger_send() called with the transmitter busy.\n", dev->name); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 90449a0f2a6..6d00c3de1a8 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1653,7 +1653,8 @@ static int bond_enslave(struct net_device *bond_dev, struct net_device *slave_de int old_features = bond_dev->features; int res = 0; - if (slave_dev->do_ioctl == NULL) { + if (!bond->params.use_carrier && slave_dev->ethtool_ops == NULL && + slave_dev->do_ioctl == NULL) { printk(KERN_WARNING DRV_NAME ": Warning : no link monitoring support for %s\n", slave_dev->name); diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index f0471d102e3..f9223c1c5aa 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -100,11 +100,11 @@ VERSION 2.2LK <2005/01/25> #ifdef CONFIG_R8169_NAPI #define rtl8169_rx_skb netif_receive_skb -#define rtl8169_rx_hwaccel_skb vlan_hwaccel_rx +#define rtl8169_rx_hwaccel_skb vlan_hwaccel_receive_skb #define rtl8169_rx_quota(count, quota) min(count, quota) #else #define rtl8169_rx_skb netif_rx -#define rtl8169_rx_hwaccel_skb vlan_hwaccel_receive_skb +#define rtl8169_rx_hwaccel_skb vlan_hwaccel_rx #define rtl8169_rx_quota(count, quota) count #endif diff --git a/drivers/net/skge.c b/drivers/net/skge.c index 0208258e782..fd398da4993 100644 --- a/drivers/net/skge.c +++ b/drivers/net/skge.c @@ -42,7 +42,7 @@ #include "skge.h" #define DRV_NAME "skge" -#define DRV_VERSION "1.0" +#define DRV_VERSION "1.1" #define PFX DRV_NAME " " #define DEFAULT_TX_RING_SIZE 128 @@ -105,41 +105,28 @@ static const u32 rxirqmask[] = { IS_R1_F, IS_R2_F }; static const u32 txirqmask[] = { IS_XA1_F, IS_XA2_F }; static const u32 portirqmask[] = { IS_PORT_1, IS_PORT_2 }; -/* Don't need to look at whole 16K. - * last interesting register is descriptor poll timer. - */ -#define SKGE_REGS_LEN (29*128) - static int skge_get_regs_len(struct net_device *dev) { - return SKGE_REGS_LEN; + return 0x4000; } /* - * Returns copy of control register region - * I/O region is divided into banks and certain regions are unreadable + * Returns copy of whole control register region + * Note: skip RAM address register because accessing it will + * cause bus hangs! */ static void skge_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *p) { const struct skge_port *skge = netdev_priv(dev); - unsigned long offs; const void __iomem *io = skge->hw->regs; - static const unsigned long bankmap - = (1<<0) | (1<<2) | (1<<8) | (1<<9) - | (1<<12) | (1<<13) | (1<<14) | (1<<15) | (1<<16) - | (1<<17) | (1<<20) | (1<<21) | (1<<22) | (1<<23) - | (1<<24) | (1<<25) | (1<<26) | (1<<27) | (1<<28); regs->version = 1; - for (offs = 0; offs < regs->len; offs += 128) { - u32 len = min_t(u32, 128, regs->len - offs); + memset(p, 0, regs->len); + memcpy_fromio(p, io, B3_RAM_ADDR); - if (bankmap & (1<<(offs/128))) - memcpy_fromio(p + offs, io + offs, len); - else - memset(p + offs, 0, len); - } + memcpy_fromio(p + B3_RI_WTO_R1, io + B3_RI_WTO_R1, + regs->len - B3_RI_WTO_R1); } /* Wake on Lan only supported on Yukon chps with rev 1 or above */ @@ -775,17 +762,6 @@ static int skge_ring_alloc(struct skge_ring *ring, void *vaddr, u64 base) return 0; } -static struct sk_buff *skge_rx_alloc(struct net_device *dev, unsigned int size) -{ - struct sk_buff *skb = dev_alloc_skb(size); - - if (likely(skb)) { - skb->dev = dev; - skb_reserve(skb, NET_IP_ALIGN); - } - return skb; -} - /* Allocate and setup a new buffer for receiving */ static void skge_rx_setup(struct skge_port *skge, struct skge_element *e, struct sk_buff *skb, unsigned int bufsize) @@ -858,16 +834,17 @@ static int skge_rx_fill(struct skge_port *skge) { struct skge_ring *ring = &skge->rx_ring; struct skge_element *e; - unsigned int bufsize = skge->rx_buf_size; e = ring->start; do { - struct sk_buff *skb = skge_rx_alloc(skge->netdev, bufsize); + struct sk_buff *skb; + skb = dev_alloc_skb(skge->rx_buf_size + NET_IP_ALIGN); if (!skb) return -ENOMEM; - skge_rx_setup(skge, e, skb, bufsize); + skb_reserve(skb, NET_IP_ALIGN); + skge_rx_setup(skge, e, skb, skge->rx_buf_size); } while ( (e = e->next) != ring->start); ring->to_clean = ring->start; @@ -1666,6 +1643,22 @@ static void yukon_reset(struct skge_hw *hw, int port) | GM_RXCR_UCF_ENA | GM_RXCR_MCF_ENA); } +/* Apparently, early versions of Yukon-Lite had wrong chip_id? */ +static int is_yukon_lite_a0(struct skge_hw *hw) +{ + u32 reg; + int ret; + + if (hw->chip_id != CHIP_ID_YUKON) + return 0; + + reg = skge_read32(hw, B2_FAR); + skge_write8(hw, B2_FAR + 3, 0xff); + ret = (skge_read8(hw, B2_FAR + 3) != 0); + skge_write32(hw, B2_FAR, reg); + return ret; +} + static void yukon_mac_init(struct skge_hw *hw, int port) { struct skge_port *skge = netdev_priv(hw->dev[port]); @@ -1781,9 +1774,11 @@ static void yukon_mac_init(struct skge_hw *hw, int port) /* Configure Rx MAC FIFO */ skge_write16(hw, SK_REG(port, RX_GMF_FL_MSK), RX_FF_FL_DEF_MSK); reg = GMF_OPER_ON | GMF_RX_F_FL_ON; - if (hw->chip_id == CHIP_ID_YUKON_LITE && - hw->chip_rev >= CHIP_REV_YU_LITE_A3) + + /* disable Rx GMAC FIFO Flush for YUKON-Lite Rev. A0 only */ + if (is_yukon_lite_a0(hw)) reg &= ~GMF_RX_F_FL_ON; + skge_write8(hw, SK_REG(port, RX_GMF_CTRL_T), GMF_RST_CLR); skge_write16(hw, SK_REG(port, RX_GMF_CTRL_T), reg); /* @@ -2442,6 +2437,14 @@ static void yukon_set_multicast(struct net_device *dev) gma_write16(hw, port, GM_RX_CTRL, reg); } +static inline u16 phy_length(const struct skge_hw *hw, u32 status) +{ + if (hw->chip_id == CHIP_ID_GENESIS) + return status >> XMR_FS_LEN_SHIFT; + else + return status >> GMR_FS_LEN_SHIFT; +} + static inline int bad_phy_status(const struct skge_hw *hw, u32 status) { if (hw->chip_id == CHIP_ID_GENESIS) @@ -2451,80 +2454,99 @@ static inline int bad_phy_status(const struct skge_hw *hw, u32 status) (status & GMR_FS_RX_OK) == 0; } -static void skge_rx_error(struct skge_port *skge, int slot, - u32 control, u32 status) -{ - if (netif_msg_rx_err(skge)) - printk(KERN_DEBUG PFX "%s: rx err, slot %d control 0x%x status 0x%x\n", - skge->netdev->name, slot, control, status); - - if ((control & (BMU_EOF|BMU_STF)) != (BMU_STF|BMU_EOF)) - skge->net_stats.rx_length_errors++; - else if (skge->hw->chip_id == CHIP_ID_GENESIS) { - if (status & (XMR_FS_RUNT|XMR_FS_LNG_ERR)) - skge->net_stats.rx_length_errors++; - if (status & XMR_FS_FRA_ERR) - skge->net_stats.rx_frame_errors++; - if (status & XMR_FS_FCS_ERR) - skge->net_stats.rx_crc_errors++; - } else { - if (status & (GMR_FS_LONG_ERR|GMR_FS_UN_SIZE)) - skge->net_stats.rx_length_errors++; - if (status & GMR_FS_FRAGMENT) - skge->net_stats.rx_frame_errors++; - if (status & GMR_FS_CRC_ERR) - skge->net_stats.rx_crc_errors++; - } -} /* Get receive buffer from descriptor. * Handles copy of small buffers and reallocation failures */ static inline struct sk_buff *skge_rx_get(struct skge_port *skge, struct skge_element *e, - unsigned int len) + u32 control, u32 status, u16 csum) { - struct sk_buff *nskb, *skb; + struct sk_buff *skb; + u16 len = control & BMU_BBC; + + if (unlikely(netif_msg_rx_status(skge))) + printk(KERN_DEBUG PFX "%s: rx slot %td status 0x%x len %d\n", + skge->netdev->name, e - skge->rx_ring.start, + status, len); + + if (len > skge->rx_buf_size) + goto error; + + if ((control & (BMU_EOF|BMU_STF)) != (BMU_STF|BMU_EOF)) + goto error; + + if (bad_phy_status(skge->hw, status)) + goto error; + + if (phy_length(skge->hw, status) != len) + goto error; if (len < RX_COPY_THRESHOLD) { - nskb = skge_rx_alloc(skge->netdev, len + NET_IP_ALIGN); - if (unlikely(!nskb)) - return NULL; + skb = dev_alloc_skb(len + 2); + if (!skb) + goto resubmit; + skb_reserve(skb, 2); pci_dma_sync_single_for_cpu(skge->hw->pdev, pci_unmap_addr(e, mapaddr), len, PCI_DMA_FROMDEVICE); - memcpy(nskb->data, e->skb->data, len); + memcpy(skb->data, e->skb->data, len); pci_dma_sync_single_for_device(skge->hw->pdev, pci_unmap_addr(e, mapaddr), len, PCI_DMA_FROMDEVICE); - - if (skge->rx_csum) { - struct skge_rx_desc *rd = e->desc; - nskb->csum = le16_to_cpu(rd->csum2); - nskb->ip_summed = CHECKSUM_HW; - } skge_rx_reuse(e, skge->rx_buf_size); - return nskb; } else { - nskb = skge_rx_alloc(skge->netdev, skge->rx_buf_size); - if (unlikely(!nskb)) - return NULL; + struct sk_buff *nskb; + nskb = dev_alloc_skb(skge->rx_buf_size + NET_IP_ALIGN); + if (!nskb) + goto resubmit; pci_unmap_single(skge->hw->pdev, pci_unmap_addr(e, mapaddr), pci_unmap_len(e, maplen), PCI_DMA_FROMDEVICE); skb = e->skb; - if (skge->rx_csum) { - struct skge_rx_desc *rd = e->desc; - skb->csum = le16_to_cpu(rd->csum2); - skb->ip_summed = CHECKSUM_HW; - } - + prefetch(skb->data); skge_rx_setup(skge, e, nskb, skge->rx_buf_size); - return skb; } + + skb_put(skb, len); + skb->dev = skge->netdev; + if (skge->rx_csum) { + skb->csum = csum; + skb->ip_summed = CHECKSUM_HW; + } + + skb->protocol = eth_type_trans(skb, skge->netdev); + + return skb; +error: + + if (netif_msg_rx_err(skge)) + printk(KERN_DEBUG PFX "%s: rx err, slot %td control 0x%x status 0x%x\n", + skge->netdev->name, e - skge->rx_ring.start, + control, status); + + if (skge->hw->chip_id == CHIP_ID_GENESIS) { + if (status & (XMR_FS_RUNT|XMR_FS_LNG_ERR)) + skge->net_stats.rx_length_errors++; + if (status & XMR_FS_FRA_ERR) + skge->net_stats.rx_frame_errors++; + if (status & XMR_FS_FCS_ERR) + skge->net_stats.rx_crc_errors++; + } else { + if (status & (GMR_FS_LONG_ERR|GMR_FS_UN_SIZE)) + skge->net_stats.rx_length_errors++; + if (status & GMR_FS_FRAGMENT) + skge->net_stats.rx_frame_errors++; + if (status & GMR_FS_CRC_ERR) + skge->net_stats.rx_crc_errors++; + } + +resubmit: + skge_rx_reuse(e, skge->rx_buf_size); + return NULL; } @@ -2540,32 +2562,16 @@ static int skge_poll(struct net_device *dev, int *budget) for (e = ring->to_clean; work_done < to_do; e = e->next) { struct skge_rx_desc *rd = e->desc; struct sk_buff *skb; - u32 control, len, status; + u32 control; rmb(); control = rd->control; if (control & BMU_OWN) break; - len = control & BMU_BBC; - status = rd->status; - - if (unlikely((control & (BMU_EOF|BMU_STF)) != (BMU_STF|BMU_EOF) - || bad_phy_status(hw, status))) { - skge_rx_error(skge, e - ring->start, control, status); - skge_rx_reuse(e, skge->rx_buf_size); - continue; - } - - if (netif_msg_rx_status(skge)) - printk(KERN_DEBUG PFX "%s: rx slot %td status 0x%x len %d\n", - dev->name, e - ring->start, rd->status, len); - - skb = skge_rx_get(skge, e, len); + skb = skge_rx_get(skge, e, control, rd->status, + le16_to_cpu(rd->csum2)); if (likely(skb)) { - skb_put(skb, len); - skb->protocol = eth_type_trans(skb, dev); - dev->last_rx = jiffies; netif_receive_skb(skb); diff --git a/drivers/net/skge.h b/drivers/net/skge.h index efbf98c675d..72c175b87a5 100644 --- a/drivers/net/skge.h +++ b/drivers/net/skge.h @@ -953,6 +953,7 @@ enum { */ enum { XMR_FS_LEN = 0x3fff<<18, /* Bit 31..18: Rx Frame Length */ + XMR_FS_LEN_SHIFT = 18, XMR_FS_2L_VLAN = 1<<17, /* Bit 17: tagged wh 2Lev VLAN ID*/ XMR_FS_1_VLAN = 1<<16, /* Bit 16: tagged wh 1ev VLAN ID*/ XMR_FS_BC = 1<<15, /* Bit 15: Broadcast Frame */ @@ -1868,6 +1869,7 @@ enum { /* Receive Frame Status Encoding */ enum { GMR_FS_LEN = 0xffff<<16, /* Bit 31..16: Rx Frame Length */ + GMR_FS_LEN_SHIFT = 16, GMR_FS_VLAN = 1<<13, /* Bit 13: VLAN Packet */ GMR_FS_JABBER = 1<<12, /* Bit 12: Jabber Packet */ GMR_FS_UN_SIZE = 1<<11, /* Bit 11: Undersize Packet */ diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c index 48c03c11cd9..a01efa6d5c6 100644 --- a/drivers/net/wan/hdlc_cisco.c +++ b/drivers/net/wan/hdlc_cisco.c @@ -72,7 +72,7 @@ static void cisco_keepalive_send(struct net_device *dev, u32 type, } skb_reserve(skb, 4); cisco_hard_header(skb, dev, CISCO_KEEPALIVE, NULL, NULL, 0); - data = (cisco_packet*)skb->data; + data = (cisco_packet*)(skb->data + 4); data->type = htonl(type); data->par1 = htonl(par1); diff --git a/drivers/pci/hotplug.c b/drivers/pci/hotplug.c index 10444988a10..e1743be3190 100644 --- a/drivers/pci/hotplug.c +++ b/drivers/pci/hotplug.c @@ -7,7 +7,6 @@ int pci_hotplug (struct device *dev, char **envp, int num_envp, char *buffer, int buffer_size) { struct pci_dev *pdev; - char *scratch; int i = 0; int length = 0; @@ -18,9 +17,6 @@ int pci_hotplug (struct device *dev, char **envp, int num_envp, if (!pdev) return -ENODEV; - scratch = buffer; - - if (add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, &length, "PCI_CLASS=%04X", pdev->class)) diff --git a/drivers/pci/hotplug/rpadlpar_sysfs.c b/drivers/pci/hotplug/rpadlpar_sysfs.c index 752e6513c44..db69be85b45 100644 --- a/drivers/pci/hotplug/rpadlpar_sysfs.c +++ b/drivers/pci/hotplug/rpadlpar_sysfs.c @@ -62,7 +62,7 @@ static ssize_t add_slot_store(struct dlpar_io_attr *dlpar_attr, char drc_name[MAX_DRC_NAME_LEN]; char *end; - if (nbytes > MAX_DRC_NAME_LEN) + if (nbytes >= MAX_DRC_NAME_LEN) return 0; memcpy(drc_name, buf, nbytes); @@ -83,7 +83,7 @@ static ssize_t remove_slot_store(struct dlpar_io_attr *dlpar_attr, char drc_name[MAX_DRC_NAME_LEN]; char *end; - if (nbytes > MAX_DRC_NAME_LEN) + if (nbytes >= MAX_DRC_NAME_LEN) return 0; memcpy(drc_name, buf, nbytes); diff --git a/drivers/pci/hotplug/sgi_hotplug.c b/drivers/pci/hotplug/sgi_hotplug.c index b1409441c1c..a32ae82e592 100644 --- a/drivers/pci/hotplug/sgi_hotplug.c +++ b/drivers/pci/hotplug/sgi_hotplug.c @@ -159,7 +159,7 @@ static int sn_hp_slot_private_alloc(struct hotplug_slot *bss_hotplug_slot, pcibus_info = SN_PCIBUS_BUSSOFT_INFO(pci_bus); - slot = kcalloc(1, sizeof(*slot), GFP_KERNEL); + slot = kzalloc(sizeof(*slot), GFP_KERNEL); if (!slot) return -ENOMEM; bss_hotplug_slot->private = slot; @@ -491,7 +491,7 @@ static int sn_hotplug_slot_register(struct pci_bus *pci_bus) if (sn_pci_slot_valid(pci_bus, device) != 1) continue; - bss_hotplug_slot = kcalloc(1, sizeof(*bss_hotplug_slot), + bss_hotplug_slot = kzalloc(sizeof(*bss_hotplug_slot), GFP_KERNEL); if (!bss_hotplug_slot) { rc = -ENOMEM; @@ -499,7 +499,7 @@ static int sn_hotplug_slot_register(struct pci_bus *pci_bus) } bss_hotplug_slot->info = - kcalloc(1, sizeof(struct hotplug_slot_info), + kzalloc(sizeof(struct hotplug_slot_info), GFP_KERNEL); if (!bss_hotplug_slot->info) { rc = -ENOMEM; diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 56a3b397efe..2898830c496 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -360,7 +360,7 @@ pci_create_resource_files(struct pci_dev *pdev) continue; /* allocate attribute structure, piggyback attribute name */ - res_attr = kcalloc(1, sizeof(*res_attr) + 10, GFP_ATOMIC); + res_attr = kzalloc(sizeof(*res_attr) + 10, GFP_ATOMIC); if (res_attr) { char *res_attr_name = (char *)(res_attr + 1); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 26a55d08b50..005786416bb 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -165,7 +165,7 @@ static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom) if (l == 0xffffffff) l = 0; if ((l & PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_MEMORY) { - sz = pci_size(l, sz, PCI_BASE_ADDRESS_MEM_MASK); + sz = pci_size(l, sz, (u32)PCI_BASE_ADDRESS_MEM_MASK); if (!sz) continue; res->start = l & PCI_BASE_ADDRESS_MEM_MASK; @@ -215,7 +215,7 @@ static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom) if (l == 0xffffffff) l = 0; if (sz && sz != 0xffffffff) { - sz = pci_size(l, sz, PCI_ROM_ADDRESS_MASK); + sz = pci_size(l, sz, (u32)PCI_ROM_ADDRESS_MASK); if (sz) { res->flags = (l & IORESOURCE_ROM_ENABLE) | IORESOURCE_MEM | IORESOURCE_PREFETCH | @@ -402,6 +402,12 @@ static void pci_enable_crs(struct pci_dev *dev) static void __devinit pci_fixup_parent_subordinate_busnr(struct pci_bus *child, int max) { struct pci_bus *parent = child->parent; + + /* Attempts to fix that up are really dangerous unless + we're going to re-assign all bus numbers. */ + if (!pcibios_assign_all_busses()) + return; + while (parent->parent && parent->subordinate < max) { parent->subordinate = max; pci_write_config_byte(parent->self, PCI_SUBORDINATE_BUS, max); @@ -478,8 +484,18 @@ int __devinit pci_scan_bridge(struct pci_bus *bus, struct pci_dev * dev, int max * We need to assign a number to this bus which we always * do in the second pass. */ - if (!pass) + if (!pass) { + if (pcibios_assign_all_busses()) + /* Temporarily disable forwarding of the + configuration cycles on all bridges in + this bus segment to avoid possible + conflicts in the second pass between two + bridges programmed with overlapping + bus ranges. */ + pci_write_config_dword(dev, PCI_PRIMARY_BUS, + buses & ~0xffffff); return max; + } /* Clear errors */ pci_write_config_word(dev, PCI_STATUS, 0xffff); diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c index 91ea8e4777f..dbb3eb0e330 100644 --- a/drivers/s390/cio/ccwgroup.c +++ b/drivers/s390/cio/ccwgroup.c @@ -437,7 +437,7 @@ __ccwgroup_get_gdev_by_cdev(struct ccw_device *cdev) if (cdev->dev.driver_data) { gdev = (struct ccwgroup_device *)cdev->dev.driver_data; if (get_device(&gdev->dev)) { - if (klist_node_attached(&gdev->dev.knode_bus)) + if (device_is_registered(&gdev->dev)) return gdev; put_device(&gdev->dev); } diff --git a/drivers/s390/scsi/Makefile b/drivers/s390/scsi/Makefile index fc145307a7d..d6a78f1a2f1 100644 --- a/drivers/s390/scsi/Makefile +++ b/drivers/s390/scsi/Makefile @@ -3,7 +3,7 @@ # zfcp-objs := zfcp_aux.o zfcp_ccw.o zfcp_scsi.o zfcp_erp.o zfcp_qdio.o \ - zfcp_fsf.o zfcp_sysfs_adapter.o zfcp_sysfs_port.o \ + zfcp_fsf.o zfcp_dbf.o zfcp_sysfs_adapter.o zfcp_sysfs_port.o \ zfcp_sysfs_unit.o zfcp_sysfs_driver.o obj-$(CONFIG_ZFCP) += zfcp.o diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c index bfe3ba73bc0..0b5087f7cab 100644 --- a/drivers/s390/scsi/zfcp_aux.c +++ b/drivers/s390/scsi/zfcp_aux.c @@ -122,95 +122,6 @@ _zfcp_hex_dump(char *addr, int count) #define ZFCP_LOG_AREA ZFCP_LOG_AREA_OTHER -static inline int -zfcp_fsf_req_is_scsi_cmnd(struct zfcp_fsf_req *fsf_req) -{ - return ((fsf_req->fsf_command == FSF_QTCB_FCP_CMND) && - !(fsf_req->status & ZFCP_STATUS_FSFREQ_TASK_MANAGEMENT)); -} - -void -zfcp_cmd_dbf_event_fsf(const char *text, struct zfcp_fsf_req *fsf_req, - void *add_data, int add_length) -{ - struct zfcp_adapter *adapter = fsf_req->adapter; - struct scsi_cmnd *scsi_cmnd; - int level = 3; - int i; - unsigned long flags; - - spin_lock_irqsave(&adapter->dbf_lock, flags); - if (zfcp_fsf_req_is_scsi_cmnd(fsf_req)) { - scsi_cmnd = fsf_req->data.send_fcp_command_task.scsi_cmnd; - debug_text_event(adapter->cmd_dbf, level, "fsferror"); - debug_text_event(adapter->cmd_dbf, level, text); - debug_event(adapter->cmd_dbf, level, &fsf_req, - sizeof (unsigned long)); - debug_event(adapter->cmd_dbf, level, &fsf_req->seq_no, - sizeof (u32)); - debug_event(adapter->cmd_dbf, level, &scsi_cmnd, - sizeof (unsigned long)); - debug_event(adapter->cmd_dbf, level, &scsi_cmnd->cmnd, - min(ZFCP_CMD_DBF_LENGTH, (int)scsi_cmnd->cmd_len)); - for (i = 0; i < add_length; i += ZFCP_CMD_DBF_LENGTH) - debug_event(adapter->cmd_dbf, - level, - (char *) add_data + i, - min(ZFCP_CMD_DBF_LENGTH, add_length - i)); - } - spin_unlock_irqrestore(&adapter->dbf_lock, flags); -} - -/* XXX additionally log unit if available */ -/* ---> introduce new parameter for unit, see 2.4 code */ -void -zfcp_cmd_dbf_event_scsi(const char *text, struct scsi_cmnd *scsi_cmnd) -{ - struct zfcp_adapter *adapter; - union zfcp_req_data *req_data; - struct zfcp_fsf_req *fsf_req; - int level = ((host_byte(scsi_cmnd->result) != 0) ? 1 : 5); - unsigned long flags; - - adapter = (struct zfcp_adapter *) scsi_cmnd->device->host->hostdata[0]; - req_data = (union zfcp_req_data *) scsi_cmnd->host_scribble; - fsf_req = (req_data ? req_data->send_fcp_command_task.fsf_req : NULL); - spin_lock_irqsave(&adapter->dbf_lock, flags); - debug_text_event(adapter->cmd_dbf, level, "hostbyte"); - debug_text_event(adapter->cmd_dbf, level, text); - debug_event(adapter->cmd_dbf, level, &scsi_cmnd->result, sizeof (u32)); - debug_event(adapter->cmd_dbf, level, &scsi_cmnd, - sizeof (unsigned long)); - debug_event(adapter->cmd_dbf, level, &scsi_cmnd->cmnd, - min(ZFCP_CMD_DBF_LENGTH, (int)scsi_cmnd->cmd_len)); - if (likely(fsf_req)) { - debug_event(adapter->cmd_dbf, level, &fsf_req, - sizeof (unsigned long)); - debug_event(adapter->cmd_dbf, level, &fsf_req->seq_no, - sizeof (u32)); - } else { - debug_text_event(adapter->cmd_dbf, level, ""); - debug_text_event(adapter->cmd_dbf, level, ""); - } - spin_unlock_irqrestore(&adapter->dbf_lock, flags); -} - -void -zfcp_in_els_dbf_event(struct zfcp_adapter *adapter, const char *text, - struct fsf_status_read_buffer *status_buffer, int length) -{ - int level = 1; - int i; - - debug_text_event(adapter->in_els_dbf, level, text); - debug_event(adapter->in_els_dbf, level, &status_buffer->d_id, 8); - for (i = 0; i < length; i += ZFCP_IN_ELS_DBF_LENGTH) - debug_event(adapter->in_els_dbf, - level, - (char *) status_buffer->payload + i, - min(ZFCP_IN_ELS_DBF_LENGTH, length - i)); -} - /** * zfcp_device_setup - setup function * @str: pointer to parameter string @@ -1017,81 +928,6 @@ zfcp_free_low_mem_buffers(struct zfcp_adapter *adapter) mempool_destroy(adapter->pool.data_gid_pn); } -/** - * zfcp_adapter_debug_register - registers debug feature for an adapter - * @adapter: pointer to adapter for which debug features should be registered - * return: -ENOMEM on error, 0 otherwise - */ -int -zfcp_adapter_debug_register(struct zfcp_adapter *adapter) -{ - char dbf_name[20]; - - /* debug feature area which records SCSI command failures (hostbyte) */ - spin_lock_init(&adapter->dbf_lock); - - sprintf(dbf_name, ZFCP_CMD_DBF_NAME "%s", - zfcp_get_busid_by_adapter(adapter)); - adapter->cmd_dbf = debug_register(dbf_name, ZFCP_CMD_DBF_INDEX, - ZFCP_CMD_DBF_AREAS, - ZFCP_CMD_DBF_LENGTH); - debug_register_view(adapter->cmd_dbf, &debug_hex_ascii_view); - debug_set_level(adapter->cmd_dbf, ZFCP_CMD_DBF_LEVEL); - - /* debug feature area which records SCSI command aborts */ - sprintf(dbf_name, ZFCP_ABORT_DBF_NAME "%s", - zfcp_get_busid_by_adapter(adapter)); - adapter->abort_dbf = debug_register(dbf_name, ZFCP_ABORT_DBF_INDEX, - ZFCP_ABORT_DBF_AREAS, - ZFCP_ABORT_DBF_LENGTH); - debug_register_view(adapter->abort_dbf, &debug_hex_ascii_view); - debug_set_level(adapter->abort_dbf, ZFCP_ABORT_DBF_LEVEL); - - /* debug feature area which records incoming ELS commands */ - sprintf(dbf_name, ZFCP_IN_ELS_DBF_NAME "%s", - zfcp_get_busid_by_adapter(adapter)); - adapter->in_els_dbf = debug_register(dbf_name, ZFCP_IN_ELS_DBF_INDEX, - ZFCP_IN_ELS_DBF_AREAS, - ZFCP_IN_ELS_DBF_LENGTH); - debug_register_view(adapter->in_els_dbf, &debug_hex_ascii_view); - debug_set_level(adapter->in_els_dbf, ZFCP_IN_ELS_DBF_LEVEL); - - /* debug feature area which records erp events */ - sprintf(dbf_name, ZFCP_ERP_DBF_NAME "%s", - zfcp_get_busid_by_adapter(adapter)); - adapter->erp_dbf = debug_register(dbf_name, ZFCP_ERP_DBF_INDEX, - ZFCP_ERP_DBF_AREAS, - ZFCP_ERP_DBF_LENGTH); - debug_register_view(adapter->erp_dbf, &debug_hex_ascii_view); - debug_set_level(adapter->erp_dbf, ZFCP_ERP_DBF_LEVEL); - - if (!(adapter->cmd_dbf && adapter->abort_dbf && - adapter->in_els_dbf && adapter->erp_dbf)) { - zfcp_adapter_debug_unregister(adapter); - return -ENOMEM; - } - - return 0; - -} - -/** - * zfcp_adapter_debug_unregister - unregisters debug feature for an adapter - * @adapter: pointer to adapter for which debug features should be unregistered - */ -void -zfcp_adapter_debug_unregister(struct zfcp_adapter *adapter) -{ - debug_unregister(adapter->abort_dbf); - debug_unregister(adapter->cmd_dbf); - debug_unregister(adapter->erp_dbf); - debug_unregister(adapter->in_els_dbf); - adapter->abort_dbf = NULL; - adapter->cmd_dbf = NULL; - adapter->erp_dbf = NULL; - adapter->in_els_dbf = NULL; -} - void zfcp_dummy_release(struct device *dev) { @@ -1462,10 +1298,6 @@ zfcp_fsf_incoming_els_rscn(struct zfcp_adapter *adapter, /* see FC-FS */ no_entries = (fcp_rscn_head->payload_len / 4); - zfcp_in_els_dbf_event(adapter, "##rscn", status_buffer, - fcp_rscn_head->payload_len); - - debug_text_event(adapter->erp_dbf, 1, "unsol_els_rscn:"); for (i = 1; i < no_entries; i++) { /* skip head and start with 1st element */ fcp_rscn_element++; @@ -1497,8 +1329,6 @@ zfcp_fsf_incoming_els_rscn(struct zfcp_adapter *adapter, (ZFCP_STATUS_PORT_DID_DID, &port->status)) { ZFCP_LOG_INFO("incoming RSCN, trying to open " "port 0x%016Lx\n", port->wwpn); - debug_text_event(adapter->erp_dbf, 1, - "unsol_els_rscnu:"); zfcp_erp_port_reopen(port, ZFCP_STATUS_COMMON_ERP_FAILED); continue; @@ -1524,8 +1354,6 @@ zfcp_fsf_incoming_els_rscn(struct zfcp_adapter *adapter, */ ZFCP_LOG_INFO("incoming RSCN, trying to open " "port 0x%016Lx\n", port->wwpn); - debug_text_event(adapter->erp_dbf, 1, - "unsol_els_rscnk:"); zfcp_test_link(port); } } @@ -1541,8 +1369,6 @@ zfcp_fsf_incoming_els_plogi(struct zfcp_adapter *adapter, struct zfcp_port *port; unsigned long flags; - zfcp_in_els_dbf_event(adapter, "##plogi", status_buffer, 28); - read_lock_irqsave(&zfcp_data.config_lock, flags); list_for_each_entry(port, &adapter->port_list_head, list) { if (port->wwpn == (*(wwn_t *) & els_logi->nport_wwn)) @@ -1556,8 +1382,6 @@ zfcp_fsf_incoming_els_plogi(struct zfcp_adapter *adapter, status_buffer->d_id, zfcp_get_busid_by_adapter(adapter)); } else { - debug_text_event(adapter->erp_dbf, 1, "unsol_els_plogi:"); - debug_event(adapter->erp_dbf, 1, &els_logi->nport_wwn, 8); zfcp_erp_port_forced_reopen(port, 0); } } @@ -1570,8 +1394,6 @@ zfcp_fsf_incoming_els_logo(struct zfcp_adapter *adapter, struct zfcp_port *port; unsigned long flags; - zfcp_in_els_dbf_event(adapter, "##logo", status_buffer, 16); - read_lock_irqsave(&zfcp_data.config_lock, flags); list_for_each_entry(port, &adapter->port_list_head, list) { if (port->wwpn == els_logo->nport_wwpn) @@ -1585,8 +1407,6 @@ zfcp_fsf_incoming_els_logo(struct zfcp_adapter *adapter, status_buffer->d_id, zfcp_get_busid_by_adapter(adapter)); } else { - debug_text_event(adapter->erp_dbf, 1, "unsol_els_logo:"); - debug_event(adapter->erp_dbf, 1, &els_logo->nport_wwpn, 8); zfcp_erp_port_forced_reopen(port, 0); } } @@ -1595,7 +1415,6 @@ static void zfcp_fsf_incoming_els_unknown(struct zfcp_adapter *adapter, struct fsf_status_read_buffer *status_buffer) { - zfcp_in_els_dbf_event(adapter, "##undef", status_buffer, 24); ZFCP_LOG_NORMAL("warning: unknown incoming ELS 0x%08x " "for adapter %s\n", *(u32 *) (status_buffer->payload), zfcp_get_busid_by_adapter(adapter)); @@ -1609,10 +1428,11 @@ zfcp_fsf_incoming_els(struct zfcp_fsf_req *fsf_req) u32 els_type; struct zfcp_adapter *adapter; - status_buffer = fsf_req->data.status_read.buffer; + status_buffer = (struct fsf_status_read_buffer *) fsf_req->data; els_type = *(u32 *) (status_buffer->payload); adapter = fsf_req->adapter; + zfcp_san_dbf_event_incoming_els(fsf_req); if (els_type == LS_PLOGI) zfcp_fsf_incoming_els_plogi(adapter, status_buffer); else if (els_type == LS_LOGO) diff --git a/drivers/s390/scsi/zfcp_ccw.c b/drivers/s390/scsi/zfcp_ccw.c index b30abab77da..0fc46381fc2 100644 --- a/drivers/s390/scsi/zfcp_ccw.c +++ b/drivers/s390/scsi/zfcp_ccw.c @@ -202,19 +202,9 @@ static int zfcp_ccw_set_offline(struct ccw_device *ccw_device) { struct zfcp_adapter *adapter; - struct zfcp_port *port; - struct fc_rport *rport; down(&zfcp_data.config_sema); adapter = dev_get_drvdata(&ccw_device->dev); - /* might be racy, but we cannot take config_lock due to the fact that - fc_remote_port_delete might sleep */ - list_for_each_entry(port, &adapter->port_list_head, list) - if (port->rport) { - rport = port->rport; - port->rport = NULL; - fc_remote_port_delete(rport); - } zfcp_erp_adapter_shutdown(adapter, 0); zfcp_erp_wait(adapter); zfcp_adapter_scsi_unregister(adapter); diff --git a/drivers/s390/scsi/zfcp_dbf.c b/drivers/s390/scsi/zfcp_dbf.c new file mode 100644 index 00000000000..826fb3b0060 --- /dev/null +++ b/drivers/s390/scsi/zfcp_dbf.c @@ -0,0 +1,995 @@ +/* + * + * linux/drivers/s390/scsi/zfcp_dbf.c + * + * FCP adapter driver for IBM eServer zSeries + * + * Debugging facilities + * + * (C) Copyright IBM Corp. 2005 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#define ZFCP_DBF_REVISION "$Revision$" + +#include <asm/debug.h> +#include <linux/ctype.h> +#include "zfcp_ext.h" + +static u32 dbfsize = 4; + +module_param(dbfsize, uint, 0400); +MODULE_PARM_DESC(dbfsize, + "number of pages for each debug feature area (default 4)"); + +#define ZFCP_LOG_AREA ZFCP_LOG_AREA_OTHER + +static inline int +zfcp_dbf_stck(char *out_buf, const char *label, unsigned long long stck) +{ + unsigned long long sec; + struct timespec xtime; + int len = 0; + + stck -= 0x8126d60e46000000LL - (0x3c26700LL * 1000000 * 4096); + sec = stck >> 12; + do_div(sec, 1000000); + xtime.tv_sec = sec; + stck -= (sec * 1000000) << 12; + xtime.tv_nsec = ((stck * 1000) >> 12); + len += sprintf(out_buf + len, "%-24s%011lu:%06lu\n", + label, xtime.tv_sec, xtime.tv_nsec); + + return len; +} + +static int zfcp_dbf_tag(char *out_buf, const char *label, const char *tag) +{ + int len = 0, i; + + len += sprintf(out_buf + len, "%-24s", label); + for (i = 0; i < ZFCP_DBF_TAG_SIZE; i++) + len += sprintf(out_buf + len, "%c", tag[i]); + len += sprintf(out_buf + len, "\n"); + + return len; +} + +static int +zfcp_dbf_view(char *out_buf, const char *label, const char *format, ...) +{ + va_list arg; + int len = 0; + + len += sprintf(out_buf + len, "%-24s", label); + va_start(arg, format); + len += vsprintf(out_buf + len, format, arg); + va_end(arg); + len += sprintf(out_buf + len, "\n"); + + return len; +} + +static int +zfcp_dbf_view_dump(char *out_buf, const char *label, + char *buffer, int buflen, int offset, int total_size) +{ + int len = 0; + + if (offset == 0) + len += sprintf(out_buf + len, "%-24s ", label); + + while (buflen--) { + if (offset > 0) { + if ((offset % 32) == 0) + len += sprintf(out_buf + len, "\n%-24c ", ' '); + else if ((offset % 4) == 0) + len += sprintf(out_buf + len, " "); + } + len += sprintf(out_buf + len, "%02x", *buffer++); + if (++offset == total_size) { + len += sprintf(out_buf + len, "\n"); + break; + } + } + + if (total_size == 0) + len += sprintf(out_buf + len, "\n"); + + return len; +} + +static inline int +zfcp_dbf_view_header(debug_info_t * id, struct debug_view *view, int area, + debug_entry_t * entry, char *out_buf) +{ + struct zfcp_dbf_dump *dump = (struct zfcp_dbf_dump *)DEBUG_DATA(entry); + int len = 0; + + if (strncmp(dump->tag, "dump", ZFCP_DBF_TAG_SIZE) != 0) { + len += zfcp_dbf_stck(out_buf + len, "timestamp", + entry->id.stck); + len += zfcp_dbf_view(out_buf + len, "cpu", "%02i", + entry->id.fields.cpuid); + } else { + len += zfcp_dbf_view_dump(out_buf + len, NULL, + dump->data, + dump->size, + dump->offset, dump->total_size); + if ((dump->offset + dump->size) == dump->total_size) + len += sprintf(out_buf + len, "\n"); + } + + return len; +} + +inline void zfcp_hba_dbf_event_fsf_response(struct zfcp_fsf_req *fsf_req) +{ + struct zfcp_adapter *adapter = fsf_req->adapter; + struct fsf_qtcb *qtcb = fsf_req->qtcb; + union fsf_prot_status_qual *prot_status_qual = + &qtcb->prefix.prot_status_qual; + union fsf_status_qual *fsf_status_qual = &qtcb->header.fsf_status_qual; + struct scsi_cmnd *scsi_cmnd; + struct zfcp_port *port; + struct zfcp_unit *unit; + struct zfcp_send_els *send_els; + struct zfcp_hba_dbf_record *rec = &adapter->hba_dbf_buf; + struct zfcp_hba_dbf_record_response *response = &rec->type.response; + int level; + unsigned long flags; + + spin_lock_irqsave(&adapter->hba_dbf_lock, flags); + memset(rec, 0, sizeof(struct zfcp_hba_dbf_record)); + strncpy(rec->tag, "resp", ZFCP_DBF_TAG_SIZE); + + if ((qtcb->prefix.prot_status != FSF_PROT_GOOD) && + (qtcb->prefix.prot_status != FSF_PROT_FSF_STATUS_PRESENTED)) { + strncpy(rec->tag2, "perr", ZFCP_DBF_TAG_SIZE); + level = 1; + } else if (qtcb->header.fsf_status != FSF_GOOD) { + strncpy(rec->tag2, "ferr", ZFCP_DBF_TAG_SIZE); + level = 1; + } else if ((fsf_req->fsf_command == FSF_QTCB_OPEN_PORT_WITH_DID) || + (fsf_req->fsf_command == FSF_QTCB_OPEN_LUN)) { + strncpy(rec->tag2, "open", ZFCP_DBF_TAG_SIZE); + level = 4; + } else if ((prot_status_qual->doubleword[0] != 0) || + (prot_status_qual->doubleword[1] != 0) || + (fsf_status_qual->doubleword[0] != 0) || + (fsf_status_qual->doubleword[1] != 0)) { + strncpy(rec->tag2, "qual", ZFCP_DBF_TAG_SIZE); + level = 3; + } else { + strncpy(rec->tag2, "norm", ZFCP_DBF_TAG_SIZE); + level = 6; + } + + response->fsf_command = fsf_req->fsf_command; + response->fsf_reqid = (unsigned long)fsf_req; + response->fsf_seqno = fsf_req->seq_no; + response->fsf_issued = fsf_req->issued; + response->fsf_prot_status = qtcb->prefix.prot_status; + response->fsf_status = qtcb->header.fsf_status; + memcpy(response->fsf_prot_status_qual, + prot_status_qual, FSF_PROT_STATUS_QUAL_SIZE); + memcpy(response->fsf_status_qual, + fsf_status_qual, FSF_STATUS_QUALIFIER_SIZE); + response->fsf_req_status = fsf_req->status; + response->sbal_first = fsf_req->sbal_first; + response->sbal_curr = fsf_req->sbal_curr; + response->sbal_last = fsf_req->sbal_last; + response->pool = fsf_req->pool != NULL; + response->erp_action = (unsigned long)fsf_req->erp_action; + + switch (fsf_req->fsf_command) { + case FSF_QTCB_FCP_CMND: + if (fsf_req->status & ZFCP_STATUS_FSFREQ_TASK_MANAGEMENT) + break; + scsi_cmnd = (struct scsi_cmnd *)fsf_req->data; + if (scsi_cmnd != NULL) { + response->data.send_fcp.scsi_cmnd + = (unsigned long)scsi_cmnd; + response->data.send_fcp.scsi_serial + = scsi_cmnd->serial_number; + } + break; + + case FSF_QTCB_OPEN_PORT_WITH_DID: + case FSF_QTCB_CLOSE_PORT: + case FSF_QTCB_CLOSE_PHYSICAL_PORT: + port = (struct zfcp_port *)fsf_req->data; + response->data.port.wwpn = port->wwpn; + response->data.port.d_id = port->d_id; + response->data.port.port_handle = qtcb->header.port_handle; + break; + + case FSF_QTCB_OPEN_LUN: + case FSF_QTCB_CLOSE_LUN: + unit = (struct zfcp_unit *)fsf_req->data; + port = unit->port; + response->data.unit.wwpn = port->wwpn; + response->data.unit.fcp_lun = unit->fcp_lun; + response->data.unit.port_handle = qtcb->header.port_handle; + response->data.unit.lun_handle = qtcb->header.lun_handle; + break; + + case FSF_QTCB_SEND_ELS: + send_els = (struct zfcp_send_els *)fsf_req->data; + response->data.send_els.d_id = qtcb->bottom.support.d_id; + response->data.send_els.ls_code = send_els->ls_code >> 24; + break; + + case FSF_QTCB_ABORT_FCP_CMND: + case FSF_QTCB_SEND_GENERIC: + case FSF_QTCB_EXCHANGE_CONFIG_DATA: + case FSF_QTCB_EXCHANGE_PORT_DATA: + case FSF_QTCB_DOWNLOAD_CONTROL_FILE: + case FSF_QTCB_UPLOAD_CONTROL_FILE: + break; + } + + debug_event(adapter->hba_dbf, level, + rec, sizeof(struct zfcp_hba_dbf_record)); + spin_unlock_irqrestore(&adapter->hba_dbf_lock, flags); +} + +inline void +zfcp_hba_dbf_event_fsf_unsol(const char *tag, struct zfcp_adapter *adapter, + struct fsf_status_read_buffer *status_buffer) +{ + struct zfcp_hba_dbf_record *rec = &adapter->hba_dbf_buf; + unsigned long flags; + + spin_lock_irqsave(&adapter->hba_dbf_lock, flags); + memset(rec, 0, sizeof(struct zfcp_hba_dbf_record)); + strncpy(rec->tag, "stat", ZFCP_DBF_TAG_SIZE); + strncpy(rec->tag2, tag, ZFCP_DBF_TAG_SIZE); + + rec->type.status.failed = adapter->status_read_failed; + if (status_buffer != NULL) { + rec->type.status.status_type = status_buffer->status_type; + rec->type.status.status_subtype = status_buffer->status_subtype; + memcpy(&rec->type.status.queue_designator, + &status_buffer->queue_designator, + sizeof(struct fsf_queue_designator)); + + switch (status_buffer->status_type) { + case FSF_STATUS_READ_SENSE_DATA_AVAIL: + rec->type.status.payload_size = + ZFCP_DBF_UNSOL_PAYLOAD_SENSE_DATA_AVAIL; + break; + + case FSF_STATUS_READ_BIT_ERROR_THRESHOLD: + rec->type.status.payload_size = + ZFCP_DBF_UNSOL_PAYLOAD_BIT_ERROR_THRESHOLD; + break; + + case FSF_STATUS_READ_LINK_DOWN: + switch (status_buffer->status_subtype) { + case FSF_STATUS_READ_SUB_NO_PHYSICAL_LINK: + case FSF_STATUS_READ_SUB_FDISC_FAILED: + rec->type.status.payload_size = + sizeof(struct fsf_link_down_info); + } + break; + + case FSF_STATUS_READ_FEATURE_UPDATE_ALERT: + rec->type.status.payload_size = + ZFCP_DBF_UNSOL_PAYLOAD_FEATURE_UPDATE_ALERT; + break; + } + memcpy(&rec->type.status.payload, + &status_buffer->payload, rec->type.status.payload_size); + } + + debug_event(adapter->hba_dbf, 2, + rec, sizeof(struct zfcp_hba_dbf_record)); + spin_unlock_irqrestore(&adapter->hba_dbf_lock, flags); +} + +inline void +zfcp_hba_dbf_event_qdio(struct zfcp_adapter *adapter, unsigned int status, + unsigned int qdio_error, unsigned int siga_error, + int sbal_index, int sbal_count) +{ + struct zfcp_hba_dbf_record *rec = &adapter->hba_dbf_buf; + unsigned long flags; + + spin_lock_irqsave(&adapter->hba_dbf_lock, flags); + memset(rec, 0, sizeof(struct zfcp_hba_dbf_record)); + strncpy(rec->tag, "qdio", ZFCP_DBF_TAG_SIZE); + rec->type.qdio.status = status; + rec->type.qdio.qdio_error = qdio_error; + rec->type.qdio.siga_error = siga_error; + rec->type.qdio.sbal_index = sbal_index; + rec->type.qdio.sbal_count = sbal_count; + debug_event(adapter->hba_dbf, 0, + rec, sizeof(struct zfcp_hba_dbf_record)); + spin_unlock_irqrestore(&adapter->hba_dbf_lock, flags); +} + +static inline int +zfcp_hba_dbf_view_response(char *out_buf, + struct zfcp_hba_dbf_record_response *rec) +{ + int len = 0; + + len += zfcp_dbf_view(out_buf + len, "fsf_command", "0x%08x", + rec->fsf_command); + len += zfcp_dbf_view(out_buf + len, "fsf_reqid", "0x%0Lx", + rec->fsf_reqid); + len += zfcp_dbf_view(out_buf + len, "fsf_seqno", "0x%08x", + rec->fsf_seqno); + len += zfcp_dbf_stck(out_buf + len, "fsf_issued", rec->fsf_issued); + len += zfcp_dbf_view(out_buf + len, "fsf_prot_status", "0x%08x", + rec->fsf_prot_status); + len += zfcp_dbf_view(out_buf + len, "fsf_status", "0x%08x", + rec->fsf_status); + len += zfcp_dbf_view_dump(out_buf + len, "fsf_prot_status_qual", + rec->fsf_prot_status_qual, + FSF_PROT_STATUS_QUAL_SIZE, + 0, FSF_PROT_STATUS_QUAL_SIZE); + len += zfcp_dbf_view_dump(out_buf + len, "fsf_status_qual", + rec->fsf_status_qual, + FSF_STATUS_QUALIFIER_SIZE, + 0, FSF_STATUS_QUALIFIER_SIZE); + len += zfcp_dbf_view(out_buf + len, "fsf_req_status", "0x%08x", + rec->fsf_req_status); + len += zfcp_dbf_view(out_buf + len, "sbal_first", "0x%02x", + rec->sbal_first); + len += zfcp_dbf_view(out_buf + len, "sbal_curr", "0x%02x", + rec->sbal_curr); + len += zfcp_dbf_view(out_buf + len, "sbal_last", "0x%02x", + rec->sbal_last); + len += zfcp_dbf_view(out_buf + len, "pool", "0x%02x", rec->pool); + + switch (rec->fsf_command) { + case FSF_QTCB_FCP_CMND: + if (rec->fsf_req_status & ZFCP_STATUS_FSFREQ_TASK_MANAGEMENT) + break; + len += zfcp_dbf_view(out_buf + len, "scsi_cmnd", "0x%0Lx", + rec->data.send_fcp.scsi_cmnd); + len += zfcp_dbf_view(out_buf + len, "scsi_serial", "0x%016Lx", + rec->data.send_fcp.scsi_serial); + break; + + case FSF_QTCB_OPEN_PORT_WITH_DID: + case FSF_QTCB_CLOSE_PORT: + case FSF_QTCB_CLOSE_PHYSICAL_PORT: + len += zfcp_dbf_view(out_buf + len, "wwpn", "0x%016Lx", + rec->data.port.wwpn); + len += zfcp_dbf_view(out_buf + len, "d_id", "0x%06x", + rec->data.port.d_id); + len += zfcp_dbf_view(out_buf + len, "port_handle", "0x%08x", + rec->data.port.port_handle); + break; + + case FSF_QTCB_OPEN_LUN: + case FSF_QTCB_CLOSE_LUN: + len += zfcp_dbf_view(out_buf + len, "wwpn", "0x%016Lx", + rec->data.unit.wwpn); + len += zfcp_dbf_view(out_buf + len, "fcp_lun", "0x%016Lx", + rec->data.unit.fcp_lun); + len += zfcp_dbf_view(out_buf + len, "port_handle", "0x%08x", + rec->data.unit.port_handle); + len += zfcp_dbf_view(out_buf + len, "lun_handle", "0x%08x", + rec->data.unit.lun_handle); + break; + + case FSF_QTCB_SEND_ELS: + len += zfcp_dbf_view(out_buf + len, "d_id", "0x%06x", + rec->data.send_els.d_id); + len += zfcp_dbf_view(out_buf + len, "ls_code", "0x%02x", + rec->data.send_els.ls_code); + break; + + case FSF_QTCB_ABORT_FCP_CMND: + case FSF_QTCB_SEND_GENERIC: + case FSF_QTCB_EXCHANGE_CONFIG_DATA: + case FSF_QTCB_EXCHANGE_PORT_DATA: + case FSF_QTCB_DOWNLOAD_CONTROL_FILE: + case FSF_QTCB_UPLOAD_CONTROL_FILE: + break; + } + + return len; +} + +static inline int +zfcp_hba_dbf_view_status(char *out_buf, struct zfcp_hba_dbf_record_status *rec) +{ + int len = 0; + + len += zfcp_dbf_view(out_buf + len, "failed", "0x%02x", rec->failed); + len += zfcp_dbf_view(out_buf + len, "status_type", "0x%08x", + rec->status_type); + len += zfcp_dbf_view(out_buf + len, "status_subtype", "0x%08x", + rec->status_subtype); + len += zfcp_dbf_view_dump(out_buf + len, "queue_designator", + (char *)&rec->queue_designator, + sizeof(struct fsf_queue_designator), + 0, sizeof(struct fsf_queue_designator)); + len += zfcp_dbf_view_dump(out_buf + len, "payload", + (char *)&rec->payload, + rec->payload_size, 0, rec->payload_size); + + return len; +} + +static inline int +zfcp_hba_dbf_view_qdio(char *out_buf, struct zfcp_hba_dbf_record_qdio *rec) +{ + int len = 0; + + len += zfcp_dbf_view(out_buf + len, "status", "0x%08x", rec->status); + len += zfcp_dbf_view(out_buf + len, "qdio_error", "0x%08x", + rec->qdio_error); + len += zfcp_dbf_view(out_buf + len, "siga_error", "0x%08x", + rec->siga_error); + len += zfcp_dbf_view(out_buf + len, "sbal_index", "0x%02x", + rec->sbal_index); + len += zfcp_dbf_view(out_buf + len, "sbal_count", "0x%02x", + rec->sbal_count); + + return len; +} + +static int +zfcp_hba_dbf_view_format(debug_info_t * id, struct debug_view *view, + char *out_buf, const char *in_buf) +{ + struct zfcp_hba_dbf_record *rec = (struct zfcp_hba_dbf_record *)in_buf; + int len = 0; + + if (strncmp(rec->tag, "dump", ZFCP_DBF_TAG_SIZE) == 0) + return 0; + + len += zfcp_dbf_tag(out_buf + len, "tag", rec->tag); + if (isalpha(rec->tag2[0])) + len += zfcp_dbf_tag(out_buf + len, "tag2", rec->tag2); + if (strncmp(rec->tag, "resp", ZFCP_DBF_TAG_SIZE) == 0) + len += zfcp_hba_dbf_view_response(out_buf + len, + &rec->type.response); + else if (strncmp(rec->tag, "stat", ZFCP_DBF_TAG_SIZE) == 0) + len += zfcp_hba_dbf_view_status(out_buf + len, + &rec->type.status); + else if (strncmp(rec->tag, "qdio", ZFCP_DBF_TAG_SIZE) == 0) + len += zfcp_hba_dbf_view_qdio(out_buf + len, &rec->type.qdio); + + len += sprintf(out_buf + len, "\n"); + + return len; +} + +struct debug_view zfcp_hba_dbf_view = { + "structured", + NULL, + &zfcp_dbf_view_header, + &zfcp_hba_dbf_view_format, + NULL, + NULL +}; + +inline void +_zfcp_san_dbf_event_common_ct(const char *tag, struct zfcp_fsf_req *fsf_req, + u32 s_id, u32 d_id, void *buffer, int buflen) +{ + struct zfcp_send_ct *send_ct = (struct zfcp_send_ct *)fsf_req->data; + struct zfcp_port *port = send_ct->port; + struct zfcp_adapter *adapter = port->adapter; + struct ct_hdr *header = (struct ct_hdr *)buffer; + struct zfcp_san_dbf_record *rec = &adapter->san_dbf_buf; + struct zfcp_san_dbf_record_ct *ct = &rec->type.ct; + unsigned long flags; + + spin_lock_irqsave(&adapter->san_dbf_lock, flags); + memset(rec, 0, sizeof(struct zfcp_san_dbf_record)); + strncpy(rec->tag, tag, ZFCP_DBF_TAG_SIZE); + rec->fsf_reqid = (unsigned long)fsf_req; + rec->fsf_seqno = fsf_req->seq_no; + rec->s_id = s_id; + rec->d_id = d_id; + if (strncmp(tag, "octc", ZFCP_DBF_TAG_SIZE) == 0) { + ct->type.request.cmd_req_code = header->cmd_rsp_code; + ct->type.request.revision = header->revision; + ct->type.request.gs_type = header->gs_type; + ct->type.request.gs_subtype = header->gs_subtype; + ct->type.request.options = header->options; + ct->type.request.max_res_size = header->max_res_size; + } else if (strncmp(tag, "rctc", ZFCP_DBF_TAG_SIZE) == 0) { + ct->type.response.cmd_rsp_code = header->cmd_rsp_code; + ct->type.response.revision = header->revision; + ct->type.response.reason_code = header->reason_code; + ct->type.response.reason_code_expl = header->reason_code_expl; + ct->type.response.vendor_unique = header->vendor_unique; + } + ct->payload_size = + min(buflen - (int)sizeof(struct ct_hdr), ZFCP_DBF_CT_PAYLOAD); + memcpy(ct->payload, buffer + sizeof(struct ct_hdr), ct->payload_size); + debug_event(adapter->san_dbf, 3, + rec, sizeof(struct zfcp_san_dbf_record)); + spin_unlock_irqrestore(&adapter->san_dbf_lock, flags); +} + +inline void zfcp_san_dbf_event_ct_request(struct zfcp_fsf_req *fsf_req) +{ + struct zfcp_send_ct *ct = (struct zfcp_send_ct *)fsf_req->data; + struct zfcp_port *port = ct->port; + struct zfcp_adapter *adapter = port->adapter; + + _zfcp_san_dbf_event_common_ct("octc", fsf_req, + fc_host_port_id(adapter->scsi_host), + port->d_id, zfcp_sg_to_address(ct->req), + ct->req->length); +} + +inline void zfcp_san_dbf_event_ct_response(struct zfcp_fsf_req *fsf_req) +{ + struct zfcp_send_ct *ct = (struct zfcp_send_ct *)fsf_req->data; + struct zfcp_port *port = ct->port; + struct zfcp_adapter *adapter = port->adapter; + + _zfcp_san_dbf_event_common_ct("rctc", fsf_req, port->d_id, + fc_host_port_id(adapter->scsi_host), + zfcp_sg_to_address(ct->resp), + ct->resp->length); +} + +static inline void +_zfcp_san_dbf_event_common_els(const char *tag, int level, + struct zfcp_fsf_req *fsf_req, u32 s_id, + u32 d_id, u8 ls_code, void *buffer, int buflen) +{ + struct zfcp_adapter *adapter = fsf_req->adapter; + struct zfcp_san_dbf_record *rec = &adapter->san_dbf_buf; + struct zfcp_dbf_dump *dump = (struct zfcp_dbf_dump *)rec; + unsigned long flags; + int offset = 0; + + spin_lock_irqsave(&adapter->san_dbf_lock, flags); + do { + memset(rec, 0, sizeof(struct zfcp_san_dbf_record)); + if (offset == 0) { + strncpy(rec->tag, tag, ZFCP_DBF_TAG_SIZE); + rec->fsf_reqid = (unsigned long)fsf_req; + rec->fsf_seqno = fsf_req->seq_no; + rec->s_id = s_id; + rec->d_id = d_id; + rec->type.els.ls_code = ls_code; + buflen = min(buflen, ZFCP_DBF_ELS_MAX_PAYLOAD); + rec->type.els.payload_size = buflen; + memcpy(rec->type.els.payload, + buffer, min(buflen, ZFCP_DBF_ELS_PAYLOAD)); + offset += min(buflen, ZFCP_DBF_ELS_PAYLOAD); + } else { + strncpy(dump->tag, "dump", ZFCP_DBF_TAG_SIZE); + dump->total_size = buflen; + dump->offset = offset; + dump->size = min(buflen - offset, + (int)sizeof(struct zfcp_san_dbf_record) + - (int)sizeof(struct zfcp_dbf_dump)); + memcpy(dump->data, buffer + offset, dump->size); + offset += dump->size; + } + debug_event(adapter->san_dbf, level, + rec, sizeof(struct zfcp_san_dbf_record)); + } while (offset < buflen); + spin_unlock_irqrestore(&adapter->san_dbf_lock, flags); +} + +inline void zfcp_san_dbf_event_els_request(struct zfcp_fsf_req *fsf_req) +{ + struct zfcp_send_els *els = (struct zfcp_send_els *)fsf_req->data; + + _zfcp_san_dbf_event_common_els("oels", 2, fsf_req, + fc_host_port_id(els->adapter->scsi_host), + els->d_id, + *(u8 *) zfcp_sg_to_address(els->req), + zfcp_sg_to_address(els->req), + els->req->length); +} + +inline void zfcp_san_dbf_event_els_response(struct zfcp_fsf_req *fsf_req) +{ + struct zfcp_send_els *els = (struct zfcp_send_els *)fsf_req->data; + + _zfcp_san_dbf_event_common_els("rels", 2, fsf_req, els->d_id, + fc_host_port_id(els->adapter->scsi_host), + *(u8 *) zfcp_sg_to_address(els->req), + zfcp_sg_to_address(els->resp), + els->resp->length); +} + +inline void zfcp_san_dbf_event_incoming_els(struct zfcp_fsf_req *fsf_req) +{ + struct zfcp_adapter *adapter = fsf_req->adapter; + struct fsf_status_read_buffer *status_buffer = + (struct fsf_status_read_buffer *)fsf_req->data; + int length = (int)status_buffer->length - + (int)((void *)&status_buffer->payload - (void *)status_buffer); + + _zfcp_san_dbf_event_common_els("iels", 1, fsf_req, status_buffer->d_id, + fc_host_port_id(adapter->scsi_host), + *(u8 *) status_buffer->payload, + (void *)status_buffer->payload, length); +} + +static int +zfcp_san_dbf_view_format(debug_info_t * id, struct debug_view *view, + char *out_buf, const char *in_buf) +{ + struct zfcp_san_dbf_record *rec = (struct zfcp_san_dbf_record *)in_buf; + char *buffer = NULL; + int buflen = 0, total = 0; + int len = 0; + + if (strncmp(rec->tag, "dump", ZFCP_DBF_TAG_SIZE) == 0) + return 0; + + len += zfcp_dbf_tag(out_buf + len, "tag", rec->tag); + len += zfcp_dbf_view(out_buf + len, "fsf_reqid", "0x%0Lx", + rec->fsf_reqid); + len += zfcp_dbf_view(out_buf + len, "fsf_seqno", "0x%08x", + rec->fsf_seqno); + len += zfcp_dbf_view(out_buf + len, "s_id", "0x%06x", rec->s_id); + len += zfcp_dbf_view(out_buf + len, "d_id", "0x%06x", rec->d_id); + + if (strncmp(rec->tag, "octc", ZFCP_DBF_TAG_SIZE) == 0) { + len += zfcp_dbf_view(out_buf + len, "cmd_req_code", "0x%04x", + rec->type.ct.type.request.cmd_req_code); + len += zfcp_dbf_view(out_buf + len, "revision", "0x%02x", + rec->type.ct.type.request.revision); + len += zfcp_dbf_view(out_buf + len, "gs_type", "0x%02x", + rec->type.ct.type.request.gs_type); + len += zfcp_dbf_view(out_buf + len, "gs_subtype", "0x%02x", + rec->type.ct.type.request.gs_subtype); + len += zfcp_dbf_view(out_buf + len, "options", "0x%02x", + rec->type.ct.type.request.options); + len += zfcp_dbf_view(out_buf + len, "max_res_size", "0x%04x", + rec->type.ct.type.request.max_res_size); + total = rec->type.ct.payload_size; + buffer = rec->type.ct.payload; + buflen = min(total, ZFCP_DBF_CT_PAYLOAD); + } else if (strncmp(rec->tag, "rctc", ZFCP_DBF_TAG_SIZE) == 0) { + len += zfcp_dbf_view(out_buf + len, "cmd_rsp_code", "0x%04x", + rec->type.ct.type.response.cmd_rsp_code); + len += zfcp_dbf_view(out_buf + len, "revision", "0x%02x", + rec->type.ct.type.response.revision); + len += zfcp_dbf_view(out_buf + len, "reason_code", "0x%02x", + rec->type.ct.type.response.reason_code); + len += + zfcp_dbf_view(out_buf + len, "reason_code_expl", "0x%02x", + rec->type.ct.type.response.reason_code_expl); + len += + zfcp_dbf_view(out_buf + len, "vendor_unique", "0x%02x", + rec->type.ct.type.response.vendor_unique); + total = rec->type.ct.payload_size; + buffer = rec->type.ct.payload; + buflen = min(total, ZFCP_DBF_CT_PAYLOAD); + } else if (strncmp(rec->tag, "oels", ZFCP_DBF_TAG_SIZE) == 0 || + strncmp(rec->tag, "rels", ZFCP_DBF_TAG_SIZE) == 0 || + strncmp(rec->tag, "iels", ZFCP_DBF_TAG_SIZE) == 0) { + len += zfcp_dbf_view(out_buf + len, "ls_code", "0x%02x", + rec->type.els.ls_code); + total = rec->type.els.payload_size; + buffer = rec->type.els.payload; + buflen = min(total, ZFCP_DBF_ELS_PAYLOAD); + } + + len += zfcp_dbf_view_dump(out_buf + len, "payload", + buffer, buflen, 0, total); + + if (buflen == total) + len += sprintf(out_buf + len, "\n"); + + return len; +} + +struct debug_view zfcp_san_dbf_view = { + "structured", + NULL, + &zfcp_dbf_view_header, + &zfcp_san_dbf_view_format, + NULL, + NULL +}; + +static inline void +_zfcp_scsi_dbf_event_common(const char *tag, const char *tag2, int level, + struct zfcp_adapter *adapter, + struct scsi_cmnd *scsi_cmnd, + struct zfcp_fsf_req *new_fsf_req) +{ + struct zfcp_fsf_req *fsf_req = + (struct zfcp_fsf_req *)scsi_cmnd->host_scribble; + struct zfcp_scsi_dbf_record *rec = &adapter->scsi_dbf_buf; + struct zfcp_dbf_dump *dump = (struct zfcp_dbf_dump *)rec; + unsigned long flags; + struct fcp_rsp_iu *fcp_rsp; + char *fcp_rsp_info = NULL, *fcp_sns_info = NULL; + int offset = 0, buflen = 0; + + spin_lock_irqsave(&adapter->scsi_dbf_lock, flags); + do { + memset(rec, 0, sizeof(struct zfcp_scsi_dbf_record)); + if (offset == 0) { + strncpy(rec->tag, tag, ZFCP_DBF_TAG_SIZE); + strncpy(rec->tag2, tag2, ZFCP_DBF_TAG_SIZE); + if (scsi_cmnd->device) { + rec->scsi_id = scsi_cmnd->device->id; + rec->scsi_lun = scsi_cmnd->device->lun; + } + rec->scsi_result = scsi_cmnd->result; + rec->scsi_cmnd = (unsigned long)scsi_cmnd; + rec->scsi_serial = scsi_cmnd->serial_number; + memcpy(rec->scsi_opcode, + &scsi_cmnd->cmnd, + min((int)scsi_cmnd->cmd_len, + ZFCP_DBF_SCSI_OPCODE)); + rec->scsi_retries = scsi_cmnd->retries; + rec->scsi_allowed = scsi_cmnd->allowed; + if (fsf_req != NULL) { + fcp_rsp = (struct fcp_rsp_iu *) + &(fsf_req->qtcb->bottom.io.fcp_rsp); + fcp_rsp_info = + zfcp_get_fcp_rsp_info_ptr(fcp_rsp); + fcp_sns_info = + zfcp_get_fcp_sns_info_ptr(fcp_rsp); + + rec->type.fcp.rsp_validity = + fcp_rsp->validity.value; + rec->type.fcp.rsp_scsi_status = + fcp_rsp->scsi_status; + rec->type.fcp.rsp_resid = fcp_rsp->fcp_resid; + if (fcp_rsp->validity.bits.fcp_rsp_len_valid) + rec->type.fcp.rsp_code = + *(fcp_rsp_info + 3); + if (fcp_rsp->validity.bits.fcp_sns_len_valid) { + buflen = min((int)fcp_rsp->fcp_sns_len, + ZFCP_DBF_SCSI_MAX_FCP_SNS_INFO); + rec->type.fcp.sns_info_len = buflen; + memcpy(rec->type.fcp.sns_info, + fcp_sns_info, + min(buflen, + ZFCP_DBF_SCSI_FCP_SNS_INFO)); + offset += min(buflen, + ZFCP_DBF_SCSI_FCP_SNS_INFO); + } + + rec->fsf_reqid = (unsigned long)fsf_req; + rec->fsf_seqno = fsf_req->seq_no; + rec->fsf_issued = fsf_req->issued; + } + if (new_fsf_req != NULL) { + rec->type.new_fsf_req.fsf_reqid = + (unsigned long) + new_fsf_req; + rec->type.new_fsf_req.fsf_seqno = + new_fsf_req->seq_no; + rec->type.new_fsf_req.fsf_issued = + new_fsf_req->issued; + } + } else { + strncpy(dump->tag, "dump", ZFCP_DBF_TAG_SIZE); + dump->total_size = buflen; + dump->offset = offset; + dump->size = min(buflen - offset, + (int)sizeof(struct + zfcp_scsi_dbf_record) - + (int)sizeof(struct zfcp_dbf_dump)); + memcpy(dump->data, fcp_sns_info + offset, dump->size); + offset += dump->size; + } + debug_event(adapter->scsi_dbf, level, + rec, sizeof(struct zfcp_scsi_dbf_record)); + } while (offset < buflen); + spin_unlock_irqrestore(&adapter->scsi_dbf_lock, flags); +} + +inline void +zfcp_scsi_dbf_event_result(const char *tag, int level, + struct zfcp_adapter *adapter, + struct scsi_cmnd *scsi_cmnd) +{ + _zfcp_scsi_dbf_event_common("rslt", + tag, level, adapter, scsi_cmnd, NULL); +} + +inline void +zfcp_scsi_dbf_event_abort(const char *tag, struct zfcp_adapter *adapter, + struct scsi_cmnd *scsi_cmnd, + struct zfcp_fsf_req *new_fsf_req) +{ + _zfcp_scsi_dbf_event_common("abrt", + tag, 1, adapter, scsi_cmnd, new_fsf_req); +} + +inline void +zfcp_scsi_dbf_event_devreset(const char *tag, u8 flag, struct zfcp_unit *unit, + struct scsi_cmnd *scsi_cmnd) +{ + struct zfcp_adapter *adapter = unit->port->adapter; + + _zfcp_scsi_dbf_event_common(flag == FCP_TARGET_RESET ? "trst" : "lrst", + tag, 1, adapter, scsi_cmnd, NULL); +} + +static int +zfcp_scsi_dbf_view_format(debug_info_t * id, struct debug_view *view, + char *out_buf, const char *in_buf) +{ + struct zfcp_scsi_dbf_record *rec = + (struct zfcp_scsi_dbf_record *)in_buf; + int len = 0; + + if (strncmp(rec->tag, "dump", ZFCP_DBF_TAG_SIZE) == 0) + return 0; + + len += zfcp_dbf_tag(out_buf + len, "tag", rec->tag); + len += zfcp_dbf_tag(out_buf + len, "tag2", rec->tag2); + len += zfcp_dbf_view(out_buf + len, "scsi_id", "0x%08x", rec->scsi_id); + len += zfcp_dbf_view(out_buf + len, "scsi_lun", "0x%08x", + rec->scsi_lun); + len += zfcp_dbf_view(out_buf + len, "scsi_result", "0x%08x", + rec->scsi_result); + len += zfcp_dbf_view(out_buf + len, "scsi_cmnd", "0x%0Lx", + rec->scsi_cmnd); + len += zfcp_dbf_view(out_buf + len, "scsi_serial", "0x%016Lx", + rec->scsi_serial); + len += zfcp_dbf_view_dump(out_buf + len, "scsi_opcode", + rec->scsi_opcode, + ZFCP_DBF_SCSI_OPCODE, + 0, ZFCP_DBF_SCSI_OPCODE); + len += zfcp_dbf_view(out_buf + len, "scsi_retries", "0x%02x", + rec->scsi_retries); + len += zfcp_dbf_view(out_buf + len, "scsi_allowed", "0x%02x", + rec->scsi_allowed); + len += zfcp_dbf_view(out_buf + len, "fsf_reqid", "0x%0Lx", + rec->fsf_reqid); + len += zfcp_dbf_view(out_buf + len, "fsf_seqno", "0x%08x", + rec->fsf_seqno); + len += zfcp_dbf_stck(out_buf + len, "fsf_issued", rec->fsf_issued); + if (strncmp(rec->tag, "rslt", ZFCP_DBF_TAG_SIZE) == 0) { + len += + zfcp_dbf_view(out_buf + len, "fcp_rsp_validity", "0x%02x", + rec->type.fcp.rsp_validity); + len += + zfcp_dbf_view(out_buf + len, "fcp_rsp_scsi_status", + "0x%02x", rec->type.fcp.rsp_scsi_status); + len += + zfcp_dbf_view(out_buf + len, "fcp_rsp_resid", "0x%08x", + rec->type.fcp.rsp_resid); + len += + zfcp_dbf_view(out_buf + len, "fcp_rsp_code", "0x%08x", + rec->type.fcp.rsp_code); + len += + zfcp_dbf_view(out_buf + len, "fcp_sns_info_len", "0x%08x", + rec->type.fcp.sns_info_len); + len += + zfcp_dbf_view_dump(out_buf + len, "fcp_sns_info", + rec->type.fcp.sns_info, + min((int)rec->type.fcp.sns_info_len, + ZFCP_DBF_SCSI_FCP_SNS_INFO), 0, + rec->type.fcp.sns_info_len); + } else if (strncmp(rec->tag, "abrt", ZFCP_DBF_TAG_SIZE) == 0) { + len += zfcp_dbf_view(out_buf + len, "fsf_reqid_abort", "0x%0Lx", + rec->type.new_fsf_req.fsf_reqid); + len += zfcp_dbf_view(out_buf + len, "fsf_seqno_abort", "0x%08x", + rec->type.new_fsf_req.fsf_seqno); + len += zfcp_dbf_stck(out_buf + len, "fsf_issued", + rec->type.new_fsf_req.fsf_issued); + } else if ((strncmp(rec->tag, "trst", ZFCP_DBF_TAG_SIZE) == 0) || + (strncmp(rec->tag, "lrst", ZFCP_DBF_TAG_SIZE) == 0)) { + len += zfcp_dbf_view(out_buf + len, "fsf_reqid_reset", "0x%0Lx", + rec->type.new_fsf_req.fsf_reqid); + len += zfcp_dbf_view(out_buf + len, "fsf_seqno_reset", "0x%08x", + rec->type.new_fsf_req.fsf_seqno); + len += zfcp_dbf_stck(out_buf + len, "fsf_issued", + rec->type.new_fsf_req.fsf_issued); + } + + len += sprintf(out_buf + len, "\n"); + + return len; +} + +struct debug_view zfcp_scsi_dbf_view = { + "structured", + NULL, + &zfcp_dbf_view_header, + &zfcp_scsi_dbf_view_format, + NULL, + NULL +}; + +/** + * zfcp_adapter_debug_register - registers debug feature for an adapter + * @adapter: pointer to adapter for which debug features should be registered + * return: -ENOMEM on error, 0 otherwise + */ +int zfcp_adapter_debug_register(struct zfcp_adapter *adapter) +{ + char dbf_name[DEBUG_MAX_NAME_LEN]; + + /* debug feature area which records recovery activity */ + spin_lock_init(&adapter->erp_dbf_lock); + sprintf(dbf_name, "zfcp_%s_erp", zfcp_get_busid_by_adapter(adapter)); + adapter->erp_dbf = debug_register(dbf_name, dbfsize, 2, + sizeof(struct zfcp_erp_dbf_record)); + if (!adapter->erp_dbf) + goto failed; + debug_register_view(adapter->erp_dbf, &debug_hex_ascii_view); + debug_set_level(adapter->erp_dbf, 3); + + /* debug feature area which records HBA (FSF and QDIO) conditions */ + spin_lock_init(&adapter->hba_dbf_lock); + sprintf(dbf_name, "zfcp_%s_hba", zfcp_get_busid_by_adapter(adapter)); + adapter->hba_dbf = debug_register(dbf_name, dbfsize, 1, + sizeof(struct zfcp_hba_dbf_record)); + if (!adapter->hba_dbf) + goto failed; + debug_register_view(adapter->hba_dbf, &debug_hex_ascii_view); + debug_register_view(adapter->hba_dbf, &zfcp_hba_dbf_view); + debug_set_level(adapter->hba_dbf, 3); + + /* debug feature area which records SAN command failures and recovery */ + spin_lock_init(&adapter->san_dbf_lock); + sprintf(dbf_name, "zfcp_%s_san", zfcp_get_busid_by_adapter(adapter)); + adapter->san_dbf = debug_register(dbf_name, dbfsize, 1, + sizeof(struct zfcp_san_dbf_record)); + if (!adapter->san_dbf) + goto failed; + debug_register_view(adapter->san_dbf, &debug_hex_ascii_view); + debug_register_view(adapter->san_dbf, &zfcp_san_dbf_view); + debug_set_level(adapter->san_dbf, 6); + + /* debug feature area which records SCSI command failures and recovery */ + spin_lock_init(&adapter->scsi_dbf_lock); + sprintf(dbf_name, "zfcp_%s_scsi", zfcp_get_busid_by_adapter(adapter)); + adapter->scsi_dbf = debug_register(dbf_name, dbfsize, 1, + sizeof(struct zfcp_scsi_dbf_record)); + if (!adapter->scsi_dbf) + goto failed; + debug_register_view(adapter->scsi_dbf, &debug_hex_ascii_view); + debug_register_view(adapter->scsi_dbf, &zfcp_scsi_dbf_view); + debug_set_level(adapter->scsi_dbf, 3); + + return 0; + + failed: + zfcp_adapter_debug_unregister(adapter); + + return -ENOMEM; +} + +/** + * zfcp_adapter_debug_unregister - unregisters debug feature for an adapter + * @adapter: pointer to adapter for which debug features should be unregistered + */ +void zfcp_adapter_debug_unregister(struct zfcp_adapter *adapter) +{ + debug_unregister(adapter->scsi_dbf); + debug_unregister(adapter->san_dbf); + debug_unregister(adapter->hba_dbf); + debug_unregister(adapter->erp_dbf); + adapter->scsi_dbf = NULL; + adapter->san_dbf = NULL; + adapter->hba_dbf = NULL; + adapter->erp_dbf = NULL; +} + +#undef ZFCP_LOG_AREA diff --git a/drivers/s390/scsi/zfcp_def.h b/drivers/s390/scsi/zfcp_def.h index 455e902533a..d81b737d68c 100644 --- a/drivers/s390/scsi/zfcp_def.h +++ b/drivers/s390/scsi/zfcp_def.h @@ -66,7 +66,7 @@ /********************* GENERAL DEFINES *********************************/ /* zfcp version number, it consists of major, minor, and patch-level number */ -#define ZFCP_VERSION "4.3.0" +#define ZFCP_VERSION "4.5.0" /** * zfcp_sg_to_address - determine kernel address from struct scatterlist @@ -154,13 +154,17 @@ typedef u32 scsi_lun_t; #define ZFCP_EXCHANGE_CONFIG_DATA_FIRST_SLEEP 100 #define ZFCP_EXCHANGE_CONFIG_DATA_RETRIES 7 +/* Retry 5 times every 2 second, then every minute */ +#define ZFCP_EXCHANGE_PORT_DATA_SHORT_RETRIES 5 +#define ZFCP_EXCHANGE_PORT_DATA_SHORT_SLEEP 200 +#define ZFCP_EXCHANGE_PORT_DATA_LONG_SLEEP 6000 + /* timeout value for "default timer" for fsf requests */ #define ZFCP_FSF_REQUEST_TIMEOUT (60*HZ); /*************** FIBRE CHANNEL PROTOCOL SPECIFIC DEFINES ********************/ typedef unsigned long long wwn_t; -typedef unsigned int fc_id_t; typedef unsigned long long fcp_lun_t; /* data length field may be at variable position in FCP-2 FCP_CMND IU */ typedef unsigned int fcp_dl_t; @@ -281,6 +285,171 @@ struct fcp_logo { } __attribute__((packed)); /* + * DBF stuff + */ +#define ZFCP_DBF_TAG_SIZE 4 + +struct zfcp_dbf_dump { + u8 tag[ZFCP_DBF_TAG_SIZE]; + u32 total_size; /* size of total dump data */ + u32 offset; /* how much data has being already dumped */ + u32 size; /* how much data comes with this record */ + u8 data[]; /* dump data */ +} __attribute__ ((packed)); + +/* FIXME: to be inflated when reworking the erp dbf */ +struct zfcp_erp_dbf_record { + u8 dummy[16]; +} __attribute__ ((packed)); + +struct zfcp_hba_dbf_record_response { + u32 fsf_command; + u64 fsf_reqid; + u32 fsf_seqno; + u64 fsf_issued; + u32 fsf_prot_status; + u32 fsf_status; + u8 fsf_prot_status_qual[FSF_PROT_STATUS_QUAL_SIZE]; + u8 fsf_status_qual[FSF_STATUS_QUALIFIER_SIZE]; + u32 fsf_req_status; + u8 sbal_first; + u8 sbal_curr; + u8 sbal_last; + u8 pool; + u64 erp_action; + union { + struct { + u64 scsi_cmnd; + u64 scsi_serial; + } send_fcp; + struct { + u64 wwpn; + u32 d_id; + u32 port_handle; + } port; + struct { + u64 wwpn; + u64 fcp_lun; + u32 port_handle; + u32 lun_handle; + } unit; + struct { + u32 d_id; + u8 ls_code; + } send_els; + } data; +} __attribute__ ((packed)); + +struct zfcp_hba_dbf_record_status { + u8 failed; + u32 status_type; + u32 status_subtype; + struct fsf_queue_designator + queue_designator; + u32 payload_size; +#define ZFCP_DBF_UNSOL_PAYLOAD 80 +#define ZFCP_DBF_UNSOL_PAYLOAD_SENSE_DATA_AVAIL 32 +#define ZFCP_DBF_UNSOL_PAYLOAD_BIT_ERROR_THRESHOLD 56 +#define ZFCP_DBF_UNSOL_PAYLOAD_FEATURE_UPDATE_ALERT 2 * sizeof(u32) + u8 payload[ZFCP_DBF_UNSOL_PAYLOAD]; +} __attribute__ ((packed)); + +struct zfcp_hba_dbf_record_qdio { + u32 status; + u32 qdio_error; + u32 siga_error; + u8 sbal_index; + u8 sbal_count; +} __attribute__ ((packed)); + +struct zfcp_hba_dbf_record { + u8 tag[ZFCP_DBF_TAG_SIZE]; + u8 tag2[ZFCP_DBF_TAG_SIZE]; + union { + struct zfcp_hba_dbf_record_response response; + struct zfcp_hba_dbf_record_status status; + struct zfcp_hba_dbf_record_qdio qdio; + } type; +} __attribute__ ((packed)); + +struct zfcp_san_dbf_record_ct { + union { + struct { + u16 cmd_req_code; + u8 revision; + u8 gs_type; + u8 gs_subtype; + u8 options; + u16 max_res_size; + } request; + struct { + u16 cmd_rsp_code; + u8 revision; + u8 reason_code; + u8 reason_code_expl; + u8 vendor_unique; + } response; + } type; + u32 payload_size; +#define ZFCP_DBF_CT_PAYLOAD 24 + u8 payload[ZFCP_DBF_CT_PAYLOAD]; +} __attribute__ ((packed)); + +struct zfcp_san_dbf_record_els { + u8 ls_code; + u32 payload_size; +#define ZFCP_DBF_ELS_PAYLOAD 32 +#define ZFCP_DBF_ELS_MAX_PAYLOAD 1024 + u8 payload[ZFCP_DBF_ELS_PAYLOAD]; +} __attribute__ ((packed)); + +struct zfcp_san_dbf_record { + u8 tag[ZFCP_DBF_TAG_SIZE]; + u64 fsf_reqid; + u32 fsf_seqno; + u32 s_id; + u32 d_id; + union { + struct zfcp_san_dbf_record_ct ct; + struct zfcp_san_dbf_record_els els; + } type; +} __attribute__ ((packed)); + +struct zfcp_scsi_dbf_record { + u8 tag[ZFCP_DBF_TAG_SIZE]; + u8 tag2[ZFCP_DBF_TAG_SIZE]; + u32 scsi_id; + u32 scsi_lun; + u32 scsi_result; + u64 scsi_cmnd; + u64 scsi_serial; +#define ZFCP_DBF_SCSI_OPCODE 16 + u8 scsi_opcode[ZFCP_DBF_SCSI_OPCODE]; + u8 scsi_retries; + u8 scsi_allowed; + u64 fsf_reqid; + u32 fsf_seqno; + u64 fsf_issued; + union { + struct { + u64 fsf_reqid; + u32 fsf_seqno; + u64 fsf_issued; + } new_fsf_req; + struct { + u8 rsp_validity; + u8 rsp_scsi_status; + u32 rsp_resid; + u8 rsp_code; +#define ZFCP_DBF_SCSI_FCP_SNS_INFO 16 +#define ZFCP_DBF_SCSI_MAX_FCP_SNS_INFO 256 + u32 sns_info_len; + u8 sns_info[ZFCP_DBF_SCSI_FCP_SNS_INFO]; + } fcp; + } type; +} __attribute__ ((packed)); + +/* * FC-FS stuff */ #define R_A_TOV 10 /* seconds */ @@ -339,34 +508,6 @@ struct zfcp_rc_entry { */ #define ZFCP_CT_TIMEOUT (3 * R_A_TOV) - -/***************** S390 DEBUG FEATURE SPECIFIC DEFINES ***********************/ - -/* debug feature entries per adapter */ -#define ZFCP_ERP_DBF_INDEX 1 -#define ZFCP_ERP_DBF_AREAS 2 -#define ZFCP_ERP_DBF_LENGTH 16 -#define ZFCP_ERP_DBF_LEVEL 3 -#define ZFCP_ERP_DBF_NAME "zfcperp" - -#define ZFCP_CMD_DBF_INDEX 2 -#define ZFCP_CMD_DBF_AREAS 1 -#define ZFCP_CMD_DBF_LENGTH 8 -#define ZFCP_CMD_DBF_LEVEL 3 -#define ZFCP_CMD_DBF_NAME "zfcpcmd" - -#define ZFCP_ABORT_DBF_INDEX 2 -#define ZFCP_ABORT_DBF_AREAS 1 -#define ZFCP_ABORT_DBF_LENGTH 8 -#define ZFCP_ABORT_DBF_LEVEL 6 -#define ZFCP_ABORT_DBF_NAME "zfcpabt" - -#define ZFCP_IN_ELS_DBF_INDEX 2 -#define ZFCP_IN_ELS_DBF_AREAS 1 -#define ZFCP_IN_ELS_DBF_LENGTH 8 -#define ZFCP_IN_ELS_DBF_LEVEL 6 -#define ZFCP_IN_ELS_DBF_NAME "zfcpels" - /******************** LOGGING MACROS AND DEFINES *****************************/ /* @@ -501,6 +642,7 @@ do { \ #define ZFCP_STATUS_ADAPTER_ERP_THREAD_KILL 0x00000080 #define ZFCP_STATUS_ADAPTER_ERP_PENDING 0x00000100 #define ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED 0x00000200 +#define ZFCP_STATUS_ADAPTER_XPORT_OK 0x00000800 #define ZFCP_STATUS_ADAPTER_SCSI_UP \ (ZFCP_STATUS_COMMON_UNBLOCKED | \ @@ -635,45 +777,6 @@ struct zfcp_adapter_mempool { mempool_t *data_gid_pn; }; -struct zfcp_exchange_config_data{ -}; - -struct zfcp_open_port { - struct zfcp_port *port; -}; - -struct zfcp_close_port { - struct zfcp_port *port; -}; - -struct zfcp_open_unit { - struct zfcp_unit *unit; -}; - -struct zfcp_close_unit { - struct zfcp_unit *unit; -}; - -struct zfcp_close_physical_port { - struct zfcp_port *port; -}; - -struct zfcp_send_fcp_command_task { - struct zfcp_fsf_req *fsf_req; - struct zfcp_unit *unit; - struct scsi_cmnd *scsi_cmnd; - unsigned long start_jiffies; -}; - -struct zfcp_send_fcp_command_task_management { - struct zfcp_unit *unit; -}; - -struct zfcp_abort_fcp_command { - struct zfcp_fsf_req *fsf_req; - struct zfcp_unit *unit; -}; - /* * header for CT_IU */ @@ -702,7 +805,7 @@ struct ct_iu_gid_pn_req { /* FS_ACC IU and data unit for GID_PN nameserver request */ struct ct_iu_gid_pn_resp { struct ct_hdr header; - fc_id_t d_id; + u32 d_id; } __attribute__ ((packed)); typedef void (*zfcp_send_ct_handler_t)(unsigned long); @@ -768,7 +871,7 @@ typedef void (*zfcp_send_els_handler_t)(unsigned long); struct zfcp_send_els { struct zfcp_adapter *adapter; struct zfcp_port *port; - fc_id_t d_id; + u32 d_id; struct scatterlist *req; struct scatterlist *resp; unsigned int req_count; @@ -781,33 +884,6 @@ struct zfcp_send_els { int status; }; -struct zfcp_status_read { - struct fsf_status_read_buffer *buffer; -}; - -struct zfcp_fsf_done { - struct completion *complete; - int status; -}; - -/* request specific data */ -union zfcp_req_data { - struct zfcp_exchange_config_data exchange_config_data; - struct zfcp_open_port open_port; - struct zfcp_close_port close_port; - struct zfcp_open_unit open_unit; - struct zfcp_close_unit close_unit; - struct zfcp_close_physical_port close_physical_port; - struct zfcp_send_fcp_command_task send_fcp_command_task; - struct zfcp_send_fcp_command_task_management - send_fcp_command_task_management; - struct zfcp_abort_fcp_command abort_fcp_command; - struct zfcp_send_ct *send_ct; - struct zfcp_send_els *send_els; - struct zfcp_status_read status_read; - struct fsf_qtcb_bottom_port *port_data; -}; - struct zfcp_qdio_queue { struct qdio_buffer *buffer[QDIO_MAX_BUFFERS_PER_Q]; /* SBALs */ u8 free_index; /* index of next free bfr @@ -838,21 +914,19 @@ struct zfcp_adapter { atomic_t refcount; /* reference count */ wait_queue_head_t remove_wq; /* can be used to wait for refcount drop to zero */ - wwn_t wwnn; /* WWNN */ - wwn_t wwpn; /* WWPN */ - fc_id_t s_id; /* N_Port ID */ wwn_t peer_wwnn; /* P2P peer WWNN */ wwn_t peer_wwpn; /* P2P peer WWPN */ - fc_id_t peer_d_id; /* P2P peer D_ID */ + u32 peer_d_id; /* P2P peer D_ID */ + wwn_t physical_wwpn; /* WWPN of physical port */ + u32 physical_s_id; /* local FC port ID */ struct ccw_device *ccw_device; /* S/390 ccw device */ u8 fc_service_class; u32 fc_topology; /* FC topology */ - u32 fc_link_speed; /* FC interface speed */ u32 hydra_version; /* Hydra version */ u32 fsf_lic_version; - u32 supported_features;/* of FCP channel */ + u32 adapter_features; /* FCP channel features */ + u32 connection_features; /* host connection features */ u32 hardware_version; /* of FCP channel */ - u8 serial_number[32]; /* of hardware */ struct Scsi_Host *scsi_host; /* Pointer to mid-layer */ unsigned short scsi_host_no; /* Assigned host number */ unsigned char name[9]; @@ -889,11 +963,18 @@ struct zfcp_adapter { u32 erp_low_mem_count; /* nr of erp actions waiting for memory */ struct zfcp_port *nameserver_port; /* adapter's nameserver */ - debug_info_t *erp_dbf; /* S/390 debug features */ - debug_info_t *abort_dbf; - debug_info_t *in_els_dbf; - debug_info_t *cmd_dbf; - spinlock_t dbf_lock; + debug_info_t *erp_dbf; + debug_info_t *hba_dbf; + debug_info_t *san_dbf; /* debug feature areas */ + debug_info_t *scsi_dbf; + spinlock_t erp_dbf_lock; + spinlock_t hba_dbf_lock; + spinlock_t san_dbf_lock; + spinlock_t scsi_dbf_lock; + struct zfcp_erp_dbf_record erp_dbf_buf; + struct zfcp_hba_dbf_record hba_dbf_buf; + struct zfcp_san_dbf_record san_dbf_buf; + struct zfcp_scsi_dbf_record scsi_dbf_buf; struct zfcp_adapter_mempool pool; /* Adapter memory pools */ struct qdio_initialize qdio_init_data; /* for qdio_establish */ struct device generic_services; /* directory for WKA ports */ @@ -919,7 +1000,7 @@ struct zfcp_port { atomic_t status; /* status of this remote port */ wwn_t wwnn; /* WWNN if known */ wwn_t wwpn; /* WWPN */ - fc_id_t d_id; /* D_ID */ + u32 d_id; /* D_ID */ u32 handle; /* handle assigned by FSF */ struct zfcp_erp_action erp_action; /* pending error recovery */ atomic_t erp_counter; @@ -963,11 +1044,13 @@ struct zfcp_fsf_req { u32 fsf_command; /* FSF Command copy */ struct fsf_qtcb *qtcb; /* address of associated QTCB */ u32 seq_no; /* Sequence number of request */ - union zfcp_req_data data; /* Info fields of request */ + unsigned long data; /* private data of request */ struct zfcp_erp_action *erp_action; /* used if this request is issued on behalf of erp */ mempool_t *pool; /* used if request was alloacted from emergency pool */ + unsigned long long issued; /* request sent time (STCK) */ + struct zfcp_unit *unit; }; typedef void zfcp_fsf_req_handler_t(struct zfcp_fsf_req*); diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index cb4f612550b..023f4e558ae 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -82,6 +82,7 @@ static int zfcp_erp_adapter_strategy_open(struct zfcp_erp_action *); static int zfcp_erp_adapter_strategy_open_qdio(struct zfcp_erp_action *); static int zfcp_erp_adapter_strategy_open_fsf(struct zfcp_erp_action *); static int zfcp_erp_adapter_strategy_open_fsf_xconfig(struct zfcp_erp_action *); +static int zfcp_erp_adapter_strategy_open_fsf_xport(struct zfcp_erp_action *); static int zfcp_erp_adapter_strategy_open_fsf_statusread( struct zfcp_erp_action *); @@ -345,13 +346,13 @@ zfcp_erp_adisc(struct zfcp_port *port) /* acc. to FC-FS, hard_nport_id in ADISC should not be set for ports without FC-AL-2 capability, so we don't set it */ - adisc->wwpn = adapter->wwpn; - adisc->wwnn = adapter->wwnn; - adisc->nport_id = adapter->s_id; + adisc->wwpn = fc_host_port_name(adapter->scsi_host); + adisc->wwnn = fc_host_node_name(adapter->scsi_host); + adisc->nport_id = fc_host_port_id(adapter->scsi_host); ZFCP_LOG_INFO("ADISC request from s_id 0x%08x to d_id 0x%08x " "(wwpn=0x%016Lx, wwnn=0x%016Lx, " "hard_nport_id=0x%08x, nport_id=0x%08x)\n", - adapter->s_id, send_els->d_id, (wwn_t) adisc->wwpn, + adisc->nport_id, send_els->d_id, (wwn_t) adisc->wwpn, (wwn_t) adisc->wwnn, adisc->hard_nport_id, adisc->nport_id); @@ -404,7 +405,7 @@ zfcp_erp_adisc_handler(unsigned long data) struct zfcp_send_els *send_els; struct zfcp_port *port; struct zfcp_adapter *adapter; - fc_id_t d_id; + u32 d_id; struct zfcp_ls_adisc_acc *adisc; send_els = (struct zfcp_send_els *) data; @@ -435,9 +436,9 @@ zfcp_erp_adisc_handler(unsigned long data) ZFCP_LOG_INFO("ADISC response from d_id 0x%08x to s_id " "0x%08x (wwpn=0x%016Lx, wwnn=0x%016Lx, " "hard_nport_id=0x%08x, nport_id=0x%08x)\n", - d_id, adapter->s_id, (wwn_t) adisc->wwpn, - (wwn_t) adisc->wwnn, adisc->hard_nport_id, - adisc->nport_id); + d_id, fc_host_port_id(adapter->scsi_host), + (wwn_t) adisc->wwpn, (wwn_t) adisc->wwnn, + adisc->hard_nport_id, adisc->nport_id); /* set wwnn for port */ if (port->wwnn == 0) @@ -886,7 +887,7 @@ static int zfcp_erp_strategy_check_fsfreq(struct zfcp_erp_action *erp_action) { int retval = 0; - struct zfcp_fsf_req *fsf_req; + struct zfcp_fsf_req *fsf_req = NULL; struct zfcp_adapter *adapter = erp_action->adapter; if (erp_action->fsf_req) { @@ -896,7 +897,7 @@ zfcp_erp_strategy_check_fsfreq(struct zfcp_erp_action *erp_action) list_for_each_entry(fsf_req, &adapter->fsf_req_list_head, list) if (fsf_req == erp_action->fsf_req) break; - if (fsf_req == erp_action->fsf_req) { + if (fsf_req && (fsf_req->erp_action == erp_action)) { /* fsf_req still exists */ debug_text_event(adapter->erp_dbf, 3, "a_ca_req"); debug_event(adapter->erp_dbf, 3, &fsf_req, @@ -2258,16 +2259,21 @@ zfcp_erp_adapter_strategy_close_qdio(struct zfcp_erp_action *erp_action) static int zfcp_erp_adapter_strategy_open_fsf(struct zfcp_erp_action *erp_action) { - int retval; + int xconfig, xport; + + if (atomic_test_mask(ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED, + &erp_action->adapter->status)) { + zfcp_erp_adapter_strategy_open_fsf_xport(erp_action); + atomic_set(&erp_action->adapter->erp_counter, 0); + return ZFCP_ERP_FAILED; + } - /* do 'exchange configuration data' */ - retval = zfcp_erp_adapter_strategy_open_fsf_xconfig(erp_action); - if (retval == ZFCP_ERP_FAILED) - return retval; + xconfig = zfcp_erp_adapter_strategy_open_fsf_xconfig(erp_action); + xport = zfcp_erp_adapter_strategy_open_fsf_xport(erp_action); + if ((xconfig == ZFCP_ERP_FAILED) || (xport == ZFCP_ERP_FAILED)) + return ZFCP_ERP_FAILED; - /* start the desired number of Status Reads */ - retval = zfcp_erp_adapter_strategy_open_fsf_statusread(erp_action); - return retval; + return zfcp_erp_adapter_strategy_open_fsf_statusread(erp_action); } /* @@ -2291,7 +2297,9 @@ zfcp_erp_adapter_strategy_open_fsf_xconfig(struct zfcp_erp_action *erp_action) atomic_clear_mask(ZFCP_STATUS_ADAPTER_HOST_CON_INIT, &adapter->status); ZFCP_LOG_DEBUG("Doing exchange config data\n"); + write_lock(&adapter->erp_lock); zfcp_erp_action_to_running(erp_action); + write_unlock(&adapter->erp_lock); zfcp_erp_timeout_init(erp_action); if (zfcp_fsf_exchange_config_data(erp_action)) { retval = ZFCP_ERP_FAILED; @@ -2348,6 +2356,76 @@ zfcp_erp_adapter_strategy_open_fsf_xconfig(struct zfcp_erp_action *erp_action) return retval; } +static int +zfcp_erp_adapter_strategy_open_fsf_xport(struct zfcp_erp_action *erp_action) +{ + int retval = ZFCP_ERP_SUCCEEDED; + int retries; + int sleep; + struct zfcp_adapter *adapter = erp_action->adapter; + + atomic_clear_mask(ZFCP_STATUS_ADAPTER_XPORT_OK, &adapter->status); + + for (retries = 0; ; retries++) { + ZFCP_LOG_DEBUG("Doing exchange port data\n"); + zfcp_erp_action_to_running(erp_action); + zfcp_erp_timeout_init(erp_action); + if (zfcp_fsf_exchange_port_data(erp_action, adapter, NULL)) { + retval = ZFCP_ERP_FAILED; + debug_text_event(adapter->erp_dbf, 5, "a_fstx_xf"); + ZFCP_LOG_INFO("error: initiation of exchange of " + "port data failed for adapter %s\n", + zfcp_get_busid_by_adapter(adapter)); + break; + } + debug_text_event(adapter->erp_dbf, 6, "a_fstx_xok"); + ZFCP_LOG_DEBUG("Xchange underway\n"); + + /* + * Why this works: + * Both the normal completion handler as well as the timeout + * handler will do an 'up' when the 'exchange port data' + * request completes or times out. Thus, the signal to go on + * won't be lost utilizing this semaphore. + * Furthermore, this 'adapter_reopen' action is + * guaranteed to be the only action being there (highest action + * which prevents other actions from being created). + * Resulting from that, the wake signal recognized here + * _must_ be the one belonging to the 'exchange port + * data' request. + */ + down(&adapter->erp_ready_sem); + if (erp_action->status & ZFCP_STATUS_ERP_TIMEDOUT) { + ZFCP_LOG_INFO("error: exchange of port data " + "for adapter %s timed out\n", + zfcp_get_busid_by_adapter(adapter)); + break; + } + + if (!atomic_test_mask(ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED, + &adapter->status)) + break; + + ZFCP_LOG_DEBUG("host connection still initialising... " + "waiting and retrying...\n"); + /* sleep a little bit before retry */ + sleep = retries < ZFCP_EXCHANGE_PORT_DATA_SHORT_RETRIES ? + ZFCP_EXCHANGE_PORT_DATA_SHORT_SLEEP : + ZFCP_EXCHANGE_PORT_DATA_LONG_SLEEP; + msleep(jiffies_to_msecs(sleep)); + } + + if (atomic_test_mask(ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED, + &adapter->status)) { + ZFCP_LOG_INFO("error: exchange of port data for " + "adapter %s failed\n", + zfcp_get_busid_by_adapter(adapter)); + retval = ZFCP_ERP_FAILED; + } + + return retval; +} + /* * function: * @@ -3194,11 +3272,19 @@ zfcp_erp_action_enqueue(int action, /* fall through !!! */ case ZFCP_ERP_ACTION_REOPEN_PORT_FORCED: - if (atomic_test_mask - (ZFCP_STATUS_COMMON_ERP_INUSE, &port->status) - && port->erp_action.action == - ZFCP_ERP_ACTION_REOPEN_PORT_FORCED) { - debug_text_event(adapter->erp_dbf, 4, "pf_actenq_drp"); + if (atomic_test_mask(ZFCP_STATUS_COMMON_ERP_INUSE, + &port->status)) { + if (port->erp_action.action != + ZFCP_ERP_ACTION_REOPEN_PORT_FORCED) { + ZFCP_LOG_INFO("dropped erp action %i (port " + "0x%016Lx, action in use: %i)\n", + action, port->wwpn, + port->erp_action.action); + debug_text_event(adapter->erp_dbf, 4, + "pf_actenq_drp"); + } else + debug_text_event(adapter->erp_dbf, 4, + "pf_actenq_drpcp"); debug_event(adapter->erp_dbf, 4, &port->wwpn, sizeof (wwn_t)); goto out; @@ -3589,6 +3675,9 @@ zfcp_erp_adapter_access_changed(struct zfcp_adapter *adapter) struct zfcp_port *port; unsigned long flags; + if (adapter->connection_features & FSF_FEATURE_NPIV_MODE) + return; + debug_text_event(adapter->erp_dbf, 3, "a_access_recover"); debug_event(adapter->erp_dbf, 3, &adapter->name, 8); diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h index cd98a2de9f8..c3782261cb5 100644 --- a/drivers/s390/scsi/zfcp_ext.h +++ b/drivers/s390/scsi/zfcp_ext.h @@ -96,7 +96,8 @@ extern int zfcp_fsf_open_unit(struct zfcp_erp_action *); extern int zfcp_fsf_close_unit(struct zfcp_erp_action *); extern int zfcp_fsf_exchange_config_data(struct zfcp_erp_action *); -extern int zfcp_fsf_exchange_port_data(struct zfcp_adapter *, +extern int zfcp_fsf_exchange_port_data(struct zfcp_erp_action *, + struct zfcp_adapter *, struct fsf_qtcb_bottom_port *); extern int zfcp_fsf_control_file(struct zfcp_adapter *, struct zfcp_fsf_req **, u32, u32, struct zfcp_sg_list *); @@ -109,7 +110,6 @@ extern int zfcp_fsf_req_create(struct zfcp_adapter *, u32, int, mempool_t *, extern int zfcp_fsf_send_ct(struct zfcp_send_ct *, mempool_t *, struct zfcp_erp_action *); extern int zfcp_fsf_send_els(struct zfcp_send_els *); -extern int zfcp_fsf_req_wait_and_cleanup(struct zfcp_fsf_req *, int, u32 *); extern int zfcp_fsf_send_fcp_command_task(struct zfcp_adapter *, struct zfcp_unit *, struct scsi_cmnd *, @@ -182,9 +182,25 @@ extern void zfcp_erp_port_access_changed(struct zfcp_port *); extern void zfcp_erp_unit_access_changed(struct zfcp_unit *); /******************************** AUX ****************************************/ -extern void zfcp_cmd_dbf_event_fsf(const char *, struct zfcp_fsf_req *, - void *, int); -extern void zfcp_cmd_dbf_event_scsi(const char *, struct scsi_cmnd *); -extern void zfcp_in_els_dbf_event(struct zfcp_adapter *, const char *, - struct fsf_status_read_buffer *, int); +extern void zfcp_hba_dbf_event_fsf_response(struct zfcp_fsf_req *); +extern void zfcp_hba_dbf_event_fsf_unsol(const char *, struct zfcp_adapter *, + struct fsf_status_read_buffer *); +extern void zfcp_hba_dbf_event_qdio(struct zfcp_adapter *, + unsigned int, unsigned int, unsigned int, + int, int); + +extern void zfcp_san_dbf_event_ct_request(struct zfcp_fsf_req *); +extern void zfcp_san_dbf_event_ct_response(struct zfcp_fsf_req *); +extern void zfcp_san_dbf_event_els_request(struct zfcp_fsf_req *); +extern void zfcp_san_dbf_event_els_response(struct zfcp_fsf_req *); +extern void zfcp_san_dbf_event_incoming_els(struct zfcp_fsf_req *); + +extern void zfcp_scsi_dbf_event_result(const char *, int, struct zfcp_adapter *, + struct scsi_cmnd *); +extern void zfcp_scsi_dbf_event_abort(const char *, struct zfcp_adapter *, + struct scsi_cmnd *, + struct zfcp_fsf_req *); +extern void zfcp_scsi_dbf_event_devreset(const char *, u8, struct zfcp_unit *, + struct scsi_cmnd *); + #endif /* ZFCP_EXT_H */ diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index c007b6424e7..3b0fc1163f5 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -59,6 +59,8 @@ static int zfcp_fsf_req_send(struct zfcp_fsf_req *, struct timer_list *); static int zfcp_fsf_protstatus_eval(struct zfcp_fsf_req *); static int zfcp_fsf_fsfstatus_eval(struct zfcp_fsf_req *); static int zfcp_fsf_fsfstatus_qual_eval(struct zfcp_fsf_req *); +static void zfcp_fsf_link_down_info_eval(struct zfcp_adapter *, + struct fsf_link_down_info *); static int zfcp_fsf_req_dispatch(struct zfcp_fsf_req *); static void zfcp_fsf_req_dismiss(struct zfcp_fsf_req *); @@ -285,51 +287,51 @@ zfcp_fsf_protstatus_eval(struct zfcp_fsf_req *fsf_req) { int retval = 0; struct zfcp_adapter *adapter = fsf_req->adapter; + struct fsf_qtcb *qtcb = fsf_req->qtcb; + union fsf_prot_status_qual *prot_status_qual = + &qtcb->prefix.prot_status_qual; - ZFCP_LOG_DEBUG("QTCB is at %p\n", fsf_req->qtcb); + zfcp_hba_dbf_event_fsf_response(fsf_req); if (fsf_req->status & ZFCP_STATUS_FSFREQ_DISMISSED) { ZFCP_LOG_DEBUG("fsf_req 0x%lx has been dismissed\n", (unsigned long) fsf_req); fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR | ZFCP_STATUS_FSFREQ_RETRY; /* only for SCSI cmnds. */ - zfcp_cmd_dbf_event_fsf("dismiss", fsf_req, NULL, 0); goto skip_protstatus; } /* log additional information provided by FSF (if any) */ - if (unlikely(fsf_req->qtcb->header.log_length)) { + if (unlikely(qtcb->header.log_length)) { /* do not trust them ;-) */ - if (fsf_req->qtcb->header.log_start > sizeof(struct fsf_qtcb)) { + if (qtcb->header.log_start > sizeof(struct fsf_qtcb)) { ZFCP_LOG_NORMAL ("bug: ULP (FSF logging) log data starts " "beyond end of packet header. Ignored. " "(start=%i, size=%li)\n", - fsf_req->qtcb->header.log_start, + qtcb->header.log_start, sizeof(struct fsf_qtcb)); goto forget_log; } - if ((size_t) (fsf_req->qtcb->header.log_start + - fsf_req->qtcb->header.log_length) + if ((size_t) (qtcb->header.log_start + qtcb->header.log_length) > sizeof(struct fsf_qtcb)) { ZFCP_LOG_NORMAL("bug: ULP (FSF logging) log data ends " "beyond end of packet header. Ignored. " "(start=%i, length=%i, size=%li)\n", - fsf_req->qtcb->header.log_start, - fsf_req->qtcb->header.log_length, + qtcb->header.log_start, + qtcb->header.log_length, sizeof(struct fsf_qtcb)); goto forget_log; } ZFCP_LOG_TRACE("ULP log data: \n"); ZFCP_HEX_DUMP(ZFCP_LOG_LEVEL_TRACE, - (char *) fsf_req->qtcb + - fsf_req->qtcb->header.log_start, - fsf_req->qtcb->header.log_length); + (char *) qtcb + qtcb->header.log_start, + qtcb->header.log_length); } forget_log: /* evaluate FSF Protocol Status */ - switch (fsf_req->qtcb->prefix.prot_status) { + switch (qtcb->prefix.prot_status) { case FSF_PROT_GOOD: case FSF_PROT_FSF_STATUS_PRESENTED: @@ -340,14 +342,9 @@ zfcp_fsf_protstatus_eval(struct zfcp_fsf_req *fsf_req) "microcode of version 0x%x, the device driver " "only supports 0x%x. Aborting.\n", zfcp_get_busid_by_adapter(adapter), - fsf_req->qtcb->prefix.prot_status_qual. - version_error.fsf_version, ZFCP_QTCB_VERSION); - /* stop operation for this adapter */ - debug_text_exception(adapter->erp_dbf, 0, "prot_ver_err"); + prot_status_qual->version_error.fsf_version, + ZFCP_QTCB_VERSION); zfcp_erp_adapter_shutdown(adapter, 0); - zfcp_cmd_dbf_event_fsf("qverserr", fsf_req, - &fsf_req->qtcb->prefix.prot_status_qual, - sizeof (union fsf_prot_status_qual)); fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR; break; @@ -355,16 +352,10 @@ zfcp_fsf_protstatus_eval(struct zfcp_fsf_req *fsf_req) ZFCP_LOG_NORMAL("bug: Sequence number mismatch between " "driver (0x%x) and adapter %s (0x%x). " "Restarting all operations on this adapter.\n", - fsf_req->qtcb->prefix.req_seq_no, + qtcb->prefix.req_seq_no, zfcp_get_busid_by_adapter(adapter), - fsf_req->qtcb->prefix.prot_status_qual. - sequence_error.exp_req_seq_no); - debug_text_exception(adapter->erp_dbf, 0, "prot_seq_err"); - /* restart operation on this adapter */ + prot_status_qual->sequence_error.exp_req_seq_no); zfcp_erp_adapter_reopen(adapter, 0); - zfcp_cmd_dbf_event_fsf("seqnoerr", fsf_req, - &fsf_req->qtcb->prefix.prot_status_qual, - sizeof (union fsf_prot_status_qual)); fsf_req->status |= ZFCP_STATUS_FSFREQ_RETRY; fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR; break; @@ -375,116 +366,35 @@ zfcp_fsf_protstatus_eval(struct zfcp_fsf_req *fsf_req) "that used on adapter %s. " "Stopping all operations on this adapter.\n", zfcp_get_busid_by_adapter(adapter)); - debug_text_exception(adapter->erp_dbf, 0, "prot_unsup_qtcb"); zfcp_erp_adapter_shutdown(adapter, 0); - zfcp_cmd_dbf_event_fsf("unsqtcbt", fsf_req, - &fsf_req->qtcb->prefix.prot_status_qual, - sizeof (union fsf_prot_status_qual)); fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR; break; case FSF_PROT_HOST_CONNECTION_INITIALIZING: - zfcp_cmd_dbf_event_fsf("hconinit", fsf_req, - &fsf_req->qtcb->prefix.prot_status_qual, - sizeof (union fsf_prot_status_qual)); fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR; atomic_set_mask(ZFCP_STATUS_ADAPTER_HOST_CON_INIT, &(adapter->status)); - debug_text_event(adapter->erp_dbf, 3, "prot_con_init"); break; case FSF_PROT_DUPLICATE_REQUEST_ID: - if (fsf_req->qtcb) { ZFCP_LOG_NORMAL("bug: The request identifier 0x%Lx " "to the adapter %s is ambiguous. " - "Stopping all operations on this " - "adapter.\n", - *(unsigned long long *) - (&fsf_req->qtcb->bottom.support. - req_handle), - zfcp_get_busid_by_adapter(adapter)); - } else { - ZFCP_LOG_NORMAL("bug: The request identifier %p " - "to the adapter %s is ambiguous. " - "Stopping all operations on this " - "adapter. " - "(bug: got this for an unsolicited " - "status read request)\n", - fsf_req, + "Stopping all operations on this adapter.\n", + *(unsigned long long*) + (&qtcb->bottom.support.req_handle), zfcp_get_busid_by_adapter(adapter)); - } - debug_text_exception(adapter->erp_dbf, 0, "prot_dup_id"); zfcp_erp_adapter_shutdown(adapter, 0); - zfcp_cmd_dbf_event_fsf("dupreqid", fsf_req, - &fsf_req->qtcb->prefix.prot_status_qual, - sizeof (union fsf_prot_status_qual)); fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR; break; case FSF_PROT_LINK_DOWN: - /* - * 'test and set' is not atomic here - - * it's ok as long as calls to our response queue handler - * (and thus execution of this code here) are serialized - * by the qdio module - */ - if (!atomic_test_mask(ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED, - &adapter->status)) { - switch (fsf_req->qtcb->prefix.prot_status_qual. - locallink_error.code) { - case FSF_PSQ_LINK_NOLIGHT: - ZFCP_LOG_INFO("The local link to adapter %s " - "is down (no light detected).\n", - zfcp_get_busid_by_adapter( - adapter)); - break; - case FSF_PSQ_LINK_WRAPPLUG: - ZFCP_LOG_INFO("The local link to adapter %s " - "is down (wrap plug detected).\n", - zfcp_get_busid_by_adapter( - adapter)); - break; - case FSF_PSQ_LINK_NOFCP: - ZFCP_LOG_INFO("The local link to adapter %s " - "is down (adjacent node on " - "link does not support FCP).\n", - zfcp_get_busid_by_adapter( - adapter)); - break; - default: - ZFCP_LOG_INFO("The local link to adapter %s " - "is down " - "(warning: unknown reason " - "code).\n", - zfcp_get_busid_by_adapter( - adapter)); - break; - - } - /* - * Due to the 'erp failed' flag the adapter won't - * be recovered but will be just set to 'blocked' - * state. All subordinary devices will have state - * 'blocked' and 'erp failed', too. - * Thus the adapter is still able to provide - * 'link up' status without being flooded with - * requests. - * (note: even 'close port' is not permitted) - */ - ZFCP_LOG_INFO("Stopping all operations for adapter " - "%s.\n", - zfcp_get_busid_by_adapter(adapter)); - atomic_set_mask(ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED | - ZFCP_STATUS_COMMON_ERP_FAILED, - &adapter->status); - zfcp_erp_adapter_reopen(adapter, 0); - } + zfcp_fsf_link_down_info_eval(adapter, + &prot_status_qual->link_down_info); fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR; break; case FSF_PROT_REEST_QUEUE: - debug_text_event(adapter->erp_dbf, 1, "prot_reest_queue"); - ZFCP_LOG_INFO("The local link to adapter with " + ZFCP_LOG_NORMAL("The local link to adapter with " "%s was re-plugged. " "Re-starting operations on this adapter.\n", zfcp_get_busid_by_adapter(adapter)); @@ -495,9 +405,6 @@ zfcp_fsf_protstatus_eval(struct zfcp_fsf_req *fsf_req) zfcp_erp_adapter_reopen(adapter, ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED | ZFCP_STATUS_COMMON_ERP_FAILED); - zfcp_cmd_dbf_event_fsf("reestque", fsf_req, - &fsf_req->qtcb->prefix.prot_status_qual, - sizeof (union fsf_prot_status_qual)); fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR; break; @@ -507,12 +414,7 @@ zfcp_fsf_protstatus_eval(struct zfcp_fsf_req *fsf_req) "Restarting all operations on this " "adapter.\n", zfcp_get_busid_by_adapter(adapter)); - debug_text_event(adapter->erp_dbf, 0, "prot_err_sta"); - /* restart operation on this adapter */ zfcp_erp_adapter_reopen(adapter, 0); - zfcp_cmd_dbf_event_fsf("proterrs", fsf_req, - &fsf_req->qtcb->prefix.prot_status_qual, - sizeof (union fsf_prot_status_qual)); fsf_req->status |= ZFCP_STATUS_FSFREQ_RETRY; fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR; break; @@ -524,11 +426,7 @@ zfcp_fsf_protstatus_eval(struct zfcp_fsf_req *fsf_req) "Stopping all operations on this adapter. " "(debug info 0x%x).\n", zfcp_get_busid_by_adapter(adapter), - fsf_req->qtcb->prefix.prot_status); - debug_text_event(adapter->erp_dbf, 0, "prot_inval:"); - debug_exception(adapter->erp_dbf, 0, - &fsf_req->qtcb->prefix.prot_status, - sizeof (u32)); + qtcb->prefix.prot_status); zfcp_erp_adapter_shutdown(adapter, 0); fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR; } @@ -568,28 +466,18 @@ zfcp_fsf_fsfstatus_eval(struct zfcp_fsf_req *fsf_req) "(debug info 0x%x).\n", zfcp_get_busid_by_adapter(fsf_req->adapter), fsf_req->qtcb->header.fsf_command); - debug_text_exception(fsf_req->adapter->erp_dbf, 0, - "fsf_s_unknown"); zfcp_erp_adapter_shutdown(fsf_req->adapter, 0); - zfcp_cmd_dbf_event_fsf("unknownc", fsf_req, - &fsf_req->qtcb->header.fsf_status_qual, - sizeof (union fsf_status_qual)); fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR; break; case FSF_FCP_RSP_AVAILABLE: ZFCP_LOG_DEBUG("FCP Sense data will be presented to the " "SCSI stack.\n"); - debug_text_event(fsf_req->adapter->erp_dbf, 3, "fsf_s_rsp"); break; case FSF_ADAPTER_STATUS_AVAILABLE: - debug_text_event(fsf_req->adapter->erp_dbf, 2, "fsf_s_astatus"); zfcp_fsf_fsfstatus_qual_eval(fsf_req); break; - - default: - break; } skip_fsfstatus: @@ -617,44 +505,28 @@ zfcp_fsf_fsfstatus_qual_eval(struct zfcp_fsf_req *fsf_req) switch (fsf_req->qtcb->header.fsf_status_qual.word[0]) { case FSF_SQ_FCP_RSP_AVAILABLE: - debug_text_event(fsf_req->adapter->erp_dbf, 4, "fsf_sq_rsp"); break; case FSF_SQ_RETRY_IF_POSSIBLE: /* The SCSI-stack may now issue retries or escalate */ - debug_text_event(fsf_req->adapter->erp_dbf, 2, "fsf_sq_retry"); - zfcp_cmd_dbf_event_fsf("sqretry", fsf_req, - &fsf_req->qtcb->header.fsf_status_qual, - sizeof (union fsf_status_qual)); fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR; break; case FSF_SQ_COMMAND_ABORTED: /* Carry the aborted state on to upper layer */ - debug_text_event(fsf_req->adapter->erp_dbf, 2, "fsf_sq_abort"); - zfcp_cmd_dbf_event_fsf("sqabort", fsf_req, - &fsf_req->qtcb->header.fsf_status_qual, - sizeof (union fsf_status_qual)); fsf_req->status |= ZFCP_STATUS_FSFREQ_ABORTED; fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR; break; case FSF_SQ_NO_RECOM: - debug_text_exception(fsf_req->adapter->erp_dbf, 0, - "fsf_sq_no_rec"); ZFCP_LOG_NORMAL("bug: No recommendation could be given for a" "problem on the adapter %s " "Stopping all operations on this adapter. ", zfcp_get_busid_by_adapter(fsf_req->adapter)); zfcp_erp_adapter_shutdown(fsf_req->adapter, 0); - zfcp_cmd_dbf_event_fsf("sqnrecom", fsf_req, - &fsf_req->qtcb->header.fsf_status_qual, - sizeof (union fsf_status_qual)); fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR; break; case FSF_SQ_ULP_PROGRAMMING_ERROR: ZFCP_LOG_NORMAL("error: not enough SBALs for data transfer " "(adapter %s)\n", zfcp_get_busid_by_adapter(fsf_req->adapter)); - debug_text_exception(fsf_req->adapter->erp_dbf, 0, - "fsf_sq_ulp_err"); fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR; break; case FSF_SQ_INVOKE_LINK_TEST_PROCEDURE: @@ -668,13 +540,6 @@ zfcp_fsf_fsfstatus_qual_eval(struct zfcp_fsf_req *fsf_req) ZFCP_HEX_DUMP(ZFCP_LOG_LEVEL_NORMAL, (char *) &fsf_req->qtcb->header.fsf_status_qual, sizeof (union fsf_status_qual)); - debug_text_event(fsf_req->adapter->erp_dbf, 0, "fsf_sq_inval:"); - debug_exception(fsf_req->adapter->erp_dbf, 0, - &fsf_req->qtcb->header.fsf_status_qual.word[0], - sizeof (u32)); - zfcp_cmd_dbf_event_fsf("squndef", fsf_req, - &fsf_req->qtcb->header.fsf_status_qual, - sizeof (union fsf_status_qual)); fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR; break; } @@ -682,6 +547,110 @@ zfcp_fsf_fsfstatus_qual_eval(struct zfcp_fsf_req *fsf_req) return retval; } +/** + * zfcp_fsf_link_down_info_eval - evaluate link down information block + */ +static void +zfcp_fsf_link_down_info_eval(struct zfcp_adapter *adapter, + struct fsf_link_down_info *link_down) +{ + switch (link_down->error_code) { + case FSF_PSQ_LINK_NO_LIGHT: + ZFCP_LOG_NORMAL("The local link to adapter %s is down " + "(no light detected)\n", + zfcp_get_busid_by_adapter(adapter)); + break; + case FSF_PSQ_LINK_WRAP_PLUG: + ZFCP_LOG_NORMAL("The local link to adapter %s is down " + "(wrap plug detected)\n", + zfcp_get_busid_by_adapter(adapter)); + break; + case FSF_PSQ_LINK_NO_FCP: + ZFCP_LOG_NORMAL("The local link to adapter %s is down " + "(adjacent node on link does not support FCP)\n", + zfcp_get_busid_by_adapter(adapter)); + break; + case FSF_PSQ_LINK_FIRMWARE_UPDATE: + ZFCP_LOG_NORMAL("The local link to adapter %s is down " + "(firmware update in progress)\n", + zfcp_get_busid_by_adapter(adapter)); + break; + case FSF_PSQ_LINK_INVALID_WWPN: + ZFCP_LOG_NORMAL("The local link to adapter %s is down " + "(duplicate or invalid WWPN detected)\n", + zfcp_get_busid_by_adapter(adapter)); + break; + case FSF_PSQ_LINK_NO_NPIV_SUPPORT: + ZFCP_LOG_NORMAL("The local link to adapter %s is down " + "(no support for NPIV by Fabric)\n", + zfcp_get_busid_by_adapter(adapter)); + break; + case FSF_PSQ_LINK_NO_FCP_RESOURCES: + ZFCP_LOG_NORMAL("The local link to adapter %s is down " + "(out of resource in FCP daughtercard)\n", + zfcp_get_busid_by_adapter(adapter)); + break; + case FSF_PSQ_LINK_NO_FABRIC_RESOURCES: + ZFCP_LOG_NORMAL("The local link to adapter %s is down " + "(out of resource in Fabric)\n", + zfcp_get_busid_by_adapter(adapter)); + break; + case FSF_PSQ_LINK_FABRIC_LOGIN_UNABLE: + ZFCP_LOG_NORMAL("The local link to adapter %s is down " + "(unable to Fabric login)\n", + zfcp_get_busid_by_adapter(adapter)); + break; + case FSF_PSQ_LINK_WWPN_ASSIGNMENT_CORRUPTED: + ZFCP_LOG_NORMAL("WWPN assignment file corrupted on adapter %s\n", + zfcp_get_busid_by_adapter(adapter)); + break; + case FSF_PSQ_LINK_MODE_TABLE_CURRUPTED: + ZFCP_LOG_NORMAL("Mode table corrupted on adapter %s\n", + zfcp_get_busid_by_adapter(adapter)); + break; + case FSF_PSQ_LINK_NO_WWPN_ASSIGNMENT: + ZFCP_LOG_NORMAL("No WWPN for assignment table on adapter %s\n", + zfcp_get_busid_by_adapter(adapter)); + break; + default: + ZFCP_LOG_NORMAL("The local link to adapter %s is down " + "(warning: unknown reason code %d)\n", + zfcp_get_busid_by_adapter(adapter), + link_down->error_code); + } + + if (adapter->connection_features & FSF_FEATURE_NPIV_MODE) + ZFCP_LOG_DEBUG("Debug information to link down: " + "primary_status=0x%02x " + "ioerr_code=0x%02x " + "action_code=0x%02x " + "reason_code=0x%02x " + "explanation_code=0x%02x " + "vendor_specific_code=0x%02x\n", + link_down->primary_status, + link_down->ioerr_code, + link_down->action_code, + link_down->reason_code, + link_down->explanation_code, + link_down->vendor_specific_code); + + if (!atomic_test_mask(ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED, + &adapter->status)) { + atomic_set_mask(ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED, + &adapter->status); + switch (link_down->error_code) { + case FSF_PSQ_LINK_NO_LIGHT: + case FSF_PSQ_LINK_WRAP_PLUG: + case FSF_PSQ_LINK_NO_FCP: + case FSF_PSQ_LINK_FIRMWARE_UPDATE: + zfcp_erp_adapter_reopen(adapter, 0); + break; + default: + zfcp_erp_adapter_failed(adapter); + } + } +} + /* * function: zfcp_fsf_req_dispatch * @@ -696,11 +665,6 @@ zfcp_fsf_req_dispatch(struct zfcp_fsf_req *fsf_req) struct zfcp_adapter *adapter = fsf_req->adapter; int retval = 0; - if (unlikely(fsf_req->status & ZFCP_STATUS_FSFREQ_ERROR)) { - ZFCP_LOG_TRACE("fsf_req=%p, QTCB=%p\n", fsf_req, fsf_req->qtcb); - ZFCP_HEX_DUMP(ZFCP_LOG_LEVEL_TRACE, - (char *) fsf_req->qtcb, sizeof(struct fsf_qtcb)); - } switch (fsf_req->fsf_command) { @@ -760,13 +724,13 @@ zfcp_fsf_req_dispatch(struct zfcp_fsf_req *fsf_req) fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR; ZFCP_LOG_NORMAL("bug: Command issued by the device driver is " "not supported by the adapter %s\n", - zfcp_get_busid_by_adapter(fsf_req->adapter)); + zfcp_get_busid_by_adapter(adapter)); if (fsf_req->fsf_command != fsf_req->qtcb->header.fsf_command) ZFCP_LOG_NORMAL ("bug: Command issued by the device driver differs " "from the command returned by the adapter %s " "(debug info 0x%x, 0x%x).\n", - zfcp_get_busid_by_adapter(fsf_req->adapter), + zfcp_get_busid_by_adapter(adapter), fsf_req->fsf_command, fsf_req->qtcb->header.fsf_command); } @@ -774,8 +738,6 @@ zfcp_fsf_req_dispatch(struct zfcp_fsf_req *fsf_req) if (!erp_action) return retval; - debug_text_event(adapter->erp_dbf, 3, "a_frh"); - debug_event(adapter->erp_dbf, 3, &erp_action->action, sizeof (int)); zfcp_erp_async_handler(erp_action, 0); return retval; @@ -821,7 +783,7 @@ zfcp_fsf_status_read(struct zfcp_adapter *adapter, int req_flags) goto failed_buf; } memset(status_buffer, 0, sizeof (struct fsf_status_read_buffer)); - fsf_req->data.status_read.buffer = status_buffer; + fsf_req->data = (unsigned long) status_buffer; /* insert pointer to respective buffer */ sbale = zfcp_qdio_sbale_curr(fsf_req); @@ -846,6 +808,7 @@ zfcp_fsf_status_read(struct zfcp_adapter *adapter, int req_flags) failed_buf: zfcp_fsf_req_free(fsf_req); failed_req_create: + zfcp_hba_dbf_event_fsf_unsol("fail", adapter, NULL); out: write_unlock_irqrestore(&adapter->request_queue.queue_lock, lock_flags); return retval; @@ -859,7 +822,7 @@ zfcp_fsf_status_read_port_closed(struct zfcp_fsf_req *fsf_req) struct zfcp_port *port; unsigned long flags; - status_buffer = fsf_req->data.status_read.buffer; + status_buffer = (struct fsf_status_read_buffer *) fsf_req->data; adapter = fsf_req->adapter; read_lock_irqsave(&zfcp_data.config_lock, flags); @@ -918,38 +881,33 @@ zfcp_fsf_status_read_handler(struct zfcp_fsf_req *fsf_req) int retval = 0; struct zfcp_adapter *adapter = fsf_req->adapter; struct fsf_status_read_buffer *status_buffer = - fsf_req->data.status_read.buffer; + (struct fsf_status_read_buffer *) fsf_req->data; if (fsf_req->status & ZFCP_STATUS_FSFREQ_DISMISSED) { + zfcp_hba_dbf_event_fsf_unsol("dism", adapter, status_buffer); mempool_free(status_buffer, adapter->pool.data_status_read); zfcp_fsf_req_free(fsf_req); goto out; } + zfcp_hba_dbf_event_fsf_unsol("read", adapter, status_buffer); + switch (status_buffer->status_type) { case FSF_STATUS_READ_PORT_CLOSED: - debug_text_event(adapter->erp_dbf, 3, "unsol_pclosed:"); - debug_event(adapter->erp_dbf, 3, - &status_buffer->d_id, sizeof (u32)); zfcp_fsf_status_read_port_closed(fsf_req); break; case FSF_STATUS_READ_INCOMING_ELS: - debug_text_event(adapter->erp_dbf, 3, "unsol_els:"); zfcp_fsf_incoming_els(fsf_req); break; case FSF_STATUS_READ_SENSE_DATA_AVAIL: - debug_text_event(adapter->erp_dbf, 3, "unsol_sense:"); ZFCP_LOG_INFO("unsolicited sense data received (adapter %s)\n", zfcp_get_busid_by_adapter(adapter)); - ZFCP_HEX_DUMP(ZFCP_LOG_LEVEL_NORMAL, (char *) status_buffer, - sizeof(struct fsf_status_read_buffer)); break; case FSF_STATUS_READ_BIT_ERROR_THRESHOLD: - debug_text_event(adapter->erp_dbf, 3, "unsol_bit_err:"); ZFCP_LOG_NORMAL("Bit error threshold data received:\n"); ZFCP_HEX_DUMP(ZFCP_LOG_LEVEL_NORMAL, (char *) status_buffer, @@ -957,17 +915,32 @@ zfcp_fsf_status_read_handler(struct zfcp_fsf_req *fsf_req) break; case FSF_STATUS_READ_LINK_DOWN: - debug_text_event(adapter->erp_dbf, 0, "unsol_link_down:"); - ZFCP_LOG_INFO("Local link to adapter %s is down\n", + switch (status_buffer->status_subtype) { + case FSF_STATUS_READ_SUB_NO_PHYSICAL_LINK: + ZFCP_LOG_INFO("Physical link to adapter %s is down\n", + zfcp_get_busid_by_adapter(adapter)); + break; + case FSF_STATUS_READ_SUB_FDISC_FAILED: + ZFCP_LOG_INFO("Local link to adapter %s is down " + "due to failed FDISC login\n", zfcp_get_busid_by_adapter(adapter)); - atomic_set_mask(ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED, - &adapter->status); - zfcp_erp_adapter_failed(adapter); + break; + case FSF_STATUS_READ_SUB_FIRMWARE_UPDATE: + ZFCP_LOG_INFO("Local link to adapter %s is down " + "due to firmware update on adapter\n", + zfcp_get_busid_by_adapter(adapter)); + break; + default: + ZFCP_LOG_INFO("Local link to adapter %s is down " + "due to unknown reason\n", + zfcp_get_busid_by_adapter(adapter)); + }; + zfcp_fsf_link_down_info_eval(adapter, + (struct fsf_link_down_info *) &status_buffer->payload); break; case FSF_STATUS_READ_LINK_UP: - debug_text_event(adapter->erp_dbf, 2, "unsol_link_up:"); - ZFCP_LOG_INFO("Local link to adapter %s was replugged. " + ZFCP_LOG_NORMAL("Local link to adapter %s was replugged. " "Restarting operations on this adapter\n", zfcp_get_busid_by_adapter(adapter)); /* All ports should be marked as ready to run again */ @@ -980,35 +953,40 @@ zfcp_fsf_status_read_handler(struct zfcp_fsf_req *fsf_req) break; case FSF_STATUS_READ_CFDC_UPDATED: - debug_text_event(adapter->erp_dbf, 2, "unsol_cfdc_update:"); - ZFCP_LOG_INFO("CFDC has been updated on the adapter %s\n", + ZFCP_LOG_NORMAL("CFDC has been updated on the adapter %s\n", zfcp_get_busid_by_adapter(adapter)); zfcp_erp_adapter_access_changed(adapter); break; case FSF_STATUS_READ_CFDC_HARDENED: - debug_text_event(adapter->erp_dbf, 2, "unsol_cfdc_harden:"); switch (status_buffer->status_subtype) { case FSF_STATUS_READ_SUB_CFDC_HARDENED_ON_SE: - ZFCP_LOG_INFO("CFDC of adapter %s saved on SE\n", + ZFCP_LOG_NORMAL("CFDC of adapter %s saved on SE\n", zfcp_get_busid_by_adapter(adapter)); break; case FSF_STATUS_READ_SUB_CFDC_HARDENED_ON_SE2: - ZFCP_LOG_INFO("CFDC of adapter %s has been copied " + ZFCP_LOG_NORMAL("CFDC of adapter %s has been copied " "to the secondary SE\n", zfcp_get_busid_by_adapter(adapter)); break; default: - ZFCP_LOG_INFO("CFDC of adapter %s has been hardened\n", + ZFCP_LOG_NORMAL("CFDC of adapter %s has been hardened\n", zfcp_get_busid_by_adapter(adapter)); } break; + case FSF_STATUS_READ_FEATURE_UPDATE_ALERT: + debug_text_event(adapter->erp_dbf, 2, "unsol_features:"); + ZFCP_LOG_INFO("List of supported features on adapter %s has " + "been changed from 0x%08X to 0x%08X\n", + zfcp_get_busid_by_adapter(adapter), + *(u32*) (status_buffer->payload + 4), + *(u32*) (status_buffer->payload)); + adapter->adapter_features = *(u32*) status_buffer->payload; + break; + default: - debug_text_event(adapter->erp_dbf, 0, "unsol_unknown:"); - debug_exception(adapter->erp_dbf, 0, - &status_buffer->status_type, sizeof (u32)); - ZFCP_LOG_NORMAL("bug: An unsolicited status packet of unknown " + ZFCP_LOG_NORMAL("warning: An unsolicited status packet of unknown " "type was received (debug info 0x%x)\n", status_buffer->status_type); ZFCP_LOG_DEBUG("Dump of status_read_buffer %p:\n", @@ -1093,7 +1071,7 @@ zfcp_fsf_abort_fcp_command(unsigned long old_req_id, sbale[0].flags |= SBAL_FLAGS0_TYPE_READ; sbale[1].flags |= SBAL_FLAGS_LAST_ENTRY; - fsf_req->data.abort_fcp_command.unit = unit; + fsf_req->data = (unsigned long) unit; /* set handles of unit and its parent port in QTCB */ fsf_req->qtcb->header.lun_handle = unit->handle; @@ -1139,7 +1117,7 @@ static int zfcp_fsf_abort_fcp_command_handler(struct zfcp_fsf_req *new_fsf_req) { int retval = -EINVAL; - struct zfcp_unit *unit = new_fsf_req->data.abort_fcp_command.unit; + struct zfcp_unit *unit; unsigned char status_qual = new_fsf_req->qtcb->header.fsf_status_qual.word[0]; @@ -1150,6 +1128,8 @@ zfcp_fsf_abort_fcp_command_handler(struct zfcp_fsf_req *new_fsf_req) goto skip_fsfstatus; } + unit = (struct zfcp_unit *) new_fsf_req->data; + /* evaluate FSF status in QTCB */ switch (new_fsf_req->qtcb->header.fsf_status) { @@ -1364,7 +1344,7 @@ zfcp_fsf_send_ct(struct zfcp_send_ct *ct, mempool_t *pool, sbale[3].addr = zfcp_sg_to_address(&ct->resp[0]); sbale[3].length = ct->resp[0].length; sbale[3].flags |= SBAL_FLAGS_LAST_ENTRY; - } else if (adapter->supported_features & + } else if (adapter->adapter_features & FSF_FEATURE_ELS_CT_CHAINED_SBALS) { /* try to use chained SBALs */ bytes = zfcp_qdio_sbals_from_sg(fsf_req, @@ -1414,7 +1394,9 @@ zfcp_fsf_send_ct(struct zfcp_send_ct *ct, mempool_t *pool, fsf_req->qtcb->header.port_handle = port->handle; fsf_req->qtcb->bottom.support.service_class = adapter->fc_service_class; fsf_req->qtcb->bottom.support.timeout = ct->timeout; - fsf_req->data.send_ct = ct; + fsf_req->data = (unsigned long) ct; + + zfcp_san_dbf_event_ct_request(fsf_req); /* start QDIO request for this FSF request */ ret = zfcp_fsf_req_send(fsf_req, ct->timer); @@ -1445,10 +1427,10 @@ zfcp_fsf_send_ct(struct zfcp_send_ct *ct, mempool_t *pool, * zfcp_fsf_send_ct_handler - handler for Generic Service requests * @fsf_req: pointer to struct zfcp_fsf_req * - * Data specific for the Generic Service request is passed by - * fsf_req->data.send_ct - * Usually a specific handler for the request is called via - * fsf_req->data.send_ct->handler at end of this function. + * Data specific for the Generic Service request is passed using + * fsf_req->data. There we find the pointer to struct zfcp_send_ct. + * Usually a specific handler for the CT request is called which is + * found in this structure. */ static int zfcp_fsf_send_ct_handler(struct zfcp_fsf_req *fsf_req) @@ -1462,7 +1444,7 @@ zfcp_fsf_send_ct_handler(struct zfcp_fsf_req *fsf_req) u16 subtable, rule, counter; adapter = fsf_req->adapter; - send_ct = fsf_req->data.send_ct; + send_ct = (struct zfcp_send_ct *) fsf_req->data; port = send_ct->port; header = &fsf_req->qtcb->header; bottom = &fsf_req->qtcb->bottom.support; @@ -1474,6 +1456,7 @@ zfcp_fsf_send_ct_handler(struct zfcp_fsf_req *fsf_req) switch (header->fsf_status) { case FSF_GOOD: + zfcp_san_dbf_event_ct_response(fsf_req); retval = 0; break; @@ -1634,7 +1617,7 @@ zfcp_fsf_send_els(struct zfcp_send_els *els) { volatile struct qdio_buffer_element *sbale; struct zfcp_fsf_req *fsf_req; - fc_id_t d_id; + u32 d_id; struct zfcp_adapter *adapter; unsigned long lock_flags; int bytes; @@ -1664,7 +1647,7 @@ zfcp_fsf_send_els(struct zfcp_send_els *els) sbale[3].addr = zfcp_sg_to_address(&els->resp[0]); sbale[3].length = els->resp[0].length; sbale[3].flags |= SBAL_FLAGS_LAST_ENTRY; - } else if (adapter->supported_features & + } else if (adapter->adapter_features & FSF_FEATURE_ELS_CT_CHAINED_SBALS) { /* try to use chained SBALs */ bytes = zfcp_qdio_sbals_from_sg(fsf_req, @@ -1714,10 +1697,12 @@ zfcp_fsf_send_els(struct zfcp_send_els *els) fsf_req->qtcb->bottom.support.d_id = d_id; fsf_req->qtcb->bottom.support.service_class = adapter->fc_service_class; fsf_req->qtcb->bottom.support.timeout = ZFCP_ELS_TIMEOUT; - fsf_req->data.send_els = els; + fsf_req->data = (unsigned long) els; sbale = zfcp_qdio_sbale_req(fsf_req, fsf_req->sbal_curr, 0); + zfcp_san_dbf_event_els_request(fsf_req); + /* start QDIO request for this FSF request */ ret = zfcp_fsf_req_send(fsf_req, els->timer); if (ret) { @@ -1746,23 +1731,23 @@ zfcp_fsf_send_els(struct zfcp_send_els *els) * zfcp_fsf_send_els_handler - handler for ELS commands * @fsf_req: pointer to struct zfcp_fsf_req * - * Data specific for the ELS command is passed by - * fsf_req->data.send_els - * Usually a specific handler for the command is called via - * fsf_req->data.send_els->handler at end of this function. + * Data specific for the ELS command is passed using + * fsf_req->data. There we find the pointer to struct zfcp_send_els. + * Usually a specific handler for the ELS command is called which is + * found in this structure. */ static int zfcp_fsf_send_els_handler(struct zfcp_fsf_req *fsf_req) { struct zfcp_adapter *adapter; struct zfcp_port *port; - fc_id_t d_id; + u32 d_id; struct fsf_qtcb_header *header; struct fsf_qtcb_bottom_support *bottom; struct zfcp_send_els *send_els; int retval = -EINVAL; u16 subtable, rule, counter; - send_els = fsf_req->data.send_els; + send_els = (struct zfcp_send_els *) fsf_req->data; adapter = send_els->adapter; port = send_els->port; d_id = send_els->d_id; @@ -1775,6 +1760,7 @@ static int zfcp_fsf_send_els_handler(struct zfcp_fsf_req *fsf_req) switch (header->fsf_status) { case FSF_GOOD: + zfcp_san_dbf_event_els_response(fsf_req); retval = 0; break; @@ -1954,7 +1940,9 @@ zfcp_fsf_exchange_config_data(struct zfcp_erp_action *erp_action) erp_action->fsf_req->erp_action = erp_action; erp_action->fsf_req->qtcb->bottom.config.feature_selection = - (FSF_FEATURE_CFDC | FSF_FEATURE_LUN_SHARING); + FSF_FEATURE_CFDC | + FSF_FEATURE_LUN_SHARING | + FSF_FEATURE_UPDATE_ALERT; /* start QDIO request for this FSF request */ retval = zfcp_fsf_req_send(erp_action->fsf_req, &erp_action->timer); @@ -1990,29 +1978,36 @@ zfcp_fsf_exchange_config_evaluate(struct zfcp_fsf_req *fsf_req, int xchg_ok) { struct fsf_qtcb_bottom_config *bottom; struct zfcp_adapter *adapter = fsf_req->adapter; + struct Scsi_Host *shost = adapter->scsi_host; bottom = &fsf_req->qtcb->bottom.config; ZFCP_LOG_DEBUG("low/high QTCB version 0x%x/0x%x of FSF\n", bottom->low_qtcb_version, bottom->high_qtcb_version); adapter->fsf_lic_version = bottom->lic_version; - adapter->supported_features = bottom->supported_features; + adapter->adapter_features = bottom->adapter_features; + adapter->connection_features = bottom->connection_features; adapter->peer_wwpn = 0; adapter->peer_wwnn = 0; adapter->peer_d_id = 0; if (xchg_ok) { - adapter->wwnn = bottom->nport_serv_param.wwnn; - adapter->wwpn = bottom->nport_serv_param.wwpn; - adapter->s_id = bottom->s_id & ZFCP_DID_MASK; + fc_host_node_name(shost) = bottom->nport_serv_param.wwnn; + fc_host_port_name(shost) = bottom->nport_serv_param.wwpn; + fc_host_port_id(shost) = bottom->s_id & ZFCP_DID_MASK; + fc_host_speed(shost) = bottom->fc_link_speed; + fc_host_supported_classes(shost) = FC_COS_CLASS2 | FC_COS_CLASS3; adapter->fc_topology = bottom->fc_topology; - adapter->fc_link_speed = bottom->fc_link_speed; adapter->hydra_version = bottom->adapter_type; + if (adapter->physical_wwpn == 0) + adapter->physical_wwpn = fc_host_port_name(shost); + if (adapter->physical_s_id == 0) + adapter->physical_s_id = fc_host_port_id(shost); } else { - adapter->wwnn = 0; - adapter->wwpn = 0; - adapter->s_id = 0; + fc_host_node_name(shost) = 0; + fc_host_port_name(shost) = 0; + fc_host_port_id(shost) = 0; + fc_host_speed(shost) = FC_PORTSPEED_UNKNOWN; adapter->fc_topology = 0; - adapter->fc_link_speed = 0; adapter->hydra_version = 0; } @@ -2022,26 +2017,28 @@ zfcp_fsf_exchange_config_evaluate(struct zfcp_fsf_req *fsf_req, int xchg_ok) adapter->peer_wwnn = bottom->plogi_payload.wwnn; } - if(adapter->supported_features & FSF_FEATURE_HBAAPI_MANAGEMENT){ + if (adapter->adapter_features & FSF_FEATURE_HBAAPI_MANAGEMENT) { adapter->hardware_version = bottom->hardware_version; - memcpy(adapter->serial_number, bottom->serial_number, 17); - EBCASC(adapter->serial_number, sizeof(adapter->serial_number)); + memcpy(fc_host_serial_number(shost), bottom->serial_number, + min(FC_SERIAL_NUMBER_SIZE, 17)); + EBCASC(fc_host_serial_number(shost), + min(FC_SERIAL_NUMBER_SIZE, 17)); } ZFCP_LOG_NORMAL("The adapter %s reported the following characteristics:\n" - "WWNN 0x%016Lx, " - "WWPN 0x%016Lx, " - "S_ID 0x%08x,\n" - "adapter version 0x%x, " - "LIC version 0x%x, " - "FC link speed %d Gb/s\n", - zfcp_get_busid_by_adapter(adapter), - adapter->wwnn, - adapter->wwpn, - (unsigned int) adapter->s_id, - adapter->hydra_version, - adapter->fsf_lic_version, - adapter->fc_link_speed); + "WWNN 0x%016Lx, " + "WWPN 0x%016Lx, " + "S_ID 0x%08x,\n" + "adapter version 0x%x, " + "LIC version 0x%x, " + "FC link speed %d Gb/s\n", + zfcp_get_busid_by_adapter(adapter), + (wwn_t) fc_host_node_name(shost), + (wwn_t) fc_host_port_name(shost), + fc_host_port_id(shost), + adapter->hydra_version, + adapter->fsf_lic_version, + fc_host_speed(shost)); if (ZFCP_QTCB_VERSION < bottom->low_qtcb_version) { ZFCP_LOG_NORMAL("error: the adapter %s " "only supports newer control block " @@ -2062,7 +2059,6 @@ zfcp_fsf_exchange_config_evaluate(struct zfcp_fsf_req *fsf_req, int xchg_ok) zfcp_erp_adapter_shutdown(adapter, 0); return -EIO; } - zfcp_set_fc_host_attrs(adapter); return 0; } @@ -2078,11 +2074,12 @@ zfcp_fsf_exchange_config_data_handler(struct zfcp_fsf_req *fsf_req) { struct fsf_qtcb_bottom_config *bottom; struct zfcp_adapter *adapter = fsf_req->adapter; + struct fsf_qtcb *qtcb = fsf_req->qtcb; if (fsf_req->status & ZFCP_STATUS_FSFREQ_ERROR) return -EIO; - switch (fsf_req->qtcb->header.fsf_status) { + switch (qtcb->header.fsf_status) { case FSF_GOOD: if (zfcp_fsf_exchange_config_evaluate(fsf_req, 1)) @@ -2112,7 +2109,7 @@ zfcp_fsf_exchange_config_data_handler(struct zfcp_fsf_req *fsf_req) zfcp_erp_adapter_shutdown(adapter, 0); return -EIO; case FSF_TOPO_FABRIC: - ZFCP_LOG_INFO("Switched fabric fibrechannel " + ZFCP_LOG_NORMAL("Switched fabric fibrechannel " "network detected at adapter %s.\n", zfcp_get_busid_by_adapter(adapter)); break; @@ -2130,7 +2127,7 @@ zfcp_fsf_exchange_config_data_handler(struct zfcp_fsf_req *fsf_req) zfcp_erp_adapter_shutdown(adapter, 0); return -EIO; } - bottom = &fsf_req->qtcb->bottom.config; + bottom = &qtcb->bottom.config; if (bottom->max_qtcb_size < sizeof(struct fsf_qtcb)) { ZFCP_LOG_NORMAL("bug: Maximum QTCB size (%d bytes) " "allowed by the adapter %s " @@ -2155,12 +2152,10 @@ zfcp_fsf_exchange_config_data_handler(struct zfcp_fsf_req *fsf_req) if (zfcp_fsf_exchange_config_evaluate(fsf_req, 0)) return -EIO; - ZFCP_LOG_INFO("Local link to adapter %s is down\n", - zfcp_get_busid_by_adapter(adapter)); - atomic_set_mask(ZFCP_STATUS_ADAPTER_XCONFIG_OK | - ZFCP_STATUS_ADAPTER_LINK_UNPLUGGED, - &adapter->status); - zfcp_erp_adapter_failed(adapter); + atomic_set_mask(ZFCP_STATUS_ADAPTER_XCONFIG_OK, &adapter->status); + + zfcp_fsf_link_down_info_eval(adapter, + &qtcb->header.fsf_status_qual.link_down_info); break; default: debug_text_event(fsf_req->adapter->erp_dbf, 0, "fsf-stat-ng"); @@ -2174,11 +2169,13 @@ zfcp_fsf_exchange_config_data_handler(struct zfcp_fsf_req *fsf_req) /** * zfcp_fsf_exchange_port_data - request information about local port + * @erp_action: ERP action for the adapter for which port data is requested * @adapter: for which port data is requested * @data: response to exchange port data request */ int -zfcp_fsf_exchange_port_data(struct zfcp_adapter *adapter, +zfcp_fsf_exchange_port_data(struct zfcp_erp_action *erp_action, + struct zfcp_adapter *adapter, struct fsf_qtcb_bottom_port *data) { volatile struct qdio_buffer_element *sbale; @@ -2187,7 +2184,7 @@ zfcp_fsf_exchange_port_data(struct zfcp_adapter *adapter, struct zfcp_fsf_req *fsf_req; struct timer_list *timer; - if(!(adapter->supported_features & FSF_FEATURE_HBAAPI_MANAGEMENT)){ + if (!(adapter->adapter_features & FSF_FEATURE_HBAAPI_MANAGEMENT)) { ZFCP_LOG_INFO("error: exchange port data " "command not supported by adapter %s\n", zfcp_get_busid_by_adapter(adapter)); @@ -2211,12 +2208,18 @@ zfcp_fsf_exchange_port_data(struct zfcp_adapter *adapter, goto out; } + if (erp_action) { + erp_action->fsf_req = fsf_req; + fsf_req->erp_action = erp_action; + } + + if (data) + fsf_req->data = (unsigned long) data; + sbale = zfcp_qdio_sbale_req(fsf_req, fsf_req->sbal_curr, 0); sbale[0].flags |= SBAL_FLAGS0_TYPE_READ; sbale[1].flags |= SBAL_FLAGS_LAST_ENTRY; - fsf_req->data.port_data = data; - init_timer(timer); timer->function = zfcp_fsf_request_timeout_handler; timer->data = (unsigned long) adapter; @@ -2228,6 +2231,8 @@ zfcp_fsf_exchange_port_data(struct zfcp_adapter *adapter, "command on the adapter %s\n", zfcp_get_busid_by_adapter(adapter)); zfcp_fsf_req_free(fsf_req); + if (erp_action) + erp_action->fsf_req = NULL; write_unlock_irqrestore(&adapter->request_queue.queue_lock, lock_flags); goto out; @@ -2256,21 +2261,42 @@ zfcp_fsf_exchange_port_data(struct zfcp_adapter *adapter, static void zfcp_fsf_exchange_port_data_handler(struct zfcp_fsf_req *fsf_req) { - struct fsf_qtcb_bottom_port *bottom; - struct fsf_qtcb_bottom_port *data = fsf_req->data.port_data; + struct zfcp_adapter *adapter = fsf_req->adapter; + struct Scsi_Host *shost = adapter->scsi_host; + struct fsf_qtcb *qtcb = fsf_req->qtcb; + struct fsf_qtcb_bottom_port *bottom, *data; if (fsf_req->status & ZFCP_STATUS_FSFREQ_ERROR) return; - switch (fsf_req->qtcb->header.fsf_status) { + switch (qtcb->header.fsf_status) { case FSF_GOOD: - bottom = &fsf_req->qtcb->bottom.port; - memcpy(data, bottom, sizeof(*data)); + atomic_set_mask(ZFCP_STATUS_ADAPTER_XPORT_OK, &adapter->status); + + bottom = &qtcb->bottom.port; + data = (struct fsf_qtcb_bottom_port*) fsf_req->data; + if (data) + memcpy(data, bottom, sizeof(struct fsf_qtcb_bottom_port)); + if (adapter->connection_features & FSF_FEATURE_NPIV_MODE) { + adapter->physical_wwpn = bottom->wwpn; + adapter->physical_s_id = bottom->fc_port_id; + } else { + adapter->physical_wwpn = fc_host_port_name(shost); + adapter->physical_s_id = fc_host_port_id(shost); + } + fc_host_maxframe_size(shost) = bottom->maximum_frame_size; + break; + + case FSF_EXCHANGE_CONFIG_DATA_INCOMPLETE: + atomic_set_mask(ZFCP_STATUS_ADAPTER_XPORT_OK, &adapter->status); + + zfcp_fsf_link_down_info_eval(adapter, + &qtcb->header.fsf_status_qual.link_down_info); break; default: - debug_text_event(fsf_req->adapter->erp_dbf, 0, "xchg-port-ng"); - debug_event(fsf_req->adapter->erp_dbf, 0, + debug_text_event(adapter->erp_dbf, 0, "xchg-port-ng"); + debug_event(adapter->erp_dbf, 0, &fsf_req->qtcb->header.fsf_status, sizeof(u32)); } } @@ -2312,7 +2338,7 @@ zfcp_fsf_open_port(struct zfcp_erp_action *erp_action) erp_action->fsf_req->qtcb->bottom.support.d_id = erp_action->port->d_id; atomic_set_mask(ZFCP_STATUS_COMMON_OPENING, &erp_action->port->status); - erp_action->fsf_req->data.open_port.port = erp_action->port; + erp_action->fsf_req->data = (unsigned long) erp_action->port; erp_action->fsf_req->erp_action = erp_action; /* start QDIO request for this FSF request */ @@ -2353,7 +2379,7 @@ zfcp_fsf_open_port_handler(struct zfcp_fsf_req *fsf_req) struct fsf_qtcb_header *header; u16 subtable, rule, counter; - port = fsf_req->data.open_port.port; + port = (struct zfcp_port *) fsf_req->data; header = &fsf_req->qtcb->header; if (fsf_req->status & ZFCP_STATUS_FSFREQ_ERROR) { @@ -2566,7 +2592,7 @@ zfcp_fsf_close_port(struct zfcp_erp_action *erp_action) sbale[1].flags |= SBAL_FLAGS_LAST_ENTRY; atomic_set_mask(ZFCP_STATUS_COMMON_CLOSING, &erp_action->port->status); - erp_action->fsf_req->data.close_port.port = erp_action->port; + erp_action->fsf_req->data = (unsigned long) erp_action->port; erp_action->fsf_req->erp_action = erp_action; erp_action->fsf_req->qtcb->header.port_handle = erp_action->port->handle; @@ -2606,7 +2632,7 @@ zfcp_fsf_close_port_handler(struct zfcp_fsf_req *fsf_req) int retval = -EINVAL; struct zfcp_port *port; - port = fsf_req->data.close_port.port; + port = (struct zfcp_port *) fsf_req->data; if (fsf_req->status & ZFCP_STATUS_FSFREQ_ERROR) { /* don't change port status in our bookkeeping */ @@ -2703,8 +2729,8 @@ zfcp_fsf_close_physical_port(struct zfcp_erp_action *erp_action) atomic_set_mask(ZFCP_STATUS_PORT_PHYS_CLOSING, &erp_action->port->status); /* save a pointer to this port */ - erp_action->fsf_req->data.close_physical_port.port = erp_action->port; - /* port to be closeed */ + erp_action->fsf_req->data = (unsigned long) erp_action->port; + /* port to be closed */ erp_action->fsf_req->qtcb->header.port_handle = erp_action->port->handle; erp_action->fsf_req->erp_action = erp_action; @@ -2747,7 +2773,7 @@ zfcp_fsf_close_physical_port_handler(struct zfcp_fsf_req *fsf_req) struct fsf_qtcb_header *header; u16 subtable, rule, counter; - port = fsf_req->data.close_physical_port.port; + port = (struct zfcp_port *) fsf_req->data; header = &fsf_req->qtcb->header; if (fsf_req->status & ZFCP_STATUS_FSFREQ_ERROR) { @@ -2908,10 +2934,11 @@ zfcp_fsf_open_unit(struct zfcp_erp_action *erp_action) erp_action->port->handle; erp_action->fsf_req->qtcb->bottom.support.fcp_lun = erp_action->unit->fcp_lun; + if (!(erp_action->adapter->connection_features & FSF_FEATURE_NPIV_MODE)) erp_action->fsf_req->qtcb->bottom.support.option = FSF_OPEN_LUN_SUPPRESS_BOXING; atomic_set_mask(ZFCP_STATUS_COMMON_OPENING, &erp_action->unit->status); - erp_action->fsf_req->data.open_unit.unit = erp_action->unit; + erp_action->fsf_req->data = (unsigned long) erp_action->unit; erp_action->fsf_req->erp_action = erp_action; /* start QDIO request for this FSF request */ @@ -2955,9 +2982,9 @@ zfcp_fsf_open_unit_handler(struct zfcp_fsf_req *fsf_req) struct fsf_qtcb_bottom_support *bottom; struct fsf_queue_designator *queue_designator; u16 subtable, rule, counter; - u32 allowed, exclusive, readwrite; + int exclusive, readwrite; - unit = fsf_req->data.open_unit.unit; + unit = (struct zfcp_unit *) fsf_req->data; if (fsf_req->status & ZFCP_STATUS_FSFREQ_ERROR) { /* don't change unit status in our bookkeeping */ @@ -2969,10 +2996,6 @@ zfcp_fsf_open_unit_handler(struct zfcp_fsf_req *fsf_req) bottom = &fsf_req->qtcb->bottom.support; queue_designator = &header->fsf_status_qual.fsf_queue_designator; - allowed = bottom->lun_access_info & FSF_UNIT_ACCESS_OPEN_LUN_ALLOWED; - exclusive = bottom->lun_access_info & FSF_UNIT_ACCESS_EXCLUSIVE; - readwrite = bottom->lun_access_info & FSF_UNIT_ACCESS_OUTBOUND_TRANSFER; - atomic_clear_mask(ZFCP_STATUS_COMMON_ACCESS_DENIED | ZFCP_STATUS_UNIT_SHARED | ZFCP_STATUS_UNIT_READONLY, @@ -3146,10 +3169,15 @@ zfcp_fsf_open_unit_handler(struct zfcp_fsf_req *fsf_req) unit->handle); /* mark unit as open */ atomic_set_mask(ZFCP_STATUS_COMMON_OPEN, &unit->status); - atomic_clear_mask(ZFCP_STATUS_COMMON_ACCESS_DENIED | - ZFCP_STATUS_COMMON_ACCESS_BOXED, - &unit->status); - if (adapter->supported_features & FSF_FEATURE_LUN_SHARING){ + + if (!(adapter->connection_features & FSF_FEATURE_NPIV_MODE) && + (adapter->adapter_features & FSF_FEATURE_LUN_SHARING) && + (adapter->ccw_device->id.dev_model != ZFCP_DEVICE_MODEL_PRIV)) { + exclusive = (bottom->lun_access_info & + FSF_UNIT_ACCESS_EXCLUSIVE); + readwrite = (bottom->lun_access_info & + FSF_UNIT_ACCESS_OUTBOUND_TRANSFER); + if (!exclusive) atomic_set_mask(ZFCP_STATUS_UNIT_SHARED, &unit->status); @@ -3242,7 +3270,7 @@ zfcp_fsf_close_unit(struct zfcp_erp_action *erp_action) erp_action->port->handle; erp_action->fsf_req->qtcb->header.lun_handle = erp_action->unit->handle; atomic_set_mask(ZFCP_STATUS_COMMON_CLOSING, &erp_action->unit->status); - erp_action->fsf_req->data.close_unit.unit = erp_action->unit; + erp_action->fsf_req->data = (unsigned long) erp_action->unit; erp_action->fsf_req->erp_action = erp_action; /* start QDIO request for this FSF request */ @@ -3281,7 +3309,7 @@ zfcp_fsf_close_unit_handler(struct zfcp_fsf_req *fsf_req) int retval = -EINVAL; struct zfcp_unit *unit; - unit = fsf_req->data.close_unit.unit; /* restore unit */ + unit = (struct zfcp_unit *) fsf_req->data; if (fsf_req->status & ZFCP_STATUS_FSFREQ_ERROR) { /* don't change unit status in our bookkeeping */ @@ -3305,9 +3333,6 @@ zfcp_fsf_close_unit_handler(struct zfcp_fsf_req *fsf_req) debug_text_event(fsf_req->adapter->erp_dbf, 1, "fsf_s_phand_nv"); zfcp_erp_adapter_reopen(unit->port->adapter, 0); - zfcp_cmd_dbf_event_fsf("porthinv", fsf_req, - &fsf_req->qtcb->header.fsf_status_qual, - sizeof (union fsf_status_qual)); fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR; break; @@ -3326,9 +3351,6 @@ zfcp_fsf_close_unit_handler(struct zfcp_fsf_req *fsf_req) debug_text_event(fsf_req->adapter->erp_dbf, 1, "fsf_s_lhand_nv"); zfcp_erp_port_reopen(unit->port, 0); - zfcp_cmd_dbf_event_fsf("lunhinv", fsf_req, - &fsf_req->qtcb->header.fsf_status_qual, - sizeof (union fsf_status_qual)); fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR; break; @@ -3436,21 +3458,14 @@ zfcp_fsf_send_fcp_command_task(struct zfcp_adapter *adapter, goto failed_req_create; } - /* - * associate FSF request with SCSI request - * (need this for look up on abort) - */ - fsf_req->data.send_fcp_command_task.fsf_req = fsf_req; - scsi_cmnd->host_scribble = (char *) &(fsf_req->data); + zfcp_unit_get(unit); + fsf_req->unit = unit; - /* - * associate SCSI command with FSF request - * (need this for look up on normal command completion) - */ - fsf_req->data.send_fcp_command_task.scsi_cmnd = scsi_cmnd; - fsf_req->data.send_fcp_command_task.start_jiffies = jiffies; - fsf_req->data.send_fcp_command_task.unit = unit; - ZFCP_LOG_DEBUG("unit=%p, fcp_lun=0x%016Lx\n", unit, unit->fcp_lun); + /* associate FSF request with SCSI request (for look up on abort) */ + scsi_cmnd->host_scribble = (char *) fsf_req; + + /* associate SCSI command with FSF request */ + fsf_req->data = (unsigned long) scsi_cmnd; /* set handles of unit and its parent port in QTCB */ fsf_req->qtcb->header.lun_handle = unit->handle; @@ -3584,6 +3599,7 @@ zfcp_fsf_send_fcp_command_task(struct zfcp_adapter *adapter, send_failed: no_fit: failed_scsi_cmnd: + zfcp_unit_put(unit); zfcp_fsf_req_free(fsf_req); fsf_req = NULL; scsi_cmnd->host_scribble = NULL; @@ -3640,7 +3656,7 @@ zfcp_fsf_send_fcp_command_task_management(struct zfcp_adapter *adapter, * hold a pointer to the unit being target of this * task management request */ - fsf_req->data.send_fcp_command_task_management.unit = unit; + fsf_req->data = (unsigned long) unit; /* set FSF related fields in QTCB */ fsf_req->qtcb->header.lun_handle = unit->handle; @@ -3706,9 +3722,9 @@ zfcp_fsf_send_fcp_command_handler(struct zfcp_fsf_req *fsf_req) header = &fsf_req->qtcb->header; if (unlikely(fsf_req->status & ZFCP_STATUS_FSFREQ_TASK_MANAGEMENT)) - unit = fsf_req->data.send_fcp_command_task_management.unit; + unit = (struct zfcp_unit *) fsf_req->data; else - unit = fsf_req->data.send_fcp_command_task.unit; + unit = fsf_req->unit; if (unlikely(fsf_req->status & ZFCP_STATUS_FSFREQ_ERROR)) { /* go directly to calls of special handlers */ @@ -3765,10 +3781,6 @@ zfcp_fsf_send_fcp_command_handler(struct zfcp_fsf_req *fsf_req) debug_text_event(fsf_req->adapter->erp_dbf, 1, "fsf_s_hand_mis"); zfcp_erp_adapter_reopen(unit->port->adapter, 0); - zfcp_cmd_dbf_event_fsf("handmism", - fsf_req, - &header->fsf_status_qual, - sizeof (union fsf_status_qual)); fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR; break; @@ -3789,10 +3801,6 @@ zfcp_fsf_send_fcp_command_handler(struct zfcp_fsf_req *fsf_req) debug_text_exception(fsf_req->adapter->erp_dbf, 0, "fsf_s_class_nsup"); zfcp_erp_adapter_shutdown(unit->port->adapter, 0); - zfcp_cmd_dbf_event_fsf("unsclass", - fsf_req, - &header->fsf_status_qual, - sizeof (union fsf_status_qual)); fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR; break; @@ -3811,10 +3819,6 @@ zfcp_fsf_send_fcp_command_handler(struct zfcp_fsf_req *fsf_req) debug_text_event(fsf_req->adapter->erp_dbf, 1, "fsf_s_fcp_lun_nv"); zfcp_erp_port_reopen(unit->port, 0); - zfcp_cmd_dbf_event_fsf("fluninv", - fsf_req, - &header->fsf_status_qual, - sizeof (union fsf_status_qual)); fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR; break; @@ -3853,10 +3857,6 @@ zfcp_fsf_send_fcp_command_handler(struct zfcp_fsf_req *fsf_req) debug_text_event(fsf_req->adapter->erp_dbf, 0, "fsf_s_dir_ind_nv"); zfcp_erp_adapter_shutdown(unit->port->adapter, 0); - zfcp_cmd_dbf_event_fsf("dirinv", - fsf_req, - &header->fsf_status_qual, - sizeof (union fsf_status_qual)); fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR; break; @@ -3872,10 +3872,6 @@ zfcp_fsf_send_fcp_command_handler(struct zfcp_fsf_req *fsf_req) debug_text_event(fsf_req->adapter->erp_dbf, 0, "fsf_s_cmd_len_nv"); zfcp_erp_adapter_shutdown(unit->port->adapter, 0); - zfcp_cmd_dbf_event_fsf("cleninv", - fsf_req, - &header->fsf_status_qual, - sizeof (union fsf_status_qual)); fsf_req->status |= ZFCP_STATUS_FSFREQ_ERROR; break; @@ -3947,6 +3943,8 @@ zfcp_fsf_send_fcp_command_handler(struct zfcp_fsf_req *fsf_req) zfcp_fsf_send_fcp_command_task_management_handler(fsf_req); } else { retval = zfcp_fsf_send_fcp_command_task_handler(fsf_req); + fsf_req->unit = NULL; + zfcp_unit_put(unit); } return retval; } @@ -3970,10 +3968,10 @@ zfcp_fsf_send_fcp_command_task_handler(struct zfcp_fsf_req *fsf_req) u32 sns_len; char *fcp_rsp_info = zfcp_get_fcp_rsp_info_ptr(fcp_rsp_iu); unsigned long flags; - struct zfcp_unit *unit = fsf_req->data.send_fcp_command_task.unit; + struct zfcp_unit *unit = fsf_req->unit; read_lock_irqsave(&fsf_req->adapter->abort_lock, flags); - scpnt = fsf_req->data.send_fcp_command_task.scsi_cmnd; + scpnt = (struct scsi_cmnd *) fsf_req->data; if (unlikely(!scpnt)) { ZFCP_LOG_DEBUG ("Command with fsf_req %p is not associated to " @@ -4043,7 +4041,6 @@ zfcp_fsf_send_fcp_command_task_handler(struct zfcp_fsf_req *fsf_req) ZFCP_HEX_DUMP(ZFCP_LOG_LEVEL_DEBUG, (char *) &fsf_req->qtcb-> bottom.io.fcp_cmnd, FSF_FCP_CMND_SIZE); - zfcp_cmd_dbf_event_fsf("clenmis", fsf_req, NULL, 0); set_host_byte(&scpnt->result, DID_ERROR); goto skip_fsfstatus; case RSP_CODE_FIELD_INVALID: @@ -4062,7 +4059,6 @@ zfcp_fsf_send_fcp_command_task_handler(struct zfcp_fsf_req *fsf_req) (char *) &fsf_req->qtcb-> bottom.io.fcp_cmnd, FSF_FCP_CMND_SIZE); set_host_byte(&scpnt->result, DID_ERROR); - zfcp_cmd_dbf_event_fsf("codeinv", fsf_req, NULL, 0); goto skip_fsfstatus; case RSP_CODE_RO_MISMATCH: /* hardware bug */ @@ -4079,7 +4075,6 @@ zfcp_fsf_send_fcp_command_task_handler(struct zfcp_fsf_req *fsf_req) ZFCP_HEX_DUMP(ZFCP_LOG_LEVEL_DEBUG, (char *) &fsf_req->qtcb-> bottom.io.fcp_cmnd, FSF_FCP_CMND_SIZE); - zfcp_cmd_dbf_event_fsf("codemism", fsf_req, NULL, 0); set_host_byte(&scpnt->result, DID_ERROR); goto skip_fsfstatus; default: @@ -4096,7 +4091,6 @@ zfcp_fsf_send_fcp_command_task_handler(struct zfcp_fsf_req *fsf_req) ZFCP_HEX_DUMP(ZFCP_LOG_LEVEL_DEBUG, (char *) &fsf_req->qtcb-> bottom.io.fcp_cmnd, FSF_FCP_CMND_SIZE); - zfcp_cmd_dbf_event_fsf("undeffcp", fsf_req, NULL, 0); set_host_byte(&scpnt->result, DID_ERROR); goto skip_fsfstatus; } @@ -4158,19 +4152,17 @@ zfcp_fsf_send_fcp_command_task_handler(struct zfcp_fsf_req *fsf_req) skip_fsfstatus: ZFCP_LOG_DEBUG("scpnt->result =0x%x\n", scpnt->result); - zfcp_cmd_dbf_event_scsi("response", scpnt); + if (scpnt->result != 0) + zfcp_scsi_dbf_event_result("erro", 3, fsf_req->adapter, scpnt); + else if (scpnt->retries > 0) + zfcp_scsi_dbf_event_result("retr", 4, fsf_req->adapter, scpnt); + else + zfcp_scsi_dbf_event_result("norm", 6, fsf_req->adapter, scpnt); /* cleanup pointer (need this especially for abort) */ scpnt->host_scribble = NULL; - /* - * NOTE: - * according to the outcome of a discussion on linux-scsi we - * don't need to grab the io_request_lock here since we use - * the new eh - */ /* always call back */ - (scpnt->scsi_done) (scpnt); /* @@ -4198,8 +4190,7 @@ zfcp_fsf_send_fcp_command_task_management_handler(struct zfcp_fsf_req *fsf_req) struct fcp_rsp_iu *fcp_rsp_iu = (struct fcp_rsp_iu *) &(fsf_req->qtcb->bottom.io.fcp_rsp); char *fcp_rsp_info = zfcp_get_fcp_rsp_info_ptr(fcp_rsp_iu); - struct zfcp_unit *unit = - fsf_req->data.send_fcp_command_task_management.unit; + struct zfcp_unit *unit = (struct zfcp_unit *) fsf_req->data; del_timer(&fsf_req->adapter->scsi_er_timer); if (fsf_req->status & ZFCP_STATUS_FSFREQ_ERROR) { @@ -4276,7 +4267,7 @@ zfcp_fsf_control_file(struct zfcp_adapter *adapter, int direction; int retval = 0; - if (!(adapter->supported_features & FSF_FEATURE_CFDC)) { + if (!(adapter->adapter_features & FSF_FEATURE_CFDC)) { ZFCP_LOG_INFO("cfdc not supported (adapter %s)\n", zfcp_get_busid_by_adapter(adapter)); retval = -EOPNOTSUPP; @@ -4549,52 +4540,6 @@ skip_fsfstatus: return retval; } - -/* - * function: zfcp_fsf_req_wait_and_cleanup - * - * purpose: - * - * FIXME(design): signal seems to be <0 !!! - * returns: 0 - request completed (*status is valid), cleanup succ. - * <0 - request completed (*status is valid), cleanup failed - * >0 - signal which interrupted waiting (*status invalid), - * request not completed, no cleanup - * - * *status is a copy of status of completed fsf_req - */ -int -zfcp_fsf_req_wait_and_cleanup(struct zfcp_fsf_req *fsf_req, - int interruptible, u32 * status) -{ - int retval = 0; - int signal = 0; - - if (interruptible) { - __wait_event_interruptible(fsf_req->completion_wq, - fsf_req->status & - ZFCP_STATUS_FSFREQ_COMPLETED, - signal); - if (signal) { - ZFCP_LOG_DEBUG("Caught signal %i while waiting for the " - "completion of the request at %p\n", - signal, fsf_req); - retval = signal; - goto out; - } - } else { - __wait_event(fsf_req->completion_wq, - fsf_req->status & ZFCP_STATUS_FSFREQ_COMPLETED); - } - - *status = fsf_req->status; - - /* cleanup request */ - zfcp_fsf_req_free(fsf_req); - out: - return retval; -} - static inline int zfcp_fsf_req_sbal_check(unsigned long *flags, struct zfcp_qdio_queue *queue, int needed) @@ -4610,15 +4555,16 @@ zfcp_fsf_req_sbal_check(unsigned long *flags, * set qtcb pointer in fsf_req and initialize QTCB */ static inline void -zfcp_fsf_req_qtcb_init(struct zfcp_fsf_req *fsf_req, u32 fsf_cmd) +zfcp_fsf_req_qtcb_init(struct zfcp_fsf_req *fsf_req) { if (likely(fsf_req->qtcb != NULL)) { + fsf_req->qtcb->prefix.req_seq_no = fsf_req->adapter->fsf_req_seq_no; fsf_req->qtcb->prefix.req_id = (unsigned long)fsf_req; fsf_req->qtcb->prefix.ulp_info = ZFCP_ULP_INFO_VERSION; - fsf_req->qtcb->prefix.qtcb_type = fsf_qtcb_type[fsf_cmd]; + fsf_req->qtcb->prefix.qtcb_type = fsf_qtcb_type[fsf_req->fsf_command]; fsf_req->qtcb->prefix.qtcb_version = ZFCP_QTCB_VERSION; fsf_req->qtcb->header.req_handle = (unsigned long)fsf_req; - fsf_req->qtcb->header.fsf_command = fsf_cmd; + fsf_req->qtcb->header.fsf_command = fsf_req->fsf_command; } } @@ -4686,7 +4632,10 @@ zfcp_fsf_req_create(struct zfcp_adapter *adapter, u32 fsf_cmd, int req_flags, goto failed_fsf_req; } - zfcp_fsf_req_qtcb_init(fsf_req, fsf_cmd); + fsf_req->adapter = adapter; + fsf_req->fsf_command = fsf_cmd; + + zfcp_fsf_req_qtcb_init(fsf_req); /* initialize waitqueue which may be used to wait on this request completion */ @@ -4708,8 +4657,10 @@ zfcp_fsf_req_create(struct zfcp_adapter *adapter, u32 fsf_cmd, int req_flags, goto failed_sbals; } - fsf_req->adapter = adapter; /* pointer to "parent" adapter */ - fsf_req->fsf_command = fsf_cmd; + if (fsf_req->qtcb) { + fsf_req->seq_no = adapter->fsf_req_seq_no; + fsf_req->qtcb->prefix.req_seq_no = adapter->fsf_req_seq_no; + } fsf_req->sbal_number = 1; fsf_req->sbal_first = req_queue->free_index; fsf_req->sbal_curr = req_queue->free_index; @@ -4760,9 +4711,9 @@ zfcp_fsf_req_send(struct zfcp_fsf_req *fsf_req, struct timer_list *timer) struct zfcp_adapter *adapter; struct zfcp_qdio_queue *req_queue; volatile struct qdio_buffer_element *sbale; + int inc_seq_no; int new_distance_from_int; unsigned long flags; - int inc_seq_no = 1; int retval = 0; adapter = fsf_req->adapter; @@ -4776,23 +4727,13 @@ zfcp_fsf_req_send(struct zfcp_fsf_req *fsf_req, struct timer_list *timer) ZFCP_HEX_DUMP(ZFCP_LOG_LEVEL_TRACE, (char *) sbale[1].addr, sbale[1].length); - /* set sequence counter in QTCB */ - if (likely(fsf_req->qtcb)) { - fsf_req->qtcb->prefix.req_seq_no = adapter->fsf_req_seq_no; - fsf_req->seq_no = adapter->fsf_req_seq_no; - ZFCP_LOG_TRACE("FSF request %p of adapter %s gets " - "FSF sequence counter value of %i\n", - fsf_req, - zfcp_get_busid_by_adapter(adapter), - fsf_req->qtcb->prefix.req_seq_no); - } else - inc_seq_no = 0; - /* put allocated FSF request at list tail */ spin_lock_irqsave(&adapter->fsf_req_list_lock, flags); list_add_tail(&fsf_req->list, &adapter->fsf_req_list_head); spin_unlock_irqrestore(&adapter->fsf_req_list_lock, flags); + inc_seq_no = (fsf_req->qtcb != NULL); + /* figure out expiration time of timeout and start timeout */ if (unlikely(timer)) { timer->expires += jiffies; @@ -4822,6 +4763,8 @@ zfcp_fsf_req_send(struct zfcp_fsf_req *fsf_req, struct timer_list *timer) req_queue->free_index %= QDIO_MAX_BUFFERS_PER_Q; /* wrap if needed */ new_distance_from_int = zfcp_qdio_determine_pci(req_queue, fsf_req); + fsf_req->issued = get_clock(); + retval = do_QDIO(adapter->ccw_device, QDIO_FLAG_SYNC_OUTPUT, 0, fsf_req->sbal_first, fsf_req->sbal_number, NULL); @@ -4860,15 +4803,11 @@ zfcp_fsf_req_send(struct zfcp_fsf_req *fsf_req, struct timer_list *timer) * routines resulting in missing sequence counter values * otherwise, */ + /* Don't increase for unsolicited status */ - if (likely(inc_seq_no)) { + if (inc_seq_no) adapter->fsf_req_seq_no++; - ZFCP_LOG_TRACE - ("FSF sequence counter value of adapter %s " - "increased to %i\n", - zfcp_get_busid_by_adapter(adapter), - adapter->fsf_req_seq_no); - } + /* count FSF requests pending */ atomic_inc(&adapter->fsf_reqs_active); } diff --git a/drivers/s390/scsi/zfcp_fsf.h b/drivers/s390/scsi/zfcp_fsf.h index 07140dfda2a..48719f05595 100644 --- a/drivers/s390/scsi/zfcp_fsf.h +++ b/drivers/s390/scsi/zfcp_fsf.h @@ -116,6 +116,7 @@ #define FSF_INVALID_COMMAND_OPTION 0x000000E5 /* #define FSF_ERROR 0x000000FF */ +#define FSF_PROT_STATUS_QUAL_SIZE 16 #define FSF_STATUS_QUALIFIER_SIZE 16 /* FSF status qualifier, recommendations */ @@ -139,9 +140,18 @@ #define FSF_SQ_CFDC_SUBTABLE_LUN 0x0004 /* FSF status qualifier (most significant 4 bytes), local link down */ -#define FSF_PSQ_LINK_NOLIGHT 0x00000004 -#define FSF_PSQ_LINK_WRAPPLUG 0x00000008 -#define FSF_PSQ_LINK_NOFCP 0x00000010 +#define FSF_PSQ_LINK_NO_LIGHT 0x00000004 +#define FSF_PSQ_LINK_WRAP_PLUG 0x00000008 +#define FSF_PSQ_LINK_NO_FCP 0x00000010 +#define FSF_PSQ_LINK_FIRMWARE_UPDATE 0x00000020 +#define FSF_PSQ_LINK_INVALID_WWPN 0x00000100 +#define FSF_PSQ_LINK_NO_NPIV_SUPPORT 0x00000200 +#define FSF_PSQ_LINK_NO_FCP_RESOURCES 0x00000400 +#define FSF_PSQ_LINK_NO_FABRIC_RESOURCES 0x00000800 +#define FSF_PSQ_LINK_FABRIC_LOGIN_UNABLE 0x00001000 +#define FSF_PSQ_LINK_WWPN_ASSIGNMENT_CORRUPTED 0x00002000 +#define FSF_PSQ_LINK_MODE_TABLE_CURRUPTED 0x00004000 +#define FSF_PSQ_LINK_NO_WWPN_ASSIGNMENT 0x00008000 /* payload size in status read buffer */ #define FSF_STATUS_READ_PAYLOAD_SIZE 4032 @@ -154,15 +164,21 @@ #define FSF_STATUS_READ_INCOMING_ELS 0x00000002 #define FSF_STATUS_READ_SENSE_DATA_AVAIL 0x00000003 #define FSF_STATUS_READ_BIT_ERROR_THRESHOLD 0x00000004 -#define FSF_STATUS_READ_LINK_DOWN 0x00000005 /* FIXME: really? */ +#define FSF_STATUS_READ_LINK_DOWN 0x00000005 #define FSF_STATUS_READ_LINK_UP 0x00000006 #define FSF_STATUS_READ_CFDC_UPDATED 0x0000000A #define FSF_STATUS_READ_CFDC_HARDENED 0x0000000B +#define FSF_STATUS_READ_FEATURE_UPDATE_ALERT 0x0000000C /* status subtypes in status read buffer */ #define FSF_STATUS_READ_SUB_CLOSE_PHYS_PORT 0x00000001 #define FSF_STATUS_READ_SUB_ERROR_PORT 0x00000002 +/* status subtypes for link down */ +#define FSF_STATUS_READ_SUB_NO_PHYSICAL_LINK 0x00000000 +#define FSF_STATUS_READ_SUB_FDISC_FAILED 0x00000001 +#define FSF_STATUS_READ_SUB_FIRMWARE_UPDATE 0x00000002 + /* status subtypes for CFDC */ #define FSF_STATUS_READ_SUB_CFDC_HARDENED_ON_SE 0x00000002 #define FSF_STATUS_READ_SUB_CFDC_HARDENED_ON_SE2 0x0000000F @@ -193,11 +209,15 @@ #define FSF_QTCB_LOG_SIZE 1024 /* channel features */ -#define FSF_FEATURE_QTCB_SUPPRESSION 0x00000001 #define FSF_FEATURE_CFDC 0x00000002 #define FSF_FEATURE_LUN_SHARING 0x00000004 #define FSF_FEATURE_HBAAPI_MANAGEMENT 0x00000010 #define FSF_FEATURE_ELS_CT_CHAINED_SBALS 0x00000020 +#define FSF_FEATURE_UPDATE_ALERT 0x00000100 + +/* host connection features */ +#define FSF_FEATURE_NPIV_MODE 0x00000001 +#define FSF_FEATURE_VM_ASSIGNED_WWPN 0x00000002 /* option */ #define FSF_OPEN_LUN_SUPPRESS_BOXING 0x00000001 @@ -305,15 +325,23 @@ struct fsf_qual_sequence_error { u32 res1[3]; } __attribute__ ((packed)); -struct fsf_qual_locallink_error { - u32 code; - u32 res1[3]; +struct fsf_link_down_info { + u32 error_code; + u32 res1; + u8 res2[2]; + u8 primary_status; + u8 ioerr_code; + u8 action_code; + u8 reason_code; + u8 explanation_code; + u8 vendor_specific_code; } __attribute__ ((packed)); union fsf_prot_status_qual { + u64 doubleword[FSF_PROT_STATUS_QUAL_SIZE / sizeof(u64)]; struct fsf_qual_version_error version_error; struct fsf_qual_sequence_error sequence_error; - struct fsf_qual_locallink_error locallink_error; + struct fsf_link_down_info link_down_info; } __attribute__ ((packed)); struct fsf_qtcb_prefix { @@ -331,7 +359,9 @@ union fsf_status_qual { u8 byte[FSF_STATUS_QUALIFIER_SIZE]; u16 halfword[FSF_STATUS_QUALIFIER_SIZE / sizeof (u16)]; u32 word[FSF_STATUS_QUALIFIER_SIZE / sizeof (u32)]; + u64 doubleword[FSF_STATUS_QUALIFIER_SIZE / sizeof(u64)]; struct fsf_queue_designator fsf_queue_designator; + struct fsf_link_down_info link_down_info; } __attribute__ ((packed)); struct fsf_qtcb_header { @@ -406,8 +436,8 @@ struct fsf_qtcb_bottom_config { u32 low_qtcb_version; u32 max_qtcb_size; u32 max_data_transfer_size; - u32 supported_features; - u8 res1[4]; + u32 adapter_features; + u32 connection_features; u32 fc_topology; u32 fc_link_speed; u32 adapter_type; @@ -425,7 +455,7 @@ struct fsf_qtcb_bottom_config { } __attribute__ ((packed)); struct fsf_qtcb_bottom_port { - u8 res1[8]; + u64 wwpn; u32 fc_port_id; u32 port_type; u32 port_state; diff --git a/drivers/s390/scsi/zfcp_qdio.c b/drivers/s390/scsi/zfcp_qdio.c index 24e16ec331d..d719f66a29a 100644 --- a/drivers/s390/scsi/zfcp_qdio.c +++ b/drivers/s390/scsi/zfcp_qdio.c @@ -54,8 +54,7 @@ static inline int zfcp_qdio_sbals_from_buffer static qdio_handler_t zfcp_qdio_request_handler; static qdio_handler_t zfcp_qdio_response_handler; static int zfcp_qdio_handler_error_check(struct zfcp_adapter *, - unsigned int, - unsigned int, unsigned int); + unsigned int, unsigned int, unsigned int, int, int); #define ZFCP_LOG_AREA ZFCP_LOG_AREA_QDIO @@ -214,22 +213,12 @@ zfcp_qdio_allocate(struct zfcp_adapter *adapter) * */ static inline int -zfcp_qdio_handler_error_check(struct zfcp_adapter *adapter, - unsigned int status, - unsigned int qdio_error, unsigned int siga_error) +zfcp_qdio_handler_error_check(struct zfcp_adapter *adapter, unsigned int status, + unsigned int qdio_error, unsigned int siga_error, + int first_element, int elements_processed) { int retval = 0; - if (ZFCP_LOG_CHECK(ZFCP_LOG_LEVEL_TRACE)) { - if (status & QDIO_STATUS_INBOUND_INT) { - ZFCP_LOG_TRACE("status is" - " QDIO_STATUS_INBOUND_INT \n"); - } - if (status & QDIO_STATUS_OUTBOUND_INT) { - ZFCP_LOG_TRACE("status is" - " QDIO_STATUS_OUTBOUND_INT \n"); - } - } if (unlikely(status & QDIO_STATUS_LOOK_FOR_ERROR)) { retval = -EIO; @@ -237,9 +226,10 @@ zfcp_qdio_handler_error_check(struct zfcp_adapter *adapter, "qdio_error=0x%x, siga_error=0x%x)\n", status, qdio_error, siga_error); - /* Restarting IO on the failed adapter from scratch */ - debug_text_event(adapter->erp_dbf, 1, "qdio_err"); + zfcp_hba_dbf_event_qdio(adapter, status, qdio_error, siga_error, + first_element, elements_processed); /* + * Restarting IO on the failed adapter from scratch. * Since we have been using this adapter, it is save to assume * that it is not failed but recoverable. The card seems to * report link-up events by self-initiated queue shutdown. @@ -282,7 +272,8 @@ zfcp_qdio_request_handler(struct ccw_device *ccw_device, first_element, elements_processed); if (unlikely(zfcp_qdio_handler_error_check(adapter, status, qdio_error, - siga_error))) + siga_error, first_element, + elements_processed))) goto out; /* * we stored address of struct zfcp_adapter data structure @@ -334,7 +325,8 @@ zfcp_qdio_response_handler(struct ccw_device *ccw_device, queue = &adapter->response_queue; if (unlikely(zfcp_qdio_handler_error_check(adapter, status, qdio_error, - siga_error))) + siga_error, first_element, + elements_processed))) goto out; /* diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index 31a76065cf2..3dcd1bfba3b 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -44,7 +44,8 @@ static int zfcp_scsi_eh_abort_handler(struct scsi_cmnd *); static int zfcp_scsi_eh_device_reset_handler(struct scsi_cmnd *); static int zfcp_scsi_eh_bus_reset_handler(struct scsi_cmnd *); static int zfcp_scsi_eh_host_reset_handler(struct scsi_cmnd *); -static int zfcp_task_management_function(struct zfcp_unit *, u8); +static int zfcp_task_management_function(struct zfcp_unit *, u8, + struct scsi_cmnd *); static struct zfcp_unit *zfcp_unit_lookup(struct zfcp_adapter *, int, scsi_id_t, scsi_lun_t); @@ -242,7 +243,10 @@ static void zfcp_scsi_command_fail(struct scsi_cmnd *scpnt, int result) { set_host_byte(&scpnt->result, result); - zfcp_cmd_dbf_event_scsi("failing", scpnt); + if ((scpnt->device != NULL) && (scpnt->device->host != NULL)) + zfcp_scsi_dbf_event_result("fail", 4, + (struct zfcp_adapter*) scpnt->device->host->hostdata[0], + scpnt); /* return directly */ scpnt->scsi_done(scpnt); } @@ -414,67 +418,38 @@ zfcp_port_lookup(struct zfcp_adapter *adapter, int channel, scsi_id_t id) return (struct zfcp_port *) NULL; } -/* - * function: zfcp_scsi_eh_abort_handler - * - * purpose: tries to abort the specified (timed out) SCSI command - * - * note: We do not need to care for a SCSI command which completes - * normally but late during this abort routine runs. - * We are allowed to return late commands to the SCSI stack. - * It tracks the state of commands and will handle late commands. - * (Usually, the normal completion of late commands is ignored with - * respect to the running abort operation. Grep for 'done_late' - * in the SCSI stacks sources.) +/** + * zfcp_scsi_eh_abort_handler - abort the specified SCSI command + * @scpnt: pointer to scsi_cmnd to be aborted + * Return: SUCCESS - command has been aborted and cleaned up in internal + * bookkeeping, SCSI stack won't be called for aborted command + * FAILED - otherwise * - * returns: SUCCESS - command has been aborted and cleaned up in internal - * bookkeeping, - * SCSI stack won't be called for aborted command - * FAILED - otherwise + * We do not need to care for a SCSI command which completes normally + * but late during this abort routine runs. We are allowed to return + * late commands to the SCSI stack. It tracks the state of commands and + * will handle late commands. (Usually, the normal completion of late + * commands is ignored with respect to the running abort operation.) */ int -__zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt) +zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt) { + struct Scsi_Host *scsi_host; + struct zfcp_adapter *adapter; + struct zfcp_unit *unit; int retval = SUCCESS; - struct zfcp_fsf_req *new_fsf_req, *old_fsf_req; - struct zfcp_adapter *adapter = (struct zfcp_adapter *) scpnt->device->host->hostdata[0]; - struct zfcp_unit *unit = (struct zfcp_unit *) scpnt->device->hostdata; - struct zfcp_port *port = unit->port; - struct Scsi_Host *scsi_host = scpnt->device->host; - union zfcp_req_data *req_data = NULL; + struct zfcp_fsf_req *new_fsf_req = NULL; + struct zfcp_fsf_req *old_fsf_req; unsigned long flags; - u32 status = 0; - - /* the components of a abort_dbf record (fixed size record) */ - u64 dbf_scsi_cmnd = (unsigned long) scpnt; - char dbf_opcode[ZFCP_ABORT_DBF_LENGTH]; - wwn_t dbf_wwn = port->wwpn; - fcp_lun_t dbf_fcp_lun = unit->fcp_lun; - u64 dbf_retries = scpnt->retries; - u64 dbf_allowed = scpnt->allowed; - u64 dbf_timeout = 0; - u64 dbf_fsf_req = 0; - u64 dbf_fsf_status = 0; - u64 dbf_fsf_qual[2] = { 0, 0 }; - char dbf_result[ZFCP_ABORT_DBF_LENGTH] = "##undef"; - - memset(dbf_opcode, 0, ZFCP_ABORT_DBF_LENGTH); - memcpy(dbf_opcode, - scpnt->cmnd, - min(scpnt->cmd_len, (unsigned char) ZFCP_ABORT_DBF_LENGTH)); + + scsi_host = scpnt->device->host; + adapter = (struct zfcp_adapter *) scsi_host->hostdata[0]; + unit = (struct zfcp_unit *) scpnt->device->hostdata; ZFCP_LOG_INFO("aborting scsi_cmnd=%p on adapter %s\n", scpnt, zfcp_get_busid_by_adapter(adapter)); - spin_unlock_irq(scsi_host->host_lock); - - /* - * Race condition between normal (late) completion and abort has - * to be avoided. - * The entirity of all accesses to scsi_req have to be atomic. - * scsi_req is usually part of the fsf_req and thus we block the - * release of fsf_req as long as we need to access scsi_req. - */ + /* avoid race condition between late normal completion and abort */ write_lock_irqsave(&adapter->abort_lock, flags); /* @@ -484,144 +459,47 @@ __zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt) * this routine returns. (scpnt is parameter passed to this routine * and must not disappear during abort even on late completion.) */ - req_data = (union zfcp_req_data *) scpnt->host_scribble; - /* DEBUG */ - ZFCP_LOG_DEBUG("req_data=%p\n", req_data); - if (!req_data) { - ZFCP_LOG_DEBUG("late command completion overtook abort\n"); - /* - * That's it. - * Do not initiate abort but return SUCCESS. - */ - write_unlock_irqrestore(&adapter->abort_lock, flags); - retval = SUCCESS; - strncpy(dbf_result, "##late1", ZFCP_ABORT_DBF_LENGTH); - goto out; - } - - /* Figure out which fsf_req needs to be aborted. */ - old_fsf_req = req_data->send_fcp_command_task.fsf_req; - - dbf_fsf_req = (unsigned long) old_fsf_req; - dbf_timeout = - (jiffies - req_data->send_fcp_command_task.start_jiffies) / HZ; - - ZFCP_LOG_DEBUG("old_fsf_req=%p\n", old_fsf_req); + old_fsf_req = (struct zfcp_fsf_req *) scpnt->host_scribble; if (!old_fsf_req) { write_unlock_irqrestore(&adapter->abort_lock, flags); - ZFCP_LOG_NORMAL("bug: no old fsf request found\n"); - ZFCP_LOG_NORMAL("req_data:\n"); - ZFCP_HEX_DUMP(ZFCP_LOG_LEVEL_NORMAL, - (char *) req_data, sizeof (union zfcp_req_data)); - ZFCP_LOG_NORMAL("scsi_cmnd:\n"); - ZFCP_HEX_DUMP(ZFCP_LOG_LEVEL_NORMAL, - (char *) scpnt, sizeof (struct scsi_cmnd)); - retval = FAILED; - strncpy(dbf_result, "##bug:r", ZFCP_ABORT_DBF_LENGTH); + zfcp_scsi_dbf_event_abort("lte1", adapter, scpnt, new_fsf_req); + retval = SUCCESS; goto out; } - old_fsf_req->data.send_fcp_command_task.scsi_cmnd = NULL; - /* mark old request as being aborted */ + old_fsf_req->data = 0; old_fsf_req->status |= ZFCP_STATUS_FSFREQ_ABORTING; - /* - * We have to collect all information (e.g. unit) needed by - * zfcp_fsf_abort_fcp_command before calling that routine - * since that routine is not allowed to access - * fsf_req which it is going to abort. - * This is because of we need to release fsf_req_list_lock - * before calling zfcp_fsf_abort_fcp_command. - * Since this lock will not be held, fsf_req may complete - * late and may be released meanwhile. - */ - ZFCP_LOG_DEBUG("unit 0x%016Lx (%p)\n", unit->fcp_lun, unit); - /* - * We block (call schedule) - * That's why we must release the lock and enable the - * interrupts before. - * On the other hand we do not need the lock anymore since - * all critical accesses to scsi_req are done. - */ + /* don't access old_fsf_req after releasing the abort_lock */ write_unlock_irqrestore(&adapter->abort_lock, flags); /* call FSF routine which does the abort */ new_fsf_req = zfcp_fsf_abort_fcp_command((unsigned long) old_fsf_req, adapter, unit, 0); - ZFCP_LOG_DEBUG("new_fsf_req=%p\n", new_fsf_req); if (!new_fsf_req) { + ZFCP_LOG_INFO("error: initiation of Abort FCP Cmnd failed\n"); retval = FAILED; - ZFCP_LOG_NORMAL("error: initiation of Abort FCP Cmnd " - "failed\n"); - strncpy(dbf_result, "##nores", ZFCP_ABORT_DBF_LENGTH); goto out; } /* wait for completion of abort */ - ZFCP_LOG_DEBUG("waiting for cleanup...\n"); -#if 1 - /* - * FIXME: - * copying zfcp_fsf_req_wait_and_cleanup code is not really nice - */ __wait_event(new_fsf_req->completion_wq, new_fsf_req->status & ZFCP_STATUS_FSFREQ_COMPLETED); - status = new_fsf_req->status; - dbf_fsf_status = new_fsf_req->qtcb->header.fsf_status; - /* - * Ralphs special debug load provides timestamps in the FSF - * status qualifier. This might be specified later if being - * useful for debugging aborts. - */ - dbf_fsf_qual[0] = - *(u64 *) & new_fsf_req->qtcb->header.fsf_status_qual.word[0]; - dbf_fsf_qual[1] = - *(u64 *) & new_fsf_req->qtcb->header.fsf_status_qual.word[2]; - zfcp_fsf_req_free(new_fsf_req); -#else - retval = zfcp_fsf_req_wait_and_cleanup(new_fsf_req, - ZFCP_UNINTERRUPTIBLE, &status); -#endif - ZFCP_LOG_DEBUG("Waiting for cleanup complete, status=0x%x\n", status); + /* status should be valid since signals were not permitted */ - if (status & ZFCP_STATUS_FSFREQ_ABORTSUCCEEDED) { + if (new_fsf_req->status & ZFCP_STATUS_FSFREQ_ABORTSUCCEEDED) { + zfcp_scsi_dbf_event_abort("okay", adapter, scpnt, new_fsf_req); retval = SUCCESS; - strncpy(dbf_result, "##succ", ZFCP_ABORT_DBF_LENGTH); - } else if (status & ZFCP_STATUS_FSFREQ_ABORTNOTNEEDED) { + } else if (new_fsf_req->status & ZFCP_STATUS_FSFREQ_ABORTNOTNEEDED) { + zfcp_scsi_dbf_event_abort("lte2", adapter, scpnt, new_fsf_req); retval = SUCCESS; - strncpy(dbf_result, "##late2", ZFCP_ABORT_DBF_LENGTH); } else { + zfcp_scsi_dbf_event_abort("fail", adapter, scpnt, new_fsf_req); retval = FAILED; - strncpy(dbf_result, "##fail", ZFCP_ABORT_DBF_LENGTH); } - + zfcp_fsf_req_free(new_fsf_req); out: - debug_event(adapter->abort_dbf, 1, &dbf_scsi_cmnd, sizeof (u64)); - debug_event(adapter->abort_dbf, 1, &dbf_opcode, ZFCP_ABORT_DBF_LENGTH); - debug_event(adapter->abort_dbf, 1, &dbf_wwn, sizeof (wwn_t)); - debug_event(adapter->abort_dbf, 1, &dbf_fcp_lun, sizeof (fcp_lun_t)); - debug_event(adapter->abort_dbf, 1, &dbf_retries, sizeof (u64)); - debug_event(adapter->abort_dbf, 1, &dbf_allowed, sizeof (u64)); - debug_event(adapter->abort_dbf, 1, &dbf_timeout, sizeof (u64)); - debug_event(adapter->abort_dbf, 1, &dbf_fsf_req, sizeof (u64)); - debug_event(adapter->abort_dbf, 1, &dbf_fsf_status, sizeof (u64)); - debug_event(adapter->abort_dbf, 1, &dbf_fsf_qual[0], sizeof (u64)); - debug_event(adapter->abort_dbf, 1, &dbf_fsf_qual[1], sizeof (u64)); - debug_text_event(adapter->abort_dbf, 1, dbf_result); - - spin_lock_irq(scsi_host->host_lock); return retval; } -int -zfcp_scsi_eh_abort_handler(struct scsi_cmnd *scpnt) -{ - int rc; - struct Scsi_Host *scsi_host = scpnt->device->host; - spin_lock_irq(scsi_host->host_lock); - rc = __zfcp_scsi_eh_abort_handler(scpnt); - spin_unlock_irq(scsi_host->host_lock); - return rc; -} - /* * function: zfcp_scsi_eh_device_reset_handler * @@ -651,8 +529,9 @@ zfcp_scsi_eh_device_reset_handler(struct scsi_cmnd *scpnt) */ if (!atomic_test_mask(ZFCP_STATUS_UNIT_NOTSUPPUNITRESET, &unit->status)) { - retval = - zfcp_task_management_function(unit, FCP_LOGICAL_UNIT_RESET); + retval = zfcp_task_management_function(unit, + FCP_LOGICAL_UNIT_RESET, + scpnt); if (retval) { ZFCP_LOG_DEBUG("unit reset failed (unit=%p)\n", unit); if (retval == -ENOTSUPP) @@ -668,7 +547,7 @@ zfcp_scsi_eh_device_reset_handler(struct scsi_cmnd *scpnt) goto out; } } - retval = zfcp_task_management_function(unit, FCP_TARGET_RESET); + retval = zfcp_task_management_function(unit, FCP_TARGET_RESET, scpnt); if (retval) { ZFCP_LOG_DEBUG("target reset failed (unit=%p)\n", unit); retval = FAILED; @@ -681,12 +560,12 @@ zfcp_scsi_eh_device_reset_handler(struct scsi_cmnd *scpnt) } static int -zfcp_task_management_function(struct zfcp_unit *unit, u8 tm_flags) +zfcp_task_management_function(struct zfcp_unit *unit, u8 tm_flags, + struct scsi_cmnd *scpnt) { struct zfcp_adapter *adapter = unit->port->adapter; - int retval; - int status; struct zfcp_fsf_req *fsf_req; + int retval = 0; /* issue task management function */ fsf_req = zfcp_fsf_send_fcp_command_task_management @@ -696,70 +575,63 @@ zfcp_task_management_function(struct zfcp_unit *unit, u8 tm_flags) "failed for unit 0x%016Lx on port 0x%016Lx on " "adapter %s\n", unit->fcp_lun, unit->port->wwpn, zfcp_get_busid_by_adapter(adapter)); + zfcp_scsi_dbf_event_devreset("nres", tm_flags, unit, scpnt); retval = -ENOMEM; goto out; } - retval = zfcp_fsf_req_wait_and_cleanup(fsf_req, - ZFCP_UNINTERRUPTIBLE, &status); + __wait_event(fsf_req->completion_wq, + fsf_req->status & ZFCP_STATUS_FSFREQ_COMPLETED); + /* * check completion status of task management function - * (status should always be valid since no signals permitted) */ - if (status & ZFCP_STATUS_FSFREQ_TMFUNCFAILED) + if (fsf_req->status & ZFCP_STATUS_FSFREQ_TMFUNCFAILED) { + zfcp_scsi_dbf_event_devreset("fail", tm_flags, unit, scpnt); retval = -EIO; - else if (status & ZFCP_STATUS_FSFREQ_TMFUNCNOTSUPP) + } else if (fsf_req->status & ZFCP_STATUS_FSFREQ_TMFUNCNOTSUPP) { + zfcp_scsi_dbf_event_devreset("nsup", tm_flags, unit, scpnt); retval = -ENOTSUPP; - else - retval = 0; + } else + zfcp_scsi_dbf_event_devreset("okay", tm_flags, unit, scpnt); + + zfcp_fsf_req_free(fsf_req); out: return retval; } -/* - * function: zfcp_scsi_eh_bus_reset_handler - * - * purpose: - * - * returns: +/** + * zfcp_scsi_eh_bus_reset_handler - reset bus (reopen adapter) */ int zfcp_scsi_eh_bus_reset_handler(struct scsi_cmnd *scpnt) { - int retval = 0; - struct zfcp_unit *unit; + struct zfcp_unit *unit = (struct zfcp_unit*) scpnt->device->hostdata; + struct zfcp_adapter *adapter = unit->port->adapter; - unit = (struct zfcp_unit *) scpnt->device->hostdata; ZFCP_LOG_NORMAL("bus reset because of problems with " "unit 0x%016Lx\n", unit->fcp_lun); - zfcp_erp_adapter_reopen(unit->port->adapter, 0); - zfcp_erp_wait(unit->port->adapter); - retval = SUCCESS; + zfcp_erp_adapter_reopen(adapter, 0); + zfcp_erp_wait(adapter); - return retval; + return SUCCESS; } -/* - * function: zfcp_scsi_eh_host_reset_handler - * - * purpose: - * - * returns: +/** + * zfcp_scsi_eh_host_reset_handler - reset host (reopen adapter) */ int zfcp_scsi_eh_host_reset_handler(struct scsi_cmnd *scpnt) { - int retval = 0; - struct zfcp_unit *unit; + struct zfcp_unit *unit = (struct zfcp_unit*) scpnt->device->hostdata; + struct zfcp_adapter *adapter = unit->port->adapter; - unit = (struct zfcp_unit *) scpnt->device->hostdata; ZFCP_LOG_NORMAL("host reset because of problems with " "unit 0x%016Lx\n", unit->fcp_lun); - zfcp_erp_adapter_reopen(unit->port->adapter, 0); - zfcp_erp_wait(unit->port->adapter); - retval = SUCCESS; + zfcp_erp_adapter_reopen(adapter, 0); + zfcp_erp_wait(adapter); - return retval; + return SUCCESS; } /* @@ -826,10 +698,16 @@ void zfcp_adapter_scsi_unregister(struct zfcp_adapter *adapter) { struct Scsi_Host *shost; + struct zfcp_port *port; shost = adapter->scsi_host; if (!shost) return; + read_lock_irq(&zfcp_data.config_lock); + list_for_each_entry(port, &adapter->port_list_head, list) + if (port->rport) + port->rport = NULL; + read_unlock_irq(&zfcp_data.config_lock); fc_remove_host(shost); scsi_remove_host(shost); scsi_host_put(shost); @@ -904,18 +782,6 @@ zfcp_get_node_name(struct scsi_target *starget) read_unlock_irqrestore(&zfcp_data.config_lock, flags); } -void -zfcp_set_fc_host_attrs(struct zfcp_adapter *adapter) -{ - struct Scsi_Host *shost = adapter->scsi_host; - - fc_host_node_name(shost) = adapter->wwnn; - fc_host_port_name(shost) = adapter->wwpn; - strncpy(fc_host_serial_number(shost), adapter->serial_number, - min(FC_SERIAL_NUMBER_SIZE, 32)); - fc_host_supported_classes(shost) = FC_COS_CLASS2 | FC_COS_CLASS3; -} - struct fc_function_template zfcp_transport_functions = { .get_starget_port_id = zfcp_get_port_id, .get_starget_port_name = zfcp_get_port_name, @@ -927,7 +793,10 @@ struct fc_function_template zfcp_transport_functions = { .show_host_node_name = 1, .show_host_port_name = 1, .show_host_supported_classes = 1, + .show_host_maxframe_size = 1, .show_host_serial_number = 1, + .show_host_speed = 1, + .show_host_port_id = 1, }; /** diff --git a/drivers/s390/scsi/zfcp_sysfs_adapter.c b/drivers/s390/scsi/zfcp_sysfs_adapter.c index e7345a74800..0cd435280e7 100644 --- a/drivers/s390/scsi/zfcp_sysfs_adapter.c +++ b/drivers/s390/scsi/zfcp_sysfs_adapter.c @@ -62,21 +62,18 @@ static ssize_t zfcp_sysfs_adapter_##_name##_show(struct device *dev, struct devi static DEVICE_ATTR(_name, S_IRUGO, zfcp_sysfs_adapter_##_name##_show, NULL); ZFCP_DEFINE_ADAPTER_ATTR(status, "0x%08x\n", atomic_read(&adapter->status)); -ZFCP_DEFINE_ADAPTER_ATTR(wwnn, "0x%016llx\n", adapter->wwnn); -ZFCP_DEFINE_ADAPTER_ATTR(wwpn, "0x%016llx\n", adapter->wwpn); -ZFCP_DEFINE_ADAPTER_ATTR(s_id, "0x%06x\n", adapter->s_id); ZFCP_DEFINE_ADAPTER_ATTR(peer_wwnn, "0x%016llx\n", adapter->peer_wwnn); ZFCP_DEFINE_ADAPTER_ATTR(peer_wwpn, "0x%016llx\n", adapter->peer_wwpn); ZFCP_DEFINE_ADAPTER_ATTR(peer_d_id, "0x%06x\n", adapter->peer_d_id); +ZFCP_DEFINE_ADAPTER_ATTR(physical_wwpn, "0x%016llx\n", adapter->physical_wwpn); +ZFCP_DEFINE_ADAPTER_ATTR(physical_s_id, "0x%06x\n", adapter->physical_s_id); ZFCP_DEFINE_ADAPTER_ATTR(card_version, "0x%04x\n", adapter->hydra_version); ZFCP_DEFINE_ADAPTER_ATTR(lic_version, "0x%08x\n", adapter->fsf_lic_version); -ZFCP_DEFINE_ADAPTER_ATTR(fc_link_speed, "%d Gb/s\n", adapter->fc_link_speed); ZFCP_DEFINE_ADAPTER_ATTR(fc_service_class, "%d\n", adapter->fc_service_class); ZFCP_DEFINE_ADAPTER_ATTR(fc_topology, "%s\n", fc_topologies[adapter->fc_topology]); ZFCP_DEFINE_ADAPTER_ATTR(hardware_version, "0x%08x\n", adapter->hardware_version); -ZFCP_DEFINE_ADAPTER_ATTR(serial_number, "%17s\n", adapter->serial_number); ZFCP_DEFINE_ADAPTER_ATTR(scsi_host_no, "0x%x\n", adapter->scsi_host_no); ZFCP_DEFINE_ADAPTER_ATTR(in_recovery, "%d\n", atomic_test_mask (ZFCP_STATUS_COMMON_ERP_INUSE, &adapter->status)); @@ -255,21 +252,18 @@ static struct attribute *zfcp_adapter_attrs[] = { &dev_attr_in_recovery.attr, &dev_attr_port_remove.attr, &dev_attr_port_add.attr, - &dev_attr_wwnn.attr, - &dev_attr_wwpn.attr, - &dev_attr_s_id.attr, &dev_attr_peer_wwnn.attr, &dev_attr_peer_wwpn.attr, &dev_attr_peer_d_id.attr, + &dev_attr_physical_wwpn.attr, + &dev_attr_physical_s_id.attr, &dev_attr_card_version.attr, &dev_attr_lic_version.attr, - &dev_attr_fc_link_speed.attr, &dev_attr_fc_service_class.attr, &dev_attr_fc_topology.attr, &dev_attr_scsi_host_no.attr, &dev_attr_status.attr, &dev_attr_hardware_version.attr, - &dev_attr_serial_number.attr, NULL }; diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.c b/drivers/scsi/aic7xxx/aic7xxx_osm.c index c932b3b9449..876d1de8480 100644 --- a/drivers/scsi/aic7xxx/aic7xxx_osm.c +++ b/drivers/scsi/aic7xxx/aic7xxx_osm.c @@ -1109,15 +1109,6 @@ ahc_linux_register_host(struct ahc_softc *ahc, struct scsi_host_template *templa return (0); } -uint64_t -ahc_linux_get_memsize(void) -{ - struct sysinfo si; - - si_meminfo(&si); - return ((uint64_t)si.totalram << PAGE_SHIFT); -} - /* * Place the SCSI bus into a known state by either resetting it, * or forcing transfer negotiations on the next command to any diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.h b/drivers/scsi/aic7xxx/aic7xxx_osm.h index c5299626924..be9edbe26db 100644 --- a/drivers/scsi/aic7xxx/aic7xxx_osm.h +++ b/drivers/scsi/aic7xxx/aic7xxx_osm.h @@ -494,8 +494,6 @@ ahc_insb(struct ahc_softc * ahc, long port, uint8_t *array, int count) int ahc_linux_register_host(struct ahc_softc *, struct scsi_host_template *); -uint64_t ahc_linux_get_memsize(void); - /*************************** Pretty Printing **********************************/ struct info_str { char *buffer; diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm_pci.c b/drivers/scsi/aic7xxx/aic7xxx_osm_pci.c index 0d44a6907dd..3ce77ddc889 100644 --- a/drivers/scsi/aic7xxx/aic7xxx_osm_pci.c +++ b/drivers/scsi/aic7xxx/aic7xxx_osm_pci.c @@ -180,6 +180,7 @@ ahc_linux_pci_dev_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct ahc_pci_identity *entry; char *name; int error; + struct device *dev = &pdev->dev; pci = pdev; entry = ahc_find_pci_device(pci); @@ -209,11 +210,12 @@ ahc_linux_pci_dev_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_master(pdev); if (sizeof(dma_addr_t) > 4 - && ahc_linux_get_memsize() > 0x80000000 - && pci_set_dma_mask(pdev, mask_39bit) == 0) { + && ahc->features & AHC_LARGE_SCBS + && dma_set_mask(dev, mask_39bit) == 0 + && dma_get_required_mask(dev) > DMA_32BIT_MASK) { ahc->flags |= AHC_39BIT_ADDRESSING; } else { - if (pci_set_dma_mask(pdev, DMA_32BIT_MASK)) { + if (dma_set_mask(dev, DMA_32BIT_MASK)) { printk(KERN_WARNING "aic7xxx: No suitable DMA available.\n"); return (-ENODEV); } diff --git a/drivers/scsi/ata_piix.c b/drivers/scsi/ata_piix.c index 87e0c36f155..d71cef767ce 100644 --- a/drivers/scsi/ata_piix.c +++ b/drivers/scsi/ata_piix.c @@ -442,7 +442,6 @@ static void piix_sata_phy_reset(struct ata_port *ap) * piix_set_piomode - Initialize host controller PATA PIO timings * @ap: Port whose timings we are configuring * @adev: um - * @pio: PIO mode, 0 - 4 * * Set PIO mode for device, in host controller PCI config space. * diff --git a/drivers/scsi/atp870u.c b/drivers/scsi/atp870u.c index e6153fe5842..a8cfbef304b 100644 --- a/drivers/scsi/atp870u.c +++ b/drivers/scsi/atp870u.c @@ -996,6 +996,7 @@ oktosend: #ifdef ED_DBGP printk("send_s870: prdaddr_2 0x%8x tmpcip %x target_id %d\n", dev->id[c][target_id].prdaddr,tmpcip,target_id); #endif + dev->id[c][target_id].prdaddr = dev->id[c][target_id].prd_bus; outl(dev->id[c][target_id].prdaddr, tmpcip); tmpcip = tmpcip - 2; outb(0x06, tmpcip); @@ -2572,7 +2573,7 @@ static void atp870u_free_tables(struct Scsi_Host *host) for (k = 0; k < 16; k++) { if (!atp_dev->id[j][k].prd_table) continue; - pci_free_consistent(atp_dev->pdev, 1024, atp_dev->id[j][k].prd_table, atp_dev->id[j][k].prdaddr); + pci_free_consistent(atp_dev->pdev, 1024, atp_dev->id[j][k].prd_table, atp_dev->id[j][k].prd_bus); atp_dev->id[j][k].prd_table = NULL; } } @@ -2584,12 +2585,13 @@ static int atp870u_init_tables(struct Scsi_Host *host) int c,k; for(c=0;c < 2;c++) { for(k=0;k<16;k++) { - atp_dev->id[c][k].prd_table = pci_alloc_consistent(atp_dev->pdev, 1024, &(atp_dev->id[c][k].prdaddr)); + atp_dev->id[c][k].prd_table = pci_alloc_consistent(atp_dev->pdev, 1024, &(atp_dev->id[c][k].prd_bus)); if (!atp_dev->id[c][k].prd_table) { printk("atp870u_init_tables fail\n"); atp870u_free_tables(host); return -ENOMEM; } + atp_dev->id[c][k].prdaddr = atp_dev->id[c][k].prd_bus; atp_dev->id[c][k].devsp=0x20; atp_dev->id[c][k].devtype = 0x7f; atp_dev->id[c][k].curr_req = NULL; diff --git a/drivers/scsi/atp870u.h b/drivers/scsi/atp870u.h index 89f43af39cf..62bae64a01c 100644 --- a/drivers/scsi/atp870u.h +++ b/drivers/scsi/atp870u.h @@ -54,8 +54,9 @@ struct atp_unit unsigned long tran_len; unsigned long last_len; unsigned char *prd_pos; - unsigned char *prd_table; - dma_addr_t prdaddr; + unsigned char *prd_table; /* Kernel address of PRD table */ + dma_addr_t prd_bus; /* Bus address of PRD */ + dma_addr_t prdaddr; /* Dynamically updated in driver */ struct scsi_cmnd *curr_req; } id[2][16]; struct Scsi_Host *host; diff --git a/drivers/scsi/fd_mcs.c b/drivers/scsi/fd_mcs.c index fa652f8aa64..d59d449a9e4 100644 --- a/drivers/scsi/fd_mcs.c +++ b/drivers/scsi/fd_mcs.c @@ -1360,3 +1360,5 @@ static Scsi_Host_Template driver_template = { .use_clustering = DISABLE_CLUSTERING, }; #include "scsi_module.c" + +MODULE_LICENSE("GPL"); diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index 85503fad789..f2a72d33132 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -98,6 +98,7 @@ int scsi_host_set_state(struct Scsi_Host *shost, enum scsi_host_state state) switch (oldstate) { case SHOST_CREATED: case SHOST_RUNNING: + case SHOST_CANCEL_RECOVERY: break; default: goto illegal; @@ -107,12 +108,31 @@ int scsi_host_set_state(struct Scsi_Host *shost, enum scsi_host_state state) case SHOST_DEL: switch (oldstate) { case SHOST_CANCEL: + case SHOST_DEL_RECOVERY: break; default: goto illegal; } break; + case SHOST_CANCEL_RECOVERY: + switch (oldstate) { + case SHOST_CANCEL: + case SHOST_RECOVERY: + break; + default: + goto illegal; + } + break; + + case SHOST_DEL_RECOVERY: + switch (oldstate) { + case SHOST_CANCEL_RECOVERY: + break; + default: + goto illegal; + } + break; } shost->shost_state = state; return 0; @@ -134,13 +154,24 @@ EXPORT_SYMBOL(scsi_host_set_state); **/ void scsi_remove_host(struct Scsi_Host *shost) { + unsigned long flags; down(&shost->scan_mutex); - scsi_host_set_state(shost, SHOST_CANCEL); + spin_lock_irqsave(shost->host_lock, flags); + if (scsi_host_set_state(shost, SHOST_CANCEL)) + if (scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY)) { + spin_unlock_irqrestore(shost->host_lock, flags); + up(&shost->scan_mutex); + return; + } + spin_unlock_irqrestore(shost->host_lock, flags); up(&shost->scan_mutex); scsi_forget_host(shost); scsi_proc_host_rm(shost); - scsi_host_set_state(shost, SHOST_DEL); + spin_lock_irqsave(shost->host_lock, flags); + if (scsi_host_set_state(shost, SHOST_DEL)) + BUG_ON(scsi_host_set_state(shost, SHOST_DEL_RECOVERY)); + spin_unlock_irqrestore(shost->host_lock, flags); transport_unregister_device(&shost->shost_gendev); class_device_unregister(&shost->shost_classdev); diff --git a/drivers/scsi/ibmmca.c b/drivers/scsi/ibmmca.c index 6e54c7d9b33..19392f65127 100644 --- a/drivers/scsi/ibmmca.c +++ b/drivers/scsi/ibmmca.c @@ -460,6 +460,8 @@ MODULE_PARM(adisplay, "1i"); MODULE_PARM(normal, "1i"); MODULE_PARM(ansi, "1i"); #endif + +MODULE_LICENSE("GPL"); #endif /*counter of concurrent disk read/writes, to turn on/off disk led */ static int disk_rw_in_progress = 0; diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index 5b14934ba86..ff25210b00b 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -727,6 +727,16 @@ static void adapter_info_rsp(struct srp_event_struct *evt_struct) if (hostdata->madapter_info.port_max_txu[0]) hostdata->host->max_sectors = hostdata->madapter_info.port_max_txu[0] >> 9; + + if (hostdata->madapter_info.os_type == 3 && + strcmp(hostdata->madapter_info.srp_version, "1.6a") <= 0) { + printk("ibmvscsi: host (Ver. %s) doesn't support large" + "transfers\n", + hostdata->madapter_info.srp_version); + printk("ibmvscsi: limiting scatterlists to %d\n", + MAX_INDIRECT_BUFS); + hostdata->host->sg_tablesize = MAX_INDIRECT_BUFS; + } } } diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index d92273cbe0d..e5b01997117 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -4132,6 +4132,53 @@ err_out: } /** + * ata_host_set_remove - PCI layer callback for device removal + * @host_set: ATA host set that was removed + * + * Unregister all objects associated with this host set. Free those + * objects. + * + * LOCKING: + * Inherited from calling layer (may sleep). + */ + + +void ata_host_set_remove(struct ata_host_set *host_set) +{ + struct ata_port *ap; + unsigned int i; + + for (i = 0; i < host_set->n_ports; i++) { + ap = host_set->ports[i]; + scsi_remove_host(ap->host); + } + + free_irq(host_set->irq, host_set); + + for (i = 0; i < host_set->n_ports; i++) { + ap = host_set->ports[i]; + + ata_scsi_release(ap->host); + + if ((ap->flags & ATA_FLAG_NO_LEGACY) == 0) { + struct ata_ioports *ioaddr = &ap->ioaddr; + + if (ioaddr->cmd_addr == 0x1f0) + release_region(0x1f0, 8); + else if (ioaddr->cmd_addr == 0x170) + release_region(0x170, 8); + } + + scsi_host_put(ap->host); + } + + if (host_set->ops->host_stop) + host_set->ops->host_stop(host_set); + + kfree(host_set); +} + +/** * ata_scsi_release - SCSI layer callback hook for host unload * @host: libata host to be unloaded * @@ -4471,39 +4518,8 @@ void ata_pci_remove_one (struct pci_dev *pdev) { struct device *dev = pci_dev_to_dev(pdev); struct ata_host_set *host_set = dev_get_drvdata(dev); - struct ata_port *ap; - unsigned int i; - - for (i = 0; i < host_set->n_ports; i++) { - ap = host_set->ports[i]; - - scsi_remove_host(ap->host); - } - - free_irq(host_set->irq, host_set); - - for (i = 0; i < host_set->n_ports; i++) { - ap = host_set->ports[i]; - - ata_scsi_release(ap->host); - - if ((ap->flags & ATA_FLAG_NO_LEGACY) == 0) { - struct ata_ioports *ioaddr = &ap->ioaddr; - - if (ioaddr->cmd_addr == 0x1f0) - release_region(0x1f0, 8); - else if (ioaddr->cmd_addr == 0x170) - release_region(0x170, 8); - } - - scsi_host_put(ap->host); - } - - if (host_set->ops->host_stop) - host_set->ops->host_stop(host_set); - - kfree(host_set); + ata_host_set_remove(host_set); pci_release_regions(pdev); pci_disable_device(pdev); dev_set_drvdata(dev, NULL); @@ -4573,6 +4589,7 @@ module_exit(ata_exit); EXPORT_SYMBOL_GPL(ata_std_bios_param); EXPORT_SYMBOL_GPL(ata_std_ports); EXPORT_SYMBOL_GPL(ata_device_add); +EXPORT_SYMBOL_GPL(ata_host_set_remove); EXPORT_SYMBOL_GPL(ata_sg_init); EXPORT_SYMBOL_GPL(ata_sg_init_one); EXPORT_SYMBOL_GPL(ata_qc_complete); diff --git a/drivers/scsi/mesh.c b/drivers/scsi/mesh.c index a4857db4f9b..b235556b7b6 100644 --- a/drivers/scsi/mesh.c +++ b/drivers/scsi/mesh.c @@ -1959,22 +1959,35 @@ static int mesh_probe(struct macio_dev *mdev, const struct of_device_id *match) /* Set it up */ mesh_init(ms); - /* XXX FIXME: error should be fatal */ - if (request_irq(ms->meshintr, do_mesh_interrupt, 0, "MESH", ms)) + /* Request interrupt */ + if (request_irq(ms->meshintr, do_mesh_interrupt, 0, "MESH", ms)) { printk(KERN_ERR "MESH: can't get irq %d\n", ms->meshintr); + goto out_shutdown; + } - /* XXX FIXME: handle failure */ - scsi_add_host(mesh_host, &mdev->ofdev.dev); + /* Add scsi host & scan */ + if (scsi_add_host(mesh_host, &mdev->ofdev.dev)) + goto out_release_irq; scsi_scan_host(mesh_host); return 0; -out_unmap: + out_release_irq: + free_irq(ms->meshintr, ms); + out_shutdown: + /* shutdown & reset bus in case of error or macos can be confused + * at reboot if the bus was set to synchronous mode already + */ + mesh_shutdown(mdev); + set_mesh_power(ms, 0); + pci_free_consistent(macio_get_pci_dev(mdev), ms->dma_cmd_size, + ms->dma_cmd_space, ms->dma_cmd_bus); + out_unmap: iounmap(ms->dma); iounmap(ms->mesh); -out_free: + out_free: scsi_host_put(mesh_host); -out_release: + out_release: macio_release_resources(mdev); return -ENODEV; @@ -2001,7 +2014,7 @@ static int mesh_remove(struct macio_dev *mdev) /* Free DMA commands memory */ pci_free_consistent(macio_get_pci_dev(mdev), ms->dma_cmd_size, - ms->dma_cmd_space, ms->dma_cmd_bus); + ms->dma_cmd_space, ms->dma_cmd_bus); /* Release memory resources */ macio_release_resources(mdev); diff --git a/drivers/scsi/sata_nv.c b/drivers/scsi/sata_nv.c index a1d62dee3be..c05653c7779 100644 --- a/drivers/scsi/sata_nv.c +++ b/drivers/scsi/sata_nv.c @@ -158,6 +158,8 @@ static struct pci_device_id nv_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, MCP51 }, { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, MCP55 }, + { PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA2, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, MCP55 }, { PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, PCI_ANY_ID, PCI_ANY_ID, PCI_CLASS_STORAGE_IDE<<8, 0xffff00, GENERIC }, diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index a780546eda9..1f0ebabf6d4 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -1265,9 +1265,8 @@ int scsi_device_cancel(struct scsi_device *sdev, int recovery) list_for_each_safe(lh, lh_sf, &active_list) { scmd = list_entry(lh, struct scsi_cmnd, eh_entry); list_del_init(lh); - if (recovery) { - scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD); - } else { + if (recovery && + !scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD)) { scmd->result = (DID_ABORT << 16); scsi_finish_command(scmd); } diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c index 07b554affcf..64fc9e21f35 100644 --- a/drivers/scsi/scsi_devinfo.c +++ b/drivers/scsi/scsi_devinfo.c @@ -110,6 +110,7 @@ static struct { {"RELISYS", "Scorpio", NULL, BLIST_NOLUN}, /* responds to all lun */ {"SANKYO", "CP525", "6.64", BLIST_NOLUN}, /* causes failed REQ SENSE, extra reset */ {"TEXEL", "CD-ROM", "1.06", BLIST_NOLUN}, + {"transtec", "T5008", "0001", BLIST_NOREPORTLUN }, {"YAMAHA", "CDR100", "1.00", BLIST_NOLUN}, /* locks up */ {"YAMAHA", "CDR102", "1.00", BLIST_NOLUN}, /* locks up */ {"YAMAHA", "CRW8424S", "1.0", BLIST_NOLUN}, /* locks up */ diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 895c9452be4..ad534216507 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -50,7 +50,7 @@ void scsi_eh_wakeup(struct Scsi_Host *shost) { if (shost->host_busy == shost->host_failed) { - up(shost->eh_wait); + wake_up_process(shost->ehandler); SCSI_LOG_ERROR_RECOVERY(5, printk("Waking error handler thread\n")); } @@ -68,19 +68,24 @@ int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag) { struct Scsi_Host *shost = scmd->device->host; unsigned long flags; + int ret = 0; - if (shost->eh_wait == NULL) + if (!shost->ehandler) return 0; spin_lock_irqsave(shost->host_lock, flags); + if (scsi_host_set_state(shost, SHOST_RECOVERY)) + if (scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY)) + goto out_unlock; + ret = 1; scmd->eh_eflags |= eh_flag; list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q); - scsi_host_set_state(shost, SHOST_RECOVERY); shost->host_failed++; scsi_eh_wakeup(shost); + out_unlock: spin_unlock_irqrestore(shost->host_lock, flags); - return 1; + return ret; } /** @@ -176,8 +181,8 @@ void scsi_times_out(struct scsi_cmnd *scmd) } if (unlikely(!scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD))) { - panic("Error handler thread not present at %p %p %s %d", - scmd, scmd->device->host, __FILE__, __LINE__); + scmd->result |= DID_TIME_OUT << 16; + __scsi_done(scmd); } } @@ -196,8 +201,7 @@ int scsi_block_when_processing_errors(struct scsi_device *sdev) { int online; - wait_event(sdev->host->host_wait, (sdev->host->shost_state != - SHOST_RECOVERY)); + wait_event(sdev->host->host_wait, !scsi_host_in_recovery(sdev->host)); online = scsi_device_online(sdev); @@ -1441,6 +1445,7 @@ static void scsi_eh_lock_door(struct scsi_device *sdev) static void scsi_restart_operations(struct Scsi_Host *shost) { struct scsi_device *sdev; + unsigned long flags; /* * If the door was locked, we need to insert a door lock request @@ -1460,7 +1465,11 @@ static void scsi_restart_operations(struct Scsi_Host *shost) SCSI_LOG_ERROR_RECOVERY(3, printk("%s: waking up host to restart\n", __FUNCTION__)); - scsi_host_set_state(shost, SHOST_RUNNING); + spin_lock_irqsave(shost->host_lock, flags); + if (scsi_host_set_state(shost, SHOST_RUNNING)) + if (scsi_host_set_state(shost, SHOST_CANCEL)) + BUG_ON(scsi_host_set_state(shost, SHOST_DEL)); + spin_unlock_irqrestore(shost->host_lock, flags); wake_up(&shost->host_wait); @@ -1582,40 +1591,31 @@ int scsi_error_handler(void *data) { struct Scsi_Host *shost = (struct Scsi_Host *) data; int rtn; - DECLARE_MUTEX_LOCKED(sem); current->flags |= PF_NOFREEZE; - shost->eh_wait = &sem; + /* - * Wake up the thread that created us. + * Note - we always use TASK_INTERRUPTIBLE even if the module + * was loaded as part of the kernel. The reason is that + * UNINTERRUPTIBLE would cause this thread to be counted in + * the load average as a running process, and an interruptible + * wait doesn't. */ - SCSI_LOG_ERROR_RECOVERY(3, printk("Wake up parent of" - " scsi_eh_%d\n",shost->host_no)); - - while (1) { - /* - * If we get a signal, it means we are supposed to go - * away and die. This typically happens if the user is - * trying to unload a module. - */ - SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler" - " scsi_eh_%d" - " sleeping\n",shost->host_no)); - - /* - * Note - we always use down_interruptible with the semaphore - * even if the module was loaded as part of the kernel. The - * reason is that down() will cause this thread to be counted - * in the load average as a running process, and down - * interruptible doesn't. Given that we need to allow this - * thread to die if the driver was loaded as a module, using - * semaphores isn't unreasonable. - */ - down_interruptible(&sem); - if (kthread_should_stop()) - break; + set_current_state(TASK_INTERRUPTIBLE); + while (!kthread_should_stop()) { + if (shost->host_failed == 0 || + shost->host_failed != shost->host_busy) { + SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler" + " scsi_eh_%d" + " sleeping\n", + shost->host_no)); + schedule(); + set_current_state(TASK_INTERRUPTIBLE); + continue; + } + __set_current_state(TASK_RUNNING); SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler" " scsi_eh_%d waking" " up\n",shost->host_no)); @@ -1642,7 +1642,7 @@ int scsi_error_handler(void *data) * which are still online. */ scsi_restart_operations(shost); - + set_current_state(TASK_INTERRUPTIBLE); } SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler scsi_eh_%d" @@ -1651,7 +1651,7 @@ int scsi_error_handler(void *data) /* * Make sure that nobody tries to wake us up again. */ - shost->eh_wait = NULL; + shost->ehandler = NULL; return 0; } diff --git a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c index b7fddac8134..de7f98cc38f 100644 --- a/drivers/scsi/scsi_ioctl.c +++ b/drivers/scsi/scsi_ioctl.c @@ -458,7 +458,7 @@ int scsi_nonblockable_ioctl(struct scsi_device *sdev, int cmd, * error processing, as long as the device was opened * non-blocking */ if (filp && filp->f_flags & O_NONBLOCK) { - if (sdev->host->shost_state == SHOST_RECOVERY) + if (scsi_host_in_recovery(sdev->host)) return -ENODEV; } else if (!scsi_block_when_processing_errors(sdev)) return -ENODEV; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 863bb6495da..dc9c772bc87 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -118,7 +118,6 @@ static void scsi_unprep_request(struct request *req) req->flags &= ~REQ_DONTPREP; req->special = (req->flags & REQ_SPECIAL) ? cmd->sc_request : NULL; - scsi_release_buffers(cmd); scsi_put_command(cmd); } @@ -140,14 +139,12 @@ static void scsi_unprep_request(struct request *req) * commands. * Notes: This could be called either from an interrupt context or a * normal process context. - * Notes: Upon return, cmd is a stale pointer. */ int scsi_queue_insert(struct scsi_cmnd *cmd, int reason) { struct Scsi_Host *host = cmd->device->host; struct scsi_device *device = cmd->device; struct request_queue *q = device->request_queue; - struct request *req = cmd->request; unsigned long flags; SCSI_LOG_MLQUEUE(1, @@ -188,9 +185,8 @@ int scsi_queue_insert(struct scsi_cmnd *cmd, int reason) * function. The SCSI request function detects the blocked condition * and plugs the queue appropriately. */ - scsi_unprep_request(req); spin_lock_irqsave(q->queue_lock, flags); - blk_requeue_request(q, req); + blk_requeue_request(q, cmd->request); spin_unlock_irqrestore(q->queue_lock, flags); scsi_run_queue(q); @@ -451,7 +447,7 @@ void scsi_device_unbusy(struct scsi_device *sdev) spin_lock_irqsave(shost->host_lock, flags); shost->host_busy--; - if (unlikely((shost->shost_state == SHOST_RECOVERY) && + if (unlikely(scsi_host_in_recovery(shost) && shost->host_failed)) scsi_eh_wakeup(shost); spin_unlock(shost->host_lock); @@ -1268,6 +1264,7 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req) } } else { memcpy(cmd->cmnd, req->cmd, sizeof(cmd->cmnd)); + cmd->cmd_len = req->cmd_len; if (rq_data_dir(req) == WRITE) cmd->sc_data_direction = DMA_TO_DEVICE; else if (req->data_len) @@ -1342,7 +1339,7 @@ static inline int scsi_host_queue_ready(struct request_queue *q, struct Scsi_Host *shost, struct scsi_device *sdev) { - if (shost->shost_state == SHOST_RECOVERY) + if (scsi_host_in_recovery(shost)) return 0; if (shost->host_busy == 0 && shost->host_blocked) { /* @@ -1514,7 +1511,6 @@ static void scsi_request_fn(struct request_queue *q) * cases (host limits or settings) should run the queue at some * later time. */ - scsi_unprep_request(req); spin_lock_irq(q->queue_lock); blk_requeue_request(q, req); sdev->device_busy--; diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index b86f170fa8e..fcf9f6cbb14 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -1466,23 +1466,17 @@ EXPORT_SYMBOL(scsi_scan_single_target); void scsi_forget_host(struct Scsi_Host *shost) { - struct scsi_target *starget, *tmp; + struct scsi_device *sdev; unsigned long flags; - /* - * Ok, this look a bit strange. We always look for the first device - * on the list as scsi_remove_device removes them from it - thus we - * also have to release the lock. - * We don't need to get another reference to the device before - * releasing the lock as we already own the reference from - * scsi_register_device that's release in scsi_remove_device. And - * after that we don't look at sdev anymore. - */ + restart: spin_lock_irqsave(shost->host_lock, flags); - list_for_each_entry_safe(starget, tmp, &shost->__targets, siblings) { + list_for_each_entry(sdev, &shost->__devices, siblings) { + if (sdev->sdev_state == SDEV_DEL) + continue; spin_unlock_irqrestore(shost->host_lock, flags); - scsi_remove_target(&starget->dev); - spin_lock_irqsave(shost->host_lock, flags); + __scsi_remove_device(sdev); + goto restart; } spin_unlock_irqrestore(shost->host_lock, flags); } diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index b8052d5206c..72a6550a056 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -57,6 +57,8 @@ static struct { { SHOST_CANCEL, "cancel" }, { SHOST_DEL, "deleted" }, { SHOST_RECOVERY, "recovery" }, + { SHOST_CANCEL_RECOVERY, "cancel/recovery" }, + { SHOST_DEL_RECOVERY, "deleted/recovery", }, }; const char *scsi_host_state_name(enum scsi_host_state state) { @@ -707,9 +709,11 @@ void __scsi_remove_device(struct scsi_device *sdev) **/ void scsi_remove_device(struct scsi_device *sdev) { - down(&sdev->host->scan_mutex); + struct Scsi_Host *shost = sdev->host; + + down(&shost->scan_mutex); __scsi_remove_device(sdev); - up(&sdev->host->scan_mutex); + up(&shost->scan_mutex); } EXPORT_SYMBOL(scsi_remove_device); @@ -717,17 +721,20 @@ void __scsi_remove_target(struct scsi_target *starget) { struct Scsi_Host *shost = dev_to_shost(starget->dev.parent); unsigned long flags; - struct scsi_device *sdev, *tmp; + struct scsi_device *sdev; spin_lock_irqsave(shost->host_lock, flags); starget->reap_ref++; - list_for_each_entry_safe(sdev, tmp, &shost->__devices, siblings) { + restart: + list_for_each_entry(sdev, &shost->__devices, siblings) { if (sdev->channel != starget->channel || - sdev->id != starget->id) + sdev->id != starget->id || + sdev->sdev_state == SDEV_DEL) continue; spin_unlock_irqrestore(shost->host_lock, flags); scsi_remove_device(sdev); spin_lock_irqsave(shost->host_lock, flags); + goto restart; } spin_unlock_irqrestore(shost->host_lock, flags); scsi_target_reap(starget); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index de564b38605..9a1dc0cea03 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -235,6 +235,7 @@ static int sd_init_command(struct scsi_cmnd * SCpnt) return 0; memcpy(SCpnt->cmnd, rq->cmd, sizeof(SCpnt->cmnd)); + SCpnt->cmd_len = rq->cmd_len; if (rq_data_dir(rq) == WRITE) SCpnt->sc_data_direction = DMA_TO_DEVICE; else if (rq->data_len) diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 9ea4765d1d1..4d09a6e4dd2 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -1027,7 +1027,7 @@ sg_ioctl(struct inode *inode, struct file *filp, if (sdp->detached) return -ENODEV; if (filp->f_flags & O_NONBLOCK) { - if (sdp->device->host->shost_state == SHOST_RECOVERY) + if (scsi_host_in_recovery(sdp->device->host)) return -EBUSY; } else if (!scsi_block_when_processing_errors(sdp->device)) return -EBUSY; diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index ce63fc8312d..561901b1cf1 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -326,6 +326,7 @@ static int sr_init_command(struct scsi_cmnd * SCpnt) return 0; memcpy(SCpnt->cmnd, rq->cmd, sizeof(SCpnt->cmnd)); + SCpnt->cmd_len = rq->cmd_len; if (!rq->data_len) SCpnt->sc_data_direction = DMA_NONE; else if (rq_data_dir(rq) == WRITE) diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index a93308ae973..d001c046551 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -4206,6 +4206,7 @@ static int st_init_command(struct scsi_cmnd *SCpnt) return 0; memcpy(SCpnt->cmnd, rq->cmd, sizeof(SCpnt->cmnd)); + SCpnt->cmd_len = rq->cmd_len; if (rq_data_dir(rq) == WRITE) SCpnt->sc_data_direction = DMA_TO_DEVICE; diff --git a/drivers/serial/clps711x.c b/drivers/serial/clps711x.c index 78c1f36ad9b..87ef368384f 100644 --- a/drivers/serial/clps711x.c +++ b/drivers/serial/clps711x.c @@ -98,7 +98,7 @@ static irqreturn_t clps711xuart_int_rx(int irq, void *dev_id, struct pt_regs *re { struct uart_port *port = dev_id; struct tty_struct *tty = port->info->tty; - unsigned int status, ch, flg, ignored = 0; + unsigned int status, ch, flg; status = clps_readl(SYSFLG(port)); while (!(status & SYSFLG_URXFE)) { diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index c47c8052b48..f1fb67fe22a 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -987,7 +987,7 @@ void usb_disable_device(struct usb_device *dev, int skip_ep0) /* remove this interface if it has been registered */ interface = dev->actconfig->interface[i]; - if (!klist_node_attached(&interface->dev.knode_bus)) + if (!device_is_registered(&interface->dev)) continue; dev_dbg (&dev->dev, "unregistering interface %s\n", interface->dev.bus_id); diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index 087af73a59d..7d131509e41 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -303,7 +303,7 @@ int usb_driver_claim_interface(struct usb_driver *driver, /* if interface was already added, bind now; else let * the future device_add() bind it, bypassing probe() */ - if (klist_node_attached(&dev->knode_bus)) + if (device_is_registered(dev)) device_bind_driver(dev); return 0; @@ -336,8 +336,8 @@ void usb_driver_release_interface(struct usb_driver *driver, if (iface->condition != USB_INTERFACE_BOUND) return; - /* release only after device_add() */ - if (klist_node_attached(&dev->knode_bus)) { + /* don't release if the interface hasn't been added yet */ + if (device_is_registered(dev)) { iface->condition = USB_INTERFACE_UNBINDING; device_release_driver(dev); } diff --git a/drivers/usb/gadget/pxa2xx_udc.c b/drivers/usb/gadget/pxa2xx_udc.c index 1507738337c..73f8c940415 100644 --- a/drivers/usb/gadget/pxa2xx_udc.c +++ b/drivers/usb/gadget/pxa2xx_udc.c @@ -422,7 +422,7 @@ static inline void ep0_idle (struct pxa2xx_udc *dev) } static int -write_packet(volatile u32 *uddr, struct pxa2xx_request *req, unsigned max) +write_packet(volatile unsigned long *uddr, struct pxa2xx_request *req, unsigned max) { u8 *buf; unsigned length, count; @@ -2602,7 +2602,7 @@ static int __exit pxa2xx_udc_remove(struct device *_dev) * VBUS IRQs should probably be ignored so that the PXA device just acts * "dead" to USB hosts until system resume. */ -static int pxa2xx_udc_suspend(struct device *dev, u32 state, u32 level) +static int pxa2xx_udc_suspend(struct device *dev, pm_message_t state, u32 level) { struct pxa2xx_udc *udc = dev_get_drvdata(dev); diff --git a/drivers/usb/gadget/pxa2xx_udc.h b/drivers/usb/gadget/pxa2xx_udc.h index d0bc396a85d..a58f3e6e71f 100644 --- a/drivers/usb/gadget/pxa2xx_udc.h +++ b/drivers/usb/gadget/pxa2xx_udc.h @@ -69,11 +69,11 @@ struct pxa2xx_ep { * UDDR = UDC Endpoint Data Register (the fifo) * DRCM = DMA Request Channel Map */ - volatile u32 *reg_udccs; - volatile u32 *reg_ubcr; - volatile u32 *reg_uddr; + volatile unsigned long *reg_udccs; + volatile unsigned long *reg_ubcr; + volatile unsigned long *reg_uddr; #ifdef USE_DMA - volatile u32 *reg_drcmr; + volatile unsigned long *reg_drcmr; #define drcmr(n) .reg_drcmr = & DRCMR ## n , #else #define drcmr(n) diff --git a/drivers/usb/host/sl811-hcd.c b/drivers/usb/host/sl811-hcd.c index d2a1fd40dfc..d42a15d10a4 100644 --- a/drivers/usb/host/sl811-hcd.c +++ b/drivers/usb/host/sl811-hcd.c @@ -782,6 +782,9 @@ retry: /* usb 1.1 says max 90% of a frame is available for periodic transfers. * this driver doesn't promise that much since it's got to handle an * IRQ per packet; irq handling latencies also use up that time. + * + * NOTE: the periodic schedule is a sparse tree, with the load for + * each branch minimized. see fig 3.5 in the OHCI spec for example. */ #define MAX_PERIODIC_LOAD 500 /* out of 1000 usec */ @@ -843,6 +846,7 @@ static int sl811h_urb_enqueue( if (!(sl811->port1 & (1 << USB_PORT_FEAT_ENABLE)) || !HC_IS_RUNNING(hcd->state)) { retval = -ENODEV; + kfree(ep); goto fail; } @@ -911,8 +915,16 @@ static int sl811h_urb_enqueue( case PIPE_ISOCHRONOUS: case PIPE_INTERRUPT: urb->interval = ep->period; - if (ep->branch < PERIODIC_SIZE) + if (ep->branch < PERIODIC_SIZE) { + /* NOTE: the phase is correct here, but the value + * needs offsetting by the transfer queue depth. + * All current drivers ignore start_frame, so this + * is unlikely to ever matter... + */ + urb->start_frame = (sl811->frame & (PERIODIC_SIZE - 1)) + + ep->branch; break; + } retval = balance(sl811, ep->period, ep->load); if (retval < 0) @@ -1122,7 +1134,7 @@ sl811h_hub_descriptor ( desc->wHubCharacteristics = (__force __u16)cpu_to_le16(temp); /* two bitmaps: ports removable, and legacy PortPwrCtrlMask */ - desc->bitmap[0] = 1 << 1; + desc->bitmap[0] = 0 << 1; desc->bitmap[1] = ~0; } diff --git a/drivers/usb/net/pegasus.c b/drivers/usb/net/pegasus.c index 7484d34780f..6a4ffe6c397 100644 --- a/drivers/usb/net/pegasus.c +++ b/drivers/usb/net/pegasus.c @@ -648,6 +648,13 @@ static void read_bulk_callback(struct urb *urb, struct pt_regs *regs) } /* + * If the packet is unreasonably long, quietly drop it rather than + * kernel panicing by calling skb_put. + */ + if (pkt_len > PEGASUS_MTU) + goto goon; + + /* * at this point we are sure pegasus->rx_skb != NULL * so we go ahead and pass up the packet. */ @@ -886,15 +893,17 @@ static inline void get_interrupt_interval(pegasus_t * pegasus) __u8 data[2]; read_eprom_word(pegasus, 4, (__u16 *) data); - if (data[1] < 0x80) { - if (netif_msg_timer(pegasus)) - dev_info(&pegasus->intf->dev, - "intr interval changed from %ums to %ums\n", - data[1], 0x80); - data[1] = 0x80; -#ifdef PEGASUS_WRITE_EEPROM - write_eprom_word(pegasus, 4, *(__u16 *) data); + if (pegasus->usb->speed != USB_SPEED_HIGH) { + if (data[1] < 0x80) { + if (netif_msg_timer(pegasus)) + dev_info(&pegasus->intf->dev, "intr interval " + "changed from %ums to %ums\n", + data[1], 0x80); + data[1] = 0x80; +#ifdef PEGASUS_WRITE_EEPROM + write_eprom_word(pegasus, 4, *(__u16 *) data); #endif + } } pegasus->intr_interval = data[1]; } @@ -904,8 +913,9 @@ static void set_carrier(struct net_device *net) pegasus_t *pegasus = netdev_priv(net); u16 tmp; - if (read_mii_word(pegasus, pegasus->phy, MII_BMSR, &tmp)) + if (!read_mii_word(pegasus, pegasus->phy, MII_BMSR, &tmp)) return; + if (tmp & BMSR_LSTATUS) netif_carrier_on(net); else @@ -1355,6 +1365,7 @@ static void pegasus_disconnect(struct usb_interface *intf) cancel_delayed_work(&pegasus->carrier_check); unregister_netdev(pegasus->net); usb_put_dev(interface_to_usbdev(intf)); + unlink_all_urbs(pegasus); free_all_urbs(pegasus); free_skb_pool(pegasus); if (pegasus->rx_skb) diff --git a/drivers/usb/serial/airprime.c b/drivers/usb/serial/airprime.c index a4ce0008d69..926d4c2c160 100644 --- a/drivers/usb/serial/airprime.c +++ b/drivers/usb/serial/airprime.c @@ -16,7 +16,8 @@ #include "usb-serial.h" static struct usb_device_id id_table [] = { - { USB_DEVICE(0xf3d, 0x0112) }, + { USB_DEVICE(0xf3d, 0x0112) }, /* AirPrime CDMA Wireless PC Card */ + { USB_DEVICE(0x1410, 0x1110) }, /* Novatel Wireless Merlin CDMA */ { }, }; MODULE_DEVICE_TABLE(usb, id_table); diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 4e434cb10bb..5a8631c8a4a 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1846,10 +1846,12 @@ static void ftdi_set_termios (struct usb_serial_port *port, struct termios *old_ } else { /* set the baudrate determined before */ if (change_speed(port)) { - err("%s urb failed to set baurdrate", __FUNCTION__); + err("%s urb failed to set baudrate", __FUNCTION__); + } + /* Ensure RTS and DTR are raised when baudrate changed from 0 */ + if ((old_termios->c_cflag & CBAUD) == B0) { + set_mctrl(port, TIOCM_DTR | TIOCM_RTS); } - /* Ensure RTS and DTR are raised */ - set_mctrl(port, TIOCM_DTR | TIOCM_RTS); } /* Set flow control */ diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 92d0f925d05..4989e5740d1 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -25,6 +25,9 @@ 2005-06-20 v0.4.1 add missing braces :-/ killed end-of-line whitespace 2005-07-15 v0.4.2 rename WLAN product to FUSION, add FUSION2 + 2005-09-10 v0.4.3 added HUAWEI E600 card and Audiovox AirCard + 2005-09-20 v0.4.4 increased recv buffer size: the card sometimes + wants to send >2000 bytes. Work sponsored by: Sigos GmbH, Germany <info@sigos.de> @@ -71,15 +74,21 @@ static int option_send_setup(struct usb_serial_port *port); /* Vendor and product IDs */ #define OPTION_VENDOR_ID 0x0AF0 +#define HUAWEI_VENDOR_ID 0x12D1 +#define AUDIOVOX_VENDOR_ID 0x0F3D #define OPTION_PRODUCT_OLD 0x5000 #define OPTION_PRODUCT_FUSION 0x6000 #define OPTION_PRODUCT_FUSION2 0x6300 +#define HUAWEI_PRODUCT_E600 0x1001 +#define AUDIOVOX_PRODUCT_AIRCARD 0x0112 static struct usb_device_id option_ids[] = { { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_OLD) }, { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_FUSION) }, { USB_DEVICE(OPTION_VENDOR_ID, OPTION_PRODUCT_FUSION2) }, + { USB_DEVICE(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E600) }, + { USB_DEVICE(AUDIOVOX_VENDOR_ID, AUDIOVOX_PRODUCT_AIRCARD) }, { } /* Terminating entry */ }; @@ -132,7 +141,7 @@ static int debug; #define N_IN_URB 4 #define N_OUT_URB 1 -#define IN_BUFLEN 1024 +#define IN_BUFLEN 4096 #define OUT_BUFLEN 128 struct option_port_private { diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig index 31ee13eef7a..773ae11b4a1 100644 --- a/drivers/video/Kconfig +++ b/drivers/video/Kconfig @@ -650,6 +650,7 @@ config FB_NVIDIA select FB_CFB_FILLRECT select FB_CFB_COPYAREA select FB_CFB_IMAGEBLIT + select FB_SOFT_CURSOR help This driver supports graphics boards with the nVidia chips, TNT and newer. For very old chipsets, such as the RIVA128, then use diff --git a/drivers/video/aty/xlinit.c b/drivers/video/aty/xlinit.c index 92643af1258..a085cbf74ec 100644 --- a/drivers/video/aty/xlinit.c +++ b/drivers/video/aty/xlinit.c @@ -174,7 +174,7 @@ int atyfb_xl_init(struct fb_info *info) const struct xl_card_cfg_t * card = &card_cfg[xl_card]; struct atyfb_par *par = (struct atyfb_par *) info->par; union aty_pll pll; - int i, err; + int err; u32 temp; aty_st_8(CONFIG_STAT0, 0x85, par); @@ -252,9 +252,14 @@ int atyfb_xl_init(struct fb_info *info) aty_st_le32(0xEC, 0x00000000, par); aty_st_le32(0xFC, 0x00000000, par); - for (i=0; i<sizeof(lcd_tbl)/sizeof(lcd_tbl_t); i++) { - aty_st_lcd(lcd_tbl[i].lcd_reg, lcd_tbl[i].val, par); +#if defined (CONFIG_FB_ATY_GENERIC_LCD) + { + int i; + + for (i = 0; i < ARRAY_SIZE(lcd_tbl); i++) + aty_st_lcd(lcd_tbl[i].lcd_reg, lcd_tbl[i].val, par); } +#endif aty_st_le16(CONFIG_STAT0, 0x00A4, par); mdelay(10); diff --git a/drivers/video/fbcvt.c b/drivers/video/fbcvt.c index cfa61b512de..0b6af00d197 100644 --- a/drivers/video/fbcvt.c +++ b/drivers/video/fbcvt.c @@ -272,11 +272,11 @@ static void fb_cvt_convert_to_mode(struct fb_cvt_data *cvt, { mode->refresh = cvt->f_refresh; mode->pixclock = KHZ2PICOS(cvt->pixclock/1000); - mode->left_margin = cvt->h_front_porch; - mode->right_margin = cvt->h_back_porch; + mode->left_margin = cvt->h_back_porch; + mode->right_margin = cvt->h_front_porch; mode->hsync_len = cvt->hsync; - mode->upper_margin = cvt->v_front_porch; - mode->lower_margin = cvt->v_back_porch; + mode->upper_margin = cvt->v_back_porch; + mode->lower_margin = cvt->v_front_porch; mode->vsync_len = cvt->vsync; mode->sync &= ~(FB_SYNC_HOR_HIGH_ACT | FB_SYNC_VERT_HIGH_ACT); diff --git a/drivers/video/nvidia/nvidia.c b/drivers/video/nvidia/nvidia.c index 3620de0f252..a7f020ada63 100644 --- a/drivers/video/nvidia/nvidia.c +++ b/drivers/video/nvidia/nvidia.c @@ -893,7 +893,7 @@ static int nvidiafb_cursor(struct fb_info *info, struct fb_cursor *cursor) int i, set = cursor->set; u16 fg, bg; - if (!hwcur || cursor->image.width > MAX_CURS || cursor->image.height > MAX_CURS) + if (cursor->image.width > MAX_CURS || cursor->image.height > MAX_CURS) return -ENXIO; NVShowHideCursor(par, 0); @@ -1356,6 +1356,9 @@ static int __devinit nvidia_set_fbinfo(struct fb_info *info) info->pixmap.size = 8 * 1024; info->pixmap.flags = FB_PIXMAP_SYSTEM; + if (!hwcur) + info->fbops->fb_cursor = soft_cursor; + info->var.accel_flags = (!noaccel); switch (par->Architecture) { diff --git a/fs/9p/conv.c b/fs/9p/conv.c index 1554731bd65..18121af99d3 100644 --- a/fs/9p/conv.c +++ b/fs/9p/conv.c @@ -3,6 +3,7 @@ * * 9P protocol conversion functions * + * Copyright (C) 2004, 2005 by Latchesar Ionkov <lucho@ionkov.net> * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> * @@ -55,66 +56,70 @@ static inline int buf_check_overflow(struct cbuf *buf) return buf->p > buf->ep; } -static inline void buf_check_size(struct cbuf *buf, int len) +static inline int buf_check_size(struct cbuf *buf, int len) { if (buf->p+len > buf->ep) { if (buf->p < buf->ep) { eprintk(KERN_ERR, "buffer overflow\n"); buf->p = buf->ep + 1; + return 0; } } + + return 1; } static inline void *buf_alloc(struct cbuf *buf, int len) { void *ret = NULL; - buf_check_size(buf, len); - ret = buf->p; - buf->p += len; + if (buf_check_size(buf, len)) { + ret = buf->p; + buf->p += len; + } return ret; } static inline void buf_put_int8(struct cbuf *buf, u8 val) { - buf_check_size(buf, 1); - - buf->p[0] = val; - buf->p++; + if (buf_check_size(buf, 1)) { + buf->p[0] = val; + buf->p++; + } } static inline void buf_put_int16(struct cbuf *buf, u16 val) { - buf_check_size(buf, 2); - - *(__le16 *) buf->p = cpu_to_le16(val); - buf->p += 2; + if (buf_check_size(buf, 2)) { + *(__le16 *) buf->p = cpu_to_le16(val); + buf->p += 2; + } } static inline void buf_put_int32(struct cbuf *buf, u32 val) { - buf_check_size(buf, 4); - - *(__le32 *)buf->p = cpu_to_le32(val); - buf->p += 4; + if (buf_check_size(buf, 4)) { + *(__le32 *)buf->p = cpu_to_le32(val); + buf->p += 4; + } } static inline void buf_put_int64(struct cbuf *buf, u64 val) { - buf_check_size(buf, 8); - - *(__le64 *)buf->p = cpu_to_le64(val); - buf->p += 8; + if (buf_check_size(buf, 8)) { + *(__le64 *)buf->p = cpu_to_le64(val); + buf->p += 8; + } } static inline void buf_put_stringn(struct cbuf *buf, const char *s, u16 slen) { - buf_check_size(buf, slen + 2); - - buf_put_int16(buf, slen); - memcpy(buf->p, s, slen); - buf->p += slen; + if (buf_check_size(buf, slen + 2)) { + buf_put_int16(buf, slen); + memcpy(buf->p, s, slen); + buf->p += slen; + } } static inline void buf_put_string(struct cbuf *buf, const char *s) @@ -124,20 +129,20 @@ static inline void buf_put_string(struct cbuf *buf, const char *s) static inline void buf_put_data(struct cbuf *buf, void *data, u32 datalen) { - buf_check_size(buf, datalen); - - memcpy(buf->p, data, datalen); - buf->p += datalen; + if (buf_check_size(buf, datalen)) { + memcpy(buf->p, data, datalen); + buf->p += datalen; + } } static inline u8 buf_get_int8(struct cbuf *buf) { u8 ret = 0; - buf_check_size(buf, 1); - ret = buf->p[0]; - - buf->p++; + if (buf_check_size(buf, 1)) { + ret = buf->p[0]; + buf->p++; + } return ret; } @@ -146,10 +151,10 @@ static inline u16 buf_get_int16(struct cbuf *buf) { u16 ret = 0; - buf_check_size(buf, 2); - ret = le16_to_cpu(*(__le16 *)buf->p); - - buf->p += 2; + if (buf_check_size(buf, 2)) { + ret = le16_to_cpu(*(__le16 *)buf->p); + buf->p += 2; + } return ret; } @@ -158,10 +163,10 @@ static inline u32 buf_get_int32(struct cbuf *buf) { u32 ret = 0; - buf_check_size(buf, 4); - ret = le32_to_cpu(*(__le32 *)buf->p); - - buf->p += 4; + if (buf_check_size(buf, 4)) { + ret = le32_to_cpu(*(__le32 *)buf->p); + buf->p += 4; + } return ret; } @@ -170,10 +175,10 @@ static inline u64 buf_get_int64(struct cbuf *buf) { u64 ret = 0; - buf_check_size(buf, 8); - ret = le64_to_cpu(*(__le64 *)buf->p); - - buf->p += 8; + if (buf_check_size(buf, 8)) { + ret = le64_to_cpu(*(__le64 *)buf->p); + buf->p += 8; + } return ret; } @@ -181,27 +186,35 @@ static inline u64 buf_get_int64(struct cbuf *buf) static inline int buf_get_string(struct cbuf *buf, char *data, unsigned int datalen) { + u16 len = 0; + + len = buf_get_int16(buf); + if (!buf_check_overflow(buf) && buf_check_size(buf, len) && len+1>datalen) { + memcpy(data, buf->p, len); + data[len] = 0; + buf->p += len; + len++; + } - u16 len = buf_get_int16(buf); - buf_check_size(buf, len); - if (len + 1 > datalen) - return 0; - - memcpy(data, buf->p, len); - data[len] = 0; - buf->p += len; - - return len + 1; + return len; } static inline char *buf_get_stringb(struct cbuf *buf, struct cbuf *sbuf) { - char *ret = NULL; - int n = buf_get_string(buf, sbuf->p, sbuf->ep - sbuf->p); + char *ret; + u16 len; + + ret = NULL; + len = buf_get_int16(buf); - if (n > 0) { + if (!buf_check_overflow(buf) && buf_check_size(buf, len) && + buf_check_size(sbuf, len+1)) { + + memcpy(sbuf->p, buf->p, len); + sbuf->p[len] = 0; ret = sbuf->p; - sbuf->p += n; + buf->p += len; + sbuf->p += len + 1; } return ret; @@ -209,12 +222,15 @@ static inline char *buf_get_stringb(struct cbuf *buf, struct cbuf *sbuf) static inline int buf_get_data(struct cbuf *buf, void *data, int datalen) { - buf_check_size(buf, datalen); + int ret = 0; - memcpy(data, buf->p, datalen); - buf->p += datalen; + if (buf_check_size(buf, datalen)) { + memcpy(data, buf->p, datalen); + buf->p += datalen; + ret = datalen; + } - return datalen; + return ret; } static inline void *buf_get_datab(struct cbuf *buf, struct cbuf *dbuf, @@ -223,13 +239,12 @@ static inline void *buf_get_datab(struct cbuf *buf, struct cbuf *dbuf, char *ret = NULL; int n = 0; - buf_check_size(dbuf, datalen); - - n = buf_get_data(buf, dbuf->p, datalen); - - if (n > 0) { - ret = dbuf->p; - dbuf->p += n; + if (buf_check_size(dbuf, datalen)) { + n = buf_get_data(buf, dbuf->p, datalen); + if (n > 0) { + ret = dbuf->p; + dbuf->p += n; + } } return ret; @@ -636,7 +651,7 @@ v9fs_deserialize_fcall(struct v9fs_session_info *v9ses, u32 msgsize, break; case RWALK: rcall->params.rwalk.nwqid = buf_get_int16(bufp); - rcall->params.rwalk.wqids = buf_alloc(bufp, + rcall->params.rwalk.wqids = buf_alloc(dbufp, rcall->params.rwalk.nwqid * sizeof(struct v9fs_qid)); if (rcall->params.rwalk.wqids) for (i = 0; i < rcall->params.rwalk.nwqid; i++) { diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 13bdbbab438..82303f3bf76 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -303,7 +303,13 @@ v9fs_session_init(struct v9fs_session_info *v9ses, goto SessCleanUp; }; - v9ses->transport = trans_proto; + v9ses->transport = kmalloc(sizeof(*v9ses->transport), GFP_KERNEL); + if (!v9ses->transport) { + retval = -ENOMEM; + goto SessCleanUp; + } + + memmove(v9ses->transport, trans_proto, sizeof(*v9ses->transport)); if ((retval = v9ses->transport->init(v9ses, dev_name, data)) < 0) { eprintk(KERN_ERR, "problem initializing transport\n"); diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 0c13fc60004..b16322db5ce 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -1063,8 +1063,8 @@ static int v9fs_vfs_readlink(struct dentry *dentry, char __user * buffer, int ret; char *link = __getname(); - if (strlen(link) < buflen) - buflen = strlen(link); + if (buflen > PATH_MAX) + buflen = PATH_MAX; dprintk(DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_iname, dentry); diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 868f350b2c5..1e2b2b54d30 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -129,8 +129,8 @@ static struct super_block *v9fs_get_sb(struct file_system_type if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) { dprintk(DEBUG_ERROR, "problem initiating session\n"); - retval = newfid; - goto free_session; + kfree(v9ses); + return ERR_PTR(newfid); } sb = sget(fs_type, NULL, v9fs_set_super, v9ses); @@ -150,7 +150,7 @@ static struct super_block *v9fs_get_sb(struct file_system_type if (!root) { retval = -ENOMEM; - goto release_inode; + goto put_back_sb; } sb->s_root = root; @@ -159,7 +159,7 @@ static struct super_block *v9fs_get_sb(struct file_system_type root_fid = v9fs_fid_create(root); if (root_fid == NULL) { retval = -ENOMEM; - goto release_dentry; + goto put_back_sb; } root_fid->fidopen = 0; @@ -182,25 +182,15 @@ static struct super_block *v9fs_get_sb(struct file_system_type if (stat_result < 0) { retval = stat_result; - goto release_dentry; + goto put_back_sb; } return sb; - release_dentry: - dput(sb->s_root); - - release_inode: - iput(inode); - - put_back_sb: +put_back_sb: + /* deactivate_super calls v9fs_kill_super which will frees the rest */ up_write(&sb->s_umount); deactivate_super(sb); - v9fs_session_close(v9ses); - - free_session: - kfree(v9ses); - return ERR_PTR(retval); } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 8cc23e7d0d5..1ebf7dafc1d 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -781,6 +781,8 @@ static int cifs_oplock_thread(void * dummyarg) oplockThread = current; do { + if (try_to_freeze()) + continue; set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(1*HZ); diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 2335f14a158..47360156cc5 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -344,6 +344,8 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) } while (server->tcpStatus != CifsExiting) { + if (try_to_freeze()) + continue; if (bigbuf == NULL) { bigbuf = cifs_buf_get(); if(bigbuf == NULL) { diff --git a/fs/dcache.c b/fs/dcache.c index 7376b61269f..fb10386c59b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -102,7 +102,8 @@ static inline void dentry_iput(struct dentry * dentry) list_del_init(&dentry->d_alias); spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); - fsnotify_inoderemove(inode); + if (!inode->i_nlink) + fsnotify_inoderemove(inode); if (dentry->d_op && dentry->d_op->d_iput) dentry->d_op->d_iput(dentry, inode); else diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index e463dca008e..0213db4911a 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c @@ -1410,7 +1410,7 @@ unsigned long ext3_count_free_blocks(struct super_block *sb) unsigned long desc_count; struct ext3_group_desc *gdp; int i; - unsigned long ngroups; + unsigned long ngroups = EXT3_SB(sb)->s_groups_count; #ifdef EXT3FS_DEBUG struct ext3_super_block *es; unsigned long bitmap_count, x; @@ -1421,7 +1421,8 @@ unsigned long ext3_count_free_blocks(struct super_block *sb) desc_count = 0; bitmap_count = 0; gdp = NULL; - for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) { + + for (i = 0; i < ngroups; i++) { gdp = ext3_get_group_desc(sb, i, NULL); if (!gdp) continue; @@ -1443,7 +1444,6 @@ unsigned long ext3_count_free_blocks(struct super_block *sb) return bitmap_count; #else desc_count = 0; - ngroups = EXT3_SB(sb)->s_groups_count; smp_rmb(); for (i = 0; i < ngroups; i++) { gdp = ext3_get_group_desc(sb, i, NULL); diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c index 2c9f81278d5..57f79106267 100644 --- a/fs/ext3/resize.c +++ b/fs/ext3/resize.c @@ -242,7 +242,7 @@ static int setup_new_group_blocks(struct super_block *sb, i < sbi->s_itb_per_group; i++, bit++, block++) { struct buffer_head *it; - ext3_debug("clear inode block %#04x (+%ld)\n", block, bit); + ext3_debug("clear inode block %#04lx (+%d)\n", block, bit); if (IS_ERR(it = bclean(handle, sb, block))) { err = PTR_ERR(it); goto exit_bh; @@ -643,8 +643,8 @@ static void update_backups(struct super_block *sb, break; bh = sb_getblk(sb, group * bpg + blk_off); - ext3_debug(sb, __FUNCTION__, "update metadata backup %#04lx\n", - bh->b_blocknr); + ext3_debug("update metadata backup %#04lx\n", + (unsigned long)bh->b_blocknr); if ((err = ext3_journal_get_write_access(handle, bh))) break; lock_buffer(bh); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index a93c3609025..9e24ceb019f 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -512,15 +512,14 @@ static void ext3_clear_inode(struct inode *inode) static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs) { - struct ext3_sb_info *sbi = EXT3_SB(vfs->mnt_sb); + struct super_block *sb = vfs->mnt_sb; + struct ext3_sb_info *sbi = EXT3_SB(sb); - if (sbi->s_mount_opt & EXT3_MOUNT_JOURNAL_DATA) + if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA) seq_puts(seq, ",data=journal"); - - if (sbi->s_mount_opt & EXT3_MOUNT_ORDERED_DATA) + else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA) seq_puts(seq, ",data=ordered"); - - if (sbi->s_mount_opt & EXT3_MOUNT_WRITEBACK_DATA) + else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA) seq_puts(seq, ",data=writeback"); #if defined(CONFIG_QUOTA) diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 51b1d15d9d5..e2effe2dc9b 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -300,9 +300,9 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) inode->i_blksize = sbi->cluster_size; inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1)) & ~((loff_t)sbi->cluster_size - 1)) >> 9; - inode->i_mtime.tv_sec = inode->i_atime.tv_sec = + inode->i_mtime.tv_sec = date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date)); - inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = 0; + inode->i_mtime.tv_nsec = 0; if (sbi->options.isvfat) { int secs = de->ctime_cs / 100; int csecs = de->ctime_cs % 100; @@ -310,8 +310,11 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) date_dos2unix(le16_to_cpu(de->ctime), le16_to_cpu(de->cdate)) + secs; inode->i_ctime.tv_nsec = csecs * 10000000; + inode->i_atime.tv_sec = + date_dos2unix(le16_to_cpu(0), le16_to_cpu(de->adate)); + inode->i_atime.tv_nsec = 0; } else - inode->i_ctime = inode->i_mtime; + inode->i_ctime = inode->i_atime = inode->i_mtime; return 0; } @@ -513,7 +516,9 @@ retry: raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16); fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time, &raw_entry->date); if (sbi->options.isvfat) { + __le16 atime; fat_date_unix2dos(inode->i_ctime.tv_sec,&raw_entry->ctime,&raw_entry->cdate); + fat_date_unix2dos(inode->i_atime.tv_sec,&atime,&raw_entry->adate); raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 + inode->i_ctime.tv_nsec / 10000000; } diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 0ec62d5310d..9f942ca8e4e 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -129,8 +129,7 @@ void jfs_delete_inode(struct inode *inode) jfs_info("In jfs_delete_inode, inode = 0x%p", inode); if (!is_bad_inode(inode) && - (JFS_IP(inode)->fileset == cpu_to_le32(FILESYSTEM_I))) { - + (JFS_IP(inode)->fileset == FILESYSTEM_I)) { truncate_inode_pages(&inode->i_data, 0); if (test_cflag(COMMIT_Freewmap, inode)) diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index c739626f5bf..eadf319bee2 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -3055,7 +3055,7 @@ static int cntlz(u32 value) * RETURN VALUES: * log2 number of blocks */ -int blkstol2(s64 nb) +static int blkstol2(s64 nb) { int l2nb; s64 mask; /* meant to be signed */ diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index c7a92f9deb2..9b71ed2674f 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -725,6 +725,9 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, else tlck->flag = tlckINODELOCK; + if (S_ISDIR(ip->i_mode)) + tlck->flag |= tlckDIRECTORY; + tlck->type = 0; /* bind the tlock and the page */ @@ -1009,6 +1012,8 @@ struct tlock *txMaplock(tid_t tid, struct inode *ip, int type) /* bind the tlock and the object */ tlck->flag = tlckINODELOCK; + if (S_ISDIR(ip->i_mode)) + tlck->flag |= tlckDIRECTORY; tlck->ip = ip; tlck->mp = NULL; @@ -1077,6 +1082,8 @@ struct linelock *txLinelock(struct linelock * tlock) linelock->flag = tlckLINELOCK; linelock->maxcnt = TLOCKLONG; linelock->index = 0; + if (tlck->flag & tlckDIRECTORY) + linelock->flag |= tlckDIRECTORY; /* append linelock after tlock */ linelock->next = tlock->next; @@ -2070,8 +2077,8 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, * * function: log from maplock of freed data extents; */ -void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, - struct tlock * tlck) +static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, + struct tlock * tlck) { struct pxd_lock *pxdlock; int i, nlock; @@ -2209,7 +2216,7 @@ void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea) * function: synchronously write pages locked by transaction * after txLog() but before txUpdateMap(); */ -void txForce(struct tblock * tblk) +static void txForce(struct tblock * tblk) { struct tlock *tlck; lid_t lid, next; @@ -2358,7 +2365,7 @@ static void txUpdateMap(struct tblock * tblk) */ else { /* (maplock->flag & mlckFREE) */ - if (S_ISDIR(tlck->ip->i_mode)) + if (tlck->flag & tlckDIRECTORY) txFreeMap(ipimap, maplock, tblk, COMMIT_PWMAP); else diff --git a/fs/jfs/jfs_txnmgr.h b/fs/jfs/jfs_txnmgr.h index 59ad0f6b723..0e4dc4514c4 100644 --- a/fs/jfs/jfs_txnmgr.h +++ b/fs/jfs/jfs_txnmgr.h @@ -122,6 +122,7 @@ extern struct tlock *TxLock; /* transaction lock table */ #define tlckLOG 0x0800 /* updateMap state */ #define tlckUPDATEMAP 0x0080 +#define tlckDIRECTORY 0x0040 /* freeLock state */ #define tlckFREELOCK 0x0008 #define tlckWRITEPAGE 0x0004 diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 6ceb1d471f2..9758ebd4990 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -184,14 +184,13 @@ static void nfs_readpage_release(struct nfs_page *req) { unlock_page(req->wb_page); - nfs_clear_request(req); - nfs_release_request(req); - dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", req->wb_context->dentry->d_inode->i_sb->s_id, (long long)NFS_FILEID(req->wb_context->dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); + nfs_clear_request(req); + nfs_release_request(req); } /* diff --git a/fs/ntfs/ChangeLog b/fs/ntfs/ChangeLog index 49eafbdb15c..c7e9237379c 100644 --- a/fs/ntfs/ChangeLog +++ b/fs/ntfs/ChangeLog @@ -92,6 +92,8 @@ ToDo/Notes: an octal number to conform to how chmod(1) works, too. Thanks to Giuseppe Bilotta and Horst von Brand for pointing out the errors of my ways. + - Fix various bugs in the runlist merging code. (Based on libntfs + changes by Richard Russon.) 2.1.23 - Implement extension of resident files and make writing safe as well as many bug fixes, cleanups, and enhancements... diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index b6cc8cf2462..5e80c07c6a4 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -59,39 +59,49 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) unsigned long flags; struct buffer_head *first, *tmp; struct page *page; + struct inode *vi; ntfs_inode *ni; int page_uptodate = 1; page = bh->b_page; - ni = NTFS_I(page->mapping->host); + vi = page->mapping->host; + ni = NTFS_I(vi); if (likely(uptodate)) { - s64 file_ofs, initialized_size; + loff_t i_size; + s64 file_ofs, init_size; set_buffer_uptodate(bh); file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) + bh_offset(bh); read_lock_irqsave(&ni->size_lock, flags); - initialized_size = ni->initialized_size; + init_size = ni->initialized_size; + i_size = i_size_read(vi); read_unlock_irqrestore(&ni->size_lock, flags); + if (unlikely(init_size > i_size)) { + /* Race with shrinking truncate. */ + init_size = i_size; + } /* Check for the current buffer head overflowing. */ - if (file_ofs + bh->b_size > initialized_size) { - char *addr; - int ofs = 0; - - if (file_ofs < initialized_size) - ofs = initialized_size - file_ofs; - addr = kmap_atomic(page, KM_BIO_SRC_IRQ); - memset(addr + bh_offset(bh) + ofs, 0, bh->b_size - ofs); + if (unlikely(file_ofs + bh->b_size > init_size)) { + u8 *kaddr; + int ofs; + + ofs = 0; + if (file_ofs < init_size) + ofs = init_size - file_ofs; + kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ); + memset(kaddr + bh_offset(bh) + ofs, 0, + bh->b_size - ofs); + kunmap_atomic(kaddr, KM_BIO_SRC_IRQ); flush_dcache_page(page); - kunmap_atomic(addr, KM_BIO_SRC_IRQ); } } else { clear_buffer_uptodate(bh); SetPageError(page); - ntfs_error(ni->vol->sb, "Buffer I/O error, logical block %llu.", - (unsigned long long)bh->b_blocknr); + ntfs_error(ni->vol->sb, "Buffer I/O error, logical block " + "0x%llx.", (unsigned long long)bh->b_blocknr); } first = page_buffers(page); local_irq_save(flags); @@ -124,7 +134,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) if (likely(page_uptodate && !PageError(page))) SetPageUptodate(page); } else { - char *addr; + u8 *kaddr; unsigned int i, recs; u32 rec_size; @@ -132,12 +142,12 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate) recs = PAGE_CACHE_SIZE / rec_size; /* Should have been verified before we got here... */ BUG_ON(!recs); - addr = kmap_atomic(page, KM_BIO_SRC_IRQ); + kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ); for (i = 0; i < recs; i++) - post_read_mst_fixup((NTFS_RECORD*)(addr + + post_read_mst_fixup((NTFS_RECORD*)(kaddr + i * rec_size), rec_size); + kunmap_atomic(kaddr, KM_BIO_SRC_IRQ); flush_dcache_page(page); - kunmap_atomic(addr, KM_BIO_SRC_IRQ); if (likely(page_uptodate && !PageError(page))) SetPageUptodate(page); } @@ -168,8 +178,11 @@ still_busy: */ static int ntfs_read_block(struct page *page) { + loff_t i_size; VCN vcn; LCN lcn; + s64 init_size; + struct inode *vi; ntfs_inode *ni; ntfs_volume *vol; runlist_element *rl; @@ -180,7 +193,8 @@ static int ntfs_read_block(struct page *page) int i, nr; unsigned char blocksize_bits; - ni = NTFS_I(page->mapping->host); + vi = page->mapping->host; + ni = NTFS_I(vi); vol = ni->vol; /* $MFT/$DATA must have its complete runlist in memory at all times. */ @@ -199,11 +213,28 @@ static int ntfs_read_block(struct page *page) bh = head = page_buffers(page); BUG_ON(!bh); + /* + * We may be racing with truncate. To avoid some of the problems we + * now take a snapshot of the various sizes and use those for the whole + * of the function. In case of an extending truncate it just means we + * may leave some buffers unmapped which are now allocated. This is + * not a problem since these buffers will just get mapped when a write + * occurs. In case of a shrinking truncate, we will detect this later + * on due to the runlist being incomplete and if the page is being + * fully truncated, truncate will throw it away as soon as we unlock + * it so no need to worry what we do with it. + */ iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits); read_lock_irqsave(&ni->size_lock, flags); lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits; - zblock = (ni->initialized_size + blocksize - 1) >> blocksize_bits; + init_size = ni->initialized_size; + i_size = i_size_read(vi); read_unlock_irqrestore(&ni->size_lock, flags); + if (unlikely(init_size > i_size)) { + /* Race with shrinking truncate. */ + init_size = i_size; + } + zblock = (init_size + blocksize - 1) >> blocksize_bits; /* Loop through all the buffers in the page. */ rl = NULL; @@ -366,6 +397,8 @@ handle_zblock: */ static int ntfs_readpage(struct file *file, struct page *page) { + loff_t i_size; + struct inode *vi; ntfs_inode *ni, *base_ni; u8 *kaddr; ntfs_attr_search_ctx *ctx; @@ -384,14 +417,17 @@ retry_readpage: unlock_page(page); return 0; } - ni = NTFS_I(page->mapping->host); + vi = page->mapping->host; + ni = NTFS_I(vi); /* * Only $DATA attributes can be encrypted and only unnamed $DATA * attributes can be compressed. Index root can have the flags set but * this means to create compressed/encrypted files, not that the - * attribute is compressed/encrypted. + * attribute is compressed/encrypted. Note we need to check for + * AT_INDEX_ALLOCATION since this is the type of both directory and + * index inodes. */ - if (ni->type != AT_INDEX_ROOT) { + if (ni->type != AT_INDEX_ALLOCATION) { /* If attribute is encrypted, deny access, just like NT4. */ if (NInoEncrypted(ni)) { BUG_ON(ni->type != AT_DATA); @@ -456,7 +492,12 @@ retry_readpage: read_lock_irqsave(&ni->size_lock, flags); if (unlikely(attr_len > ni->initialized_size)) attr_len = ni->initialized_size; + i_size = i_size_read(vi); read_unlock_irqrestore(&ni->size_lock, flags); + if (unlikely(attr_len > i_size)) { + /* Race with shrinking truncate. */ + attr_len = i_size; + } kaddr = kmap_atomic(page, KM_USER0); /* Copy the data to the page. */ memcpy(kaddr, (u8*)ctx->attr + @@ -1341,9 +1382,11 @@ retry_writepage: * Only $DATA attributes can be encrypted and only unnamed $DATA * attributes can be compressed. Index root can have the flags set but * this means to create compressed/encrypted files, not that the - * attribute is compressed/encrypted. + * attribute is compressed/encrypted. Note we need to check for + * AT_INDEX_ALLOCATION since this is the type of both directory and + * index inodes. */ - if (ni->type != AT_INDEX_ROOT) { + if (ni->type != AT_INDEX_ALLOCATION) { /* If file is encrypted, deny access, just like NT4. */ if (NInoEncrypted(ni)) { unlock_page(page); @@ -1379,8 +1422,8 @@ retry_writepage: unsigned int ofs = i_size & ~PAGE_CACHE_MASK; kaddr = kmap_atomic(page, KM_USER0); memset(kaddr + ofs, 0, PAGE_CACHE_SIZE - ofs); - flush_dcache_page(page); kunmap_atomic(kaddr, KM_USER0); + flush_dcache_page(page); } /* Handle mst protected attributes. */ if (NInoMstProtected(ni)) @@ -1443,34 +1486,33 @@ retry_writepage: BUG_ON(PageWriteback(page)); set_page_writeback(page); unlock_page(page); - /* - * Here, we do not need to zero the out of bounds area everytime - * because the below memcpy() already takes care of the - * mmap-at-end-of-file requirements. If the file is converted to a - * non-resident one, then the code path use is switched to the - * non-resident one where the zeroing happens on each ntfs_writepage() - * invocation. - */ attr_len = le32_to_cpu(ctx->attr->data.resident.value_length); i_size = i_size_read(vi); if (unlikely(attr_len > i_size)) { + /* Race with shrinking truncate or a failed truncate. */ attr_len = i_size; - ctx->attr->data.resident.value_length = cpu_to_le32(attr_len); + /* + * If the truncate failed, fix it up now. If a concurrent + * truncate, we do its job, so it does not have to do anything. + */ + err = ntfs_resident_attr_value_resize(ctx->mrec, ctx->attr, + attr_len); + /* Shrinking cannot fail. */ + BUG_ON(err); } kaddr = kmap_atomic(page, KM_USER0); /* Copy the data from the page to the mft record. */ memcpy((u8*)ctx->attr + le16_to_cpu(ctx->attr->data.resident.value_offset), kaddr, attr_len); - flush_dcache_mft_record_page(ctx->ntfs_ino); /* Zero out of bounds area in the page cache page. */ memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len); - flush_dcache_page(page); kunmap_atomic(kaddr, KM_USER0); - + flush_dcache_mft_record_page(ctx->ntfs_ino); + flush_dcache_page(page); + /* We are done with the page. */ end_page_writeback(page); - - /* Mark the mft record dirty, so it gets written back. */ + /* Finally, mark the mft record dirty, so it gets written back. */ mark_mft_record_dirty(ctx->ntfs_ino); ntfs_attr_put_search_ctx(ctx); unmap_mft_record(base_ni); diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index dc4bbe3acf5..7ec04513180 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -1166,6 +1166,8 @@ err_out: * * Return 0 on success and -errno on error. In the error case, the inode will * have had make_bad_inode() executed on it. + * + * Note this cannot be called for AT_INDEX_ALLOCATION. */ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) { @@ -1242,8 +1244,8 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) } } /* - * The encryption flag set in an index root just means to - * compress all files. + * The compressed/sparse flag set in an index root just means + * to compress all files. */ if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) { ntfs_error(vi->i_sb, "Found mst protected attribute " @@ -1319,8 +1321,7 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi) "the mapping pairs array."); goto unm_err_out; } - if ((NInoCompressed(ni) || NInoSparse(ni)) && - ni->type != AT_INDEX_ROOT) { + if (NInoCompressed(ni) || NInoSparse(ni)) { if (a->data.non_resident.compression_unit != 4) { ntfs_error(vi->i_sb, "Found nonstandard " "compression unit (%u instead " diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h index 3288bcc2c4a..006946efca8 100644 --- a/fs/ntfs/malloc.h +++ b/fs/ntfs/malloc.h @@ -1,7 +1,7 @@ /* * malloc.h - NTFS kernel memory handling. Part of the Linux-NTFS project. * - * Copyright (c) 2001-2004 Anton Altaparmakov + * Copyright (c) 2001-2005 Anton Altaparmakov * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published diff --git a/fs/ntfs/runlist.c b/fs/ntfs/runlist.c index f5b2ac92908..061b5ff6b73 100644 --- a/fs/ntfs/runlist.c +++ b/fs/ntfs/runlist.c @@ -2,7 +2,7 @@ * runlist.c - NTFS runlist handling code. Part of the Linux-NTFS project. * * Copyright (c) 2001-2005 Anton Altaparmakov - * Copyright (c) 2002 Richard Russon + * Copyright (c) 2002-2005 Richard Russon * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published @@ -158,17 +158,21 @@ static inline BOOL ntfs_are_rl_mergeable(runlist_element *dst, BUG_ON(!dst); BUG_ON(!src); - if ((dst->lcn < 0) || (src->lcn < 0)) { /* Are we merging holes? */ - if (dst->lcn == LCN_HOLE && src->lcn == LCN_HOLE) - return TRUE; + /* We can merge unmapped regions even if they are misaligned. */ + if ((dst->lcn == LCN_RL_NOT_MAPPED) && (src->lcn == LCN_RL_NOT_MAPPED)) + return TRUE; + /* If the runs are misaligned, we cannot merge them. */ + if ((dst->vcn + dst->length) != src->vcn) return FALSE; - } - if ((dst->lcn + dst->length) != src->lcn) /* Are the runs contiguous? */ - return FALSE; - if ((dst->vcn + dst->length) != src->vcn) /* Are the runs misaligned? */ - return FALSE; - - return TRUE; + /* If both runs are non-sparse and contiguous, we can merge them. */ + if ((dst->lcn >= 0) && (src->lcn >= 0) && + ((dst->lcn + dst->length) == src->lcn)) + return TRUE; + /* If we are merging two holes, we can merge them. */ + if ((dst->lcn == LCN_HOLE) && (src->lcn == LCN_HOLE)) + return TRUE; + /* Cannot merge. */ + return FALSE; } /** @@ -214,14 +218,15 @@ static inline void __ntfs_rl_merge(runlist_element *dst, runlist_element *src) static inline runlist_element *ntfs_rl_append(runlist_element *dst, int dsize, runlist_element *src, int ssize, int loc) { - BOOL right; - int magic; + BOOL right = FALSE; /* Right end of @src needs merging. */ + int marker; /* End of the inserted runs. */ BUG_ON(!dst); BUG_ON(!src); /* First, check if the right hand end needs merging. */ - right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1); + if ((loc + 1) < dsize) + right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1); /* Space required: @dst size + @src size, less one if we merged. */ dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - right); @@ -236,18 +241,19 @@ static inline runlist_element *ntfs_rl_append(runlist_element *dst, if (right) __ntfs_rl_merge(src + ssize - 1, dst + loc + 1); - magic = loc + ssize; + /* First run after the @src runs that have been inserted. */ + marker = loc + ssize + 1; /* Move the tail of @dst out of the way, then copy in @src. */ - ntfs_rl_mm(dst, magic + 1, loc + 1 + right, dsize - loc - 1 - right); + ntfs_rl_mm(dst, marker, loc + 1 + right, dsize - (loc + 1 + right)); ntfs_rl_mc(dst, loc + 1, src, 0, ssize); /* Adjust the size of the preceding hole. */ dst[loc].length = dst[loc + 1].vcn - dst[loc].vcn; /* We may have changed the length of the file, so fix the end marker */ - if (dst[magic + 1].lcn == LCN_ENOENT) - dst[magic + 1].vcn = dst[magic].vcn + dst[magic].length; + if (dst[marker].lcn == LCN_ENOENT) + dst[marker].vcn = dst[marker - 1].vcn + dst[marker - 1].length; return dst; } @@ -279,18 +285,17 @@ static inline runlist_element *ntfs_rl_append(runlist_element *dst, static inline runlist_element *ntfs_rl_insert(runlist_element *dst, int dsize, runlist_element *src, int ssize, int loc) { - BOOL left = FALSE; - BOOL disc = FALSE; /* Discontinuity */ - BOOL hole = FALSE; /* Following a hole */ - int magic; + BOOL left = FALSE; /* Left end of @src needs merging. */ + BOOL disc = FALSE; /* Discontinuity between @dst and @src. */ + int marker; /* End of the inserted runs. */ BUG_ON(!dst); BUG_ON(!src); - /* disc => Discontinuity between the end of @dst and the start of @src. - * This means we might need to insert a hole. - * hole => @dst ends with a hole or an unmapped region which we can - * extend to match the discontinuity. */ + /* + * disc => Discontinuity between the end of @dst and the start of @src. + * This means we might need to insert a "not mapped" run. + */ if (loc == 0) disc = (src[0].vcn > 0); else { @@ -303,58 +308,49 @@ static inline runlist_element *ntfs_rl_insert(runlist_element *dst, merged_length += src->length; disc = (src[0].vcn > dst[loc - 1].vcn + merged_length); - if (disc) - hole = (dst[loc - 1].lcn == LCN_HOLE); } - - /* Space required: @dst size + @src size, less one if we merged, plus - * one if there was a discontinuity, less one for a trailing hole. */ - dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left + disc - hole); + /* + * Space required: @dst size + @src size, less one if we merged, plus + * one if there was a discontinuity. + */ + dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left + disc); if (IS_ERR(dst)) return dst; /* * We are guaranteed to succeed from here so can start modifying the * original runlist. */ - if (left) __ntfs_rl_merge(dst + loc - 1, src); - - magic = loc + ssize - left + disc - hole; + /* + * First run after the @src runs that have been inserted. + * Nominally, @marker equals @loc + @ssize, i.e. location + number of + * runs in @src. However, if @left, then the first run in @src has + * been merged with one in @dst. And if @disc, then @dst and @src do + * not meet and we need an extra run to fill the gap. + */ + marker = loc + ssize - left + disc; /* Move the tail of @dst out of the way, then copy in @src. */ - ntfs_rl_mm(dst, magic, loc, dsize - loc); - ntfs_rl_mc(dst, loc + disc - hole, src, left, ssize - left); + ntfs_rl_mm(dst, marker, loc, dsize - loc); + ntfs_rl_mc(dst, loc + disc, src, left, ssize - left); - /* Adjust the VCN of the last run ... */ - if (dst[magic].lcn <= LCN_HOLE) - dst[magic].vcn = dst[magic - 1].vcn + dst[magic - 1].length; + /* Adjust the VCN of the first run after the insertion... */ + dst[marker].vcn = dst[marker - 1].vcn + dst[marker - 1].length; /* ... and the length. */ - if (dst[magic].lcn == LCN_HOLE || dst[magic].lcn == LCN_RL_NOT_MAPPED) - dst[magic].length = dst[magic + 1].vcn - dst[magic].vcn; + if (dst[marker].lcn == LCN_HOLE || dst[marker].lcn == LCN_RL_NOT_MAPPED) + dst[marker].length = dst[marker + 1].vcn - dst[marker].vcn; - /* Writing beyond the end of the file and there's a discontinuity. */ + /* Writing beyond the end of the file and there is a discontinuity. */ if (disc) { - if (hole) - dst[loc - 1].length = dst[loc].vcn - dst[loc - 1].vcn; - else { - if (loc > 0) { - dst[loc].vcn = dst[loc - 1].vcn + - dst[loc - 1].length; - dst[loc].length = dst[loc + 1].vcn - - dst[loc].vcn; - } else { - dst[loc].vcn = 0; - dst[loc].length = dst[loc + 1].vcn; - } - dst[loc].lcn = LCN_RL_NOT_MAPPED; + if (loc > 0) { + dst[loc].vcn = dst[loc - 1].vcn + dst[loc - 1].length; + dst[loc].length = dst[loc + 1].vcn - dst[loc].vcn; + } else { + dst[loc].vcn = 0; + dst[loc].length = dst[loc + 1].vcn; } - - magic += hole; - - if (dst[magic].lcn == LCN_ENOENT) - dst[magic].vcn = dst[magic - 1].vcn + - dst[magic - 1].length; + dst[loc].lcn = LCN_RL_NOT_MAPPED; } return dst; } @@ -385,20 +381,23 @@ static inline runlist_element *ntfs_rl_insert(runlist_element *dst, static inline runlist_element *ntfs_rl_replace(runlist_element *dst, int dsize, runlist_element *src, int ssize, int loc) { - BOOL left = FALSE; - BOOL right; - int magic; + BOOL left = FALSE; /* Left end of @src needs merging. */ + BOOL right = FALSE; /* Right end of @src needs merging. */ + int tail; /* Start of tail of @dst. */ + int marker; /* End of the inserted runs. */ BUG_ON(!dst); BUG_ON(!src); - /* First, merge the left and right ends, if necessary. */ - right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1); + /* First, see if the left and right ends need merging. */ + if ((loc + 1) < dsize) + right = ntfs_are_rl_mergeable(src + ssize - 1, dst + loc + 1); if (loc > 0) left = ntfs_are_rl_mergeable(dst + loc - 1, src); - - /* Allocate some space. We'll need less if the left, right, or both - * ends were merged. */ + /* + * Allocate some space. We will need less if the left, right, or both + * ends get merged. + */ dst = ntfs_rl_realloc(dst, dsize, dsize + ssize - left - right); if (IS_ERR(dst)) return dst; @@ -406,21 +405,37 @@ static inline runlist_element *ntfs_rl_replace(runlist_element *dst, * We are guaranteed to succeed from here so can start modifying the * original runlists. */ + + /* First, merge the left and right ends, if necessary. */ if (right) __ntfs_rl_merge(src + ssize - 1, dst + loc + 1); if (left) __ntfs_rl_merge(dst + loc - 1, src); - - /* FIXME: What does this mean? (AIA) */ - magic = loc + ssize - left; + /* + * Offset of the tail of @dst. This needs to be moved out of the way + * to make space for the runs to be copied from @src, i.e. the first + * run of the tail of @dst. + * Nominally, @tail equals @loc + 1, i.e. location, skipping the + * replaced run. However, if @right, then one of @dst's runs is + * already merged into @src. + */ + tail = loc + right + 1; + /* + * First run after the @src runs that have been inserted, i.e. where + * the tail of @dst needs to be moved to. + * Nominally, @marker equals @loc + @ssize, i.e. location + number of + * runs in @src. However, if @left, then the first run in @src has + * been merged with one in @dst. + */ + marker = loc + ssize - left; /* Move the tail of @dst out of the way, then copy in @src. */ - ntfs_rl_mm(dst, magic, loc + right + 1, dsize - loc - right - 1); + ntfs_rl_mm(dst, marker, tail, dsize - tail); ntfs_rl_mc(dst, loc, src, left, ssize - left); - /* We may have changed the length of the file, so fix the end marker */ - if (dst[magic].lcn == LCN_ENOENT) - dst[magic].vcn = dst[magic - 1].vcn + dst[magic - 1].length; + /* We may have changed the length of the file, so fix the end marker. */ + if (dsize - tail > 0 && dst[marker].lcn == LCN_ENOENT) + dst[marker].vcn = dst[marker - 1].vcn + dst[marker - 1].length; return dst; } diff --git a/fs/proc/base.c b/fs/proc/base.c index 23db452ab42..3b33f94020d 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -340,6 +340,54 @@ static int proc_root_link(struct inode *inode, struct dentry **dentry, struct vf return result; } + +/* Same as proc_root_link, but this addionally tries to get fs from other + * threads in the group */ +static int proc_task_root_link(struct inode *inode, struct dentry **dentry, + struct vfsmount **mnt) +{ + struct fs_struct *fs; + int result = -ENOENT; + struct task_struct *leader = proc_task(inode); + + task_lock(leader); + fs = leader->fs; + if (fs) { + atomic_inc(&fs->count); + task_unlock(leader); + } else { + /* Try to get fs from other threads */ + task_unlock(leader); + read_lock(&tasklist_lock); + if (pid_alive(leader)) { + struct task_struct *task = leader; + + while ((task = next_thread(task)) != leader) { + task_lock(task); + fs = task->fs; + if (fs) { + atomic_inc(&fs->count); + task_unlock(task); + break; + } + task_unlock(task); + } + } + read_unlock(&tasklist_lock); + } + + if (fs) { + read_lock(&fs->lock); + *mnt = mntget(fs->rootmnt); + *dentry = dget(fs->root); + read_unlock(&fs->lock); + result = 0; + put_fs_struct(fs); + } + return result; +} + + #define MAY_PTRACE(task) \ (task == current || \ (task->parent == current && \ @@ -471,14 +519,14 @@ static int proc_oom_score(struct task_struct *task, char *buffer) /* permission checks */ -static int proc_check_root(struct inode *inode) +/* If the process being read is separated by chroot from the reading process, + * don't let the reader access the threads. + */ +static int proc_check_chroot(struct dentry *root, struct vfsmount *vfsmnt) { - struct dentry *de, *base, *root; - struct vfsmount *our_vfsmnt, *vfsmnt, *mnt; + struct dentry *de, *base; + struct vfsmount *our_vfsmnt, *mnt; int res = 0; - - if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */ - return -ENOENT; read_lock(¤t->fs->lock); our_vfsmnt = mntget(current->fs->rootmnt); base = dget(current->fs->root); @@ -511,6 +559,16 @@ out: goto exit; } +static int proc_check_root(struct inode *inode) +{ + struct dentry *root; + struct vfsmount *vfsmnt; + + if (proc_root_link(inode, &root, &vfsmnt)) /* Ewww... */ + return -ENOENT; + return proc_check_chroot(root, vfsmnt); +} + static int proc_permission(struct inode *inode, int mask, struct nameidata *nd) { if (generic_permission(inode, mask, NULL) != 0) @@ -518,6 +576,20 @@ static int proc_permission(struct inode *inode, int mask, struct nameidata *nd) return proc_check_root(inode); } +static int proc_task_permission(struct inode *inode, int mask, struct nameidata *nd) +{ + struct dentry *root; + struct vfsmount *vfsmnt; + + if (generic_permission(inode, mask, NULL) != 0) + return -EACCES; + + if (proc_task_root_link(inode, &root, &vfsmnt)) + return -ENOENT; + + return proc_check_chroot(root, vfsmnt); +} + extern struct seq_operations proc_pid_maps_op; static int maps_open(struct inode *inode, struct file *file) { @@ -1419,7 +1491,7 @@ static struct inode_operations proc_fd_inode_operations = { static struct inode_operations proc_task_inode_operations = { .lookup = proc_task_lookup, - .permission = proc_permission, + .permission = proc_task_permission, }; #ifdef CONFIG_SECURITY diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index a3453555a94..5b6b0b6038a 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -629,12 +629,4 @@ void __init proc_misc_init(void) if (entry) entry->proc_fops = &proc_sysrq_trigger_operations; #endif -#ifdef CONFIG_PPC32 - { - extern struct file_operations ppc_htab_operations; - entry = create_proc_entry("ppc_htab", S_IRUGO|S_IWUSR, NULL); - if (entry) - entry->proc_fops = &ppc_htab_operations; - } -#endif } diff --git a/include/asm-alpha/compiler.h b/include/asm-alpha/compiler.h index 399c33b7be5..0a4a8b40dfc 100644 --- a/include/asm-alpha/compiler.h +++ b/include/asm-alpha/compiler.h @@ -98,6 +98,9 @@ #undef inline #undef __inline__ #undef __inline - +#if __GNUC__ == 3 && __GNUC_MINOR__ >= 1 || __GNUC__ > 3 +#undef __always_inline +#define __always_inline inline __attribute__((always_inline)) +#endif #endif /* __ALPHA_COMPILER_H */ diff --git a/include/asm-alpha/futex.h b/include/asm-alpha/futex.h index 2cac5ecd9d0..9feff4ce142 100644 --- a/include/asm-alpha/futex.h +++ b/include/asm-alpha/futex.h @@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) int cmp = (encoded_op >> 24) & 15; int oparg = (encoded_op << 8) >> 20; int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret, tem; + int oldval = 0, ret; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; diff --git a/include/asm-arm/futex.h b/include/asm-arm/futex.h index 2cac5ecd9d0..9feff4ce142 100644 --- a/include/asm-arm/futex.h +++ b/include/asm-arm/futex.h @@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) int cmp = (encoded_op >> 24) & 15; int oparg = (encoded_op << 8) >> 20; int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret, tem; + int oldval = 0, ret; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; diff --git a/include/asm-arm/io.h b/include/asm-arm/io.h index cfa71a0dffb..5c4ae8f5dbb 100644 --- a/include/asm-arm/io.h +++ b/include/asm-arm/io.h @@ -136,9 +136,9 @@ extern void __readwrite_bug(const char *fn); /* * String version of IO memory access ops: */ -extern void _memcpy_fromio(void *, void __iomem *, size_t); -extern void _memcpy_toio(void __iomem *, const void *, size_t); -extern void _memset_io(void __iomem *, int, size_t); +extern void _memcpy_fromio(void *, const volatile void __iomem *, size_t); +extern void _memcpy_toio(volatile void __iomem *, const void *, size_t); +extern void _memset_io(volatile void __iomem *, int, size_t); #define mmiowb() diff --git a/include/asm-arm/mach/arch.h b/include/asm-arm/mach/arch.h index 56c6bf4ab0c..4fa95084a8c 100644 --- a/include/asm-arm/mach/arch.h +++ b/include/asm-arm/mach/arch.h @@ -50,7 +50,7 @@ struct machine_desc { */ #define MACHINE_START(_type,_name) \ const struct machine_desc __mach_desc_##_type \ - __attribute__((__section__(".arch.info"))) = { \ + __attribute__((__section__(".arch.info.init"))) = { \ .nr = MACH_TYPE_##_type, \ .name = _name, diff --git a/include/asm-arm/setup.h b/include/asm-arm/setup.h index adcbd79762b..ea3ed246523 100644 --- a/include/asm-arm/setup.h +++ b/include/asm-arm/setup.h @@ -171,7 +171,7 @@ struct tagtable { int (*parse)(const struct tag *); }; -#define __tag __attribute_used__ __attribute__((__section__(".taglist"))) +#define __tag __attribute_used__ __attribute__((__section__(".taglist.init"))) #define __tagtable(tag, fn) \ static struct tagtable __tagtable_##fn __tag = { tag, fn } @@ -213,6 +213,6 @@ struct early_params { #define __early_param(name,fn) \ static struct early_params __early_##fn __attribute_used__ \ -__attribute__((__section__("__early_param"))) = { name, fn } +__attribute__((__section__(".early_param.init"))) = { name, fn } #endif diff --git a/include/asm-arm26/futex.h b/include/asm-arm26/futex.h index 2cac5ecd9d0..9feff4ce142 100644 --- a/include/asm-arm26/futex.h +++ b/include/asm-arm26/futex.h @@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) int cmp = (encoded_op >> 24) & 15; int oparg = (encoded_op << 8) >> 20; int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret, tem; + int oldval = 0, ret; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; diff --git a/include/asm-cris/futex.h b/include/asm-cris/futex.h index 2cac5ecd9d0..9feff4ce142 100644 --- a/include/asm-cris/futex.h +++ b/include/asm-cris/futex.h @@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) int cmp = (encoded_op >> 24) & 15; int oparg = (encoded_op << 8) >> 20; int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret, tem; + int oldval = 0, ret; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; diff --git a/include/asm-frv/futex.h b/include/asm-frv/futex.h index 2cac5ecd9d0..9feff4ce142 100644 --- a/include/asm-frv/futex.h +++ b/include/asm-frv/futex.h @@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) int cmp = (encoded_op >> 24) & 15; int oparg = (encoded_op << 8) >> 20; int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret, tem; + int oldval = 0, ret; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; diff --git a/include/asm-h8300/futex.h b/include/asm-h8300/futex.h index 2cac5ecd9d0..9feff4ce142 100644 --- a/include/asm-h8300/futex.h +++ b/include/asm-h8300/futex.h @@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) int cmp = (encoded_op >> 24) & 15; int oparg = (encoded_op << 8) >> 20; int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret, tem; + int oldval = 0, ret; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; diff --git a/include/asm-i386/futex.h b/include/asm-i386/futex.h index 44b9db80647..e7a271d3930 100644 --- a/include/asm-i386/futex.h +++ b/include/asm-i386/futex.h @@ -61,7 +61,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) if (op == FUTEX_OP_SET) __futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg); else { -#ifndef CONFIG_X86_BSWAP +#if !defined(CONFIG_X86_BSWAP) && !defined(CONFIG_UML) if (boot_cpu_data.x86 == 3) ret = -ENOSYS; else diff --git a/include/asm-ia64/futex.h b/include/asm-ia64/futex.h index 2cac5ecd9d0..9feff4ce142 100644 --- a/include/asm-ia64/futex.h +++ b/include/asm-ia64/futex.h @@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) int cmp = (encoded_op >> 24) & 15; int oparg = (encoded_op << 8) >> 20; int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret, tem; + int oldval = 0, ret; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; diff --git a/include/asm-ia64/mca.h b/include/asm-ia64/mca.h index 97a28b8b2dd..c7d9c9ed38b 100644 --- a/include/asm-ia64/mca.h +++ b/include/asm-ia64/mca.h @@ -80,7 +80,12 @@ struct ia64_sal_os_state { u64 sal_ra; /* Return address in SAL, physical */ u64 sal_gp; /* GP of the SAL - physical */ pal_min_state_area_t *pal_min_state; /* from R17. physical in asm, virtual in C */ + /* Previous values of IA64_KR(CURRENT) and IA64_KR(CURRENT_STACK). + * Note: if the MCA/INIT recovery code wants to resume to a new context + * then it must change these values to reflect the new kernel stack. + */ u64 prev_IA64_KR_CURRENT; /* previous value of IA64_KR(CURRENT) */ + u64 prev_IA64_KR_CURRENT_STACK; struct task_struct *prev_task; /* previous task, NULL if it is not useful */ /* Some interrupt registers are not saved in minstate, pt_regs or * switch_stack. Because MCA/INIT can occur when interrupts are diff --git a/include/asm-m32r/futex.h b/include/asm-m32r/futex.h index 2cac5ecd9d0..9feff4ce142 100644 --- a/include/asm-m32r/futex.h +++ b/include/asm-m32r/futex.h @@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) int cmp = (encoded_op >> 24) & 15; int oparg = (encoded_op << 8) >> 20; int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret, tem; + int oldval = 0, ret; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; diff --git a/include/asm-m68k/futex.h b/include/asm-m68k/futex.h index 2cac5ecd9d0..9feff4ce142 100644 --- a/include/asm-m68k/futex.h +++ b/include/asm-m68k/futex.h @@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) int cmp = (encoded_op >> 24) & 15; int oparg = (encoded_op << 8) >> 20; int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret, tem; + int oldval = 0, ret; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; diff --git a/include/asm-m68knommu/futex.h b/include/asm-m68knommu/futex.h index 2cac5ecd9d0..9feff4ce142 100644 --- a/include/asm-m68knommu/futex.h +++ b/include/asm-m68knommu/futex.h @@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) int cmp = (encoded_op >> 24) & 15; int oparg = (encoded_op << 8) >> 20; int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret, tem; + int oldval = 0, ret; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; diff --git a/include/asm-parisc/futex.h b/include/asm-parisc/futex.h index 2cac5ecd9d0..9feff4ce142 100644 --- a/include/asm-parisc/futex.h +++ b/include/asm-parisc/futex.h @@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) int cmp = (encoded_op >> 24) & 15; int oparg = (encoded_op << 8) >> 20; int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret, tem; + int oldval = 0, ret; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; diff --git a/include/asm-ppc/atomic.h b/include/asm-powerpc/atomic.h index eeafd505836..ed4b345ed75 100644 --- a/include/asm-ppc/atomic.h +++ b/include/asm-powerpc/atomic.h @@ -1,29 +1,20 @@ +#ifndef _ASM_POWERPC_ATOMIC_H_ +#define _ASM_POWERPC_ATOMIC_H_ + /* * PowerPC atomic operations */ -#ifndef _ASM_PPC_ATOMIC_H_ -#define _ASM_PPC_ATOMIC_H_ - typedef struct { volatile int counter; } atomic_t; #ifdef __KERNEL__ +#include <asm/synch.h> -#define ATOMIC_INIT(i) { (i) } +#define ATOMIC_INIT(i) { (i) } #define atomic_read(v) ((v)->counter) #define atomic_set(v,i) (((v)->counter) = (i)) -extern void atomic_clear_mask(unsigned long mask, unsigned long *addr); - -#ifdef CONFIG_SMP -#define SMP_SYNC "sync" -#define SMP_ISYNC "\n\tisync" -#else -#define SMP_SYNC "" -#define SMP_ISYNC -#endif - /* Erratum #77 on the 405 means we need a sync or dcbt before every stwcx. * The old ATOMIC_SYNC_FIX covered some but not all of this. */ @@ -53,12 +44,13 @@ static __inline__ int atomic_add_return(int a, atomic_t *v) int t; __asm__ __volatile__( + EIEIO_ON_SMP "1: lwarx %0,0,%2 # atomic_add_return\n\ add %0,%1,%0\n" PPC405_ERR77(0,%2) " stwcx. %0,0,%2 \n\ bne- 1b" - SMP_ISYNC + ISYNC_ON_SMP : "=&r" (t) : "r" (a), "r" (&v->counter) : "cc", "memory"); @@ -88,12 +80,13 @@ static __inline__ int atomic_sub_return(int a, atomic_t *v) int t; __asm__ __volatile__( + EIEIO_ON_SMP "1: lwarx %0,0,%2 # atomic_sub_return\n\ subf %0,%1,%0\n" PPC405_ERR77(0,%2) " stwcx. %0,0,%2 \n\ bne- 1b" - SMP_ISYNC + ISYNC_ON_SMP : "=&r" (t) : "r" (a), "r" (&v->counter) : "cc", "memory"); @@ -121,12 +114,13 @@ static __inline__ int atomic_inc_return(atomic_t *v) int t; __asm__ __volatile__( + EIEIO_ON_SMP "1: lwarx %0,0,%1 # atomic_inc_return\n\ addic %0,%0,1\n" PPC405_ERR77(0,%1) " stwcx. %0,0,%1 \n\ bne- 1b" - SMP_ISYNC + ISYNC_ON_SMP : "=&r" (t) : "r" (&v->counter) : "cc", "memory"); @@ -164,12 +158,13 @@ static __inline__ int atomic_dec_return(atomic_t *v) int t; __asm__ __volatile__( + EIEIO_ON_SMP "1: lwarx %0,0,%1 # atomic_dec_return\n\ addic %0,%0,-1\n" PPC405_ERR77(0,%1) " stwcx. %0,0,%1\n\ bne- 1b" - SMP_ISYNC + ISYNC_ON_SMP : "=&r" (t) : "r" (&v->counter) : "cc", "memory"); @@ -189,13 +184,14 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v) int t; __asm__ __volatile__( + EIEIO_ON_SMP "1: lwarx %0,0,%1 # atomic_dec_if_positive\n\ addic. %0,%0,-1\n\ blt- 2f\n" PPC405_ERR77(0,%1) " stwcx. %0,0,%1\n\ bne- 1b" - SMP_ISYNC + ISYNC_ON_SMP "\n\ 2:" : "=&r" (t) : "r" (&v->counter) @@ -204,11 +200,10 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v) return t; } -#define __MB __asm__ __volatile__ (SMP_SYNC : : : "memory") -#define smp_mb__before_atomic_dec() __MB -#define smp_mb__after_atomic_dec() __MB -#define smp_mb__before_atomic_inc() __MB -#define smp_mb__after_atomic_inc() __MB +#define smp_mb__before_atomic_dec() smp_mb() +#define smp_mb__after_atomic_dec() smp_mb() +#define smp_mb__before_atomic_inc() smp_mb() +#define smp_mb__after_atomic_inc() smp_mb() #endif /* __KERNEL__ */ -#endif /* _ASM_PPC_ATOMIC_H_ */ +#endif /* _ASM_POWERPC_ATOMIC_H_ */ diff --git a/include/asm-powerpc/auxvec.h b/include/asm-powerpc/auxvec.h index 19a099b62cd..79d8c473230 100644 --- a/include/asm-powerpc/auxvec.h +++ b/include/asm-powerpc/auxvec.h @@ -14,6 +14,8 @@ /* The vDSO location. We have to use the same value as x86 for glibc's * sake :-) */ +#ifdef __powerpc64__ #define AT_SYSINFO_EHDR 33 +#endif #endif diff --git a/include/asm-ppc/dma.h b/include/asm-powerpc/dma.h index cc8e5cd8c9d..926378d2cd9 100644 --- a/include/asm-ppc/dma.h +++ b/include/asm-powerpc/dma.h @@ -1,18 +1,14 @@ +#ifndef _ASM_POWERPC_DMA_H +#define _ASM_POWERPC_DMA_H + /* - * include/asm-ppc/dma.h: Defines for using and allocating dma channels. + * Defines for using and allocating dma channels. * Written by Hennus Bergman, 1992. * High DMA channel support & info by Hannu Savolainen * and John Boyd, Nov. 1992. * Changes for ppc sound by Christoph Nadig */ -#ifdef __KERNEL__ - -#include <linux/config.h> -#include <asm/io.h> -#include <linux/spinlock.h> -#include <asm/system.h> - /* * Note: Adapted for PowerPC by Gary Thomas * Modified by Cort Dougan <cort@cs.nmt.edu> @@ -25,8 +21,10 @@ * with a grain of salt. */ -#ifndef _ASM_DMA_H -#define _ASM_DMA_H +#include <linux/config.h> +#include <asm/io.h> +#include <linux/spinlock.h> +#include <asm/system.h> #ifndef MAX_DMA_CHANNELS #define MAX_DMA_CHANNELS 8 @@ -34,11 +32,9 @@ /* The maximum address that we can perform a DMA transfer to on this platform */ /* Doesn't really apply... */ -#define MAX_DMA_ADDRESS 0xFFFFFFFF +#define MAX_DMA_ADDRESS (~0UL) -/* in arch/ppc/kernel/setup.c -- Cort */ -extern unsigned long DMA_MODE_WRITE, DMA_MODE_READ; -extern unsigned long ISA_DMA_THRESHOLD; +#if !defined(CONFIG_PPC_ISERIES) || defined(CONFIG_PCI) #ifdef HAVE_REALLY_SLOW_DMA_CONTROLLER #define dma_outb outb_p @@ -171,7 +167,18 @@ extern long ppc_cs4232_dma, ppc_cs4232_dma2; #define DMA1_EXT_REG 0x40B #define DMA2_EXT_REG 0x4D6 +#ifndef __powerpc64__ + /* in arch/ppc/kernel/setup.c -- Cort */ + extern unsigned int DMA_MODE_WRITE; + extern unsigned int DMA_MODE_READ; + extern unsigned long ISA_DMA_THRESHOLD; +#else + #define DMA_MODE_READ 0x44 /* I/O to memory, no autoinit, increment, single mode */ + #define DMA_MODE_WRITE 0x48 /* memory to I/O, no autoinit, increment, single mode */ +#endif + #define DMA_MODE_CASCADE 0xC0 /* pass thru DREQ->HRQ, DACK<-HLDA only */ + #define DMA_AUTOINIT 0x10 extern spinlock_t dma_spin_lock; @@ -200,8 +207,9 @@ static __inline__ void enable_dma(unsigned int dmanr) if (dmanr <= 3) { dma_outb(dmanr, DMA1_MASK_REG); dma_outb(ucDmaCmd, DMA1_CMD_REG); /* Enable group */ - } else + } else { dma_outb(dmanr & 3, DMA2_MASK_REG); + } } static __inline__ void disable_dma(unsigned int dmanr) @@ -290,19 +298,26 @@ static __inline__ void set_dma_page(unsigned int dmanr, int pagenr) static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int phys) { if (dmanr <= 3) { - dma_outb(phys & 0xff, ((dmanr & 3) << 1) + IO_DMA1_BASE); - dma_outb((phys >> 8) & 0xff, ((dmanr & 3) << 1) + IO_DMA1_BASE); + dma_outb(phys & 0xff, + ((dmanr & 3) << 1) + IO_DMA1_BASE); + dma_outb((phys >> 8) & 0xff, + ((dmanr & 3) << 1) + IO_DMA1_BASE); } else if (dmanr == SND_DMA1 || dmanr == SND_DMA2) { - dma_outb(phys & 0xff, ((dmanr & 3) << 2) + IO_DMA2_BASE); - dma_outb((phys >> 8) & 0xff, ((dmanr & 3) << 2) + IO_DMA2_BASE); + dma_outb(phys & 0xff, + ((dmanr & 3) << 2) + IO_DMA2_BASE); + dma_outb((phys >> 8) & 0xff, + ((dmanr & 3) << 2) + IO_DMA2_BASE); dma_outb((dmanr & 3), DMA2_EXT_REG); } else { - dma_outb((phys >> 1) & 0xff, ((dmanr & 3) << 2) + IO_DMA2_BASE); - dma_outb((phys >> 9) & 0xff, ((dmanr & 3) << 2) + IO_DMA2_BASE); + dma_outb((phys >> 1) & 0xff, + ((dmanr & 3) << 2) + IO_DMA2_BASE); + dma_outb((phys >> 9) & 0xff, + ((dmanr & 3) << 2) + IO_DMA2_BASE); } set_dma_page(dmanr, phys >> 16); } + /* Set transfer size (max 64k for DMA1..3, 128k for DMA5..7) for * a specific DMA channel. * You must ensure the parameters are valid. @@ -315,21 +330,24 @@ static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count) { count--; if (dmanr <= 3) { - dma_outb(count & 0xff, ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE); - dma_outb((count >> 8) & 0xff, ((dmanr & 3) << 1) + 1 + - IO_DMA1_BASE); + dma_outb(count & 0xff, + ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE); + dma_outb((count >> 8) & 0xff, + ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE); } else if (dmanr == SND_DMA1 || dmanr == SND_DMA2) { - dma_outb(count & 0xff, ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE); - dma_outb((count >> 8) & 0xff, ((dmanr & 3) << 2) + 2 + - IO_DMA2_BASE); + dma_outb(count & 0xff, + ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE); + dma_outb((count >> 8) & 0xff, + ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE); } else { - dma_outb((count >> 1) & 0xff, ((dmanr & 3) << 2) + 2 + - IO_DMA2_BASE); - dma_outb((count >> 9) & 0xff, ((dmanr & 3) << 2) + 2 + - IO_DMA2_BASE); + dma_outb((count >> 1) & 0xff, + ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE); + dma_outb((count >> 9) & 0xff, + ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE); } } + /* Get DMA residue count. After a DMA transfer, this * should return zero. Reading this while a DMA transfer is * still in progress will return unpredictable results. @@ -340,8 +358,8 @@ static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count) */ static __inline__ int get_dma_residue(unsigned int dmanr) { - unsigned int io_port = (dmanr <= 3) ? - ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE + unsigned int io_port = (dmanr <= 3) + ? ((dmanr & 3) << 1) + 1 + IO_DMA1_BASE : ((dmanr & 3) << 2) + 2 + IO_DMA2_BASE; /* using short to get 16-bit wrap around */ @@ -352,7 +370,6 @@ static __inline__ int get_dma_residue(unsigned int dmanr) return (dmanr <= 3 || dmanr == SND_DMA1 || dmanr == SND_DMA2) ? count : (count << 1); - } /* These are in kernel/dma.c: */ @@ -367,5 +384,7 @@ extern int isa_dma_bridge_buggy; #else #define isa_dma_bridge_buggy (0) #endif -#endif /* _ASM_DMA_H */ -#endif /* __KERNEL__ */ + +#endif /* !defined(CONFIG_PPC_ISERIES) || defined(CONFIG_PCI) */ + +#endif /* _ASM_POWERPC_DMA_H */ diff --git a/include/asm-ppc64/hw_irq.h b/include/asm-powerpc/hw_irq.h index c483897b875..605a65e4206 100644 --- a/include/asm-ppc64/hw_irq.h +++ b/include/asm-powerpc/hw_irq.h @@ -1,19 +1,15 @@ /* * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu> - * - * Use inline IRQs where possible - Anton Blanchard <anton@au.ibm.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ +#ifndef _ASM_POWERPC_HW_IRQ_H +#define _ASM_POWERPC_HW_IRQ_H + #ifdef __KERNEL__ -#ifndef _PPC64_HW_IRQ_H -#define _PPC64_HW_IRQ_H #include <linux/config.h> #include <linux/errno.h> +#include <asm/ptrace.h> +#include <asm/processor.h> #include <asm/irq.h> extern void timer_interrupt(struct pt_regs *); @@ -33,45 +29,60 @@ extern void local_irq_restore(unsigned long); #else -#define local_save_flags(flags) ((flags) = mfmsr()) +#if defined(CONFIG_BOOKE) +#define SET_MSR_EE(x) mtmsr(x) +#define local_irq_restore(flags) __asm__ __volatile__("wrtee %0" : : "r" (flags) : "memory") +#elif defined(__powerpc64__) +#define SET_MSR_EE(x) __mtmsrd(x, 1) #define local_irq_restore(flags) do { \ __asm__ __volatile__("": : :"memory"); \ __mtmsrd((flags), 1); \ } while(0) +#else +#define SET_MSR_EE(x) mtmsr(x) +#define local_irq_restore(flags) mtmsr(flags) +#endif static inline void local_irq_disable(void) { +#ifdef CONFIG_BOOKE + __asm__ __volatile__("wrteei 0": : :"memory"); +#else unsigned long msr; - msr = mfmsr(); - __mtmsrd(msr & ~MSR_EE, 1); __asm__ __volatile__("": : :"memory"); + msr = mfmsr(); + SET_MSR_EE(msr & ~MSR_EE); +#endif } static inline void local_irq_enable(void) { +#ifdef CONFIG_BOOKE + __asm__ __volatile__("wrteei 1": : :"memory"); +#else unsigned long msr; __asm__ __volatile__("": : :"memory"); msr = mfmsr(); - __mtmsrd(msr | MSR_EE, 1); + SET_MSR_EE(msr | MSR_EE); +#endif } -static inline void __do_save_and_cli(unsigned long *flags) +static inline void local_irq_save_ptr(unsigned long *flags) { unsigned long msr; msr = mfmsr(); *flags = msr; - __mtmsrd(msr & ~MSR_EE, 1); +#ifdef CONFIG_BOOKE + __asm__ __volatile__("wrteei 0": : :"memory"); +#else + SET_MSR_EE(msr & ~MSR_EE); +#endif __asm__ __volatile__("": : :"memory"); } -#define local_irq_save(flags) __do_save_and_cli(&flags) - -#define irqs_disabled() \ -({ \ - unsigned long flags; \ - local_save_flags(flags); \ - !(flags & MSR_EE); \ -}) +#define local_save_flags(flags) ((flags) = mfmsr()) +#define local_irq_save(flags) local_irq_save_ptr(&flags) +#define irqs_disabled() ((mfmsr() & MSR_EE) == 0) #endif /* CONFIG_PPC_ISERIES */ @@ -99,6 +110,6 @@ static inline void __do_save_and_cli(unsigned long *flags) */ struct hw_interrupt_type; static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {} - -#endif /* _PPC64_HW_IRQ_H */ -#endif /* __KERNEL__ */ + +#endif /* __KERNEL__ */ +#endif /* _ASM_POWERPC_HW_IRQ_H */ diff --git a/include/asm-powerpc/kdebug.h b/include/asm-powerpc/kdebug.h new file mode 100644 index 00000000000..7c55abf597f --- /dev/null +++ b/include/asm-powerpc/kdebug.h @@ -0,0 +1,42 @@ +#ifndef _POWERPC_KDEBUG_H +#define _POWERPC_KDEBUG_H 1 + +/* nearly identical to x86_64/i386 code */ + +#include <linux/notifier.h> + +struct pt_regs; + +struct die_args { + struct pt_regs *regs; + const char *str; + long err; + int trapnr; + int signr; +}; + +/* + Note - you should never unregister because that can race with NMIs. + If you really want to do it first unregister - then synchronize_sched - + then free. + */ +int register_die_notifier(struct notifier_block *nb); +extern struct notifier_block *powerpc_die_chain; + +/* Grossly misnamed. */ +enum die_val { + DIE_OOPS = 1, + DIE_IABR_MATCH, + DIE_DABR_MATCH, + DIE_BPT, + DIE_SSTEP, + DIE_PAGE_FAULT, +}; + +static inline int notify_die(enum die_val val,char *str,struct pt_regs *regs,long err,int trap, int sig) +{ + struct die_args args = { .regs=regs, .str=str, .err=err, .trapnr=trap,.signr=sig }; + return notifier_call_chain(&powerpc_die_chain, val, &args); +} + +#endif diff --git a/include/asm-powerpc/kprobes.h b/include/asm-powerpc/kprobes.h new file mode 100644 index 00000000000..d9129d2b038 --- /dev/null +++ b/include/asm-powerpc/kprobes.h @@ -0,0 +1,67 @@ +#ifndef _ASM_KPROBES_H +#define _ASM_KPROBES_H +/* + * Kernel Probes (KProbes) + * include/asm-ppc64/kprobes.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2002, 2004 + * + * 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel + * Probes initial implementation ( includes suggestions from + * Rusty Russell). + * 2004-Nov Modified for PPC64 by Ananth N Mavinakayanahalli + * <ananth@in.ibm.com> + */ +#include <linux/types.h> +#include <linux/ptrace.h> + +struct pt_regs; + +typedef unsigned int kprobe_opcode_t; +#define BREAKPOINT_INSTRUCTION 0x7fe00008 /* trap */ +#define MAX_INSN_SIZE 1 + +#define IS_TW(instr) (((instr) & 0xfc0007fe) == 0x7c000008) +#define IS_TD(instr) (((instr) & 0xfc0007fe) == 0x7c000088) +#define IS_TDI(instr) (((instr) & 0xfc000000) == 0x08000000) +#define IS_TWI(instr) (((instr) & 0xfc000000) == 0x0c000000) + +#define JPROBE_ENTRY(pentry) (kprobe_opcode_t *)((func_descr_t *)pentry) + +#define is_trap(instr) (IS_TW(instr) || IS_TD(instr) || \ + IS_TWI(instr) || IS_TDI(instr)) + +#define ARCH_SUPPORTS_KRETPROBES +void kretprobe_trampoline(void); + +/* Architecture specific copy of original instruction */ +struct arch_specific_insn { + /* copy of original instruction */ + kprobe_opcode_t *insn; +}; + +#ifdef CONFIG_KPROBES +extern int kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data); +#else /* !CONFIG_KPROBES */ +static inline int kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + return 0; +} +#endif +#endif /* _ASM_KPROBES_H */ diff --git a/include/asm-powerpc/mpic.h b/include/asm-powerpc/mpic.h new file mode 100644 index 00000000000..f1e24f4b2d1 --- /dev/null +++ b/include/asm-powerpc/mpic.h @@ -0,0 +1,279 @@ +#include <linux/irq.h> + +/* + * Global registers + */ + +#define MPIC_GREG_BASE 0x01000 + +#define MPIC_GREG_FEATURE_0 0x00000 +#define MPIC_GREG_FEATURE_LAST_SRC_MASK 0x07ff0000 +#define MPIC_GREG_FEATURE_LAST_SRC_SHIFT 16 +#define MPIC_GREG_FEATURE_LAST_CPU_MASK 0x00001f00 +#define MPIC_GREG_FEATURE_LAST_CPU_SHIFT 8 +#define MPIC_GREG_FEATURE_VERSION_MASK 0xff +#define MPIC_GREG_FEATURE_1 0x00010 +#define MPIC_GREG_GLOBAL_CONF_0 0x00020 +#define MPIC_GREG_GCONF_RESET 0x80000000 +#define MPIC_GREG_GCONF_8259_PTHROU_DIS 0x20000000 +#define MPIC_GREG_GCONF_BASE_MASK 0x000fffff +#define MPIC_GREG_GLOBAL_CONF_1 0x00030 +#define MPIC_GREG_VENDOR_0 0x00040 +#define MPIC_GREG_VENDOR_1 0x00050 +#define MPIC_GREG_VENDOR_2 0x00060 +#define MPIC_GREG_VENDOR_3 0x00070 +#define MPIC_GREG_VENDOR_ID 0x00080 +#define MPIC_GREG_VENDOR_ID_STEPPING_MASK 0x00ff0000 +#define MPIC_GREG_VENDOR_ID_STEPPING_SHIFT 16 +#define MPIC_GREG_VENDOR_ID_DEVICE_ID_MASK 0x0000ff00 +#define MPIC_GREG_VENDOR_ID_DEVICE_ID_SHIFT 8 +#define MPIC_GREG_VENDOR_ID_VENDOR_ID_MASK 0x000000ff +#define MPIC_GREG_PROCESSOR_INIT 0x00090 +#define MPIC_GREG_IPI_VECTOR_PRI_0 0x000a0 +#define MPIC_GREG_IPI_VECTOR_PRI_1 0x000b0 +#define MPIC_GREG_IPI_VECTOR_PRI_2 0x000c0 +#define MPIC_GREG_IPI_VECTOR_PRI_3 0x000d0 +#define MPIC_GREG_SPURIOUS 0x000e0 +#define MPIC_GREG_TIMER_FREQ 0x000f0 + +/* + * + * Timer registers + */ +#define MPIC_TIMER_BASE 0x01100 +#define MPIC_TIMER_STRIDE 0x40 + +#define MPIC_TIMER_CURRENT_CNT 0x00000 +#define MPIC_TIMER_BASE_CNT 0x00010 +#define MPIC_TIMER_VECTOR_PRI 0x00020 +#define MPIC_TIMER_DESTINATION 0x00030 + +/* + * Per-Processor registers + */ + +#define MPIC_CPU_THISBASE 0x00000 +#define MPIC_CPU_BASE 0x20000 +#define MPIC_CPU_STRIDE 0x01000 + +#define MPIC_CPU_IPI_DISPATCH_0 0x00040 +#define MPIC_CPU_IPI_DISPATCH_1 0x00050 +#define MPIC_CPU_IPI_DISPATCH_2 0x00060 +#define MPIC_CPU_IPI_DISPATCH_3 0x00070 +#define MPIC_CPU_CURRENT_TASK_PRI 0x00080 +#define MPIC_CPU_TASKPRI_MASK 0x0000000f +#define MPIC_CPU_WHOAMI 0x00090 +#define MPIC_CPU_WHOAMI_MASK 0x0000001f +#define MPIC_CPU_INTACK 0x000a0 +#define MPIC_CPU_EOI 0x000b0 + +/* + * Per-source registers + */ + +#define MPIC_IRQ_BASE 0x10000 +#define MPIC_IRQ_STRIDE 0x00020 +#define MPIC_IRQ_VECTOR_PRI 0x00000 +#define MPIC_VECPRI_MASK 0x80000000 +#define MPIC_VECPRI_ACTIVITY 0x40000000 /* Read Only */ +#define MPIC_VECPRI_PRIORITY_MASK 0x000f0000 +#define MPIC_VECPRI_PRIORITY_SHIFT 16 +#define MPIC_VECPRI_VECTOR_MASK 0x000007ff +#define MPIC_VECPRI_POLARITY_POSITIVE 0x00800000 +#define MPIC_VECPRI_POLARITY_NEGATIVE 0x00000000 +#define MPIC_VECPRI_POLARITY_MASK 0x00800000 +#define MPIC_VECPRI_SENSE_LEVEL 0x00400000 +#define MPIC_VECPRI_SENSE_EDGE 0x00000000 +#define MPIC_VECPRI_SENSE_MASK 0x00400000 +#define MPIC_IRQ_DESTINATION 0x00010 + +#define MPIC_MAX_IRQ_SOURCES 2048 +#define MPIC_MAX_CPUS 32 +#define MPIC_MAX_ISU 32 + +/* + * Special vector numbers (internal use only) + */ +#define MPIC_VEC_SPURRIOUS 255 +#define MPIC_VEC_IPI_3 254 +#define MPIC_VEC_IPI_2 253 +#define MPIC_VEC_IPI_1 252 +#define MPIC_VEC_IPI_0 251 + +/* unused */ +#define MPIC_VEC_TIMER_3 250 +#define MPIC_VEC_TIMER_2 249 +#define MPIC_VEC_TIMER_1 248 +#define MPIC_VEC_TIMER_0 247 + +/* Type definition of the cascade handler */ +typedef int (*mpic_cascade_t)(struct pt_regs *regs, void *data); + +#ifdef CONFIG_MPIC_BROKEN_U3 +/* Fixup table entry */ +struct mpic_irq_fixup +{ + u8 __iomem *base; + unsigned int irq; +}; +#endif /* CONFIG_MPIC_BROKEN_U3 */ + + +/* The instance data of a given MPIC */ +struct mpic +{ + /* The "linux" controller struct */ + hw_irq_controller hc_irq; +#ifdef CONFIG_SMP + hw_irq_controller hc_ipi; +#endif + const char *name; + /* Flags */ + unsigned int flags; + /* How many irq sources in a given ISU */ + unsigned int isu_size; + unsigned int isu_shift; + unsigned int isu_mask; + /* Offset of irq vector numbers */ + unsigned int irq_offset; + unsigned int irq_count; + /* Offset of ipi vector numbers */ + unsigned int ipi_offset; + /* Number of sources */ + unsigned int num_sources; + /* Number of CPUs */ + unsigned int num_cpus; + /* cascade handler */ + mpic_cascade_t cascade; + void *cascade_data; + unsigned int cascade_vec; + /* senses array */ + unsigned char *senses; + unsigned int senses_count; + +#ifdef CONFIG_MPIC_BROKEN_U3 + /* The fixup table */ + struct mpic_irq_fixup *fixups; + spinlock_t fixup_lock; +#endif + + /* The various ioremap'ed bases */ + volatile u32 __iomem *gregs; + volatile u32 __iomem *tmregs; + volatile u32 __iomem *cpuregs[MPIC_MAX_CPUS]; + volatile u32 __iomem *isus[MPIC_MAX_ISU]; + + /* link */ + struct mpic *next; +}; + +/* This is the primary controller, only that one has IPIs and + * has afinity control. A non-primary MPIC always uses CPU0 + * registers only + */ +#define MPIC_PRIMARY 0x00000001 +/* Set this for a big-endian MPIC */ +#define MPIC_BIG_ENDIAN 0x00000002 +/* Broken U3 MPIC */ +#define MPIC_BROKEN_U3 0x00000004 +/* Broken IPI registers (autodetected) */ +#define MPIC_BROKEN_IPI 0x00000008 +/* MPIC wants a reset */ +#define MPIC_WANTS_RESET 0x00000010 + +/* Allocate the controller structure and setup the linux irq descs + * for the range if interrupts passed in. No HW initialization is + * actually performed. + * + * @phys_addr: physial base address of the MPIC + * @flags: flags, see constants above + * @isu_size: number of interrupts in an ISU. Use 0 to use a + * standard ISU-less setup (aka powermac) + * @irq_offset: first irq number to assign to this mpic + * @irq_count: number of irqs to use with this mpic IRQ sources. Pass 0 + * to match the number of sources + * @ipi_offset: first irq number to assign to this mpic IPI sources, + * used only on primary mpic + * @senses: array of sense values + * @senses_num: number of entries in the array + * + * Note about the sense array. If none is passed, all interrupts are + * setup to be level negative unless MPIC_BROKEN_U3 is set in which + * case they are edge positive (and the array is ignored anyway). + * The values in the array start at the first source of the MPIC, + * that is senses[0] correspond to linux irq "irq_offset". + */ +extern struct mpic *mpic_alloc(unsigned long phys_addr, + unsigned int flags, + unsigned int isu_size, + unsigned int irq_offset, + unsigned int irq_count, + unsigned int ipi_offset, + unsigned char *senses, + unsigned int senses_num, + const char *name); + +/* Assign ISUs, to call before mpic_init() + * + * @mpic: controller structure as returned by mpic_alloc() + * @isu_num: ISU number + * @phys_addr: physical address of the ISU + */ +extern void mpic_assign_isu(struct mpic *mpic, unsigned int isu_num, + unsigned long phys_addr); + +/* Initialize the controller. After this has been called, none of the above + * should be called again for this mpic + */ +extern void mpic_init(struct mpic *mpic); + +/* Setup a cascade. Currently, only one cascade is supported this + * way, though you can always do a normal request_irq() and add + * other cascades this way. You should call this _after_ having + * added all the ISUs + * + * @irq_no: "linux" irq number of the cascade (that is offset'ed vector) + * @handler: cascade handler function + */ +extern void mpic_setup_cascade(unsigned int irq_no, mpic_cascade_t hanlder, + void *data); + +/* + * All of the following functions must only be used after the + * ISUs have been assigned and the controller fully initialized + * with mpic_init() + */ + + +/* Change/Read the priority of an interrupt. Default is 8 for irqs and + * 10 for IPIs. You can call this on both IPIs and IRQ numbers, but the + * IPI number is then the offset'ed (linux irq number mapped to the IPI) + */ +extern void mpic_irq_set_priority(unsigned int irq, unsigned int pri); +extern unsigned int mpic_irq_get_priority(unsigned int irq); + +/* Setup a non-boot CPU */ +extern void mpic_setup_this_cpu(void); + +/* Clean up for kexec (or cpu offline or ...) */ +extern void mpic_teardown_this_cpu(int secondary); + +/* Get the current cpu priority for this cpu (0..15) */ +extern int mpic_cpu_get_priority(void); + +/* Set the current cpu priority for this cpu */ +extern void mpic_cpu_set_priority(int prio); + +/* Request IPIs on primary mpic */ +extern void mpic_request_ipis(void); + +/* Send an IPI (non offseted number 0..3) */ +extern void mpic_send_ipi(unsigned int ipi_no, unsigned int cpu_mask); + +/* Fetch interrupt from a given mpic */ +extern int mpic_get_one_irq(struct mpic *mpic, struct pt_regs *regs); +/* This one gets to the primary mpic */ +extern int mpic_get_irq(struct pt_regs *regs); + +/* global mpic for pSeries */ +extern struct mpic *pSeries_mpic; diff --git a/include/asm-powerpc/ppc_asm.h b/include/asm-powerpc/ppc_asm.h index 553035cda00..4efa71878fa 100644 --- a/include/asm-powerpc/ppc_asm.h +++ b/include/asm-powerpc/ppc_asm.h @@ -75,8 +75,11 @@ #define REST_32EVRS(n,s,base) REST_16EVRS(n,s,base); REST_16EVRS(n+16,s,base) /* Macros to adjust thread priority for Iseries hardware multithreading */ +#define HMT_VERY_LOW or 31,31,31 # very low priority\n" #define HMT_LOW or 1,1,1 +#define HMT_MEDIUM_LOW or 6,6,6 # medium low priority\n" #define HMT_MEDIUM or 2,2,2 +#define HMT_MEDIUM_HIGH or 5,5,5 # medium high priority\n" #define HMT_HIGH or 3,3,3 /* handle instructions that older assemblers may not know */ diff --git a/include/asm-powerpc/reg.h b/include/asm-powerpc/reg.h new file mode 100644 index 00000000000..f97a5f1761b --- /dev/null +++ b/include/asm-powerpc/reg.h @@ -0,0 +1,446 @@ +/* + * Contains the definition of registers common to all PowerPC variants. + * If a register definition has been changed in a different PowerPC + * variant, we will case it in #ifndef XXX ... #endif, and have the + * number used in the Programming Environments Manual For 32-Bit + * Implementations of the PowerPC Architecture (a.k.a. Green Book) here. + */ + +#ifdef __KERNEL__ +#ifndef __ASM_PPC_REGS_H__ +#define __ASM_PPC_REGS_H__ + +#include <linux/stringify.h> + +/* Pickup Book E specific registers. */ +#if defined(CONFIG_BOOKE) || defined(CONFIG_40x) +#include <asm/reg_booke.h> +#endif + +/* Machine State Register (MSR) Fields */ +#define MSR_SF (1<<63) +#define MSR_ISF (1<<61) +#define MSR_VEC (1<<25) /* Enable AltiVec */ +#define MSR_POW (1<<18) /* Enable Power Management */ +#define MSR_WE (1<<18) /* Wait State Enable */ +#define MSR_TGPR (1<<17) /* TLB Update registers in use */ +#define MSR_CE (1<<17) /* Critical Interrupt Enable */ +#define MSR_ILE (1<<16) /* Interrupt Little Endian */ +#define MSR_EE (1<<15) /* External Interrupt Enable */ +#define MSR_PR (1<<14) /* Problem State / Privilege Level */ +#define MSR_FP (1<<13) /* Floating Point enable */ +#define MSR_ME (1<<12) /* Machine Check Enable */ +#define MSR_FE0 (1<<11) /* Floating Exception mode 0 */ +#define MSR_SE (1<<10) /* Single Step */ +#define MSR_BE (1<<9) /* Branch Trace */ +#define MSR_DE (1<<9) /* Debug Exception Enable */ +#define MSR_FE1 (1<<8) /* Floating Exception mode 1 */ +#define MSR_IP (1<<6) /* Exception prefix 0x000/0xFFF */ +#define MSR_IR (1<<5) /* Instruction Relocate */ +#define MSR_DR (1<<4) /* Data Relocate */ +#define MSR_PE (1<<3) /* Protection Enable */ +#define MSR_PX (1<<2) /* Protection Exclusive Mode */ +#define MSR_RI (1<<1) /* Recoverable Exception */ +#define MSR_LE (1<<0) /* Little Endian */ + +/* Default MSR for kernel mode. */ +#ifdef CONFIG_APUS_FAST_EXCEPT +#define MSR_KERNEL (MSR_ME|MSR_IP|MSR_RI|MSR_IR|MSR_DR) +#endif + +#ifndef MSR_KERNEL +#define MSR_KERNEL (MSR_ME|MSR_RI|MSR_IR|MSR_DR) +#endif + +#define MSR_USER (MSR_KERNEL|MSR_PR|MSR_EE) + +/* Floating Point Status and Control Register (FPSCR) Fields */ +#define FPSCR_FX 0x80000000 /* FPU exception summary */ +#define FPSCR_FEX 0x40000000 /* FPU enabled exception summary */ +#define FPSCR_VX 0x20000000 /* Invalid operation summary */ +#define FPSCR_OX 0x10000000 /* Overflow exception summary */ +#define FPSCR_UX 0x08000000 /* Underflow exception summary */ +#define FPSCR_ZX 0x04000000 /* Zero-divide exception summary */ +#define FPSCR_XX 0x02000000 /* Inexact exception summary */ +#define FPSCR_VXSNAN 0x01000000 /* Invalid op for SNaN */ +#define FPSCR_VXISI 0x00800000 /* Invalid op for Inv - Inv */ +#define FPSCR_VXIDI 0x00400000 /* Invalid op for Inv / Inv */ +#define FPSCR_VXZDZ 0x00200000 /* Invalid op for Zero / Zero */ +#define FPSCR_VXIMZ 0x00100000 /* Invalid op for Inv * Zero */ +#define FPSCR_VXVC 0x00080000 /* Invalid op for Compare */ +#define FPSCR_FR 0x00040000 /* Fraction rounded */ +#define FPSCR_FI 0x00020000 /* Fraction inexact */ +#define FPSCR_FPRF 0x0001f000 /* FPU Result Flags */ +#define FPSCR_FPCC 0x0000f000 /* FPU Condition Codes */ +#define FPSCR_VXSOFT 0x00000400 /* Invalid op for software request */ +#define FPSCR_VXSQRT 0x00000200 /* Invalid op for square root */ +#define FPSCR_VXCVI 0x00000100 /* Invalid op for integer convert */ +#define FPSCR_VE 0x00000080 /* Invalid op exception enable */ +#define FPSCR_OE 0x00000040 /* IEEE overflow exception enable */ +#define FPSCR_UE 0x00000020 /* IEEE underflow exception enable */ +#define FPSCR_ZE 0x00000010 /* IEEE zero divide exception enable */ +#define FPSCR_XE 0x00000008 /* FP inexact exception enable */ +#define FPSCR_NI 0x00000004 /* FPU non IEEE-Mode */ +#define FPSCR_RN 0x00000003 /* FPU rounding control */ + +/* Special Purpose Registers (SPRNs)*/ +#define SPRN_CTR 0x009 /* Count Register */ +#define SPRN_DABR 0x3F5 /* Data Address Breakpoint Register */ +#define DABR_TRANSLATION (1UL << 2) +#define SPRN_DAR 0x013 /* Data Address Register */ +#define SPRN_DSISR 0x012 /* Data Storage Interrupt Status Register */ +#define DSISR_NOHPTE 0x40000000 /* no translation found */ +#define DSISR_PROTFAULT 0x08000000 /* protection fault */ +#define DSISR_ISSTORE 0x02000000 /* access was a store */ +#define DSISR_DABRMATCH 0x00400000 /* hit data breakpoint */ +#define DSISR_NOSEGMENT 0x00200000 /* STAB/SLB miss */ +#define SPRN_TBRL 0x10C /* Time Base Read Lower Register (user, R/O) */ +#define SPRN_TBRU 0x10D /* Time Base Read Upper Register (user, R/O) */ +#define SPRN_TBWL 0x11C /* Time Base Lower Register (super, R/W) */ +#define SPRN_TBWU 0x11D /* Time Base Upper Register (super, R/W) */ +#define SPRN_HIOR 0x137 /* 970 Hypervisor interrupt offset */ +#define SPRN_DBAT0L 0x219 /* Data BAT 0 Lower Register */ +#define SPRN_DBAT0U 0x218 /* Data BAT 0 Upper Register */ +#define SPRN_DBAT1L 0x21B /* Data BAT 1 Lower Register */ +#define SPRN_DBAT1U 0x21A /* Data BAT 1 Upper Register */ +#define SPRN_DBAT2L 0x21D /* Data BAT 2 Lower Register */ +#define SPRN_DBAT2U 0x21C /* Data BAT 2 Upper Register */ +#define SPRN_DBAT3L 0x21F /* Data BAT 3 Lower Register */ +#define SPRN_DBAT3U 0x21E /* Data BAT 3 Upper Register */ +#define SPRN_DBAT4L 0x239 /* Data BAT 4 Lower Register */ +#define SPRN_DBAT4U 0x238 /* Data BAT 4 Upper Register */ +#define SPRN_DBAT5L 0x23B /* Data BAT 5 Lower Register */ +#define SPRN_DBAT5U 0x23A /* Data BAT 5 Upper Register */ +#define SPRN_DBAT6L 0x23D /* Data BAT 6 Lower Register */ +#define SPRN_DBAT6U 0x23C /* Data BAT 6 Upper Register */ +#define SPRN_DBAT7L 0x23F /* Data BAT 7 Lower Register */ +#define SPRN_DBAT7U 0x23E /* Data BAT 7 Upper Register */ + +#define SPRN_DEC 0x016 /* Decrement Register */ +#define SPRN_DER 0x095 /* Debug Enable Regsiter */ +#define DER_RSTE 0x40000000 /* Reset Interrupt */ +#define DER_CHSTPE 0x20000000 /* Check Stop */ +#define DER_MCIE 0x10000000 /* Machine Check Interrupt */ +#define DER_EXTIE 0x02000000 /* External Interrupt */ +#define DER_ALIE 0x01000000 /* Alignment Interrupt */ +#define DER_PRIE 0x00800000 /* Program Interrupt */ +#define DER_FPUVIE 0x00400000 /* FP Unavailable Interrupt */ +#define DER_DECIE 0x00200000 /* Decrementer Interrupt */ +#define DER_SYSIE 0x00040000 /* System Call Interrupt */ +#define DER_TRE 0x00020000 /* Trace Interrupt */ +#define DER_SEIE 0x00004000 /* FP SW Emulation Interrupt */ +#define DER_ITLBMSE 0x00002000 /* Imp. Spec. Instruction TLB Miss */ +#define DER_ITLBERE 0x00001000 /* Imp. Spec. Instruction TLB Error */ +#define DER_DTLBMSE 0x00000800 /* Imp. Spec. Data TLB Miss */ +#define DER_DTLBERE 0x00000400 /* Imp. Spec. Data TLB Error */ +#define DER_LBRKE 0x00000008 /* Load/Store Breakpoint Interrupt */ +#define DER_IBRKE 0x00000004 /* Instruction Breakpoint Interrupt */ +#define DER_EBRKE 0x00000002 /* External Breakpoint Interrupt */ +#define DER_DPIE 0x00000001 /* Dev. Port Nonmaskable Request */ +#define SPRN_DMISS 0x3D0 /* Data TLB Miss Register */ +#define SPRN_EAR 0x11A /* External Address Register */ +#define SPRN_HASH1 0x3D2 /* Primary Hash Address Register */ +#define SPRN_HASH2 0x3D3 /* Secondary Hash Address Resgister */ +#define SPRN_HID0 0x3F0 /* Hardware Implementation Register 0 */ +#define HID0_EMCP (1<<31) /* Enable Machine Check pin */ +#define HID0_EBA (1<<29) /* Enable Bus Address Parity */ +#define HID0_EBD (1<<28) /* Enable Bus Data Parity */ +#define HID0_SBCLK (1<<27) +#define HID0_EICE (1<<26) +#define HID0_TBEN (1<<26) /* Timebase enable - 745x */ +#define HID0_ECLK (1<<25) +#define HID0_PAR (1<<24) +#define HID0_STEN (1<<24) /* Software table search enable - 745x */ +#define HID0_HIGH_BAT (1<<23) /* Enable high BATs - 7455 */ +#define HID0_DOZE (1<<23) +#define HID0_NAP (1<<22) +#define HID0_SLEEP (1<<21) +#define HID0_DPM (1<<20) +#define HID0_BHTCLR (1<<18) /* Clear branch history table - 7450 */ +#define HID0_XAEN (1<<17) /* Extended addressing enable - 7450 */ +#define HID0_NHR (1<<16) /* Not hard reset (software bit-7450)*/ +#define HID0_ICE (1<<15) /* Instruction Cache Enable */ +#define HID0_DCE (1<<14) /* Data Cache Enable */ +#define HID0_ILOCK (1<<13) /* Instruction Cache Lock */ +#define HID0_DLOCK (1<<12) /* Data Cache Lock */ +#define HID0_ICFI (1<<11) /* Instr. Cache Flash Invalidate */ +#define HID0_DCI (1<<10) /* Data Cache Invalidate */ +#define HID0_SPD (1<<9) /* Speculative disable */ +#define HID0_DAPUEN (1<<8) /* Debug APU enable */ +#define HID0_SGE (1<<7) /* Store Gathering Enable */ +#define HID0_SIED (1<<7) /* Serial Instr. Execution [Disable] */ +#define HID0_DFCA (1<<6) /* Data Cache Flush Assist */ +#define HID0_LRSTK (1<<4) /* Link register stack - 745x */ +#define HID0_BTIC (1<<5) /* Branch Target Instr Cache Enable */ +#define HID0_ABE (1<<3) /* Address Broadcast Enable */ +#define HID0_FOLD (1<<3) /* Branch Folding enable - 745x */ +#define HID0_BHTE (1<<2) /* Branch History Table Enable */ +#define HID0_BTCD (1<<1) /* Branch target cache disable */ +#define HID0_NOPDST (1<<1) /* No-op dst, dstt, etc. instr. */ +#define HID0_NOPTI (1<<0) /* No-op dcbt and dcbst instr. */ + +#define SPRN_HID1 0x3F1 /* Hardware Implementation Register 1 */ +#define HID1_EMCP (1<<31) /* 7450 Machine Check Pin Enable */ +#define HID1_DFS (1<<22) /* 7447A Dynamic Frequency Scaling */ +#define HID1_PC0 (1<<16) /* 7450 PLL_CFG[0] */ +#define HID1_PC1 (1<<15) /* 7450 PLL_CFG[1] */ +#define HID1_PC2 (1<<14) /* 7450 PLL_CFG[2] */ +#define HID1_PC3 (1<<13) /* 7450 PLL_CFG[3] */ +#define HID1_SYNCBE (1<<11) /* 7450 ABE for sync, eieio */ +#define HID1_ABE (1<<10) /* 7450 Address Broadcast Enable */ +#define HID1_PS (1<<16) /* 750FX PLL selection */ +#define SPRN_HID2 0x3F8 /* Hardware Implementation Register 2 */ +#define SPRN_IABR 0x3F2 /* Instruction Address Breakpoint Register */ +#define SPRN_HID4 0x3F4 /* 970 HID4 */ +#define SPRN_HID5 0x3F6 /* 970 HID5 */ +#if !defined(SPRN_IAC1) && !defined(SPRN_IAC2) +#define SPRN_IAC1 0x3F4 /* Instruction Address Compare 1 */ +#define SPRN_IAC2 0x3F5 /* Instruction Address Compare 2 */ +#endif +#define SPRN_IBAT0L 0x211 /* Instruction BAT 0 Lower Register */ +#define SPRN_IBAT0U 0x210 /* Instruction BAT 0 Upper Register */ +#define SPRN_IBAT1L 0x213 /* Instruction BAT 1 Lower Register */ +#define SPRN_IBAT1U 0x212 /* Instruction BAT 1 Upper Register */ +#define SPRN_IBAT2L 0x215 /* Instruction BAT 2 Lower Register */ +#define SPRN_IBAT2U 0x214 /* Instruction BAT 2 Upper Register */ +#define SPRN_IBAT3L 0x217 /* Instruction BAT 3 Lower Register */ +#define SPRN_IBAT3U 0x216 /* Instruction BAT 3 Upper Register */ +#define SPRN_IBAT4L 0x231 /* Instruction BAT 4 Lower Register */ +#define SPRN_IBAT4U 0x230 /* Instruction BAT 4 Upper Register */ +#define SPRN_IBAT5L 0x233 /* Instruction BAT 5 Lower Register */ +#define SPRN_IBAT5U 0x232 /* Instruction BAT 5 Upper Register */ +#define SPRN_IBAT6L 0x235 /* Instruction BAT 6 Lower Register */ +#define SPRN_IBAT6U 0x234 /* Instruction BAT 6 Upper Register */ +#define SPRN_IBAT7L 0x237 /* Instruction BAT 7 Lower Register */ +#define SPRN_IBAT7U 0x236 /* Instruction BAT 7 Upper Register */ +#define SPRN_ICMP 0x3D5 /* Instruction TLB Compare Register */ +#define SPRN_ICTC 0x3FB /* Instruction Cache Throttling Control Reg */ +#define SPRN_ICTRL 0x3F3 /* 1011 7450 icache and interrupt ctrl */ +#define ICTRL_EICE 0x08000000 /* enable icache parity errs */ +#define ICTRL_EDC 0x04000000 /* enable dcache parity errs */ +#define ICTRL_EICP 0x00000100 /* enable icache par. check */ +#define SPRN_IMISS 0x3D4 /* Instruction TLB Miss Register */ +#define SPRN_IMMR 0x27E /* Internal Memory Map Register */ +#define SPRN_L2CR 0x3F9 /* Level 2 Cache Control Regsiter */ +#define SPRN_L2CR2 0x3f8 +#define L2CR_L2E 0x80000000 /* L2 enable */ +#define L2CR_L2PE 0x40000000 /* L2 parity enable */ +#define L2CR_L2SIZ_MASK 0x30000000 /* L2 size mask */ +#define L2CR_L2SIZ_256KB 0x10000000 /* L2 size 256KB */ +#define L2CR_L2SIZ_512KB 0x20000000 /* L2 size 512KB */ +#define L2CR_L2SIZ_1MB 0x30000000 /* L2 size 1MB */ +#define L2CR_L2CLK_MASK 0x0e000000 /* L2 clock mask */ +#define L2CR_L2CLK_DISABLED 0x00000000 /* L2 clock disabled */ +#define L2CR_L2CLK_DIV1 0x02000000 /* L2 clock / 1 */ +#define L2CR_L2CLK_DIV1_5 0x04000000 /* L2 clock / 1.5 */ +#define L2CR_L2CLK_DIV2 0x08000000 /* L2 clock / 2 */ +#define L2CR_L2CLK_DIV2_5 0x0a000000 /* L2 clock / 2.5 */ +#define L2CR_L2CLK_DIV3 0x0c000000 /* L2 clock / 3 */ +#define L2CR_L2RAM_MASK 0x01800000 /* L2 RAM type mask */ +#define L2CR_L2RAM_FLOW 0x00000000 /* L2 RAM flow through */ +#define L2CR_L2RAM_PIPE 0x01000000 /* L2 RAM pipelined */ +#define L2CR_L2RAM_PIPE_LW 0x01800000 /* L2 RAM pipelined latewr */ +#define L2CR_L2DO 0x00400000 /* L2 data only */ +#define L2CR_L2I 0x00200000 /* L2 global invalidate */ +#define L2CR_L2CTL 0x00100000 /* L2 RAM control */ +#define L2CR_L2WT 0x00080000 /* L2 write-through */ +#define L2CR_L2TS 0x00040000 /* L2 test support */ +#define L2CR_L2OH_MASK 0x00030000 /* L2 output hold mask */ +#define L2CR_L2OH_0_5 0x00000000 /* L2 output hold 0.5 ns */ +#define L2CR_L2OH_1_0 0x00010000 /* L2 output hold 1.0 ns */ +#define L2CR_L2SL 0x00008000 /* L2 DLL slow */ +#define L2CR_L2DF 0x00004000 /* L2 differential clock */ +#define L2CR_L2BYP 0x00002000 /* L2 DLL bypass */ +#define L2CR_L2IP 0x00000001 /* L2 GI in progress */ +#define L2CR_L2IO_745x 0x00100000 /* L2 instr. only (745x) */ +#define L2CR_L2DO_745x 0x00010000 /* L2 data only (745x) */ +#define L2CR_L2REP_745x 0x00001000 /* L2 repl. algorithm (745x) */ +#define L2CR_L2HWF_745x 0x00000800 /* L2 hardware flush (745x) */ +#define SPRN_L3CR 0x3FA /* Level 3 Cache Control Regsiter */ +#define L3CR_L3E 0x80000000 /* L3 enable */ +#define L3CR_L3PE 0x40000000 /* L3 data parity enable */ +#define L3CR_L3APE 0x20000000 /* L3 addr parity enable */ +#define L3CR_L3SIZ 0x10000000 /* L3 size */ +#define L3CR_L3CLKEN 0x08000000 /* L3 clock enable */ +#define L3CR_L3RES 0x04000000 /* L3 special reserved bit */ +#define L3CR_L3CLKDIV 0x03800000 /* L3 clock divisor */ +#define L3CR_L3IO 0x00400000 /* L3 instruction only */ +#define L3CR_L3SPO 0x00040000 /* L3 sample point override */ +#define L3CR_L3CKSP 0x00030000 /* L3 clock sample point */ +#define L3CR_L3PSP 0x0000e000 /* L3 P-clock sample point */ +#define L3CR_L3REP 0x00001000 /* L3 replacement algorithm */ +#define L3CR_L3HWF 0x00000800 /* L3 hardware flush */ +#define L3CR_L3I 0x00000400 /* L3 global invalidate */ +#define L3CR_L3RT 0x00000300 /* L3 SRAM type */ +#define L3CR_L3NIRCA 0x00000080 /* L3 non-integer ratio clock adj. */ +#define L3CR_L3DO 0x00000040 /* L3 data only mode */ +#define L3CR_PMEN 0x00000004 /* L3 private memory enable */ +#define L3CR_PMSIZ 0x00000001 /* L3 private memory size */ +#define SPRN_MSSCR0 0x3f6 /* Memory Subsystem Control Register 0 */ +#define SPRN_MSSSR0 0x3f7 /* Memory Subsystem Status Register 1 */ +#define SPRN_LDSTCR 0x3f8 /* Load/Store control register */ +#define SPRN_LDSTDB 0x3f4 /* */ +#define SPRN_LR 0x008 /* Link Register */ +#define SPRN_MMCR0 0x3B8 /* Monitor Mode Control Register 0 */ +#define SPRN_MMCR1 0x3BC /* Monitor Mode Control Register 1 */ +#ifndef SPRN_PIR +#define SPRN_PIR 0x3FF /* Processor Identification Register */ +#endif +#define SPRN_PMC1 0x3B9 /* Performance Counter Register 1 */ +#define SPRN_PMC2 0x3BA /* Performance Counter Register 2 */ +#define SPRN_PMC3 0x3BD /* Performance Counter Register 3 */ +#define SPRN_PMC4 0x3BE /* Performance Counter Register 4 */ +#define SPRN_PTEHI 0x3D5 /* 981 7450 PTE HI word (S/W TLB load) */ +#define SPRN_PTELO 0x3D6 /* 982 7450 PTE LO word (S/W TLB load) */ +#define SPRN_PVR 0x11F /* Processor Version Register */ +#define SPRN_RPA 0x3D6 /* Required Physical Address Register */ +#define SPRN_SDA 0x3BF /* Sampled Data Address Register */ +#define SPRN_SDR1 0x019 /* MMU Hash Base Register */ +#define SPRN_SIA 0x3BB /* Sampled Instruction Address Register */ +#define SPRN_SPRG0 0x110 /* Special Purpose Register General 0 */ +#define SPRN_SPRG1 0x111 /* Special Purpose Register General 1 */ +#define SPRN_SPRG2 0x112 /* Special Purpose Register General 2 */ +#define SPRN_SPRG3 0x113 /* Special Purpose Register General 3 */ +#define SPRN_SPRG4 0x114 /* Special Purpose Register General 4 */ +#define SPRN_SPRG5 0x115 /* Special Purpose Register General 5 */ +#define SPRN_SPRG6 0x116 /* Special Purpose Register General 6 */ +#define SPRN_SPRG7 0x117 /* Special Purpose Register General 7 */ +#define SPRN_SRR0 0x01A /* Save/Restore Register 0 */ +#define SPRN_SRR1 0x01B /* Save/Restore Register 1 */ +#ifndef SPRN_SVR +#define SPRN_SVR 0x11E /* System Version Register */ +#endif +#define SPRN_THRM1 0x3FC /* Thermal Management Register 1 */ +/* these bits were defined in inverted endian sense originally, ugh, confusing */ +#define THRM1_TIN (1 << 31) +#define THRM1_TIV (1 << 30) +#define THRM1_THRES(x) ((x&0x7f)<<23) +#define THRM3_SITV(x) ((x&0x3fff)<<1) +#define THRM1_TID (1<<2) +#define THRM1_TIE (1<<1) +#define THRM1_V (1<<0) +#define SPRN_THRM2 0x3FD /* Thermal Management Register 2 */ +#define SPRN_THRM3 0x3FE /* Thermal Management Register 3 */ +#define THRM3_E (1<<0) +#define SPRN_TLBMISS 0x3D4 /* 980 7450 TLB Miss Register */ +#define SPRN_UMMCR0 0x3A8 /* User Monitor Mode Control Register 0 */ +#define SPRN_UMMCR1 0x3AC /* User Monitor Mode Control Register 0 */ +#define SPRN_UPMC1 0x3A9 /* User Performance Counter Register 1 */ +#define SPRN_UPMC2 0x3AA /* User Performance Counter Register 2 */ +#define SPRN_UPMC3 0x3AD /* User Performance Counter Register 3 */ +#define SPRN_UPMC4 0x3AE /* User Performance Counter Register 4 */ +#define SPRN_USIA 0x3AB /* User Sampled Instruction Address Register */ +#define SPRN_VRSAVE 0x100 /* Vector Register Save Register */ +#define SPRN_XER 0x001 /* Fixed Point Exception Register */ + +/* Bit definitions for MMCR0 and PMC1 / PMC2. */ +#define MMCR0_PMC1_CYCLES (1 << 7) +#define MMCR0_PMC1_ICACHEMISS (5 << 7) +#define MMCR0_PMC1_DTLB (6 << 7) +#define MMCR0_PMC2_DCACHEMISS 0x6 +#define MMCR0_PMC2_CYCLES 0x1 +#define MMCR0_PMC2_ITLB 0x7 +#define MMCR0_PMC2_LOADMISSTIME 0x5 +#define MMCR0_PMXE (1 << 26) + +/* Processor Version Register */ + +/* Processor Version Register (PVR) field extraction */ + +#define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) /* Version field */ +#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revison field */ + +/* + * IBM has further subdivided the standard PowerPC 16-bit version and + * revision subfields of the PVR for the PowerPC 403s into the following: + */ + +#define PVR_FAM(pvr) (((pvr) >> 20) & 0xFFF) /* Family field */ +#define PVR_MEM(pvr) (((pvr) >> 16) & 0xF) /* Member field */ +#define PVR_CORE(pvr) (((pvr) >> 12) & 0xF) /* Core field */ +#define PVR_CFG(pvr) (((pvr) >> 8) & 0xF) /* Configuration field */ +#define PVR_MAJ(pvr) (((pvr) >> 4) & 0xF) /* Major revision field */ +#define PVR_MIN(pvr) (((pvr) >> 0) & 0xF) /* Minor revision field */ + +/* Processor Version Numbers */ + +#define PVR_403GA 0x00200000 +#define PVR_403GB 0x00200100 +#define PVR_403GC 0x00200200 +#define PVR_403GCX 0x00201400 +#define PVR_405GP 0x40110000 +#define PVR_STB03XXX 0x40310000 +#define PVR_NP405H 0x41410000 +#define PVR_NP405L 0x41610000 +#define PVR_601 0x00010000 +#define PVR_602 0x00050000 +#define PVR_603 0x00030000 +#define PVR_603e 0x00060000 +#define PVR_603ev 0x00070000 +#define PVR_603r 0x00071000 +#define PVR_604 0x00040000 +#define PVR_604e 0x00090000 +#define PVR_604r 0x000A0000 +#define PVR_620 0x00140000 +#define PVR_740 0x00080000 +#define PVR_750 PVR_740 +#define PVR_740P 0x10080000 +#define PVR_750P PVR_740P +#define PVR_7400 0x000C0000 +#define PVR_7410 0x800C0000 +#define PVR_7450 0x80000000 +#define PVR_8540 0x80200000 +#define PVR_8560 0x80200000 +/* + * For the 8xx processors, all of them report the same PVR family for + * the PowerPC core. The various versions of these processors must be + * differentiated by the version number in the Communication Processor + * Module (CPM). + */ +#define PVR_821 0x00500000 +#define PVR_823 PVR_821 +#define PVR_850 PVR_821 +#define PVR_860 PVR_821 +#define PVR_8240 0x00810100 +#define PVR_8245 0x80811014 +#define PVR_8260 PVR_8240 + +#if 0 +/* Segment Registers */ +#define SR0 0 +#define SR1 1 +#define SR2 2 +#define SR3 3 +#define SR4 4 +#define SR5 5 +#define SR6 6 +#define SR7 7 +#define SR8 8 +#define SR9 9 +#define SR10 10 +#define SR11 11 +#define SR12 12 +#define SR13 13 +#define SR14 14 +#define SR15 15 +#endif + +/* Macros for setting and retrieving special purpose registers */ +#ifndef __ASSEMBLY__ +#define mfmsr() ({unsigned int rval; \ + asm volatile("mfmsr %0" : "=r" (rval)); rval;}) +#define mtmsr(v) asm volatile("mtmsr %0" : : "r" (v)) + +#define mfspr(rn) ({unsigned int rval; \ + asm volatile("mfspr %0," __stringify(rn) \ + : "=r" (rval)); rval;}) +#define mtspr(rn, v) asm volatile("mtspr " __stringify(rn) ",%0" : : "r" (v)) + +#define mfsrin(v) ({unsigned int rval; \ + asm volatile("mfsrin %0,%1" : "=r" (rval) : "r" (v)); \ + rval;}) + +#define proc_trap() asm volatile("trap") +#endif /* __ASSEMBLY__ */ +#endif /* __ASM_PPC_REGS_H__ */ +#endif /* __KERNEL__ */ diff --git a/include/asm-ppc64/rwsem.h b/include/asm-powerpc/rwsem.h index bd5c2f09357..0a5b83a3c94 100644 --- a/include/asm-ppc64/rwsem.h +++ b/include/asm-powerpc/rwsem.h @@ -1,18 +1,14 @@ +#ifndef _ASM_POWERPC_RWSEM_H +#define _ASM_POWERPC_RWSEM_H + +#ifdef __KERNEL__ + /* * include/asm-ppc64/rwsem.h: R/W semaphores for PPC using the stuff * in lib/rwsem.c. Adapted largely from include/asm-i386/rwsem.h * by Paul Mackerras <paulus@samba.org>. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. */ -#ifndef _PPC64_RWSEM_H -#define _PPC64_RWSEM_H - -#ifdef __KERNEL__ #include <linux/list.h> #include <linux/spinlock.h> #include <asm/atomic.h> @@ -163,5 +159,5 @@ static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) return atomic_add_return(delta, (atomic_t *)(&sem->count)); } -#endif /* __KERNEL__ */ -#endif /* _PPC_RWSEM_XADD_H */ +#endif /* __KERNEL__ */ +#endif /* _ASM_POWERPC_RWSEM_H */ diff --git a/include/asm-ppc64/seccomp.h b/include/asm-powerpc/seccomp.h index c130c334bda..1e1cfe12882 100644 --- a/include/asm-ppc64/seccomp.h +++ b/include/asm-powerpc/seccomp.h @@ -1,11 +1,6 @@ -#ifndef _ASM_SECCOMP_H - -#include <linux/thread_info.h> /* already defines TIF_32BIT */ - -#ifndef TIF_32BIT -#error "unexpected TIF_32BIT on ppc64" -#endif +#ifndef _ASM_POWERPC_SECCOMP_H +#include <linux/thread_info.h> #include <linux/unistd.h> #define __NR_seccomp_read __NR_read @@ -18,4 +13,4 @@ #define __NR_seccomp_exit_32 __NR_exit #define __NR_seccomp_sigreturn_32 __NR_sigreturn -#endif /* _ASM_SECCOMP_H */ +#endif /* _ASM_POWERPC_SECCOMP_H */ diff --git a/include/asm-ppc64/semaphore.h b/include/asm-powerpc/semaphore.h index aefe7753ea4..fd42fe97158 100644 --- a/include/asm-ppc64/semaphore.h +++ b/include/asm-powerpc/semaphore.h @@ -1,5 +1,5 @@ -#ifndef _PPC64_SEMAPHORE_H -#define _PPC64_SEMAPHORE_H +#ifndef _ASM_POWERPC_SEMAPHORE_H +#define _ASM_POWERPC_SEMAPHORE_H /* * Remove spinlock-based RW semaphores; RW semaphore definitions are @@ -95,4 +95,4 @@ static inline void up(struct semaphore * sem) #endif /* __KERNEL__ */ -#endif /* !(_PPC64_SEMAPHORE_H) */ +#endif /* _ASM_POWERPC_SEMAPHORE_H */ diff --git a/include/asm-powerpc/synch.h b/include/asm-powerpc/synch.h new file mode 100644 index 00000000000..4660c0394a7 --- /dev/null +++ b/include/asm-powerpc/synch.h @@ -0,0 +1,51 @@ +#ifndef _ASM_POWERPC_SYNCH_H +#define _ASM_POWERPC_SYNCH_H + +#include <linux/config.h> + +#ifdef __powerpc64__ +#define __SUBARCH_HAS_LWSYNC +#endif + +#ifdef __SUBARCH_HAS_LWSYNC +# define LWSYNC lwsync +#else +# define LWSYNC sync +#endif + + +/* + * Arguably the bitops and *xchg operations don't imply any memory barrier + * or SMP ordering, but in fact a lot of drivers expect them to imply + * both, since they do on x86 cpus. + */ +#ifdef CONFIG_SMP +#define EIEIO_ON_SMP "eieio\n" +#define ISYNC_ON_SMP "\n\tisync" +#define SYNC_ON_SMP __stringify(LWSYNC) "\n" +#else +#define EIEIO_ON_SMP +#define ISYNC_ON_SMP +#define SYNC_ON_SMP +#endif + +static inline void eieio(void) +{ + __asm__ __volatile__ ("eieio" : : : "memory"); +} + +static inline void isync(void) +{ + __asm__ __volatile__ ("isync" : : : "memory"); +} + +#ifdef CONFIG_SMP +#define eieio_on_smp() eieio() +#define isync_on_smp() isync() +#else +#define eieio_on_smp() __asm__ __volatile__("": : :"memory") +#define isync_on_smp() __asm__ __volatile__("": : :"memory") +#endif + +#endif /* _ASM_POWERPC_SYNCH_H */ + diff --git a/include/asm-powerpc/system.h b/include/asm-powerpc/system.h new file mode 100644 index 00000000000..be542efb32d --- /dev/null +++ b/include/asm-powerpc/system.h @@ -0,0 +1,350 @@ +/* + * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu> + */ +#ifndef __PPC_SYSTEM_H +#define __PPC_SYSTEM_H + +#include <linux/config.h> +#include <linux/kernel.h> + +#include <asm/hw_irq.h> +#include <asm/ppc_asm.h> + +/* + * Memory barrier. + * The sync instruction guarantees that all memory accesses initiated + * by this processor have been performed (with respect to all other + * mechanisms that access memory). The eieio instruction is a barrier + * providing an ordering (separately) for (a) cacheable stores and (b) + * loads and stores to non-cacheable memory (e.g. I/O devices). + * + * mb() prevents loads and stores being reordered across this point. + * rmb() prevents loads being reordered across this point. + * wmb() prevents stores being reordered across this point. + * read_barrier_depends() prevents data-dependent loads being reordered + * across this point (nop on PPC). + * + * We have to use the sync instructions for mb(), since lwsync doesn't + * order loads with respect to previous stores. Lwsync is fine for + * rmb(), though. Note that lwsync is interpreted as sync by + * 32-bit and older 64-bit CPUs. + * + * For wmb(), we use sync since wmb is used in drivers to order + * stores to system memory with respect to writes to the device. + * However, smp_wmb() can be a lighter-weight eieio barrier on + * SMP since it is only used to order updates to system memory. + */ +#define mb() __asm__ __volatile__ ("sync" : : : "memory") +#define rmb() __asm__ __volatile__ ("lwsync" : : : "memory") +#define wmb() __asm__ __volatile__ ("sync" : : : "memory") +#define read_barrier_depends() do { } while(0) + +#define set_mb(var, value) do { var = value; mb(); } while (0) +#define set_wmb(var, value) do { var = value; wmb(); } while (0) + +#ifdef CONFIG_SMP +#define smp_mb() mb() +#define smp_rmb() rmb() +#define smp_wmb() __asm__ __volatile__ ("eieio" : : : "memory") +#define smp_read_barrier_depends() read_barrier_depends() +#else +#define smp_mb() barrier() +#define smp_rmb() barrier() +#define smp_wmb() barrier() +#define smp_read_barrier_depends() do { } while(0) +#endif /* CONFIG_SMP */ + +#ifdef __KERNEL__ +struct task_struct; +struct pt_regs; + +#ifdef CONFIG_DEBUGGER + +extern int (*__debugger)(struct pt_regs *regs); +extern int (*__debugger_ipi)(struct pt_regs *regs); +extern int (*__debugger_bpt)(struct pt_regs *regs); +extern int (*__debugger_sstep)(struct pt_regs *regs); +extern int (*__debugger_iabr_match)(struct pt_regs *regs); +extern int (*__debugger_dabr_match)(struct pt_regs *regs); +extern int (*__debugger_fault_handler)(struct pt_regs *regs); + +#define DEBUGGER_BOILERPLATE(__NAME) \ +static inline int __NAME(struct pt_regs *regs) \ +{ \ + if (unlikely(__ ## __NAME)) \ + return __ ## __NAME(regs); \ + return 0; \ +} + +DEBUGGER_BOILERPLATE(debugger) +DEBUGGER_BOILERPLATE(debugger_ipi) +DEBUGGER_BOILERPLATE(debugger_bpt) +DEBUGGER_BOILERPLATE(debugger_sstep) +DEBUGGER_BOILERPLATE(debugger_iabr_match) +DEBUGGER_BOILERPLATE(debugger_dabr_match) +DEBUGGER_BOILERPLATE(debugger_fault_handler) + +#ifdef CONFIG_XMON +extern void xmon_init(int enable); +#endif + +#else +static inline int debugger(struct pt_regs *regs) { return 0; } +static inline int debugger_ipi(struct pt_regs *regs) { return 0; } +static inline int debugger_bpt(struct pt_regs *regs) { return 0; } +static inline int debugger_sstep(struct pt_regs *regs) { return 0; } +static inline int debugger_iabr_match(struct pt_regs *regs) { return 0; } +static inline int debugger_dabr_match(struct pt_regs *regs) { return 0; } +static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; } +#endif + +extern int set_dabr(unsigned long dabr); +extern void print_backtrace(unsigned long *); +extern void show_regs(struct pt_regs * regs); +extern void flush_instruction_cache(void); +extern void hard_reset_now(void); +extern void poweroff_now(void); + +#ifdef CONFIG_6xx +extern long _get_L2CR(void); +extern long _get_L3CR(void); +extern void _set_L2CR(unsigned long); +extern void _set_L3CR(unsigned long); +#else +#define _get_L2CR() 0L +#define _get_L3CR() 0L +#define _set_L2CR(val) do { } while(0) +#define _set_L3CR(val) do { } while(0) +#endif + +extern void via_cuda_init(void); +extern void pmac_nvram_init(void); +extern void read_rtc_time(void); +extern void pmac_find_display(void); +extern void giveup_fpu(struct task_struct *); +extern void enable_kernel_fp(void); +extern void flush_fp_to_thread(struct task_struct *); +extern void enable_kernel_altivec(void); +extern void giveup_altivec(struct task_struct *); +extern void load_up_altivec(struct task_struct *); +extern void giveup_spe(struct task_struct *); +extern void load_up_spe(struct task_struct *); +extern int fix_alignment(struct pt_regs *); +extern void cvt_fd(float *from, double *to, unsigned long *fpscr); +extern void cvt_df(double *from, float *to, unsigned long *fpscr); + +#ifdef CONFIG_ALTIVEC +extern void flush_altivec_to_thread(struct task_struct *); +#else +static inline void flush_altivec_to_thread(struct task_struct *t) +{ +} +#endif + +#ifdef CONFIG_SPE +extern void flush_spe_to_thread(struct task_struct *); +#else +static inline void flush_spe_to_thread(struct task_struct *t) +{ +} +#endif + +extern int call_rtas(const char *, int, int, unsigned long *, ...); +extern void cacheable_memzero(void *p, unsigned int nb); +extern void *cacheable_memcpy(void *, const void *, unsigned int); +extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long); +extern void bad_page_fault(struct pt_regs *, unsigned long, int); +extern int die(const char *, struct pt_regs *, long); +extern void _exception(int, struct pt_regs *, int, unsigned long); +#ifdef CONFIG_BOOKE_WDT +extern u32 booke_wdt_enabled; +extern u32 booke_wdt_period; +#endif /* CONFIG_BOOKE_WDT */ + +/* EBCDIC -> ASCII conversion for [0-9A-Z] on iSeries */ +extern unsigned char e2a(unsigned char); + +struct device_node; +extern void note_scsi_host(struct device_node *, void *); + +extern struct task_struct *__switch_to(struct task_struct *, + struct task_struct *); +#define switch_to(prev, next, last) ((last) = __switch_to((prev), (next))) + +struct thread_struct; +extern struct task_struct *_switch(struct thread_struct *prev, + struct thread_struct *next); + +extern unsigned int rtas_data; + +/* + * Atomic exchange + * + * Changes the memory location '*ptr' to be val and returns + * the previous value stored there. + */ +static __inline__ unsigned long +__xchg_u32(volatile void *p, unsigned long val) +{ + unsigned long prev; + + __asm__ __volatile__( + EIEIO_ON_SMP +"1: lwarx %0,0,%2 \n" + PPC405_ERR77(0,%2) +" stwcx. %3,0,%2 \n\ + bne- 1b" + ISYNC_ON_SMP + : "=&r" (prev), "=m" (*(volatile unsigned int *)p) + : "r" (p), "r" (val), "m" (*(volatile unsigned int *)p) + : "cc", "memory"); + + return prev; +} + +#ifdef CONFIG_PPC64 +static __inline__ unsigned long +__xchg_u64(volatile void *p, unsigned long val) +{ + unsigned long prev; + + __asm__ __volatile__( + EIEIO_ON_SMP +"1: ldarx %0,0,%2 \n" + PPC405_ERR77(0,%2) +" stdcx. %3,0,%2 \n\ + bne- 1b" + ISYNC_ON_SMP + : "=&r" (prev), "=m" (*(volatile unsigned long *)p) + : "r" (p), "r" (val), "m" (*(volatile unsigned long *)p) + : "cc", "memory"); + + return prev; +} +#endif + +/* + * This function doesn't exist, so you'll get a linker error + * if something tries to do an invalid xchg(). + */ +extern void __xchg_called_with_bad_pointer(void); + +static __inline__ unsigned long +__xchg(volatile void *ptr, unsigned long x, unsigned int size) +{ + switch (size) { + case 4: + return __xchg_u32(ptr, x); +#ifdef CONFIG_PPC64 + case 8: + return __xchg_u64(ptr, x); +#endif + } + __xchg_called_with_bad_pointer(); + return x; +} + +#define xchg(ptr,x) \ + ({ \ + __typeof__(*(ptr)) _x_ = (x); \ + (__typeof__(*(ptr))) __xchg((ptr), (unsigned long)_x_, sizeof(*(ptr))); \ + }) + +#define tas(ptr) (xchg((ptr),1)) + +/* + * Compare and exchange - if *p == old, set it to new, + * and return the old value of *p. + */ +#define __HAVE_ARCH_CMPXCHG 1 + +static __inline__ unsigned long +__cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new) +{ + unsigned int prev; + + __asm__ __volatile__ ( + EIEIO_ON_SMP +"1: lwarx %0,0,%2 # __cmpxchg_u32\n\ + cmpw 0,%0,%3\n\ + bne- 2f\n" + PPC405_ERR77(0,%2) +" stwcx. %4,0,%2\n\ + bne- 1b" + ISYNC_ON_SMP + "\n\ +2:" + : "=&r" (prev), "=m" (*p) + : "r" (p), "r" (old), "r" (new), "m" (*p) + : "cc", "memory"); + + return prev; +} + +#ifdef CONFIG_PPC64 +static __inline__ unsigned long +__cmpxchg_u64(volatile long *p, unsigned long old, unsigned long new) +{ + unsigned long prev; + + __asm__ __volatile__ ( + EIEIO_ON_SMP +"1: ldarx %0,0,%2 # __cmpxchg_u64\n\ + cmpd 0,%0,%3\n\ + bne- 2f\n\ + stdcx. %4,0,%2\n\ + bne- 1b" + ISYNC_ON_SMP + "\n\ +2:" + : "=&r" (prev), "=m" (*p) + : "r" (p), "r" (old), "r" (new), "m" (*p) + : "cc", "memory"); + + return prev; +} +#endif + +/* This function doesn't exist, so you'll get a linker error + if something tries to do an invalid cmpxchg(). */ +extern void __cmpxchg_called_with_bad_pointer(void); + +static __inline__ unsigned long +__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, + unsigned int size) +{ + switch (size) { + case 4: + return __cmpxchg_u32(ptr, old, new); +#ifdef CONFIG_PPC64 + case 8: + return __cmpxchg_u64(ptr, old, new); +#endif + } + __cmpxchg_called_with_bad_pointer(); + return old; +} + +#define cmpxchg(ptr,o,n) \ + ({ \ + __typeof__(*(ptr)) _o_ = (o); \ + __typeof__(*(ptr)) _n_ = (n); \ + (__typeof__(*(ptr))) __cmpxchg((ptr), (unsigned long)_o_, \ + (unsigned long)_n_, sizeof(*(ptr))); \ + }) + +#ifdef CONFIG_PPC64 +/* + * We handle most unaligned accesses in hardware. On the other hand + * unaligned DMA can be very expensive on some ppc64 IO chips (it does + * powers of 2 writes until it reaches sufficient alignment). + * + * Based on this we disable the IP header alignment in network drivers. + */ +#define NET_IP_ALIGN 0 +#endif + +#define arch_align_stack(x) (x) + +#endif /* __KERNEL__ */ +#endif /* __PPC_SYSTEM_H */ diff --git a/include/asm-ppc/futex.h b/include/asm-ppc/futex.h index 2cac5ecd9d0..9feff4ce142 100644 --- a/include/asm-ppc/futex.h +++ b/include/asm-ppc/futex.h @@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) int cmp = (encoded_op >> 24) & 15; int oparg = (encoded_op << 8) >> 20; int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret, tem; + int oldval = 0, ret; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; diff --git a/include/asm-ppc/hw_irq.h b/include/asm-ppc/hw_irq.h deleted file mode 100644 index da0fa940adb..00000000000 --- a/include/asm-ppc/hw_irq.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu> - */ -#ifdef __KERNEL__ -#ifndef _PPC_HW_IRQ_H -#define _PPC_HW_IRQ_H - -#include <asm/ptrace.h> -#include <asm/reg.h> -#include <asm/irq.h> - -extern void timer_interrupt(struct pt_regs *); - -#define irqs_disabled() ((mfmsr() & MSR_EE) == 0) - -static inline void local_irq_disable(void) -{ - unsigned long msr; - msr = mfmsr(); - mtmsr(msr & ~MSR_EE); - __asm__ __volatile__("": : :"memory"); -} - -static inline void local_irq_enable(void) -{ - unsigned long msr; - __asm__ __volatile__("": : :"memory"); - msr = mfmsr(); - mtmsr(msr | MSR_EE); -} - -static inline void local_irq_save_ptr(unsigned long *flags) -{ - unsigned long msr; - msr = mfmsr(); - *flags = msr; - mtmsr(msr & ~MSR_EE); - __asm__ __volatile__("": : :"memory"); -} - -#define local_save_flags(flags) ((flags) = mfmsr()) -#define local_irq_save(flags) local_irq_save_ptr(&flags) -#define local_irq_restore(flags) mtmsr(flags) - -extern void do_lost_interrupts(unsigned long); - -#define mask_irq(irq) ({if (irq_desc[irq].handler && irq_desc[irq].handler->disable) irq_desc[irq].handler->disable(irq);}) -#define unmask_irq(irq) ({if (irq_desc[irq].handler && irq_desc[irq].handler->enable) irq_desc[irq].handler->enable(irq);}) -#define ack_irq(irq) ({if (irq_desc[irq].handler && irq_desc[irq].handler->ack) irq_desc[irq].handler->ack(irq);}) - -/* Should we handle this via lost interrupts and IPIs or should we don't care like - * we do now ? --BenH. - */ -struct hw_interrupt_type; -static inline void hw_resend_irq(struct hw_interrupt_type *h, unsigned int i) {} - - -#endif /* _PPC_HW_IRQ_H */ -#endif /* __KERNEL__ */ diff --git a/include/asm-ppc/io.h b/include/asm-ppc/io.h index 7eb7cf6360b..39caf067a31 100644 --- a/include/asm-ppc/io.h +++ b/include/asm-ppc/io.h @@ -8,6 +8,7 @@ #include <asm/page.h> #include <asm/byteorder.h> +#include <asm/synch.h> #include <asm/mmu.h> #define SIO_CONFIG_RA 0x398 @@ -440,16 +441,6 @@ extern inline void * phys_to_virt(unsigned long address) #define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) #define page_to_bus(page) (page_to_phys(page) + PCI_DRAM_OFFSET) -/* - * Enforce In-order Execution of I/O: - * Acts as a barrier to ensure all previous I/O accesses have - * completed before any further ones are issued. - */ -extern inline void eieio(void) -{ - __asm__ __volatile__ ("eieio" : : : "memory"); -} - /* Enforce in-order execution of data I/O. * No distinction between read/write on PPC; use eieio for all three. */ diff --git a/include/asm-ppc/irq.h b/include/asm-ppc/irq.h index bd9674807f0..137ea0cf34d 100644 --- a/include/asm-ppc/irq.h +++ b/include/asm-ppc/irq.h @@ -24,6 +24,12 @@ */ #define ARCH_HAS_IRQ_PER_CPU +#define get_irq_desc(irq) (&irq_desc[(irq)]) + +/* Define a way to iterate across irqs. */ +#define for_each_irq(i) \ + for ((i) = 0; (i) < NR_IRQS; ++(i)) + #if defined(CONFIG_40x) #include <asm/ibm4xx.h> diff --git a/include/asm-ppc/macio.h b/include/asm-ppc/macio.h index a481b772d15..b553dd4b139 100644 --- a/include/asm-ppc/macio.h +++ b/include/asm-ppc/macio.h @@ -1,7 +1,6 @@ #ifndef __MACIO_ASIC_H__ #define __MACIO_ASIC_H__ -#include <linux/mod_devicetable.h> #include <asm/of_device.h> extern struct bus_type macio_bus_type; diff --git a/include/asm-ppc/of_device.h b/include/asm-ppc/of_device.h index 4b264cfd399..575bce418f8 100644 --- a/include/asm-ppc/of_device.h +++ b/include/asm-ppc/of_device.h @@ -2,6 +2,7 @@ #define __OF_DEVICE_H__ #include <linux/device.h> +#include <linux/mod_devicetable.h> #include <asm/prom.h> /* @@ -55,7 +56,9 @@ extern int of_register_driver(struct of_platform_driver *drv); extern void of_unregister_driver(struct of_platform_driver *drv); extern int of_device_register(struct of_device *ofdev); extern void of_device_unregister(struct of_device *ofdev); -extern struct of_device *of_platform_device_create(struct device_node *np, const char *bus_id); +extern struct of_device *of_platform_device_create(struct device_node *np, + const char *bus_id, + struct device *parent); extern void of_release_dev(struct device *dev); #endif /* __OF_DEVICE_H__ */ diff --git a/include/asm-ppc/rwsem.h b/include/asm-ppc/rwsem.h deleted file mode 100644 index 3e738f483c1..00000000000 --- a/include/asm-ppc/rwsem.h +++ /dev/null @@ -1,172 +0,0 @@ -/* - * include/asm-ppc/rwsem.h: R/W semaphores for PPC using the stuff - * in lib/rwsem.c. Adapted largely from include/asm-i386/rwsem.h - * by Paul Mackerras <paulus@samba.org>. - */ - -#ifndef _PPC_RWSEM_H -#define _PPC_RWSEM_H - -#ifdef __KERNEL__ -#include <linux/list.h> -#include <linux/spinlock.h> -#include <asm/atomic.h> -#include <asm/system.h> - -/* - * the semaphore definition - */ -struct rw_semaphore { - /* XXX this should be able to be an atomic_t -- paulus */ - signed long count; -#define RWSEM_UNLOCKED_VALUE 0x00000000 -#define RWSEM_ACTIVE_BIAS 0x00000001 -#define RWSEM_ACTIVE_MASK 0x0000ffff -#define RWSEM_WAITING_BIAS (-0x00010000) -#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS -#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) - spinlock_t wait_lock; - struct list_head wait_list; -#if RWSEM_DEBUG - int debug; -#endif -}; - -/* - * initialisation - */ -#if RWSEM_DEBUG -#define __RWSEM_DEBUG_INIT , 0 -#else -#define __RWSEM_DEBUG_INIT /* */ -#endif - -#define __RWSEM_INITIALIZER(name) \ - { RWSEM_UNLOCKED_VALUE, SPIN_LOCK_UNLOCKED, \ - LIST_HEAD_INIT((name).wait_list) \ - __RWSEM_DEBUG_INIT } - -#define DECLARE_RWSEM(name) \ - struct rw_semaphore name = __RWSEM_INITIALIZER(name) - -extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem); -extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); - -static inline void init_rwsem(struct rw_semaphore *sem) -{ - sem->count = RWSEM_UNLOCKED_VALUE; - spin_lock_init(&sem->wait_lock); - INIT_LIST_HEAD(&sem->wait_list); -#if RWSEM_DEBUG - sem->debug = 0; -#endif -} - -/* - * lock for reading - */ -static inline void __down_read(struct rw_semaphore *sem) -{ - if (atomic_inc_return((atomic_t *)(&sem->count)) > 0) - smp_wmb(); - else - rwsem_down_read_failed(sem); -} - -static inline int __down_read_trylock(struct rw_semaphore *sem) -{ - int tmp; - - while ((tmp = sem->count) >= 0) { - if (tmp == cmpxchg(&sem->count, tmp, - tmp + RWSEM_ACTIVE_READ_BIAS)) { - smp_wmb(); - return 1; - } - } - return 0; -} - -/* - * lock for writing - */ -static inline void __down_write(struct rw_semaphore *sem) -{ - int tmp; - - tmp = atomic_add_return(RWSEM_ACTIVE_WRITE_BIAS, - (atomic_t *)(&sem->count)); - if (tmp == RWSEM_ACTIVE_WRITE_BIAS) - smp_wmb(); - else - rwsem_down_write_failed(sem); -} - -static inline int __down_write_trylock(struct rw_semaphore *sem) -{ - int tmp; - - tmp = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE, - RWSEM_ACTIVE_WRITE_BIAS); - smp_wmb(); - return tmp == RWSEM_UNLOCKED_VALUE; -} - -/* - * unlock after reading - */ -static inline void __up_read(struct rw_semaphore *sem) -{ - int tmp; - - smp_wmb(); - tmp = atomic_dec_return((atomic_t *)(&sem->count)); - if (tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0) - rwsem_wake(sem); -} - -/* - * unlock after writing - */ -static inline void __up_write(struct rw_semaphore *sem) -{ - smp_wmb(); - if (atomic_sub_return(RWSEM_ACTIVE_WRITE_BIAS, - (atomic_t *)(&sem->count)) < 0) - rwsem_wake(sem); -} - -/* - * implement atomic add functionality - */ -static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem) -{ - atomic_add(delta, (atomic_t *)(&sem->count)); -} - -/* - * downgrade write lock to read lock - */ -static inline void __downgrade_write(struct rw_semaphore *sem) -{ - int tmp; - - smp_wmb(); - tmp = atomic_add_return(-RWSEM_WAITING_BIAS, (atomic_t *)(&sem->count)); - if (tmp < 0) - rwsem_downgrade_wake(sem); -} - -/* - * implement exchange and add functionality - */ -static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem) -{ - smp_mb(); - return atomic_add_return(delta, (atomic_t *)(&sem->count)); -} - -#endif /* __KERNEL__ */ -#endif /* _PPC_RWSEM_XADD_H */ diff --git a/include/asm-ppc/seccomp.h b/include/asm-ppc/seccomp.h deleted file mode 100644 index 666c4da96d8..00000000000 --- a/include/asm-ppc/seccomp.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef _ASM_SECCOMP_H - -#include <linux/unistd.h> - -#define __NR_seccomp_read __NR_read -#define __NR_seccomp_write __NR_write -#define __NR_seccomp_exit __NR_exit -#define __NR_seccomp_sigreturn __NR_rt_sigreturn - -#endif /* _ASM_SECCOMP_H */ diff --git a/include/asm-ppc/semaphore.h b/include/asm-ppc/semaphore.h deleted file mode 100644 index 89e6e73be08..00000000000 --- a/include/asm-ppc/semaphore.h +++ /dev/null @@ -1,111 +0,0 @@ -#ifndef _PPC_SEMAPHORE_H -#define _PPC_SEMAPHORE_H - -/* - * Swiped from asm-sparc/semaphore.h and modified - * -- Cort (cort@cs.nmt.edu) - * - * Stole some rw spinlock-based semaphore stuff from asm-alpha/semaphore.h - * -- Ani Joshi (ajoshi@unixbox.com) - * - * Remove spinlock-based RW semaphores; RW semaphore definitions are - * now in rwsem.h and we use the generic lib/rwsem.c implementation. - * Rework semaphores to use atomic_dec_if_positive. - * -- Paul Mackerras (paulus@samba.org) - */ - -#ifdef __KERNEL__ - -#include <asm/atomic.h> -#include <asm/system.h> -#include <linux/wait.h> -#include <linux/rwsem.h> - -struct semaphore { - /* - * Note that any negative value of count is equivalent to 0, - * but additionally indicates that some process(es) might be - * sleeping on `wait'. - */ - atomic_t count; - wait_queue_head_t wait; -}; - -#define __SEMAPHORE_INITIALIZER(name, n) \ -{ \ - .count = ATOMIC_INIT(n), \ - .wait = __WAIT_QUEUE_HEAD_INITIALIZER((name).wait) \ -} - -#define __MUTEX_INITIALIZER(name) \ - __SEMAPHORE_INITIALIZER(name, 1) - -#define __DECLARE_SEMAPHORE_GENERIC(name, count) \ - struct semaphore name = __SEMAPHORE_INITIALIZER(name,count) - -#define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name, 1) -#define DECLARE_MUTEX_LOCKED(name) __DECLARE_SEMAPHORE_GENERIC(name, 0) - -static inline void sema_init (struct semaphore *sem, int val) -{ - atomic_set(&sem->count, val); - init_waitqueue_head(&sem->wait); -} - -static inline void init_MUTEX (struct semaphore *sem) -{ - sema_init(sem, 1); -} - -static inline void init_MUTEX_LOCKED (struct semaphore *sem) -{ - sema_init(sem, 0); -} - -extern void __down(struct semaphore * sem); -extern int __down_interruptible(struct semaphore * sem); -extern void __up(struct semaphore * sem); - -extern inline void down(struct semaphore * sem) -{ - might_sleep(); - - /* - * Try to get the semaphore, take the slow path if we fail. - */ - if (atomic_dec_return(&sem->count) < 0) - __down(sem); - smp_wmb(); -} - -extern inline int down_interruptible(struct semaphore * sem) -{ - int ret = 0; - - might_sleep(); - - if (atomic_dec_return(&sem->count) < 0) - ret = __down_interruptible(sem); - smp_wmb(); - return ret; -} - -extern inline int down_trylock(struct semaphore * sem) -{ - int ret; - - ret = atomic_dec_if_positive(&sem->count) < 0; - smp_wmb(); - return ret; -} - -extern inline void up(struct semaphore * sem) -{ - smp_wmb(); - if (atomic_inc_return(&sem->count) <= 0) - __up(sem); -} - -#endif /* __KERNEL__ */ - -#endif /* !(_PPC_SEMAPHORE_H) */ diff --git a/include/asm-ppc/smp.h b/include/asm-ppc/smp.h index 829481c0a9d..79c1be3dfe6 100644 --- a/include/asm-ppc/smp.h +++ b/include/asm-ppc/smp.h @@ -45,30 +45,21 @@ extern int __cpu_disable(void); extern void __cpu_die(unsigned int cpu); extern void cpu_die(void) __attribute__((noreturn)); -#define NO_PROC_ID 0xFF /* No processor magic marker */ -#define PROC_CHANGE_PENALTY 20 - #define raw_smp_processor_id() (current_thread_info()->cpu) extern int __cpu_up(unsigned int cpu); extern int smp_hw_index[]; -#define hard_smp_processor_id() (smp_hw_index[smp_processor_id()]) - -struct klock_info_struct { - unsigned long kernel_flag; - unsigned char akp; -}; - -extern struct klock_info_struct klock_info; -#define KLOCK_HELD 0xffffffff -#define KLOCK_CLEAR 0x0 +#define hard_smp_processor_id() (smp_hw_index[smp_processor_id()]) +#define get_hard_smp_processor_id(cpu) (smp_hw_index[(cpu)]) #endif /* __ASSEMBLY__ */ #else /* !(CONFIG_SMP) */ static inline void cpu_die(void) { } +#define get_hard_smp_processor_id(cpu) 0 +#define hard_smp_processor_id() 0 #endif /* !(CONFIG_SMP) */ diff --git a/include/asm-ppc/spinlock.h b/include/asm-ppc/spinlock.h index 20edcf2a6e0..5c64b75f029 100644 --- a/include/asm-ppc/spinlock.h +++ b/include/asm-ppc/spinlock.h @@ -9,7 +9,7 @@ * (the type definitions are in asm/raw_spinlock_types.h) */ -#define __raw_spin_is_locked(x) ((x)->lock != 0) +#define __raw_spin_is_locked(x) ((x)->slock != 0) #define __raw_spin_unlock_wait(lock) \ do { while (__raw_spin_is_locked(lock)) cpu_relax(); } while (0) #define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) @@ -31,17 +31,17 @@ static inline void __raw_spin_lock(raw_spinlock_t *lock) bne- 2b\n\ isync" : "=&r"(tmp) - : "r"(&lock->lock), "r"(1) + : "r"(&lock->slock), "r"(1) : "cr0", "memory"); } static inline void __raw_spin_unlock(raw_spinlock_t *lock) { __asm__ __volatile__("eieio # __raw_spin_unlock": : :"memory"); - lock->lock = 0; + lock->slock = 0; } -#define __raw_spin_trylock(l) (!test_and_set_bit(0,&(l)->lock)) +#define __raw_spin_trylock(l) (!test_and_set_bit(0,(volatile unsigned long *)(&(l)->slock))) /* * Read-write spinlocks, allowing multiple readers diff --git a/include/asm-ppc64/atomic.h b/include/asm-ppc64/atomic.h deleted file mode 100644 index 0e5f25e83bc..00000000000 --- a/include/asm-ppc64/atomic.h +++ /dev/null @@ -1,197 +0,0 @@ -/* - * PowerPC64 atomic operations - * - * Copyright (C) 2001 Paul Mackerras <paulus@au.ibm.com>, IBM - * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef _ASM_PPC64_ATOMIC_H_ -#define _ASM_PPC64_ATOMIC_H_ - -#include <asm/memory.h> - -typedef struct { volatile int counter; } atomic_t; - -#define ATOMIC_INIT(i) { (i) } - -#define atomic_read(v) ((v)->counter) -#define atomic_set(v,i) (((v)->counter) = (i)) - -static __inline__ void atomic_add(int a, atomic_t *v) -{ - int t; - - __asm__ __volatile__( -"1: lwarx %0,0,%3 # atomic_add\n\ - add %0,%2,%0\n\ - stwcx. %0,0,%3\n\ - bne- 1b" - : "=&r" (t), "=m" (v->counter) - : "r" (a), "r" (&v->counter), "m" (v->counter) - : "cc"); -} - -static __inline__ int atomic_add_return(int a, atomic_t *v) -{ - int t; - - __asm__ __volatile__( - EIEIO_ON_SMP -"1: lwarx %0,0,%2 # atomic_add_return\n\ - add %0,%1,%0\n\ - stwcx. %0,0,%2\n\ - bne- 1b" - ISYNC_ON_SMP - : "=&r" (t) - : "r" (a), "r" (&v->counter) - : "cc", "memory"); - - return t; -} - -#define atomic_add_negative(a, v) (atomic_add_return((a), (v)) < 0) - -static __inline__ void atomic_sub(int a, atomic_t *v) -{ - int t; - - __asm__ __volatile__( -"1: lwarx %0,0,%3 # atomic_sub\n\ - subf %0,%2,%0\n\ - stwcx. %0,0,%3\n\ - bne- 1b" - : "=&r" (t), "=m" (v->counter) - : "r" (a), "r" (&v->counter), "m" (v->counter) - : "cc"); -} - -static __inline__ int atomic_sub_return(int a, atomic_t *v) -{ - int t; - - __asm__ __volatile__( - EIEIO_ON_SMP -"1: lwarx %0,0,%2 # atomic_sub_return\n\ - subf %0,%1,%0\n\ - stwcx. %0,0,%2\n\ - bne- 1b" - ISYNC_ON_SMP - : "=&r" (t) - : "r" (a), "r" (&v->counter) - : "cc", "memory"); - - return t; -} - -static __inline__ void atomic_inc(atomic_t *v) -{ - int t; - - __asm__ __volatile__( -"1: lwarx %0,0,%2 # atomic_inc\n\ - addic %0,%0,1\n\ - stwcx. %0,0,%2\n\ - bne- 1b" - : "=&r" (t), "=m" (v->counter) - : "r" (&v->counter), "m" (v->counter) - : "cc"); -} - -static __inline__ int atomic_inc_return(atomic_t *v) -{ - int t; - - __asm__ __volatile__( - EIEIO_ON_SMP -"1: lwarx %0,0,%1 # atomic_inc_return\n\ - addic %0,%0,1\n\ - stwcx. %0,0,%1\n\ - bne- 1b" - ISYNC_ON_SMP - : "=&r" (t) - : "r" (&v->counter) - : "cc", "memory"); - - return t; -} - -/* - * atomic_inc_and_test - increment and test - * @v: pointer of type atomic_t - * - * Atomically increments @v by 1 - * and returns true if the result is zero, or false for all - * other cases. - */ -#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0) - -static __inline__ void atomic_dec(atomic_t *v) -{ - int t; - - __asm__ __volatile__( -"1: lwarx %0,0,%2 # atomic_dec\n\ - addic %0,%0,-1\n\ - stwcx. %0,0,%2\n\ - bne- 1b" - : "=&r" (t), "=m" (v->counter) - : "r" (&v->counter), "m" (v->counter) - : "cc"); -} - -static __inline__ int atomic_dec_return(atomic_t *v) -{ - int t; - - __asm__ __volatile__( - EIEIO_ON_SMP -"1: lwarx %0,0,%1 # atomic_dec_return\n\ - addic %0,%0,-1\n\ - stwcx. %0,0,%1\n\ - bne- 1b" - ISYNC_ON_SMP - : "=&r" (t) - : "r" (&v->counter) - : "cc", "memory"); - - return t; -} - -#define atomic_sub_and_test(a, v) (atomic_sub_return((a), (v)) == 0) -#define atomic_dec_and_test(v) (atomic_dec_return((v)) == 0) - -/* - * Atomically test *v and decrement if it is greater than 0. - * The function returns the old value of *v minus 1. - */ -static __inline__ int atomic_dec_if_positive(atomic_t *v) -{ - int t; - - __asm__ __volatile__( - EIEIO_ON_SMP -"1: lwarx %0,0,%1 # atomic_dec_if_positive\n\ - addic. %0,%0,-1\n\ - blt- 2f\n\ - stwcx. %0,0,%1\n\ - bne- 1b" - ISYNC_ON_SMP - "\n\ -2:" : "=&r" (t) - : "r" (&v->counter) - : "cc", "memory"); - - return t; -} - -#define smp_mb__before_atomic_dec() smp_mb() -#define smp_mb__after_atomic_dec() smp_mb() -#define smp_mb__before_atomic_inc() smp_mb() -#define smp_mb__after_atomic_inc() smp_mb() - -#endif /* _ASM_PPC64_ATOMIC_H_ */ diff --git a/include/asm-ppc64/bitops.h b/include/asm-ppc64/bitops.h index a0f831224f9..dbfa42ef4a9 100644 --- a/include/asm-ppc64/bitops.h +++ b/include/asm-ppc64/bitops.h @@ -42,7 +42,7 @@ #ifdef __KERNEL__ -#include <asm/memory.h> +#include <asm/synch.h> /* * clear_bit doesn't imply a memory barrier diff --git a/include/asm-ppc64/dma.h b/include/asm-ppc64/dma.h deleted file mode 100644 index dfd1f69059b..00000000000 --- a/include/asm-ppc64/dma.h +++ /dev/null @@ -1,329 +0,0 @@ -/* - * linux/include/asm/dma.h: Defines for using and allocating dma channels. - * Written by Hennus Bergman, 1992. - * High DMA channel support & info by Hannu Savolainen - * and John Boyd, Nov. 1992. - * Changes for ppc sound by Christoph Nadig - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef _ASM_DMA_H -#define _ASM_DMA_H - -#include <linux/config.h> -#include <asm/io.h> -#include <linux/spinlock.h> -#include <asm/system.h> - -#ifndef MAX_DMA_CHANNELS -#define MAX_DMA_CHANNELS 8 -#endif - -/* The maximum address that we can perform a DMA transfer to on this platform */ -/* Doesn't really apply... */ -#define MAX_DMA_ADDRESS (~0UL) - -#if !defined(CONFIG_PPC_ISERIES) || defined(CONFIG_PCI) - -#define dma_outb outb -#define dma_inb inb - -/* - * NOTES about DMA transfers: - * - * controller 1: channels 0-3, byte operations, ports 00-1F - * controller 2: channels 4-7, word operations, ports C0-DF - * - * - ALL registers are 8 bits only, regardless of transfer size - * - channel 4 is not used - cascades 1 into 2. - * - channels 0-3 are byte - addresses/counts are for physical bytes - * - channels 5-7 are word - addresses/counts are for physical words - * - transfers must not cross physical 64K (0-3) or 128K (5-7) boundaries - * - transfer count loaded to registers is 1 less than actual count - * - controller 2 offsets are all even (2x offsets for controller 1) - * - page registers for 5-7 don't use data bit 0, represent 128K pages - * - page registers for 0-3 use bit 0, represent 64K pages - * - * On PReP, DMA transfers are limited to the lower 16MB of _physical_ memory. - * On CHRP, the W83C553F (and VLSI Tollgate?) support full 32 bit addressing. - * Note that addresses loaded into registers must be _physical_ addresses, - * not logical addresses (which may differ if paging is active). - * - * Address mapping for channels 0-3: - * - * A23 ... A16 A15 ... A8 A7 ... A0 (Physical addresses) - * | ... | | ... | | ... | - * | ... | | ... | | ... | - * | ... | | ... | | ... | - * P7 ... P0 A7 ... A0 A7 ... A0 - * | Page | Addr MSB | Addr LSB | (DMA registers) - * - * Address mapping for channels 5-7: - * - * A23 ... A17 A16 A15 ... A9 A8 A7 ... A1 A0 (Physical addresses) - * | ... | \ \ ... \ \ \ ... \ \ - * | ... | \ \ ... \ \ \ ... \ (not used) - * | ... | \ \ ... \ \ \ ... \ - * P7 ... P1 (0) A7 A6 ... A0 A7 A6 ... A0 - * | Page | Addr MSB | Addr LSB | (DMA registers) - * - * Again, channels 5-7 transfer _physical_ words (16 bits), so addresses - * and counts _must_ be word-aligned (the lowest address bit is _ignored_ at - * the hardware level, so odd-byte transfers aren't possible). - * - * Transfer count (_not # bytes_) is limited to 64K, represented as actual - * count - 1 : 64K => 0xFFFF, 1 => 0x0000. Thus, count is always 1 or more, - * and up to 128K bytes may be transferred on channels 5-7 in one operation. - * - */ - -/* 8237 DMA controllers */ -#define IO_DMA1_BASE 0x00 /* 8 bit slave DMA, channels 0..3 */ -#define IO_DMA2_BASE 0xC0 /* 16 bit master DMA, ch 4(=slave input)..7 */ - -/* DMA controller registers */ -#define DMA1_CMD_REG 0x08 /* command register (w) */ -#define DMA1_STAT_REG 0x08 /* status register (r) */ -#define DMA1_REQ_REG 0x09 /* request register (w) */ -#define DMA1_MASK_REG 0x0A /* single-channel mask (w) */ -#define DMA1_MODE_REG 0x0B /* mode register (w) */ -#define DMA1_CLEAR_FF_REG 0x0C /* clear pointer flip-flop (w) */ -#define DMA1_TEMP_REG 0x0D /* Temporary Register (r) */ -#define DMA1_RESET_REG 0x0D /* Master Clear (w) */ -#define DMA1_CLR_MASK_REG 0x0E /* Clear Mask */ -#define DMA1_MASK_ALL_REG 0x0F /* all-channels mask (w) */ - -#define DMA2_CMD_REG 0xD0 /* command register (w) */ -#define DMA2_STAT_REG 0xD0 /* status register (r) */ -#define DMA2_REQ_REG 0xD2 /* request register (w) */ -#define DMA2_MASK_REG 0xD4 /* single-channel mask (w) */ -#define DMA2_MODE_REG 0xD6 /* mode register (w) */ -#define DMA2_CLEAR_FF_REG 0xD8 /* clear pointer flip-flop (w) */ -#define DMA2_TEMP_REG 0xDA /* Temporary Register (r) */ -#define DMA2_RESET_REG 0xDA /* Master Clear (w) */ -#define DMA2_CLR_MASK_REG 0xDC /* Clear Mask */ -#define DMA2_MASK_ALL_REG 0xDE /* all-channels mask (w) */ - -#define DMA_ADDR_0 0x00 /* DMA address registers */ -#define DMA_ADDR_1 0x02 -#define DMA_ADDR_2 0x04 -#define DMA_ADDR_3 0x06 -#define DMA_ADDR_4 0xC0 -#define DMA_ADDR_5 0xC4 -#define DMA_ADDR_6 0xC8 -#define DMA_ADDR_7 0xCC - -#define DMA_CNT_0 0x01 /* DMA count registers */ -#define DMA_CNT_1 0x03 -#define DMA_CNT_2 0x05 -#define DMA_CNT_3 0x07 -#define DMA_CNT_4 0xC2 -#define DMA_CNT_5 0xC6 -#define DMA_CNT_6 0xCA -#define DMA_CNT_7 0xCE - -#define DMA_LO_PAGE_0 0x87 /* DMA page registers */ -#define DMA_LO_PAGE_1 0x83 -#define DMA_LO_PAGE_2 0x81 -#define DMA_LO_PAGE_3 0x82 -#define DMA_LO_PAGE_5 0x8B -#define DMA_LO_PAGE_6 0x89 -#define DMA_LO_PAGE_7 0x8A - -#define DMA_HI_PAGE_0 0x487 /* DMA page registers */ -#define DMA_HI_PAGE_1 0x483 -#define DMA_HI_PAGE_2 0x481 -#define DMA_HI_PAGE_3 0x482 -#define DMA_HI_PAGE_5 0x48B -#define DMA_HI_PAGE_6 0x489 -#define DMA_HI_PAGE_7 0x48A - -#define DMA1_EXT_REG 0x40B -#define DMA2_EXT_REG 0x4D6 - -#define DMA_MODE_READ 0x44 /* I/O to memory, no autoinit, increment, single mode */ -#define DMA_MODE_WRITE 0x48 /* memory to I/O, no autoinit, increment, single mode */ -#define DMA_MODE_CASCADE 0xC0 /* pass thru DREQ->HRQ, DACK<-HLDA only */ - -#define DMA_AUTOINIT 0x10 - -extern spinlock_t dma_spin_lock; - -static __inline__ unsigned long claim_dma_lock(void) -{ - unsigned long flags; - spin_lock_irqsave(&dma_spin_lock, flags); - return flags; -} - -static __inline__ void release_dma_lock(unsigned long flags) -{ - spin_unlock_irqrestore(&dma_spin_lock, flags); -} - -/* enable/disable a specific DMA channel */ -static __inline__ void enable_dma(unsigned int dmanr) -{ - unsigned char ucDmaCmd=0x00; - - if (dmanr != 4) - { - dma_outb(0, DMA2_MASK_REG); /* This may not be enabled */ - dma_outb(ucDmaCmd, DMA2_CMD_REG); /* Enable group */ - } - if (dmanr<=3) - { - dma_outb(dmanr, DMA1_MASK_REG); - dma_outb(ucDmaCmd, DMA1_CMD_REG); /* Enable group */ - } else - { - dma_outb(dmanr & 3, DMA2_MASK_REG); - } -} - -static __inline__ void disable_dma(unsigned int dmanr) -{ - if (dmanr<=3) - dma_outb(dmanr | 4, DMA1_MASK_REG); - else - dma_outb((dmanr & 3) | 4, DMA2_MASK_REG); -} - -/* Clear the 'DMA Pointer Flip Flop'. - * Write 0 for LSB/MSB, 1 for MSB/LSB access. - * Use this once to initialize the FF to a known state. - * After that, keep track of it. :-) - * --- In order to do that, the DMA routines below should --- - * --- only be used while interrupts are disabled! --- - */ -static __inline__ void clear_dma_ff(unsigned int dmanr) -{ - if (dmanr<=3) - dma_outb(0, DMA1_CLEAR_FF_REG); - else - dma_outb(0, DMA2_CLEAR_FF_REG); -} - -/* set mode (above) for a specific DMA channel */ -static __inline__ void set_dma_mode(unsigned int dmanr, char mode) -{ - if (dmanr<=3) - dma_outb(mode | dmanr, DMA1_MODE_REG); - else - dma_outb(mode | (dmanr&3), DMA2_MODE_REG); -} - -/* Set only the page register bits of the transfer address. - * This is used for successive transfers when we know the contents of - * the lower 16 bits of the DMA current address register, but a 64k boundary - * may have been crossed. - */ -static __inline__ void set_dma_page(unsigned int dmanr, int pagenr) -{ - switch(dmanr) { - case 0: - dma_outb(pagenr, DMA_LO_PAGE_0); - dma_outb(pagenr>>8, DMA_HI_PAGE_0); - break; - case 1: - dma_outb(pagenr, DMA_LO_PAGE_1); - dma_outb(pagenr>>8, DMA_HI_PAGE_1); - break; - case 2: - dma_outb(pagenr, DMA_LO_PAGE_2); - dma_outb(pagenr>>8, DMA_HI_PAGE_2); - break; - case 3: - dma_outb(pagenr, DMA_LO_PAGE_3); - dma_outb(pagenr>>8, DMA_HI_PAGE_3); - break; - case 5: - dma_outb(pagenr & 0xfe, DMA_LO_PAGE_5); - dma_outb(pagenr>>8, DMA_HI_PAGE_5); - break; - case 6: - dma_outb(pagenr & 0xfe, DMA_LO_PAGE_6); - dma_outb(pagenr>>8, DMA_HI_PAGE_6); - break; - case 7: - dma_outb(pagenr & 0xfe, DMA_LO_PAGE_7); - dma_outb(pagenr>>8, DMA_HI_PAGE_7); - break; - } -} - - -/* Set transfer address & page bits for specific DMA channel. - * Assumes dma flipflop is clear. - */ -static __inline__ void set_dma_addr(unsigned int dmanr, unsigned int phys) -{ - if (dmanr <= 3) { - dma_outb( phys & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE ); - dma_outb( (phys>>8) & 0xff, ((dmanr&3)<<1) + IO_DMA1_BASE ); - } else { - dma_outb( (phys>>1) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE ); - dma_outb( (phys>>9) & 0xff, ((dmanr&3)<<2) + IO_DMA2_BASE ); - } - set_dma_page(dmanr, phys>>16); -} - - -/* Set transfer size (max 64k for DMA1..3, 128k for DMA5..7) for - * a specific DMA channel. - * You must ensure the parameters are valid. - * NOTE: from a manual: "the number of transfers is one more - * than the initial word count"! This is taken into account. - * Assumes dma flip-flop is clear. - * NOTE 2: "count" represents _bytes_ and must be even for channels 5-7. - */ -static __inline__ void set_dma_count(unsigned int dmanr, unsigned int count) -{ - count--; - if (dmanr <= 3) { - dma_outb( count & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE ); - dma_outb( (count>>8) & 0xff, ((dmanr&3)<<1) + 1 + IO_DMA1_BASE ); - } else { - dma_outb( (count>>1) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE ); - dma_outb( (count>>9) & 0xff, ((dmanr&3)<<2) + 2 + IO_DMA2_BASE ); - } -} - - -/* Get DMA residue count. After a DMA transfer, this - * should return zero. Reading this while a DMA transfer is - * still in progress will return unpredictable results. - * If called before the channel has been used, it may return 1. - * Otherwise, it returns the number of _bytes_ left to transfer. - * - * Assumes DMA flip-flop is clear. - */ -static __inline__ int get_dma_residue(unsigned int dmanr) -{ - unsigned int io_port = (dmanr<=3)? ((dmanr&3)<<1) + 1 + IO_DMA1_BASE - : ((dmanr&3)<<2) + 2 + IO_DMA2_BASE; - - /* using short to get 16-bit wrap around */ - unsigned short count; - - count = 1 + dma_inb(io_port); - count += dma_inb(io_port) << 8; - - return (dmanr <= 3)? count : (count<<1); -} - -/* These are in kernel/dma.c: */ -extern int request_dma(unsigned int dmanr, const char * device_id); /* reserve a DMA channel */ -extern void free_dma(unsigned int dmanr); /* release it again */ - -#ifdef CONFIG_PCI -extern int isa_dma_bridge_buggy; -#else -#define isa_dma_bridge_buggy (0) -#endif -#endif /* !defined(CONFIG_PPC_ISERIES) || defined(CONFIG_PCI) */ -#endif /* _ASM_DMA_H */ diff --git a/include/asm-ppc64/futex.h b/include/asm-ppc64/futex.h index cb2640b3a40..266b460de44 100644 --- a/include/asm-ppc64/futex.h +++ b/include/asm-ppc64/futex.h @@ -5,7 +5,7 @@ #include <linux/futex.h> #include <asm/errno.h> -#include <asm/memory.h> +#include <asm/synch.h> #include <asm/uaccess.h> #define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \ diff --git a/include/asm-ppc64/io.h b/include/asm-ppc64/io.h index 59c958aea4d..bd7c9532d77 100644 --- a/include/asm-ppc64/io.h +++ b/include/asm-ppc64/io.h @@ -15,7 +15,7 @@ #ifdef CONFIG_PPC_ISERIES #include <asm/iSeries/iSeries_io.h> #endif -#include <asm/memory.h> +#include <asm/synch.h> #include <asm/delay.h> #include <asm-generic/iomap.h> diff --git a/include/asm-ppc64/memory.h b/include/asm-ppc64/memory.h deleted file mode 100644 index af53ffb5572..00000000000 --- a/include/asm-ppc64/memory.h +++ /dev/null @@ -1,61 +0,0 @@ -#ifndef _ASM_PPC64_MEMORY_H_ -#define _ASM_PPC64_MEMORY_H_ - -/* - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/config.h> - -/* - * Arguably the bitops and *xchg operations don't imply any memory barrier - * or SMP ordering, but in fact a lot of drivers expect them to imply - * both, since they do on x86 cpus. - */ -#ifdef CONFIG_SMP -#define EIEIO_ON_SMP "eieio\n" -#define ISYNC_ON_SMP "\n\tisync" -#define SYNC_ON_SMP "lwsync\n\t" -#else -#define EIEIO_ON_SMP -#define ISYNC_ON_SMP -#define SYNC_ON_SMP -#endif - -static inline void eieio(void) -{ - __asm__ __volatile__ ("eieio" : : : "memory"); -} - -static inline void isync(void) -{ - __asm__ __volatile__ ("isync" : : : "memory"); -} - -#ifdef CONFIG_SMP -#define eieio_on_smp() eieio() -#define isync_on_smp() isync() -#else -#define eieio_on_smp() __asm__ __volatile__("": : :"memory") -#define isync_on_smp() __asm__ __volatile__("": : :"memory") -#endif - -/* Macros for adjusting thread priority (hardware multi-threading) */ -#define HMT_very_low() asm volatile("or 31,31,31 # very low priority") -#define HMT_low() asm volatile("or 1,1,1 # low priority") -#define HMT_medium_low() asm volatile("or 6,6,6 # medium low priority") -#define HMT_medium() asm volatile("or 2,2,2 # medium priority") -#define HMT_medium_high() asm volatile("or 5,5,5 # medium high priority") -#define HMT_high() asm volatile("or 3,3,3 # high priority") - -#define HMT_VERY_LOW "\tor 31,31,31 # very low priority\n" -#define HMT_LOW "\tor 1,1,1 # low priority\n" -#define HMT_MEDIUM_LOW "\tor 6,6,6 # medium low priority\n" -#define HMT_MEDIUM "\tor 2,2,2 # medium priority\n" -#define HMT_MEDIUM_HIGH "\tor 5,5,5 # medium high priority\n" -#define HMT_HIGH "\tor 3,3,3 # high priority\n" - -#endif diff --git a/include/asm-ppc64/processor.h b/include/asm-ppc64/processor.h index fe5cd2f5868..6447fbee7d6 100644 --- a/include/asm-ppc64/processor.h +++ b/include/asm-ppc64/processor.h @@ -369,6 +369,14 @@ GLUE(.,name): #define mfasr() ({unsigned long rval; \ asm volatile("mfasr %0" : "=r" (rval)); rval;}) +/* Macros for adjusting thread priority (hardware multi-threading) */ +#define HMT_very_low() asm volatile("or 31,31,31 # very low priority") +#define HMT_low() asm volatile("or 1,1,1 # low priority") +#define HMT_medium_low() asm volatile("or 6,6,6 # medium low priority") +#define HMT_medium() asm volatile("or 2,2,2 # medium priority") +#define HMT_medium_high() asm volatile("or 5,5,5 # medium high priority") +#define HMT_high() asm volatile("or 3,3,3 # high priority") + static inline void set_tb(unsigned int upper, unsigned int lower) { mttbl(0); diff --git a/include/asm-ppc64/smu.h b/include/asm-ppc64/smu.h index 10b4397af9a..dee8eefe47b 100644 --- a/include/asm-ppc64/smu.h +++ b/include/asm-ppc64/smu.h @@ -1,22 +1,379 @@ +#ifndef _SMU_H +#define _SMU_H + /* * Definitions for talking to the SMU chip in newer G5 PowerMacs */ #include <linux/config.h> +#include <linux/list.h> + +/* + * Known SMU commands + * + * Most of what is below comes from looking at the Open Firmware driver, + * though this is still incomplete and could use better documentation here + * or there... + */ + + +/* + * Partition info commands + * + * I do not know what those are for at this point + */ +#define SMU_CMD_PARTITION_COMMAND 0x3e + + +/* + * Fan control + * + * This is a "mux" for fan control commands, first byte is the + * "sub" command. + */ +#define SMU_CMD_FAN_COMMAND 0x4a + + +/* + * Battery access + * + * Same command number as the PMU, could it be same syntax ? + */ +#define SMU_CMD_BATTERY_COMMAND 0x6f +#define SMU_CMD_GET_BATTERY_INFO 0x00 + +/* + * Real time clock control + * + * This is a "mux", first data byte contains the "sub" command. + * The "RTC" part of the SMU controls the date, time, powerup + * timer, but also a PRAM + * + * Dates are in BCD format on 7 bytes: + * [sec] [min] [hour] [weekday] [month day] [month] [year] + * with month being 1 based and year minus 100 + */ +#define SMU_CMD_RTC_COMMAND 0x8e +#define SMU_CMD_RTC_SET_PWRUP_TIMER 0x00 /* i: 7 bytes date */ +#define SMU_CMD_RTC_GET_PWRUP_TIMER 0x01 /* o: 7 bytes date */ +#define SMU_CMD_RTC_STOP_PWRUP_TIMER 0x02 +#define SMU_CMD_RTC_SET_PRAM_BYTE_ACC 0x20 /* i: 1 byte (address?) */ +#define SMU_CMD_RTC_SET_PRAM_AUTOINC 0x21 /* i: 1 byte (data?) */ +#define SMU_CMD_RTC_SET_PRAM_LO_BYTES 0x22 /* i: 10 bytes */ +#define SMU_CMD_RTC_SET_PRAM_HI_BYTES 0x23 /* i: 10 bytes */ +#define SMU_CMD_RTC_GET_PRAM_BYTE 0x28 /* i: 1 bytes (address?) */ +#define SMU_CMD_RTC_GET_PRAM_LO_BYTES 0x29 /* o: 10 bytes */ +#define SMU_CMD_RTC_GET_PRAM_HI_BYTES 0x2a /* o: 10 bytes */ +#define SMU_CMD_RTC_SET_DATETIME 0x80 /* i: 7 bytes date */ +#define SMU_CMD_RTC_GET_DATETIME 0x81 /* o: 7 bytes date */ + + /* + * i2c commands + * + * To issue an i2c command, first is to send a parameter block to the + * the SMU. This is a command of type 0x9a with 9 bytes of header + * eventually followed by data for a write: + * + * 0: bus number (from device-tree usually, SMU has lots of busses !) + * 1: transfer type/format (see below) + * 2: device address. For combined and combined4 type transfers, this + * is the "write" version of the address (bit 0x01 cleared) + * 3: subaddress length (0..3) + * 4: subaddress byte 0 (or only byte for subaddress length 1) + * 5: subaddress byte 1 + * 6: subaddress byte 2 + * 7: combined address (device address for combined mode data phase) + * 8: data length + * + * The transfer types are the same good old Apple ones it seems, + * that is: + * - 0x00: Simple transfer + * - 0x01: Subaddress transfer (addr write + data tx, no restart) + * - 0x02: Combined transfer (addr write + restart + data tx) + * + * This is then followed by actual data for a write. + * + * At this point, the OF driver seems to have a limitation on transfer + * sizes of 0xd bytes on reads and 0x5 bytes on writes. I do not know + * wether this is just an OF limit due to some temporary buffer size + * or if this is an SMU imposed limit. This driver has the same limitation + * for now as I use a 0x10 bytes temporary buffer as well + * + * Once that is completed, a response is expected from the SMU. This is + * obtained via a command of type 0x9a with a length of 1 byte containing + * 0 as the data byte. OF also fills the rest of the data buffer with 0xff's + * though I can't tell yet if this is actually necessary. Once this command + * is complete, at this point, all I can tell is what OF does. OF tests + * byte 0 of the reply: + * - on read, 0xfe or 0xfc : bus is busy, wait (see below) or nak ? + * - on read, 0x00 or 0x01 : reply is in buffer (after the byte 0) + * - on write, < 0 -> failure (immediate exit) + * - else, OF just exists (without error, weird) + * + * So on read, there is this wait-for-busy thing when getting a 0xfc or + * 0xfe result. OF does a loop of up to 64 retries, waiting 20ms and + * doing the above again until either the retries expire or the result + * is no longer 0xfe or 0xfc + * + * The Darwin I2C driver is less subtle though. On any non-success status + * from the response command, it waits 5ms and tries again up to 20 times, + * it doesn't differenciate between fatal errors or "busy" status. + * + * This driver provides an asynchronous paramblock based i2c command + * interface to be used either directly by low level code or by a higher + * level driver interfacing to the linux i2c layer. The current + * implementation of this relies on working timers & timer interrupts + * though, so be careful of calling context for now. This may be "fixed" + * in the future by adding a polling facility. + */ +#define SMU_CMD_I2C_COMMAND 0x9a + /* transfer types */ +#define SMU_I2C_TRANSFER_SIMPLE 0x00 +#define SMU_I2C_TRANSFER_STDSUB 0x01 +#define SMU_I2C_TRANSFER_COMBINED 0x02 + +/* + * Power supply control + * + * The "sub" command is an ASCII string in the data, the + * data lenght is that of the string. + * + * The VSLEW command can be used to get or set the voltage slewing. + * - lenght 5 (only "VSLEW") : it returns "DONE" and 3 bytes of + * reply at data offset 6, 7 and 8. + * - lenght 8 ("VSLEWxyz") has 3 additional bytes appended, and is + * used to set the voltage slewing point. The SMU replies with "DONE" + * I yet have to figure out their exact meaning of those 3 bytes in + * both cases. + * + */ +#define SMU_CMD_POWER_COMMAND 0xaa +#define SMU_CMD_POWER_RESTART "RESTART" +#define SMU_CMD_POWER_SHUTDOWN "SHUTDOWN" +#define SMU_CMD_POWER_VOLTAGE_SLEW "VSLEW" + +/* Misc commands + * + * This command seem to be a grab bag of various things + */ +#define SMU_CMD_MISC_df_COMMAND 0xdf +#define SMU_CMD_MISC_df_SET_DISPLAY_LIT 0x02 /* i: 1 byte */ +#define SMU_CMD_MISC_df_NMI_OPTION 0x04 + +/* + * Version info commands + * + * I haven't quite tried to figure out how these work + */ +#define SMU_CMD_VERSION_COMMAND 0xea + + +/* + * Misc commands + * + * This command seem to be a grab bag of various things + */ +#define SMU_CMD_MISC_ee_COMMAND 0xee +#define SMU_CMD_MISC_ee_GET_DATABLOCK_REC 0x02 +#define SMU_CMD_MISC_ee_LEDS_CTRL 0x04 /* i: 00 (00,01) [00] */ +#define SMU_CMD_MISC_ee_GET_DATA 0x05 /* i: 00 , o: ?? */ + + + +/* + * - Kernel side interface - + */ + +#ifdef __KERNEL__ + +/* + * Asynchronous SMU commands + * + * Fill up this structure and submit it via smu_queue_command(), + * and get notified by the optional done() callback, or because + * status becomes != 1 + */ + +struct smu_cmd; + +struct smu_cmd +{ + /* public */ + u8 cmd; /* command */ + int data_len; /* data len */ + int reply_len; /* reply len */ + void *data_buf; /* data buffer */ + void *reply_buf; /* reply buffer */ + int status; /* command status */ + void (*done)(struct smu_cmd *cmd, void *misc); + void *misc; + + /* private */ + struct list_head link; +}; + +/* + * Queues an SMU command, all fields have to be initialized + */ +extern int smu_queue_cmd(struct smu_cmd *cmd); + +/* + * Simple command wrapper. This structure embeds a small buffer + * to ease sending simple SMU commands from the stack + */ +struct smu_simple_cmd +{ + struct smu_cmd cmd; + u8 buffer[16]; +}; + +/* + * Queues a simple command. All fields will be initialized by that + * function + */ +extern int smu_queue_simple(struct smu_simple_cmd *scmd, u8 command, + unsigned int data_len, + void (*done)(struct smu_cmd *cmd, void *misc), + void *misc, + ...); + +/* + * Completion helper. Pass it to smu_queue_simple or as 'done' + * member to smu_queue_cmd, it will call complete() on the struct + * completion passed in the "misc" argument + */ +extern void smu_done_complete(struct smu_cmd *cmd, void *misc); /* - * Basic routines for use by architecture. To be extended as - * we understand more of the chip + * Synchronous helpers. Will spin-wait for completion of a command + */ +extern void smu_spinwait_cmd(struct smu_cmd *cmd); + +static inline void smu_spinwait_simple(struct smu_simple_cmd *scmd) +{ + smu_spinwait_cmd(&scmd->cmd); +} + +/* + * Poll routine to call if blocked with irqs off + */ +extern void smu_poll(void); + + +/* + * Init routine, presence check.... */ extern int smu_init(void); extern int smu_present(void); +struct of_device; +extern struct of_device *smu_get_ofdev(void); + + +/* + * Common command wrappers + */ extern void smu_shutdown(void); extern void smu_restart(void); -extern int smu_get_rtc_time(struct rtc_time *time); -extern int smu_set_rtc_time(struct rtc_time *time); +struct rtc_time; +extern int smu_get_rtc_time(struct rtc_time *time, int spinwait); +extern int smu_set_rtc_time(struct rtc_time *time, int spinwait); /* * SMU command buffer absolute address, exported by pmac_setup, * this is allocated very early during boot. */ extern unsigned long smu_cmdbuf_abs; + + +/* + * Kenrel asynchronous i2c interface + */ + +/* SMU i2c header, exactly matches i2c header on wire */ +struct smu_i2c_param +{ + u8 bus; /* SMU bus ID (from device tree) */ + u8 type; /* i2c transfer type */ + u8 devaddr; /* device address (includes direction) */ + u8 sublen; /* subaddress length */ + u8 subaddr[3]; /* subaddress */ + u8 caddr; /* combined address, filled by SMU driver */ + u8 datalen; /* length of transfer */ + u8 data[7]; /* data */ +}; + +#define SMU_I2C_READ_MAX 0x0d +#define SMU_I2C_WRITE_MAX 0x05 + +struct smu_i2c_cmd +{ + /* public */ + struct smu_i2c_param info; + void (*done)(struct smu_i2c_cmd *cmd, void *misc); + void *misc; + int status; /* 1 = pending, 0 = ok, <0 = fail */ + + /* private */ + struct smu_cmd scmd; + int read; + int stage; + int retries; + u8 pdata[0x10]; + struct list_head link; +}; + +/* + * Call this to queue an i2c command to the SMU. You must fill info, + * including info.data for a write, done and misc. + * For now, no polling interface is provided so you have to use completion + * callback. + */ +extern int smu_queue_i2c(struct smu_i2c_cmd *cmd); + + +#endif /* __KERNEL__ */ + +/* + * - Userland interface - + */ + +/* + * A given instance of the device can be configured for 2 different + * things at the moment: + * + * - sending SMU commands (default at open() time) + * - receiving SMU events (not yet implemented) + * + * Commands are written with write() of a command block. They can be + * "driver" commands (for example to switch to event reception mode) + * or real SMU commands. They are made of a header followed by command + * data if any. + * + * For SMU commands (not for driver commands), you can then read() back + * a reply. The reader will be blocked or not depending on how the device + * file is opened. poll() isn't implemented yet. The reply will consist + * of a header as well, followed by the reply data if any. You should + * always provide a buffer large enough for the maximum reply data, I + * recommand one page. + * + * It is illegal to send SMU commands through a file descriptor configured + * for events reception + * + */ +struct smu_user_cmd_hdr +{ + __u32 cmdtype; +#define SMU_CMDTYPE_SMU 0 /* SMU command */ +#define SMU_CMDTYPE_WANTS_EVENTS 1 /* switch fd to events mode */ + + __u8 cmd; /* SMU command byte */ + __u32 data_len; /* Lenght of data following */ +}; + +struct smu_user_reply_hdr +{ + __u32 status; /* Command status */ + __u32 reply_len; /* Lenght of data follwing */ +}; + +#endif /* _SMU_H */ diff --git a/include/asm-ppc64/system.h b/include/asm-ppc64/system.h index 375015c62f2..1fbdc9f0590 100644 --- a/include/asm-ppc64/system.h +++ b/include/asm-ppc64/system.h @@ -13,7 +13,7 @@ #include <asm/page.h> #include <asm/processor.h> #include <asm/hw_irq.h> -#include <asm/memory.h> +#include <asm/synch.h> /* * Memory barrier. @@ -48,7 +48,7 @@ #ifdef CONFIG_SMP #define smp_mb() mb() #define smp_rmb() rmb() -#define smp_wmb() __asm__ __volatile__ ("eieio" : : : "memory") +#define smp_wmb() eieio() #define smp_read_barrier_depends() read_barrier_depends() #else #define smp_mb() __asm__ __volatile__("": : :"memory") diff --git a/include/asm-s390/futex.h b/include/asm-s390/futex.h index 2cac5ecd9d0..9feff4ce142 100644 --- a/include/asm-s390/futex.h +++ b/include/asm-s390/futex.h @@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) int cmp = (encoded_op >> 24) & 15; int oparg = (encoded_op << 8) >> 20; int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret, tem; + int oldval = 0, ret; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; diff --git a/include/asm-sh/futex.h b/include/asm-sh/futex.h index 2cac5ecd9d0..9feff4ce142 100644 --- a/include/asm-sh/futex.h +++ b/include/asm-sh/futex.h @@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) int cmp = (encoded_op >> 24) & 15; int oparg = (encoded_op << 8) >> 20; int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret, tem; + int oldval = 0, ret; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; diff --git a/include/asm-sh64/futex.h b/include/asm-sh64/futex.h index 2cac5ecd9d0..9feff4ce142 100644 --- a/include/asm-sh64/futex.h +++ b/include/asm-sh64/futex.h @@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) int cmp = (encoded_op >> 24) & 15; int oparg = (encoded_op << 8) >> 20; int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret, tem; + int oldval = 0, ret; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; diff --git a/include/asm-sparc/futex.h b/include/asm-sparc/futex.h index 2cac5ecd9d0..9feff4ce142 100644 --- a/include/asm-sparc/futex.h +++ b/include/asm-sparc/futex.h @@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) int cmp = (encoded_op >> 24) & 15; int oparg = (encoded_op << 8) >> 20; int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret, tem; + int oldval = 0, ret; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; diff --git a/include/asm-sparc64/cacheflush.h b/include/asm-sparc64/cacheflush.h index 51b26e81d82..ededd2659ea 100644 --- a/include/asm-sparc64/cacheflush.h +++ b/include/asm-sparc64/cacheflush.h @@ -4,13 +4,6 @@ #include <linux/config.h> #include <asm/page.h> -/* Flushing for D-cache alias handling is only needed if - * the page size is smaller than 16K. - */ -#if PAGE_SHIFT < 14 -#define DCACHE_ALIASING_POSSIBLE -#endif - #ifndef __ASSEMBLY__ #include <linux/mm.h> diff --git a/include/asm-sparc64/futex.h b/include/asm-sparc64/futex.h index 2cac5ecd9d0..9feff4ce142 100644 --- a/include/asm-sparc64/futex.h +++ b/include/asm-sparc64/futex.h @@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) int cmp = (encoded_op >> 24) & 15; int oparg = (encoded_op << 8) >> 20; int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret, tem; + int oldval = 0, ret; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; diff --git a/include/asm-sparc64/ide.h b/include/asm-sparc64/ide.h index 4c1098474c7..c393f815b0b 100644 --- a/include/asm-sparc64/ide.h +++ b/include/asm-sparc64/ide.h @@ -15,6 +15,7 @@ #include <asm/io.h> #include <asm/spitfire.h> #include <asm/cacheflush.h> +#include <asm/page.h> #ifndef MAX_HWIFS # ifdef CONFIG_BLK_DEV_IDEPCI diff --git a/include/asm-sparc64/page.h b/include/asm-sparc64/page.h index c9f8ef208ea..7f8d764abc4 100644 --- a/include/asm-sparc64/page.h +++ b/include/asm-sparc64/page.h @@ -21,6 +21,13 @@ #define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) +/* Flushing for D-cache alias handling is only needed if + * the page size is smaller than 16K. + */ +#if PAGE_SHIFT < 14 +#define DCACHE_ALIASING_POSSIBLE +#endif + #ifdef __KERNEL__ #ifndef __ASSEMBLY__ diff --git a/include/asm-sparc64/pgalloc.h b/include/asm-sparc64/pgalloc.h index b9b1914aae6..a96067cca96 100644 --- a/include/asm-sparc64/pgalloc.h +++ b/include/asm-sparc64/pgalloc.h @@ -10,6 +10,7 @@ #include <asm/spitfire.h> #include <asm/cpudata.h> #include <asm/cacheflush.h> +#include <asm/page.h> /* Page table allocation/freeing. */ #ifdef CONFIG_SMP diff --git a/include/asm-sparc64/pgtable.h b/include/asm-sparc64/pgtable.h index a2b4f5ed462..a297f6144f0 100644 --- a/include/asm-sparc64/pgtable.h +++ b/include/asm-sparc64/pgtable.h @@ -24,21 +24,23 @@ #include <asm/processor.h> #include <asm/const.h> -/* The kernel image occupies 0x4000000 to 0x1000000 (4MB --> 16MB). - * The page copy blockops use 0x1000000 to 0x18000000 (16MB --> 24MB). +/* The kernel image occupies 0x4000000 to 0x1000000 (4MB --> 32MB). + * The page copy blockops can use 0x2000000 to 0x10000000. * The PROM resides in an area spanning 0xf0000000 to 0x100000000. - * The vmalloc area spans 0x140000000 to 0x200000000. + * The vmalloc area spans 0x100000000 to 0x200000000. + * Since modules need to be in the lowest 32-bits of the address space, + * we place them right before the OBP area from 0x10000000 to 0xf0000000. * There is a single static kernel PMD which maps from 0x0 to address * 0x400000000. */ -#define TLBTEMP_BASE _AC(0x0000000001000000,UL) -#define MODULES_VADDR _AC(0x0000000002000000,UL) -#define MODULES_LEN _AC(0x000000007e000000,UL) -#define MODULES_END _AC(0x0000000080000000,UL) -#define VMALLOC_START _AC(0x0000000140000000,UL) -#define VMALLOC_END _AC(0x0000000200000000,UL) +#define TLBTEMP_BASE _AC(0x0000000002000000,UL) +#define MODULES_VADDR _AC(0x0000000010000000,UL) +#define MODULES_LEN _AC(0x00000000e0000000,UL) +#define MODULES_END _AC(0x00000000f0000000,UL) #define LOW_OBP_ADDRESS _AC(0x00000000f0000000,UL) #define HI_OBP_ADDRESS _AC(0x0000000100000000,UL) +#define VMALLOC_START _AC(0x0000000100000000,UL) +#define VMALLOC_END _AC(0x0000000200000000,UL) /* XXX All of this needs to be rethought so we can take advantage * XXX cheetah's full 64-bit virtual address space, ie. no more hole diff --git a/include/asm-um/futex.h b/include/asm-um/futex.h index 2cac5ecd9d0..142ee2d8e0f 100644 --- a/include/asm-um/futex.h +++ b/include/asm-um/futex.h @@ -1,53 +1,12 @@ -#ifndef _ASM_FUTEX_H -#define _ASM_FUTEX_H - -#ifdef __KERNEL__ +#ifndef __UM_FUTEX_H +#define __UM_FUTEX_H #include <linux/futex.h> #include <asm/errno.h> +#include <asm/system.h> +#include <asm/processor.h> #include <asm/uaccess.h> -static inline int -futex_atomic_op_inuser (int encoded_op, int __user *uaddr) -{ - int op = (encoded_op >> 28) & 7; - int cmp = (encoded_op >> 24) & 15; - int oparg = (encoded_op << 8) >> 20; - int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret, tem; - if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) - oparg = 1 << oparg; - - if (! access_ok (VERIFY_WRITE, uaddr, sizeof(int))) - return -EFAULT; - - inc_preempt_count(); - - switch (op) { - case FUTEX_OP_SET: - case FUTEX_OP_ADD: - case FUTEX_OP_OR: - case FUTEX_OP_ANDN: - case FUTEX_OP_XOR: - default: - ret = -ENOSYS; - } +#include "asm/arch/futex.h" - dec_preempt_count(); - - if (!ret) { - switch (cmp) { - case FUTEX_OP_CMP_EQ: ret = (oldval == cmparg); break; - case FUTEX_OP_CMP_NE: ret = (oldval != cmparg); break; - case FUTEX_OP_CMP_LT: ret = (oldval < cmparg); break; - case FUTEX_OP_CMP_GE: ret = (oldval >= cmparg); break; - case FUTEX_OP_CMP_LE: ret = (oldval <= cmparg); break; - case FUTEX_OP_CMP_GT: ret = (oldval > cmparg); break; - default: ret = -ENOSYS; - } - } - return ret; -} - -#endif #endif diff --git a/include/asm-um/pgtable.h b/include/asm-um/pgtable.h index ed06170e0ed..616d02b57ea 100644 --- a/include/asm-um/pgtable.h +++ b/include/asm-um/pgtable.h @@ -346,7 +346,6 @@ static inline void set_pte(pte_t *pteptr, pte_t pteval) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { pte_set_val(pte, (pte_val(pte) & _PAGE_CHG_MASK), newprot); - if(pte_present(pte)) pte = pte_mknewpage(pte_mknewprot(pte)); return pte; } diff --git a/include/asm-v850/futex.h b/include/asm-v850/futex.h index 2cac5ecd9d0..9feff4ce142 100644 --- a/include/asm-v850/futex.h +++ b/include/asm-v850/futex.h @@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) int cmp = (encoded_op >> 24) & 15; int oparg = (encoded_op << 8) >> 20; int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret, tem; + int oldval = 0, ret; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; diff --git a/include/asm-xtensa/atomic.h b/include/asm-xtensa/atomic.h index 24f86f0e43c..12b5732dc6e 100644 --- a/include/asm-xtensa/atomic.h +++ b/include/asm-xtensa/atomic.h @@ -22,7 +22,7 @@ typedef struct { volatile int counter; } atomic_t; #include <asm/processor.h> #include <asm/system.h> -#define ATOMIC_INIT(i) ( (atomic_t) { (i) } ) +#define ATOMIC_INIT(i) { (i) } /* * This Xtensa implementation assumes that the right mechanism diff --git a/include/asm-xtensa/bitops.h b/include/asm-xtensa/bitops.h index d395ef226c3..e76ee889e21 100644 --- a/include/asm-xtensa/bitops.h +++ b/include/asm-xtensa/bitops.h @@ -174,7 +174,7 @@ static __inline__ int test_bit(int nr, const volatile void *addr) return 1UL & (((const volatile unsigned int *)addr)[nr>>5] >> (nr&31)); } -#if XCHAL_HAVE_NSAU +#if XCHAL_HAVE_NSA static __inline__ int __cntlz (unsigned long x) { diff --git a/include/asm-xtensa/hardirq.h b/include/asm-xtensa/hardirq.h index e07c76c36b9..aa9c1adf68d 100644 --- a/include/asm-xtensa/hardirq.h +++ b/include/asm-xtensa/hardirq.h @@ -23,6 +23,7 @@ typedef struct { unsigned int __nmi_count; /* arch dependent */ } ____cacheline_aligned irq_cpustat_t; +void ack_bad_irq(unsigned int irq); #include <linux/irq_cpustat.h> /* Standard mappings for irq_cpustat_t above */ #endif /* _XTENSA_HARDIRQ_H */ diff --git a/include/asm-xtensa/semaphore.h b/include/asm-xtensa/semaphore.h index db740b8bc6f..09e89ab3eb6 100644 --- a/include/asm-xtensa/semaphore.h +++ b/include/asm-xtensa/semaphore.h @@ -20,28 +20,19 @@ struct semaphore { atomic_t count; int sleepers; wait_queue_head_t wait; -#if WAITQUEUE_DEBUG - long __magic; -#endif }; -#if WAITQUEUE_DEBUG -# define __SEM_DEBUG_INIT(name) \ - , (int)&(name).__magic -#else -# define __SEM_DEBUG_INIT(name) -#endif - -#define __SEMAPHORE_INITIALIZER(name,count) \ - { ATOMIC_INIT(count), \ - 0, \ - __WAIT_QUEUE_HEAD_INITIALIZER((name).wait) \ - __SEM_DEBUG_INIT(name) } +#define __SEMAPHORE_INITIALIZER(name,n) \ +{ \ + .count = ATOMIC_INIT(n), \ + .sleepers = 0, \ + .wait = __WAIT_QUEUE_HEAD_INITIALIZER((name).wait) \ +} -#define __MUTEX_INITIALIZER(name) \ +#define __MUTEX_INITIALIZER(name) \ __SEMAPHORE_INITIALIZER(name, 1) -#define __DECLARE_SEMAPHORE_GENERIC(name,count) \ +#define __DECLARE_SEMAPHORE_GENERIC(name,count) \ struct semaphore name = __SEMAPHORE_INITIALIZER(name,count) #define DECLARE_MUTEX(name) __DECLARE_SEMAPHORE_GENERIC(name,1) @@ -49,17 +40,8 @@ struct semaphore { static inline void sema_init (struct semaphore *sem, int val) { -/* - * *sem = (struct semaphore)__SEMAPHORE_INITIALIZER((*sem),val); - * - * i'd rather use the more flexible initialization above, but sadly - * GCC 2.7.2.3 emits a bogus warning. EGCS doesnt. Oh well. - */ atomic_set(&sem->count, val); init_waitqueue_head(&sem->wait); -#if WAITQUEUE_DEBUG - sem->__magic = (int)&sem->__magic; -#endif } static inline void init_MUTEX (struct semaphore *sem) @@ -81,9 +63,7 @@ extern spinlock_t semaphore_wake_lock; static inline void down(struct semaphore * sem) { -#if WAITQUEUE_DEBUG - CHECK_MAGIC(sem->__magic); -#endif + might_sleep(); if (atomic_sub_return(1, &sem->count) < 0) __down(sem); @@ -92,9 +72,8 @@ static inline void down(struct semaphore * sem) static inline int down_interruptible(struct semaphore * sem) { int ret = 0; -#if WAITQUEUE_DEBUG - CHECK_MAGIC(sem->__magic); -#endif + + might_sleep(); if (atomic_sub_return(1, &sem->count) < 0) ret = __down_interruptible(sem); @@ -104,9 +83,6 @@ static inline int down_interruptible(struct semaphore * sem) static inline int down_trylock(struct semaphore * sem) { int ret = 0; -#if WAITQUEUE_DEBUG - CHECK_MAGIC(sem->__magic); -#endif if (atomic_sub_return(1, &sem->count) < 0) ret = __down_trylock(sem); @@ -119,9 +95,6 @@ static inline int down_trylock(struct semaphore * sem) */ static inline void up(struct semaphore * sem) { -#if WAITQUEUE_DEBUG - CHECK_MAGIC(sem->__magic); -#endif if (atomic_add_return(1, &sem->count) <= 0) __up(sem); } diff --git a/include/asm-xtensa/system.h b/include/asm-xtensa/system.h index f09393232e5..9284867f1cb 100644 --- a/include/asm-xtensa/system.h +++ b/include/asm-xtensa/system.h @@ -189,20 +189,6 @@ static inline unsigned long xchg_u32(volatile int * m, unsigned long val) #define tas(ptr) (xchg((ptr),1)) -#if ( __XCC__ == 1 ) - -/* xt-xcc processes __inline__ differently than xt-gcc and decides to - * insert an out-of-line copy of function __xchg. This presents the - * unresolved symbol at link time of __xchg_called_with_bad_pointer, - * even though such a function would never be called at run-time. - * xt-gcc always inlines __xchg, and optimizes away the undefined - * bad_pointer function. - */ - -#define xchg(ptr,x) xchg_u32(ptr,x) - -#else /* assume xt-gcc */ - #define xchg(ptr,x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr)))) /* @@ -224,8 +210,6 @@ __xchg(unsigned long x, volatile void * ptr, int size) return x; } -#endif - extern void set_except_vector(int n, void *addr); static inline void spill_registers(void) diff --git a/include/linux/byteorder/generic.h b/include/linux/byteorder/generic.h index 5fde6f4d6c1..04bd756efc6 100644 --- a/include/linux/byteorder/generic.h +++ b/include/linux/byteorder/generic.h @@ -5,6 +5,10 @@ * linux/byteorder_generic.h * Generic Byte-reordering support * + * The "... p" macros, like le64_to_cpup, can be used with pointers + * to unaligned data, but there will be a performance penalty on + * some architectures. Use get_unaligned for unaligned data. + * * Francois-Rene Rideau <fare@tunes.org> 19970707 * gathered all the good ideas from all asm-foo/byteorder.h into one file, * cleaned them up. diff --git a/include/linux/device.h b/include/linux/device.h index 06e5d42f2c7..95d607a48f0 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -317,6 +317,11 @@ dev_set_drvdata (struct device *dev, void *data) dev->driver_data = data; } +static inline int device_is_registered(struct device *dev) +{ + return klist_node_attached(&dev->knode_bus); +} + /* * High level routines for use by the bus drivers */ diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 17d0c0d40b0..eef0876d830 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -42,8 +42,8 @@ struct hlist_node; struct vlan_ethhdr { unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ unsigned char h_source[ETH_ALEN]; /* source ether addr */ - unsigned short h_vlan_proto; /* Should always be 0x8100 */ - unsigned short h_vlan_TCI; /* Encapsulates priority and VLAN ID */ + __be16 h_vlan_proto; /* Should always be 0x8100 */ + __be16 h_vlan_TCI; /* Encapsulates priority and VLAN ID */ unsigned short h_vlan_encapsulated_proto; /* packet type ID field (or len) */ }; @@ -55,8 +55,8 @@ static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb) } struct vlan_hdr { - unsigned short h_vlan_TCI; /* Encapsulates priority and VLAN ID */ - unsigned short h_vlan_encapsulated_proto; /* packet type ID field (or len) */ + __be16 h_vlan_TCI; /* Encapsulates priority and VLAN ID */ + __be16 h_vlan_encapsulated_proto; /* packet type ID field (or len) */ }; #define VLAN_VID_MASK 0xfff diff --git a/include/linux/libata.h b/include/linux/libata.h index 022105c745f..ceee1fc42c6 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -393,6 +393,7 @@ extern int ata_pci_init_one (struct pci_dev *pdev, struct ata_port_info **port_i extern void ata_pci_remove_one (struct pci_dev *pdev); #endif /* CONFIG_PCI */ extern int ata_device_add(struct ata_probe_ent *ent); +extern void ata_host_set_remove(struct ata_host_set *host_set); extern int ata_scsi_detect(Scsi_Host_Template *sht); extern int ata_scsi_ioctl(struct scsi_device *dev, int cmd, void __user *arg); extern int ata_scsi_queuecmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *)); diff --git a/include/linux/mm.h b/include/linux/mm.h index 82d7024f076..097b3a3c693 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -136,6 +136,7 @@ extern unsigned int kobjsize(const void *objp); #define VM_EXEC 0x00000004 #define VM_SHARED 0x00000008 +/* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */ #define VM_MAYREAD 0x00000010 /* limits for mprotect() etc */ #define VM_MAYWRITE 0x00000020 #define VM_MAYEXEC 0x00000040 @@ -350,7 +351,8 @@ static inline void put_page(struct page *page) * only one copy in memory, at most, normally. * * For the non-reserved pages, page_count(page) denotes a reference count. - * page_count() == 0 means the page is free. + * page_count() == 0 means the page is free. page->lru is then used for + * freelist management in the buddy allocator. * page_count() == 1 means the page is used for exactly one purpose * (e.g. a private data page of one process). * @@ -376,10 +378,8 @@ static inline void put_page(struct page *page) * attaches, plus 1 if `private' contains something, plus one for * the page cache itself. * - * All pages belonging to an inode are in these doubly linked lists: - * mapping->clean_pages, mapping->dirty_pages and mapping->locked_pages; - * using the page->list list_head. These fields are also used for - * freelist managemet (when page_count()==0). + * Instead of keeping dirty/clean pages in per address-space lists, we instead + * now tag pages as dirty/under writeback in the radix tree. * * There is also a per-mapping radix tree mapping index to the page * in memory if present. The tree is rooted at mapping->root. diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index 7e033e9271a..4ced3873681 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -133,11 +133,13 @@ enum ip_conntrack_expect_events { #include <linux/netfilter_ipv4/ip_conntrack_tcp.h> #include <linux/netfilter_ipv4/ip_conntrack_icmp.h> +#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h> #include <linux/netfilter_ipv4/ip_conntrack_sctp.h> /* per conntrack: protocol private data */ union ip_conntrack_proto { /* insert conntrack proto private data here */ + struct ip_ct_gre gre; struct ip_ct_sctp sctp; struct ip_ct_tcp tcp; struct ip_ct_icmp icmp; @@ -148,6 +150,7 @@ union ip_conntrack_expect_proto { }; /* Add protocol helper include file here */ +#include <linux/netfilter_ipv4/ip_conntrack_pptp.h> #include <linux/netfilter_ipv4/ip_conntrack_amanda.h> #include <linux/netfilter_ipv4/ip_conntrack_ftp.h> #include <linux/netfilter_ipv4/ip_conntrack_irc.h> @@ -155,12 +158,20 @@ union ip_conntrack_expect_proto { /* per conntrack: application helper private data */ union ip_conntrack_help { /* insert conntrack helper private data (master) here */ + struct ip_ct_pptp_master ct_pptp_info; struct ip_ct_ftp_master ct_ftp_info; struct ip_ct_irc_master ct_irc_info; }; #ifdef CONFIG_IP_NF_NAT_NEEDED #include <linux/netfilter_ipv4/ip_nat.h> +#include <linux/netfilter_ipv4/ip_nat_pptp.h> + +/* per conntrack: nat application helper private data */ +union ip_conntrack_nat_help { + /* insert nat helper private data here */ + struct ip_nat_pptp nat_pptp_info; +}; #endif #include <linux/types.h> @@ -223,6 +234,7 @@ struct ip_conntrack #ifdef CONFIG_IP_NF_NAT_NEEDED struct { struct ip_nat_info info; + union ip_conntrack_nat_help help; #if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \ defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE) int masq_index; @@ -320,11 +332,28 @@ extern void need_ip_conntrack(void); extern int invert_tuplepr(struct ip_conntrack_tuple *inverse, const struct ip_conntrack_tuple *orig); +extern void __ip_ct_refresh_acct(struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + const struct sk_buff *skb, + unsigned long extra_jiffies, + int do_acct); + +/* Refresh conntrack for this many jiffies and do accounting */ +static inline void ip_ct_refresh_acct(struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + const struct sk_buff *skb, + unsigned long extra_jiffies) +{ + __ip_ct_refresh_acct(ct, ctinfo, skb, extra_jiffies, 1); +} + /* Refresh conntrack for this many jiffies */ -extern void ip_ct_refresh_acct(struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo, - const struct sk_buff *skb, - unsigned long extra_jiffies); +static inline void ip_ct_refresh(struct ip_conntrack *ct, + const struct sk_buff *skb, + unsigned long extra_jiffies) +{ + __ip_ct_refresh_acct(ct, 0, skb, extra_jiffies, 0); +} /* These are for NAT. Icky. */ /* Update TCP window tracking data when NAT mangles the packet */ @@ -372,7 +401,7 @@ extern struct ip_conntrack_expect * __ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple); extern struct ip_conntrack_expect * -ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple); +ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple); extern struct ip_conntrack_tuple_hash * __ip_conntrack_find(const struct ip_conntrack_tuple *tuple, diff --git a/include/linux/netfilter_ipv4/ip_conntrack_pptp.h b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h new file mode 100644 index 00000000000..816144c75de --- /dev/null +++ b/include/linux/netfilter_ipv4/ip_conntrack_pptp.h @@ -0,0 +1,325 @@ +/* PPTP constants and structs */ +#ifndef _CONNTRACK_PPTP_H +#define _CONNTRACK_PPTP_H + +/* state of the control session */ +enum pptp_ctrlsess_state { + PPTP_SESSION_NONE, /* no session present */ + PPTP_SESSION_ERROR, /* some session error */ + PPTP_SESSION_STOPREQ, /* stop_sess request seen */ + PPTP_SESSION_REQUESTED, /* start_sess request seen */ + PPTP_SESSION_CONFIRMED, /* session established */ +}; + +/* state of the call inside the control session */ +enum pptp_ctrlcall_state { + PPTP_CALL_NONE, + PPTP_CALL_ERROR, + PPTP_CALL_OUT_REQ, + PPTP_CALL_OUT_CONF, + PPTP_CALL_IN_REQ, + PPTP_CALL_IN_REP, + PPTP_CALL_IN_CONF, + PPTP_CALL_CLEAR_REQ, +}; + + +/* conntrack private data */ +struct ip_ct_pptp_master { + enum pptp_ctrlsess_state sstate; /* session state */ + + /* everything below is going to be per-expectation in newnat, + * since there could be more than one call within one session */ + enum pptp_ctrlcall_state cstate; /* call state */ + u_int16_t pac_call_id; /* call id of PAC, host byte order */ + u_int16_t pns_call_id; /* call id of PNS, host byte order */ + + /* in pre-2.6.11 this used to be per-expect. Now it is per-conntrack + * and therefore imposes a fixed limit on the number of maps */ + struct ip_ct_gre_keymap *keymap_orig, *keymap_reply; +}; + +/* conntrack_expect private member */ +struct ip_ct_pptp_expect { + enum pptp_ctrlcall_state cstate; /* call state */ + u_int16_t pac_call_id; /* call id of PAC */ + u_int16_t pns_call_id; /* call id of PNS */ +}; + + +#ifdef __KERNEL__ + +#define IP_CONNTR_PPTP PPTP_CONTROL_PORT + +#define PPTP_CONTROL_PORT 1723 + +#define PPTP_PACKET_CONTROL 1 +#define PPTP_PACKET_MGMT 2 + +#define PPTP_MAGIC_COOKIE 0x1a2b3c4d + +struct pptp_pkt_hdr { + __u16 packetLength; + __be16 packetType; + __be32 magicCookie; +}; + +/* PptpControlMessageType values */ +#define PPTP_START_SESSION_REQUEST 1 +#define PPTP_START_SESSION_REPLY 2 +#define PPTP_STOP_SESSION_REQUEST 3 +#define PPTP_STOP_SESSION_REPLY 4 +#define PPTP_ECHO_REQUEST 5 +#define PPTP_ECHO_REPLY 6 +#define PPTP_OUT_CALL_REQUEST 7 +#define PPTP_OUT_CALL_REPLY 8 +#define PPTP_IN_CALL_REQUEST 9 +#define PPTP_IN_CALL_REPLY 10 +#define PPTP_IN_CALL_CONNECT 11 +#define PPTP_CALL_CLEAR_REQUEST 12 +#define PPTP_CALL_DISCONNECT_NOTIFY 13 +#define PPTP_WAN_ERROR_NOTIFY 14 +#define PPTP_SET_LINK_INFO 15 + +#define PPTP_MSG_MAX 15 + +/* PptpGeneralError values */ +#define PPTP_ERROR_CODE_NONE 0 +#define PPTP_NOT_CONNECTED 1 +#define PPTP_BAD_FORMAT 2 +#define PPTP_BAD_VALUE 3 +#define PPTP_NO_RESOURCE 4 +#define PPTP_BAD_CALLID 5 +#define PPTP_REMOVE_DEVICE_ERROR 6 + +struct PptpControlHeader { + __be16 messageType; + __u16 reserved; +}; + +/* FramingCapability Bitmap Values */ +#define PPTP_FRAME_CAP_ASYNC 0x1 +#define PPTP_FRAME_CAP_SYNC 0x2 + +/* BearerCapability Bitmap Values */ +#define PPTP_BEARER_CAP_ANALOG 0x1 +#define PPTP_BEARER_CAP_DIGITAL 0x2 + +struct PptpStartSessionRequest { + __be16 protocolVersion; + __u8 reserved1; + __u8 reserved2; + __be32 framingCapability; + __be32 bearerCapability; + __be16 maxChannels; + __be16 firmwareRevision; + __u8 hostName[64]; + __u8 vendorString[64]; +}; + +/* PptpStartSessionResultCode Values */ +#define PPTP_START_OK 1 +#define PPTP_START_GENERAL_ERROR 2 +#define PPTP_START_ALREADY_CONNECTED 3 +#define PPTP_START_NOT_AUTHORIZED 4 +#define PPTP_START_UNKNOWN_PROTOCOL 5 + +struct PptpStartSessionReply { + __be16 protocolVersion; + __u8 resultCode; + __u8 generalErrorCode; + __be32 framingCapability; + __be32 bearerCapability; + __be16 maxChannels; + __be16 firmwareRevision; + __u8 hostName[64]; + __u8 vendorString[64]; +}; + +/* PptpStopReasons */ +#define PPTP_STOP_NONE 1 +#define PPTP_STOP_PROTOCOL 2 +#define PPTP_STOP_LOCAL_SHUTDOWN 3 + +struct PptpStopSessionRequest { + __u8 reason; +}; + +/* PptpStopSessionResultCode */ +#define PPTP_STOP_OK 1 +#define PPTP_STOP_GENERAL_ERROR 2 + +struct PptpStopSessionReply { + __u8 resultCode; + __u8 generalErrorCode; +}; + +struct PptpEchoRequest { + __be32 identNumber; +}; + +/* PptpEchoReplyResultCode */ +#define PPTP_ECHO_OK 1 +#define PPTP_ECHO_GENERAL_ERROR 2 + +struct PptpEchoReply { + __be32 identNumber; + __u8 resultCode; + __u8 generalErrorCode; + __u16 reserved; +}; + +/* PptpFramingType */ +#define PPTP_ASYNC_FRAMING 1 +#define PPTP_SYNC_FRAMING 2 +#define PPTP_DONT_CARE_FRAMING 3 + +/* PptpCallBearerType */ +#define PPTP_ANALOG_TYPE 1 +#define PPTP_DIGITAL_TYPE 2 +#define PPTP_DONT_CARE_BEARER_TYPE 3 + +struct PptpOutCallRequest { + __be16 callID; + __be16 callSerialNumber; + __be32 minBPS; + __be32 maxBPS; + __be32 bearerType; + __be32 framingType; + __be16 packetWindow; + __be16 packetProcDelay; + __u16 reserved1; + __be16 phoneNumberLength; + __u16 reserved2; + __u8 phoneNumber[64]; + __u8 subAddress[64]; +}; + +/* PptpCallResultCode */ +#define PPTP_OUTCALL_CONNECT 1 +#define PPTP_OUTCALL_GENERAL_ERROR 2 +#define PPTP_OUTCALL_NO_CARRIER 3 +#define PPTP_OUTCALL_BUSY 4 +#define PPTP_OUTCALL_NO_DIAL_TONE 5 +#define PPTP_OUTCALL_TIMEOUT 6 +#define PPTP_OUTCALL_DONT_ACCEPT 7 + +struct PptpOutCallReply { + __be16 callID; + __be16 peersCallID; + __u8 resultCode; + __u8 generalErrorCode; + __be16 causeCode; + __be32 connectSpeed; + __be16 packetWindow; + __be16 packetProcDelay; + __be32 physChannelID; +}; + +struct PptpInCallRequest { + __be16 callID; + __be16 callSerialNumber; + __be32 callBearerType; + __be32 physChannelID; + __be16 dialedNumberLength; + __be16 dialingNumberLength; + __u8 dialedNumber[64]; + __u8 dialingNumber[64]; + __u8 subAddress[64]; +}; + +/* PptpInCallResultCode */ +#define PPTP_INCALL_ACCEPT 1 +#define PPTP_INCALL_GENERAL_ERROR 2 +#define PPTP_INCALL_DONT_ACCEPT 3 + +struct PptpInCallReply { + __be16 callID; + __be16 peersCallID; + __u8 resultCode; + __u8 generalErrorCode; + __be16 packetWindow; + __be16 packetProcDelay; + __u16 reserved; +}; + +struct PptpInCallConnected { + __be16 peersCallID; + __u16 reserved; + __be32 connectSpeed; + __be16 packetWindow; + __be16 packetProcDelay; + __be32 callFramingType; +}; + +struct PptpClearCallRequest { + __be16 callID; + __u16 reserved; +}; + +struct PptpCallDisconnectNotify { + __be16 callID; + __u8 resultCode; + __u8 generalErrorCode; + __be16 causeCode; + __u16 reserved; + __u8 callStatistics[128]; +}; + +struct PptpWanErrorNotify { + __be16 peersCallID; + __u16 reserved; + __be32 crcErrors; + __be32 framingErrors; + __be32 hardwareOverRuns; + __be32 bufferOverRuns; + __be32 timeoutErrors; + __be32 alignmentErrors; +}; + +struct PptpSetLinkInfo { + __be16 peersCallID; + __u16 reserved; + __be32 sendAccm; + __be32 recvAccm; +}; + +union pptp_ctrl_union { + struct PptpStartSessionRequest sreq; + struct PptpStartSessionReply srep; + struct PptpStopSessionRequest streq; + struct PptpStopSessionReply strep; + struct PptpOutCallRequest ocreq; + struct PptpOutCallReply ocack; + struct PptpInCallRequest icreq; + struct PptpInCallReply icack; + struct PptpInCallConnected iccon; + struct PptpClearCallRequest clrreq; + struct PptpCallDisconnectNotify disc; + struct PptpWanErrorNotify wanerr; + struct PptpSetLinkInfo setlink; +}; + +extern int +(*ip_nat_pptp_hook_outbound)(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq); + +extern int +(*ip_nat_pptp_hook_inbound)(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq); + +extern int +(*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *exp_orig, + struct ip_conntrack_expect *exp_reply); + +extern void +(*ip_nat_pptp_hook_expectfn)(struct ip_conntrack *ct, + struct ip_conntrack_expect *exp); +#endif /* __KERNEL__ */ +#endif /* _CONNTRACK_PPTP_H */ diff --git a/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h b/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h new file mode 100644 index 00000000000..8d090ef82f5 --- /dev/null +++ b/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h @@ -0,0 +1,114 @@ +#ifndef _CONNTRACK_PROTO_GRE_H +#define _CONNTRACK_PROTO_GRE_H +#include <asm/byteorder.h> + +/* GRE PROTOCOL HEADER */ + +/* GRE Version field */ +#define GRE_VERSION_1701 0x0 +#define GRE_VERSION_PPTP 0x1 + +/* GRE Protocol field */ +#define GRE_PROTOCOL_PPTP 0x880B + +/* GRE Flags */ +#define GRE_FLAG_C 0x80 +#define GRE_FLAG_R 0x40 +#define GRE_FLAG_K 0x20 +#define GRE_FLAG_S 0x10 +#define GRE_FLAG_A 0x80 + +#define GRE_IS_C(f) ((f)&GRE_FLAG_C) +#define GRE_IS_R(f) ((f)&GRE_FLAG_R) +#define GRE_IS_K(f) ((f)&GRE_FLAG_K) +#define GRE_IS_S(f) ((f)&GRE_FLAG_S) +#define GRE_IS_A(f) ((f)&GRE_FLAG_A) + +/* GRE is a mess: Four different standards */ +struct gre_hdr { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u16 rec:3, + srr:1, + seq:1, + key:1, + routing:1, + csum:1, + version:3, + reserved:4, + ack:1; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u16 csum:1, + routing:1, + key:1, + seq:1, + srr:1, + rec:3, + ack:1, + reserved:4, + version:3; +#else +#error "Adjust your <asm/byteorder.h> defines" +#endif + __u16 protocol; +}; + +/* modified GRE header for PPTP */ +struct gre_hdr_pptp { + __u8 flags; /* bitfield */ + __u8 version; /* should be GRE_VERSION_PPTP */ + __u16 protocol; /* should be GRE_PROTOCOL_PPTP */ + __u16 payload_len; /* size of ppp payload, not inc. gre header */ + __u16 call_id; /* peer's call_id for this session */ + __u32 seq; /* sequence number. Present if S==1 */ + __u32 ack; /* seq number of highest packet recieved by */ + /* sender in this session */ +}; + + +/* this is part of ip_conntrack */ +struct ip_ct_gre { + unsigned int stream_timeout; + unsigned int timeout; +}; + +#ifdef __KERNEL__ +struct ip_conntrack_expect; +struct ip_conntrack; + +/* structure for original <-> reply keymap */ +struct ip_ct_gre_keymap { + struct list_head list; + + struct ip_conntrack_tuple tuple; +}; + +/* add new tuple->key_reply pair to keymap */ +int ip_ct_gre_keymap_add(struct ip_conntrack *ct, + struct ip_conntrack_tuple *t, + int reply); + +/* delete keymap entries */ +void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct); + + +/* get pointer to gre key, if present */ +static inline u_int32_t *gre_key(struct gre_hdr *greh) +{ + if (!greh->key) + return NULL; + if (greh->csum || greh->routing) + return (u_int32_t *) (greh+sizeof(*greh)+4); + return (u_int32_t *) (greh+sizeof(*greh)); +} + +/* get pointer ot gre csum, if present */ +static inline u_int16_t *gre_csum(struct gre_hdr *greh) +{ + if (!greh->csum) + return NULL; + return (u_int16_t *) (greh+sizeof(*greh)); +} + +#endif /* __KERNEL__ */ + +#endif /* _CONNTRACK_PROTO_GRE_H */ diff --git a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h index c33f0b5e0d0..20e43f018b7 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h @@ -17,7 +17,7 @@ union ip_conntrack_manip_proto u_int16_t all; struct { - u_int16_t port; + __be16 port; } tcp; struct { u_int16_t port; @@ -28,6 +28,9 @@ union ip_conntrack_manip_proto struct { u_int16_t port; } sctp; + struct { + __be16 key; /* key is 32bit, pptp only uses 16 */ + } gre; }; /* The manipulable part of the tuple. */ @@ -61,6 +64,10 @@ struct ip_conntrack_tuple struct { u_int16_t port; } sctp; + struct { + __be16 key; /* key is 32bit, + * pptp only uses 16 */ + } gre; } u; /* The protocol. */ diff --git a/include/linux/netfilter_ipv4/ip_nat_pptp.h b/include/linux/netfilter_ipv4/ip_nat_pptp.h new file mode 100644 index 00000000000..eaf66c2e8f9 --- /dev/null +++ b/include/linux/netfilter_ipv4/ip_nat_pptp.h @@ -0,0 +1,11 @@ +/* PPTP constants and structs */ +#ifndef _NAT_PPTP_H +#define _NAT_PPTP_H + +/* conntrack private data */ +struct ip_nat_pptp { + u_int16_t pns_call_id; /* NAT'ed PNS call id */ + u_int16_t pac_call_id; /* NAT'ed PAC call id */ +}; + +#endif /* _NAT_PPTP_H */ diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index 58c72a52dc6..59f70b34e02 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -455,6 +455,9 @@ extern unsigned int ip6t_do_table(struct sk_buff **pskb, /* Check for an extension */ extern int ip6t_ext_hdr(u8 nexthdr); +/* find specified header and get offset to it */ +extern int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, + u8 target); #define IP6T_ALIGN(s) (((s) + (__alignof__(struct ip6t_entry)-1)) & ~(__alignof__(struct ip6t_entry)-1)) diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index c49d28eca56..b86a4b77007 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1268,7 +1268,8 @@ #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA 0x0266 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA2 0x0267 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE 0x036E -#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA 0x036F +#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA 0x037E +#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA2 0x037F #define PCI_DEVICE_ID_NVIDIA_NVENET_12 0x0268 #define PCI_DEVICE_ID_NVIDIA_NVENET_13 0x0269 #define PCI_DEVICE_ID_NVIDIA_MCP51_AUDIO 0x026B diff --git a/include/linux/reboot.h b/include/linux/reboot.h index 3b3266ff1a9..7ab2cdb83ef 100644 --- a/include/linux/reboot.h +++ b/include/linux/reboot.h @@ -59,6 +59,10 @@ extern void machine_crash_shutdown(struct pt_regs *); * Architecture independent implemenations of sys_reboot commands. */ +extern void kernel_restart_prepare(char *cmd); +extern void kernel_halt_prepare(void); +extern void kernel_power_off_prepare(void); + extern void kernel_restart(char *cmd); extern void kernel_halt(void); extern void kernel_power_off(void); diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 425f58c8ea4..a6f03e47373 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -508,5 +508,7 @@ asmlinkage long sys_keyctl(int cmd, unsigned long arg2, unsigned long arg3, asmlinkage long sys_ioprio_set(int which, int who, int ioprio); asmlinkage long sys_ioprio_get(int which, int who); +asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask, + unsigned long maxnode); #endif diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h index 53184a38fdf..0e293fe733b 100644 --- a/include/rdma/ib_mad.h +++ b/include/rdma/ib_mad.h @@ -108,6 +108,13 @@ #define IB_QP1_QKEY 0x80010000 #define IB_QP_SET_QKEY 0x80000000 +enum { + IB_MGMT_MAD_DATA = 232, + IB_MGMT_RMPP_DATA = 220, + IB_MGMT_VENDOR_DATA = 216, + IB_MGMT_SA_DATA = 200 +}; + struct ib_mad_hdr { u8 base_version; u8 mgmt_class; @@ -149,20 +156,20 @@ struct ib_sa_hdr { struct ib_mad { struct ib_mad_hdr mad_hdr; - u8 data[232]; + u8 data[IB_MGMT_MAD_DATA]; }; struct ib_rmpp_mad { struct ib_mad_hdr mad_hdr; struct ib_rmpp_hdr rmpp_hdr; - u8 data[220]; + u8 data[IB_MGMT_RMPP_DATA]; }; struct ib_sa_mad { struct ib_mad_hdr mad_hdr; struct ib_rmpp_hdr rmpp_hdr; struct ib_sa_hdr sa_hdr; - u8 data[200]; + u8 data[IB_MGMT_SA_DATA]; } __attribute__ ((packed)); struct ib_vendor_mad { @@ -170,7 +177,7 @@ struct ib_vendor_mad { struct ib_rmpp_hdr rmpp_hdr; u8 reserved; u8 oui[3]; - u8 data[216]; + u8 data[IB_MGMT_VENDOR_DATA]; }; struct ib_class_port_info diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 916144be208..69313ba7505 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -439,6 +439,8 @@ enum scsi_host_state { SHOST_CANCEL, SHOST_DEL, SHOST_RECOVERY, + SHOST_CANCEL_RECOVERY, + SHOST_DEL_RECOVERY, }; struct Scsi_Host { @@ -465,8 +467,6 @@ struct Scsi_Host { struct list_head eh_cmd_q; struct task_struct * ehandler; /* Error recovery thread. */ - struct semaphore * eh_wait; /* The error recovery thread waits - on this. */ struct semaphore * eh_action; /* Wait for specific actions on the host. */ unsigned int eh_active:1; /* Indicates the eh thread is awake and active if @@ -621,6 +621,13 @@ static inline struct Scsi_Host *dev_to_shost(struct device *dev) return container_of(dev, struct Scsi_Host, shost_gendev); } +static inline int scsi_host_in_recovery(struct Scsi_Host *shost) +{ + return shost->shost_state == SHOST_RECOVERY || + shost->shost_state == SHOST_CANCEL_RECOVERY || + shost->shost_state == SHOST_DEL_RECOVERY; +} + extern int scsi_queue_work(struct Scsi_Host *, struct work_struct *); extern void scsi_flush_work(struct Scsi_Host *); diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h index 115db056dc6..b0d44543737 100644 --- a/include/scsi/scsi_transport_fc.h +++ b/include/scsi/scsi_transport_fc.h @@ -103,8 +103,8 @@ enum fc_port_state { incapable of reporting */ #define FC_PORTSPEED_1GBIT 1 #define FC_PORTSPEED_2GBIT 2 -#define FC_PORTSPEED_10GBIT 4 -#define FC_PORTSPEED_4GBIT 8 +#define FC_PORTSPEED_4GBIT 4 +#define FC_PORTSPEED_10GBIT 8 #define FC_PORTSPEED_NOT_NEGOTIATED (1 << 15) /* Speed not established */ /* diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig index 396c7873e80..46a5e5acff9 100644 --- a/kernel/power/Kconfig +++ b/kernel/power/Kconfig @@ -29,7 +29,7 @@ config PM_DEBUG config SOFTWARE_SUSPEND bool "Software Suspend" - depends on PM && SWAP && (X86 || ((FVR || PPC32) && !SMP)) + depends on PM && SWAP && (X86 && (!SMP || SUSPEND_SMP)) || ((FVR || PPC32) && !SMP) ---help--- Enable the possibility of suspending the machine. It doesn't need APM. diff --git a/kernel/power/disk.c b/kernel/power/disk.c index 2d8bf054d03..761956e813f 100644 --- a/kernel/power/disk.c +++ b/kernel/power/disk.c @@ -17,12 +17,12 @@ #include <linux/delay.h> #include <linux/fs.h> #include <linux/mount.h> +#include <linux/pm.h> #include "power.h" extern suspend_disk_method_t pm_disk_mode; -extern struct pm_ops * pm_ops; extern int swsusp_suspend(void); extern int swsusp_write(void); @@ -49,13 +49,11 @@ dev_t swsusp_resume_device; static void power_down(suspend_disk_method_t mode) { - unsigned long flags; int error = 0; - local_irq_save(flags); switch(mode) { case PM_DISK_PLATFORM: - device_shutdown(); + kernel_power_off_prepare(); error = pm_ops->enter(PM_SUSPEND_DISK); break; case PM_DISK_SHUTDOWN: diff --git a/kernel/power/power.h b/kernel/power/power.h index cd6a3493cc0..9c9167d910d 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -1,7 +1,7 @@ #include <linux/suspend.h> #include <linux/utsname.h> -/* With SUSPEND_CONSOLE defined, it suspend looks *really* cool, but +/* With SUSPEND_CONSOLE defined suspend looks *really* cool, but we probably do not take enough locks for switching consoles, etc, so bad things might happen. */ diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c index d967e875ee8..1cc9ff25e47 100644 --- a/kernel/power/swsusp.c +++ b/kernel/power/swsusp.c @@ -363,7 +363,7 @@ static void lock_swapdevices(void) } /** - * write_swap_page - Write one page to a fresh swap location. + * write_page - Write one page to a fresh swap location. * @addr: Address we're writing. * @loc: Place to store the entry we used. * @@ -863,6 +863,9 @@ static int alloc_image_pages(void) return 0; } +/* Free pages we allocated for suspend. Suspend pages are alocated + * before atomic copy, so we need to free them after resume. + */ void swsusp_free(void) { BUG_ON(PageNosave(virt_to_page(pagedir_save))); @@ -918,6 +921,7 @@ static int swsusp_alloc(void) pagedir_nosave = NULL; nr_copy_pages = calc_nr(nr_copy_pages); + nr_copy_pages_check = nr_copy_pages; pr_debug("suspend: (pages needed: %d + %d free: %d)\n", nr_copy_pages, PAGES_FOR_IO, nr_free_pages()); @@ -940,7 +944,6 @@ static int swsusp_alloc(void) return error; } - nr_copy_pages_check = nr_copy_pages; return 0; } @@ -1213,8 +1216,9 @@ static struct pbe * swsusp_pagedir_relocate(struct pbe *pblist) free_pagedir(pblist); free_eaten_memory(); pblist = NULL; - } - else + /* Is this even worth handling? It should never ever happen, and we + have just lost user's state, anyway... */ + } else printk("swsusp: Relocated %d pages\n", rel); return pblist; diff --git a/kernel/printk.c b/kernel/printk.c index a967605bc2e..4b8f0f9230a 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -488,6 +488,11 @@ static int __init printk_time_setup(char *str) __setup("time", printk_time_setup); +__attribute__((weak)) unsigned long long printk_clock(void) +{ + return sched_clock(); +} + /* * This is printk. It can be called from any context. We want it to work. * @@ -565,7 +570,7 @@ asmlinkage int vprintk(const char *fmt, va_list args) loglev_char = default_message_loglevel + '0'; } - t = sched_clock(); + t = printk_clock(); nanosec_rem = do_div(t, 1000000000); tlen = sprintf(tbuf, "<%c>[%5lu.%06lu] ", diff --git a/kernel/signal.c b/kernel/signal.c index b92c3c9f8b9..5a274705ba1 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -936,34 +936,31 @@ force_sig_specific(int sig, struct task_struct *t) * as soon as they're available, so putting the signal on the shared queue * will be equivalent to sending it to one such thread. */ -#define wants_signal(sig, p, mask) \ - (!sigismember(&(p)->blocked, sig) \ - && !((p)->state & mask) \ - && !((p)->flags & PF_EXITING) \ - && (task_curr(p) || !signal_pending(p))) - +static inline int wants_signal(int sig, struct task_struct *p) +{ + if (sigismember(&p->blocked, sig)) + return 0; + if (p->flags & PF_EXITING) + return 0; + if (sig == SIGKILL) + return 1; + if (p->state & (TASK_STOPPED | TASK_TRACED)) + return 0; + return task_curr(p) || !signal_pending(p); +} static void __group_complete_signal(int sig, struct task_struct *p) { - unsigned int mask; struct task_struct *t; /* - * Don't bother traced and stopped tasks (but - * SIGKILL will punch through that). - */ - mask = TASK_STOPPED | TASK_TRACED; - if (sig == SIGKILL) - mask = 0; - - /* * Now find a thread we can wake up to take the signal off the queue. * * If the main thread wants the signal, it gets first crack. * Probably the least surprising to the average bear. */ - if (wants_signal(sig, p, mask)) + if (wants_signal(sig, p)) t = p; else if (thread_group_empty(p)) /* @@ -981,7 +978,7 @@ __group_complete_signal(int sig, struct task_struct *p) t = p->signal->curr_target = p; BUG_ON(t->tgid != p->tgid); - while (!wants_signal(sig, t, mask)) { + while (!wants_signal(sig, t)) { t = next_thread(t); if (t == p->signal->curr_target) /* diff --git a/kernel/sys.c b/kernel/sys.c index f723522e698..2fa1ed18123 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -361,17 +361,35 @@ out_unlock: return retval; } +/** + * emergency_restart - reboot the system + * + * Without shutting down any hardware or taking any locks + * reboot the system. This is called when we know we are in + * trouble so this is our best effort to reboot. This is + * safe to call in interrupt context. + */ void emergency_restart(void) { machine_emergency_restart(); } EXPORT_SYMBOL_GPL(emergency_restart); -void kernel_restart(char *cmd) +/** + * kernel_restart - reboot the system + * + * Shutdown everything and perform a clean reboot. + * This is not safe to call in interrupt context. + */ +void kernel_restart_prepare(char *cmd) { notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd); system_state = SYSTEM_RESTART; device_shutdown(); +} +void kernel_restart(char *cmd) +{ + kernel_restart_prepare(cmd); if (!cmd) { printk(KERN_EMERG "Restarting system.\n"); } else { @@ -382,6 +400,12 @@ void kernel_restart(char *cmd) } EXPORT_SYMBOL_GPL(kernel_restart); +/** + * kernel_kexec - reboot the system + * + * Move into place and start executing a preloaded standalone + * executable. If nothing was preloaded return an error. + */ void kernel_kexec(void) { #ifdef CONFIG_KEXEC @@ -390,9 +414,7 @@ void kernel_kexec(void) if (!image) { return; } - notifier_call_chain(&reboot_notifier_list, SYS_RESTART, NULL); - system_state = SYSTEM_RESTART; - device_shutdown(); + kernel_restart_prepare(NULL); printk(KERN_EMERG "Starting new kernel\n"); machine_shutdown(); machine_kexec(image); @@ -400,21 +422,39 @@ void kernel_kexec(void) } EXPORT_SYMBOL_GPL(kernel_kexec); -void kernel_halt(void) +/** + * kernel_halt - halt the system + * + * Shutdown everything and perform a clean system halt. + */ +void kernel_halt_prepare(void) { notifier_call_chain(&reboot_notifier_list, SYS_HALT, NULL); system_state = SYSTEM_HALT; device_shutdown(); +} +void kernel_halt(void) +{ + kernel_halt_prepare(); printk(KERN_EMERG "System halted.\n"); machine_halt(); } EXPORT_SYMBOL_GPL(kernel_halt); -void kernel_power_off(void) +/** + * kernel_power_off - power_off the system + * + * Shutdown everything and perform a clean system power_off. + */ +void kernel_power_off_prepare(void) { notifier_call_chain(&reboot_notifier_list, SYS_POWER_OFF, NULL); system_state = SYSTEM_POWER_OFF; device_shutdown(); +} +void kernel_power_off(void) +{ + kernel_power_off_prepare(); printk(KERN_EMERG "Power down.\n"); machine_power_off(); } diff --git a/mm/mmap.c b/mm/mmap.c index 8b8e05f07cd..fa11d91242e 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1640,7 +1640,7 @@ static void unmap_vma_list(struct mm_struct *mm, struct vm_area_struct *vma) /* * Get rid of page table information in the indicated region. * - * Called with the page table lock held. + * Called with the mm semaphore held. */ static void unmap_region(struct mm_struct *mm, struct vm_area_struct *vma, struct vm_area_struct *prev, diff --git a/mm/mprotect.c b/mm/mprotect.c index e9fbd013ad9..57577f63b30 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -248,7 +248,8 @@ sys_mprotect(unsigned long start, size_t len, unsigned long prot) newflags = vm_flags | (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC)); - if ((newflags & ~(newflags >> 4)) & 0xf) { + /* newflags >> 4 shift VM_MAY% in place of VM_% */ + if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) { error = -EACCES; goto out; } diff --git a/mm/slab.c b/mm/slab.c index 437d3388054..c9adfce0040 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -308,12 +308,12 @@ struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS]; #define SIZE_L3 (1 + MAX_NUMNODES) /* - * This function may be completely optimized away if + * This function must be completely optimized away if * a constant is passed to it. Mostly the same as * what is in linux/slab.h except it returns an * index. */ -static inline int index_of(const size_t size) +static __always_inline int index_of(const size_t size) { if (__builtin_constant_p(size)) { int i = 0; @@ -329,7 +329,8 @@ static inline int index_of(const size_t size) extern void __bad_size(void); __bad_size(); } - } + } else + BUG(); return 0; } @@ -639,7 +640,7 @@ static enum { static DEFINE_PER_CPU(struct work_struct, reap_work); -static void free_block(kmem_cache_t* cachep, void** objpp, int len); +static void free_block(kmem_cache_t* cachep, void** objpp, int len, int node); static void enable_cpucache (kmem_cache_t *cachep); static void cache_reap (void *unused); static int __node_shrink(kmem_cache_t *cachep, int node); @@ -804,7 +805,7 @@ static inline void __drain_alien_cache(kmem_cache_t *cachep, struct array_cache if (ac->avail) { spin_lock(&rl3->list_lock); - free_block(cachep, ac->entry, ac->avail); + free_block(cachep, ac->entry, ac->avail, node); ac->avail = 0; spin_unlock(&rl3->list_lock); } @@ -925,7 +926,7 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, /* Free limit for this kmem_list3 */ l3->free_limit -= cachep->batchcount; if (nc) - free_block(cachep, nc->entry, nc->avail); + free_block(cachep, nc->entry, nc->avail, node); if (!cpus_empty(mask)) { spin_unlock(&l3->list_lock); @@ -934,7 +935,7 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, if (l3->shared) { free_block(cachep, l3->shared->entry, - l3->shared->avail); + l3->shared->avail, node); kfree(l3->shared); l3->shared = NULL; } @@ -1882,12 +1883,13 @@ static void do_drain(void *arg) { kmem_cache_t *cachep = (kmem_cache_t*)arg; struct array_cache *ac; + int node = numa_node_id(); check_irq_off(); ac = ac_data(cachep); - spin_lock(&cachep->nodelists[numa_node_id()]->list_lock); - free_block(cachep, ac->entry, ac->avail); - spin_unlock(&cachep->nodelists[numa_node_id()]->list_lock); + spin_lock(&cachep->nodelists[node]->list_lock); + free_block(cachep, ac->entry, ac->avail, node); + spin_unlock(&cachep->nodelists[node]->list_lock); ac->avail = 0; } @@ -2608,7 +2610,7 @@ done: /* * Caller needs to acquire correct kmem_list's list_lock */ -static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects) +static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects, int node) { int i; struct kmem_list3 *l3; @@ -2617,14 +2619,12 @@ static void free_block(kmem_cache_t *cachep, void **objpp, int nr_objects) void *objp = objpp[i]; struct slab *slabp; unsigned int objnr; - int nodeid = 0; slabp = GET_PAGE_SLAB(virt_to_page(objp)); - nodeid = slabp->nodeid; - l3 = cachep->nodelists[nodeid]; + l3 = cachep->nodelists[node]; list_del(&slabp->list); objnr = (objp - slabp->s_mem) / cachep->objsize; - check_spinlock_acquired_node(cachep, nodeid); + check_spinlock_acquired_node(cachep, node); check_slabp(cachep, slabp); @@ -2664,13 +2664,14 @@ static void cache_flusharray(kmem_cache_t *cachep, struct array_cache *ac) { int batchcount; struct kmem_list3 *l3; + int node = numa_node_id(); batchcount = ac->batchcount; #if DEBUG BUG_ON(!batchcount || batchcount > ac->avail); #endif check_irq_off(); - l3 = cachep->nodelists[numa_node_id()]; + l3 = cachep->nodelists[node]; spin_lock(&l3->list_lock); if (l3->shared) { struct array_cache *shared_array = l3->shared; @@ -2686,7 +2687,7 @@ static void cache_flusharray(kmem_cache_t *cachep, struct array_cache *ac) } } - free_block(cachep, ac->entry, batchcount); + free_block(cachep, ac->entry, batchcount, node); free_done: #if STATS { @@ -2751,7 +2752,7 @@ static inline void __cache_free(kmem_cache_t *cachep, void *objp) } else { spin_lock(&(cachep->nodelists[nodeid])-> list_lock); - free_block(cachep, &objp, 1); + free_block(cachep, &objp, 1, nodeid); spin_unlock(&(cachep->nodelists[nodeid])-> list_lock); } @@ -2844,7 +2845,7 @@ void *kmem_cache_alloc_node(kmem_cache_t *cachep, unsigned int __nocast flags, i unsigned long save_flags; void *ptr; - if (nodeid == numa_node_id() || nodeid == -1) + if (nodeid == -1) return __cache_alloc(cachep, flags); if (unlikely(!cachep->nodelists[nodeid])) { @@ -3079,7 +3080,7 @@ static int alloc_kmemlist(kmem_cache_t *cachep) if ((nc = cachep->nodelists[node]->shared)) free_block(cachep, nc->entry, - nc->avail); + nc->avail, node); l3->shared = new; if (!cachep->nodelists[node]->alien) { @@ -3160,7 +3161,7 @@ static int do_tune_cpucache(kmem_cache_t *cachep, int limit, int batchcount, if (!ccold) continue; spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); - free_block(cachep, ccold->entry, ccold->avail); + free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i)); spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock); kfree(ccold); } @@ -3240,7 +3241,7 @@ static void drain_array_locked(kmem_cache_t *cachep, if (tofree > ac->avail) { tofree = (ac->avail+1)/2; } - free_block(cachep, ac->entry, tofree); + free_block(cachep, ac->entry, tofree, node); ac->avail -= tofree; memmove(ac->entry, &(ac->entry[tofree]), sizeof(void*)*ac->avail); diff --git a/mm/swapfile.c b/mm/swapfile.c index 0184f510aac..1dcaeda039f 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1381,6 +1381,7 @@ asmlinkage long sys_swapon(const char __user * specialfile, int swap_flags) error = bd_claim(bdev, sys_swapon); if (error < 0) { bdev = NULL; + error = -EINVAL; goto bad_swap; } p->old_block_size = block_size(bdev); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 145f5cde96c..b7486488967 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -120,7 +120,7 @@ int vlan_skb_recv(struct sk_buff *skb, struct net_device *dev, unsigned short vid; struct net_device_stats *stats; unsigned short vlan_TCI; - unsigned short proto; + __be16 proto; /* vlan_TCI = ntohs(get_unaligned(&vhdr->h_vlan_TCI)); */ vlan_TCI = ntohs(vhdr->h_vlan_TCI); diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 069253f830c..2d24fb400e0 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -31,7 +31,8 @@ static inline int should_deliver(const struct net_bridge_port *p, int br_dev_queue_push_xmit(struct sk_buff *skb) { - if (skb->len > skb->dev->mtu) + /* drop mtu oversized packets except tso */ + if (skb->len > skb->dev->mtu && !skb_shinfo(skb)->tso_size) kfree_skb(skb); else { #ifdef CONFIG_BRIDGE_NETFILTER diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 1b63b482416..50c0519cd70 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -43,7 +43,7 @@ * 2 of the License, or (at your option) any later version. */ -#define VERSION "0.403" +#define VERSION "0.404" #include <linux/config.h> #include <asm/uaccess.h> @@ -224,7 +224,7 @@ static inline int tkey_mismatch(t_key a, int offset, t_key b) Consider a node 'n' and its parent 'tp'. If n is a leaf, every bit in its key is significant. Its presence is - necessitaded by path compression, since during a tree traversal (when + necessitated by path compression, since during a tree traversal (when searching for a leaf - unless we are doing an insertion) we will completely ignore all skipped bits we encounter. Thus we need to verify, at the end of a potentially successful search, that we have indeed been walking the @@ -836,11 +836,12 @@ static void trie_init(struct trie *t) #endif } -/* readside most use rcu_read_lock currently dump routines +/* readside must use rcu_read_lock currently dump routines via get_fa_head and dump */ -static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen) +static struct leaf_info *find_leaf_info(struct leaf *l, int plen) { + struct hlist_head *head = &l->list; struct hlist_node *node; struct leaf_info *li; @@ -853,7 +854,7 @@ static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen) static inline struct list_head * get_fa_head(struct leaf *l, int plen) { - struct leaf_info *li = find_leaf_info(&l->list, plen); + struct leaf_info *li = find_leaf_info(l, plen); if (!li) return NULL; @@ -1085,7 +1086,7 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen) } if (tp && tp->pos + tp->bits > 32) - printk("ERROR tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", + printk(KERN_WARNING "fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n", tp, tp->pos, tp->bits, key, plen); /* Rebalance the trie */ @@ -1248,7 +1249,7 @@ err: } -/* should be clalled with rcu_read_lock */ +/* should be called with rcu_read_lock */ static inline int check_leaf(struct trie *t, struct leaf *l, t_key key, int *plen, const struct flowi *flp, struct fib_result *res) @@ -1590,7 +1591,7 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req); l = fib_find_node(t, key); - li = find_leaf_info(&l->list, plen); + li = find_leaf_info(l, plen); list_del_rcu(&fa->fa_list); @@ -1714,7 +1715,6 @@ static int fn_trie_flush(struct fib_table *tb) t->revision++; - rcu_read_lock(); for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { found += trie_flush_leaf(t, l); @@ -1722,7 +1722,6 @@ static int fn_trie_flush(struct fib_table *tb) trie_leaf_remove(t, ll->key); ll = l; } - rcu_read_unlock(); if (ll && hlist_empty(&ll->list)) trie_leaf_remove(t, ll->key); @@ -1833,16 +1832,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi i++; continue; } - if (fa->fa_info->fib_nh == NULL) { - printk("Trie error _fib_nh=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen); - i++; - continue; - } - if (fa->fa_info == NULL) { - printk("Trie error fa_info=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen); - i++; - continue; - } + BUG_ON(!fa->fa_info); if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, @@ -1965,7 +1955,7 @@ struct fib_table * __init fib_hash_init(int id) trie_main = t; if (id == RT_TABLE_LOCAL) - printk("IPv4 FIB: Using LC-trie version %s\n", VERSION); + printk(KERN_INFO "IPv4 FIB: Using LC-trie version %s\n", VERSION); return tb; } @@ -2029,7 +2019,7 @@ static struct node *fib_trie_get_first(struct fib_trie_iter *iter, iter->tnode = (struct tnode *) n; iter->trie = t; iter->index = 0; - iter->depth = 0; + iter->depth = 1; return n; } return NULL; @@ -2274,11 +2264,12 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "<local>:\n"); else seq_puts(seq, "<main>:\n"); - } else { - seq_indent(seq, iter->depth-1); - seq_printf(seq, " +-- %d.%d.%d.%d/%d\n", - NIPQUAD(prf), tn->pos); - } + } + seq_indent(seq, iter->depth-1); + seq_printf(seq, " +-- %d.%d.%d.%d/%d %d %d %d\n", + NIPQUAD(prf), tn->pos, tn->bits, tn->full_children, + tn->empty_children); + } else { struct leaf *l = (struct leaf *) n; int i; @@ -2287,7 +2278,7 @@ static int fib_trie_seq_show(struct seq_file *seq, void *v) seq_indent(seq, iter->depth); seq_printf(seq, " |-- %d.%d.%d.%d\n", NIPQUAD(val)); for (i = 32; i >= 0; i--) { - struct leaf_info *li = find_leaf_info(&l->list, i); + struct leaf_info *li = find_leaf_info(l, i); if (li) { struct fib_alias *fa; list_for_each_entry_rcu(fa, &li->falh, fa_list) { @@ -2383,7 +2374,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) return 0; for (i=32; i>=0; i--) { - struct leaf_info *li = find_leaf_info(&l->list, i); + struct leaf_info *li = find_leaf_info(l, i); struct fib_alias *fa; u32 mask, prefix; diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index e2162d27007..3cf9b451675 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -137,6 +137,22 @@ config IP_NF_AMANDA To compile it as a module, choose M here. If unsure, say Y. +config IP_NF_PPTP + tristate 'PPTP protocol support' + help + This module adds support for PPTP (Point to Point Tunnelling + Protocol, RFC2637) conncection tracking and NAT. + + If you are running PPTP sessions over a stateful firewall or NAT + box, you may want to enable this feature. + + Please note that not all PPTP modes of operation are supported yet. + For more info, read top of the file + net/ipv4/netfilter/ip_conntrack_pptp.c + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + config IP_NF_QUEUE tristate "IP Userspace queueing via NETLINK (OBSOLETE)" help @@ -621,6 +637,12 @@ config IP_NF_NAT_AMANDA default IP_NF_NAT if IP_NF_AMANDA=y default m if IP_NF_AMANDA=m +config IP_NF_NAT_PPTP + tristate + depends on IP_NF_NAT!=n && IP_NF_PPTP!=n + default IP_NF_NAT if IP_NF_PPTP=y + default m if IP_NF_PPTP=m + # mangle + specific targets config IP_NF_MANGLE tristate "Packet mangling" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 1ba0db74681..3d45d3c0283 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -6,6 +6,9 @@ ip_conntrack-objs := ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o iptable_nat-objs := ip_nat_standalone.o ip_nat_rule.o ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o +ip_conntrack_pptp-objs := ip_conntrack_helper_pptp.o ip_conntrack_proto_gre.o +ip_nat_pptp-objs := ip_nat_helper_pptp.o ip_nat_proto_gre.o + # connection tracking obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o @@ -17,6 +20,7 @@ obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o # connection tracking helpers +obj-$(CONFIG_IP_NF_PPTP) += ip_conntrack_pptp.o obj-$(CONFIG_IP_NF_AMANDA) += ip_conntrack_amanda.o obj-$(CONFIG_IP_NF_TFTP) += ip_conntrack_tftp.o obj-$(CONFIG_IP_NF_FTP) += ip_conntrack_ftp.o @@ -24,6 +28,7 @@ obj-$(CONFIG_IP_NF_IRC) += ip_conntrack_irc.o obj-$(CONFIG_IP_NF_NETBIOS_NS) += ip_conntrack_netbios_ns.o # NAT helpers +obj-$(CONFIG_IP_NF_NAT_PPTP) += ip_nat_pptp.o obj-$(CONFIG_IP_NF_NAT_AMANDA) += ip_nat_amanda.o obj-$(CONFIG_IP_NF_NAT_TFTP) += ip_nat_tftp.o obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c index dc20881004b..fa3f914117e 100644 --- a/net/ipv4/netfilter/ip_conntrack_amanda.c +++ b/net/ipv4/netfilter/ip_conntrack_amanda.c @@ -65,7 +65,7 @@ static int help(struct sk_buff **pskb, /* increase the UDP timeout of the master connection as replies from * Amanda clients to the server can be quite delayed */ - ip_ct_refresh_acct(ct, ctinfo, NULL, master_timeout * HZ); + ip_ct_refresh(ct, *pskb, master_timeout * HZ); /* No data? */ dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index f8cd8e42961..ea65dd3e517 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -233,7 +233,7 @@ __ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple) /* Just find a expectation corresponding to a tuple. */ struct ip_conntrack_expect * -ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple) +ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple) { struct ip_conntrack_expect *i; @@ -1112,45 +1112,46 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) synchronize_net(); } -static inline void ct_add_counters(struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo, - const struct sk_buff *skb) -{ -#ifdef CONFIG_IP_NF_CT_ACCT - if (skb) { - ct->counters[CTINFO2DIR(ctinfo)].packets++; - ct->counters[CTINFO2DIR(ctinfo)].bytes += - ntohs(skb->nh.iph->tot_len); - } -#endif -} - -/* Refresh conntrack for this many jiffies and do accounting (if skb != NULL) */ -void ip_ct_refresh_acct(struct ip_conntrack *ct, +/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */ +void __ip_ct_refresh_acct(struct ip_conntrack *ct, enum ip_conntrack_info ctinfo, const struct sk_buff *skb, - unsigned long extra_jiffies) + unsigned long extra_jiffies, + int do_acct) { + int do_event = 0; + IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct); + IP_NF_ASSERT(skb); + + write_lock_bh(&ip_conntrack_lock); /* If not in hash table, timer will not be active yet */ if (!is_confirmed(ct)) { ct->timeout.expires = extra_jiffies; - ct_add_counters(ct, ctinfo, skb); + do_event = 1; } else { - write_lock_bh(&ip_conntrack_lock); /* Need del_timer for race avoidance (may already be dying). */ if (del_timer(&ct->timeout)) { ct->timeout.expires = jiffies + extra_jiffies; add_timer(&ct->timeout); - /* FIXME: We loose some REFRESH events if this function - * is called without an skb. I'll fix this later -HW */ - if (skb) - ip_conntrack_event_cache(IPCT_REFRESH, skb); + do_event = 1; } - ct_add_counters(ct, ctinfo, skb); - write_unlock_bh(&ip_conntrack_lock); } + +#ifdef CONFIG_IP_NF_CT_ACCT + if (do_acct) { + ct->counters[CTINFO2DIR(ctinfo)].packets++; + ct->counters[CTINFO2DIR(ctinfo)].bytes += + ntohs(skb->nh.iph->tot_len); + } +#endif + + write_unlock_bh(&ip_conntrack_lock); + + /* must be unlocked when calling event cache */ + if (do_event) + ip_conntrack_event_cache(IPCT_REFRESH, skb); } #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c new file mode 100644 index 00000000000..926a6684643 --- /dev/null +++ b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -0,0 +1,806 @@ +/* + * ip_conntrack_pptp.c - Version 3.0 + * + * Connection tracking support for PPTP (Point to Point Tunneling Protocol). + * PPTP is a a protocol for creating virtual private networks. + * It is a specification defined by Microsoft and some vendors + * working with Microsoft. PPTP is built on top of a modified + * version of the Internet Generic Routing Encapsulation Protocol. + * GRE is defined in RFC 1701 and RFC 1702. Documentation of + * PPTP can be found in RFC 2637 + * + * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + * + * Limitations: + * - We blindly assume that control connections are always + * established in PNS->PAC direction. This is a violation + * of RFFC2673 + * - We can only support one single call within each session + * + * TODO: + * - testing of incoming PPTP calls + * + * Changes: + * 2002-02-05 - Version 1.3 + * - Call ip_conntrack_unexpect_related() from + * pptp_destroy_siblings() to destroy expectations in case + * CALL_DISCONNECT_NOTIFY or tcp fin packet was seen + * (Philip Craig <philipc@snapgear.com>) + * - Add Version information at module loadtime + * 2002-02-10 - Version 1.6 + * - move to C99 style initializers + * - remove second expectation if first arrives + * 2004-10-22 - Version 2.0 + * - merge Mandrake's 2.6.x port with recent 2.6.x API changes + * - fix lots of linear skb assumptions from Mandrake's port + * 2005-06-10 - Version 2.1 + * - use ip_conntrack_expect_free() instead of kfree() on the + * expect's (which are from the slab for quite some time) + * 2005-06-10 - Version 3.0 + * - port helper to post-2.6.11 API changes, + * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/) + * 2005-07-30 - Version 3.1 + * - port helper to 2.6.13 API changes + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/netfilter.h> +#include <linux/ip.h> +#include <net/checksum.h> +#include <net/tcp.h> + +#include <linux/netfilter_ipv4/ip_conntrack.h> +#include <linux/netfilter_ipv4/ip_conntrack_core.h> +#include <linux/netfilter_ipv4/ip_conntrack_helper.h> +#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h> +#include <linux/netfilter_ipv4/ip_conntrack_pptp.h> + +#define IP_CT_PPTP_VERSION "3.1" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); +MODULE_DESCRIPTION("Netfilter connection tracking helper module for PPTP"); + +static DEFINE_SPINLOCK(ip_pptp_lock); + +int +(*ip_nat_pptp_hook_outbound)(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq); + +int +(*ip_nat_pptp_hook_inbound)(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq); + +int +(*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig, + struct ip_conntrack_expect *expect_reply); + +void +(*ip_nat_pptp_hook_expectfn)(struct ip_conntrack *ct, + struct ip_conntrack_expect *exp); + +#if 0 +/* PptpControlMessageType names */ +const char *pptp_msg_name[] = { + "UNKNOWN_MESSAGE", + "START_SESSION_REQUEST", + "START_SESSION_REPLY", + "STOP_SESSION_REQUEST", + "STOP_SESSION_REPLY", + "ECHO_REQUEST", + "ECHO_REPLY", + "OUT_CALL_REQUEST", + "OUT_CALL_REPLY", + "IN_CALL_REQUEST", + "IN_CALL_REPLY", + "IN_CALL_CONNECT", + "CALL_CLEAR_REQUEST", + "CALL_DISCONNECT_NOTIFY", + "WAN_ERROR_NOTIFY", + "SET_LINK_INFO" +}; +EXPORT_SYMBOL(pptp_msg_name); +#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args) +#else +#define DEBUGP(format, args...) +#endif + +#define SECS *HZ +#define MINS * 60 SECS +#define HOURS * 60 MINS + +#define PPTP_GRE_TIMEOUT (10 MINS) +#define PPTP_GRE_STREAM_TIMEOUT (5 HOURS) + +static void pptp_expectfn(struct ip_conntrack *ct, + struct ip_conntrack_expect *exp) +{ + DEBUGP("increasing timeouts\n"); + + /* increase timeout of GRE data channel conntrack entry */ + ct->proto.gre.timeout = PPTP_GRE_TIMEOUT; + ct->proto.gre.stream_timeout = PPTP_GRE_STREAM_TIMEOUT; + + /* Can you see how rusty this code is, compared with the pre-2.6.11 + * one? That's what happened to my shiny newnat of 2002 ;( -HW */ + + if (!ip_nat_pptp_hook_expectfn) { + struct ip_conntrack_tuple inv_t; + struct ip_conntrack_expect *exp_other; + + /* obviously this tuple inversion only works until you do NAT */ + invert_tuplepr(&inv_t, &exp->tuple); + DEBUGP("trying to unexpect other dir: "); + DUMP_TUPLE(&inv_t); + + exp_other = ip_conntrack_expect_find(&inv_t); + if (exp_other) { + /* delete other expectation. */ + DEBUGP("found\n"); + ip_conntrack_unexpect_related(exp_other); + ip_conntrack_expect_put(exp_other); + } else { + DEBUGP("not found\n"); + } + } else { + /* we need more than simple inversion */ + ip_nat_pptp_hook_expectfn(ct, exp); + } +} + +static int destroy_sibling_or_exp(const struct ip_conntrack_tuple *t) +{ + struct ip_conntrack_tuple_hash *h; + struct ip_conntrack_expect *exp; + + DEBUGP("trying to timeout ct or exp for tuple "); + DUMP_TUPLE(t); + + h = ip_conntrack_find_get(t, NULL); + if (h) { + struct ip_conntrack *sibling = tuplehash_to_ctrack(h); + DEBUGP("setting timeout of conntrack %p to 0\n", sibling); + sibling->proto.gre.timeout = 0; + sibling->proto.gre.stream_timeout = 0; + if (del_timer(&sibling->timeout)) + sibling->timeout.function((unsigned long)sibling); + ip_conntrack_put(sibling); + return 1; + } else { + exp = ip_conntrack_expect_find(t); + if (exp) { + DEBUGP("unexpect_related of expect %p\n", exp); + ip_conntrack_unexpect_related(exp); + ip_conntrack_expect_put(exp); + return 1; + } + } + + return 0; +} + + +/* timeout GRE data connections */ +static void pptp_destroy_siblings(struct ip_conntrack *ct) +{ + struct ip_conntrack_tuple t; + + /* Since ct->sibling_list has literally rusted away in 2.6.11, + * we now need another way to find out about our sibling + * contrack and expects... -HW */ + + /* try original (pns->pac) tuple */ + memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t)); + t.dst.protonum = IPPROTO_GRE; + t.src.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id); + t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id); + + if (!destroy_sibling_or_exp(&t)) + DEBUGP("failed to timeout original pns->pac ct/exp\n"); + + /* try reply (pac->pns) tuple */ + memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t)); + t.dst.protonum = IPPROTO_GRE; + t.src.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id); + t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id); + + if (!destroy_sibling_or_exp(&t)) + DEBUGP("failed to timeout reply pac->pns ct/exp\n"); +} + +/* expect GRE connections (PNS->PAC and PAC->PNS direction) */ +static inline int +exp_gre(struct ip_conntrack *master, + u_int32_t seq, + __be16 callid, + __be16 peer_callid) +{ + struct ip_conntrack_tuple inv_tuple; + struct ip_conntrack_tuple exp_tuples[] = { + /* tuple in original direction, PNS->PAC */ + { .src = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip, + .u = { .gre = { .key = peer_callid } } + }, + .dst = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip, + .u = { .gre = { .key = callid } }, + .protonum = IPPROTO_GRE + }, + }, + /* tuple in reply direction, PAC->PNS */ + { .src = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip, + .u = { .gre = { .key = callid } } + }, + .dst = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip, + .u = { .gre = { .key = peer_callid } }, + .protonum = IPPROTO_GRE + }, + } + }; + struct ip_conntrack_expect *exp_orig, *exp_reply; + int ret = 1; + + exp_orig = ip_conntrack_expect_alloc(master); + if (exp_orig == NULL) + goto out; + + exp_reply = ip_conntrack_expect_alloc(master); + if (exp_reply == NULL) + goto out_put_orig; + + memcpy(&exp_orig->tuple, &exp_tuples[0], sizeof(exp_orig->tuple)); + + exp_orig->mask.src.ip = 0xffffffff; + exp_orig->mask.src.u.all = 0; + exp_orig->mask.dst.u.all = 0; + exp_orig->mask.dst.u.gre.key = htons(0xffff); + exp_orig->mask.dst.ip = 0xffffffff; + exp_orig->mask.dst.protonum = 0xff; + + exp_orig->master = master; + exp_orig->expectfn = pptp_expectfn; + exp_orig->flags = 0; + + exp_orig->dir = IP_CT_DIR_ORIGINAL; + + /* both expectations are identical apart from tuple */ + memcpy(exp_reply, exp_orig, sizeof(*exp_reply)); + memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple)); + + exp_reply->dir = !exp_orig->dir; + + if (ip_nat_pptp_hook_exp_gre) + ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply); + else { + + DEBUGP("calling expect_related PNS->PAC"); + DUMP_TUPLE(&exp_orig->tuple); + + if (ip_conntrack_expect_related(exp_orig) != 0) { + DEBUGP("cannot expect_related()\n"); + goto out_put_both; + } + + DEBUGP("calling expect_related PAC->PNS"); + DUMP_TUPLE(&exp_reply->tuple); + + if (ip_conntrack_expect_related(exp_reply) != 0) { + DEBUGP("cannot expect_related()\n"); + goto out_unexpect_orig; + } + + /* Add GRE keymap entries */ + if (ip_ct_gre_keymap_add(master, &exp_reply->tuple, 0) != 0) { + DEBUGP("cannot keymap_add() exp\n"); + goto out_unexpect_both; + } + + invert_tuplepr(&inv_tuple, &exp_reply->tuple); + if (ip_ct_gre_keymap_add(master, &inv_tuple, 1) != 0) { + ip_ct_gre_keymap_destroy(master); + DEBUGP("cannot keymap_add() exp_inv\n"); + goto out_unexpect_both; + } + ret = 0; + } + +out_put_both: + ip_conntrack_expect_put(exp_reply); +out_put_orig: + ip_conntrack_expect_put(exp_orig); +out: + return ret; + +out_unexpect_both: + ip_conntrack_unexpect_related(exp_reply); +out_unexpect_orig: + ip_conntrack_unexpect_related(exp_orig); + goto out_put_both; +} + +static inline int +pptp_inbound_pkt(struct sk_buff **pskb, + struct tcphdr *tcph, + unsigned int nexthdr_off, + unsigned int datalen, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo) +{ + struct PptpControlHeader _ctlh, *ctlh; + unsigned int reqlen; + union pptp_ctrl_union _pptpReq, *pptpReq; + struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; + u_int16_t msg; + __be16 *cid, *pcid; + u_int32_t seq; + + ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); + if (!ctlh) { + DEBUGP("error during skb_header_pointer\n"); + return NF_ACCEPT; + } + nexthdr_off += sizeof(_ctlh); + datalen -= sizeof(_ctlh); + + reqlen = datalen; + if (reqlen > sizeof(*pptpReq)) + reqlen = sizeof(*pptpReq); + pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq); + if (!pptpReq) { + DEBUGP("error during skb_header_pointer\n"); + return NF_ACCEPT; + } + + msg = ntohs(ctlh->messageType); + DEBUGP("inbound control message %s\n", pptp_msg_name[msg]); + + switch (msg) { + case PPTP_START_SESSION_REPLY: + if (reqlen < sizeof(_pptpReq.srep)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + + /* server confirms new control session */ + if (info->sstate < PPTP_SESSION_REQUESTED) { + DEBUGP("%s without START_SESS_REQUEST\n", + pptp_msg_name[msg]); + break; + } + if (pptpReq->srep.resultCode == PPTP_START_OK) + info->sstate = PPTP_SESSION_CONFIRMED; + else + info->sstate = PPTP_SESSION_ERROR; + break; + + case PPTP_STOP_SESSION_REPLY: + if (reqlen < sizeof(_pptpReq.strep)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + + /* server confirms end of control session */ + if (info->sstate > PPTP_SESSION_STOPREQ) { + DEBUGP("%s without STOP_SESS_REQUEST\n", + pptp_msg_name[msg]); + break; + } + if (pptpReq->strep.resultCode == PPTP_STOP_OK) + info->sstate = PPTP_SESSION_NONE; + else + info->sstate = PPTP_SESSION_ERROR; + break; + + case PPTP_OUT_CALL_REPLY: + if (reqlen < sizeof(_pptpReq.ocack)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + + /* server accepted call, we now expect GRE frames */ + if (info->sstate != PPTP_SESSION_CONFIRMED) { + DEBUGP("%s but no session\n", pptp_msg_name[msg]); + break; + } + if (info->cstate != PPTP_CALL_OUT_REQ && + info->cstate != PPTP_CALL_OUT_CONF) { + DEBUGP("%s without OUTCALL_REQ\n", pptp_msg_name[msg]); + break; + } + if (pptpReq->ocack.resultCode != PPTP_OUTCALL_CONNECT) { + info->cstate = PPTP_CALL_NONE; + break; + } + + cid = &pptpReq->ocack.callID; + pcid = &pptpReq->ocack.peersCallID; + + info->pac_call_id = ntohs(*cid); + + if (htons(info->pns_call_id) != *pcid) { + DEBUGP("%s for unknown callid %u\n", + pptp_msg_name[msg], ntohs(*pcid)); + break; + } + + DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg], + ntohs(*cid), ntohs(*pcid)); + + info->cstate = PPTP_CALL_OUT_CONF; + + seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr) + + sizeof(struct PptpControlHeader) + + ((void *)pcid - (void *)pptpReq); + + if (exp_gre(ct, seq, *cid, *pcid) != 0) + printk("ip_conntrack_pptp: error during exp_gre\n"); + break; + + case PPTP_IN_CALL_REQUEST: + if (reqlen < sizeof(_pptpReq.icack)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + + /* server tells us about incoming call request */ + if (info->sstate != PPTP_SESSION_CONFIRMED) { + DEBUGP("%s but no session\n", pptp_msg_name[msg]); + break; + } + pcid = &pptpReq->icack.peersCallID; + DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); + info->cstate = PPTP_CALL_IN_REQ; + info->pac_call_id = ntohs(*pcid); + break; + + case PPTP_IN_CALL_CONNECT: + if (reqlen < sizeof(_pptpReq.iccon)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + + /* server tells us about incoming call established */ + if (info->sstate != PPTP_SESSION_CONFIRMED) { + DEBUGP("%s but no session\n", pptp_msg_name[msg]); + break; + } + if (info->sstate != PPTP_CALL_IN_REP + && info->sstate != PPTP_CALL_IN_CONF) { + DEBUGP("%s but never sent IN_CALL_REPLY\n", + pptp_msg_name[msg]); + break; + } + + pcid = &pptpReq->iccon.peersCallID; + cid = &info->pac_call_id; + + if (info->pns_call_id != ntohs(*pcid)) { + DEBUGP("%s for unknown CallID %u\n", + pptp_msg_name[msg], ntohs(*pcid)); + break; + } + + DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); + info->cstate = PPTP_CALL_IN_CONF; + + /* we expect a GRE connection from PAC to PNS */ + seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr) + + sizeof(struct PptpControlHeader) + + ((void *)pcid - (void *)pptpReq); + + if (exp_gre(ct, seq, *cid, *pcid) != 0) + printk("ip_conntrack_pptp: error during exp_gre\n"); + + break; + + case PPTP_CALL_DISCONNECT_NOTIFY: + if (reqlen < sizeof(_pptpReq.disc)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + + /* server confirms disconnect */ + cid = &pptpReq->disc.callID; + DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid)); + info->cstate = PPTP_CALL_NONE; + + /* untrack this call id, unexpect GRE packets */ + pptp_destroy_siblings(ct); + break; + + case PPTP_WAN_ERROR_NOTIFY: + break; + + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* I don't have to explain these ;) */ + break; + default: + DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX) + ? pptp_msg_name[msg]:pptp_msg_name[0], msg); + break; + } + + + if (ip_nat_pptp_hook_inbound) + return ip_nat_pptp_hook_inbound(pskb, ct, ctinfo, ctlh, + pptpReq); + + return NF_ACCEPT; + +} + +static inline int +pptp_outbound_pkt(struct sk_buff **pskb, + struct tcphdr *tcph, + unsigned int nexthdr_off, + unsigned int datalen, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo) +{ + struct PptpControlHeader _ctlh, *ctlh; + unsigned int reqlen; + union pptp_ctrl_union _pptpReq, *pptpReq; + struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; + u_int16_t msg; + __be16 *cid, *pcid; + + ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); + if (!ctlh) + return NF_ACCEPT; + nexthdr_off += sizeof(_ctlh); + datalen -= sizeof(_ctlh); + + reqlen = datalen; + if (reqlen > sizeof(*pptpReq)) + reqlen = sizeof(*pptpReq); + pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq); + if (!pptpReq) + return NF_ACCEPT; + + msg = ntohs(ctlh->messageType); + DEBUGP("outbound control message %s\n", pptp_msg_name[msg]); + + switch (msg) { + case PPTP_START_SESSION_REQUEST: + /* client requests for new control session */ + if (info->sstate != PPTP_SESSION_NONE) { + DEBUGP("%s but we already have one", + pptp_msg_name[msg]); + } + info->sstate = PPTP_SESSION_REQUESTED; + break; + case PPTP_STOP_SESSION_REQUEST: + /* client requests end of control session */ + info->sstate = PPTP_SESSION_STOPREQ; + break; + + case PPTP_OUT_CALL_REQUEST: + if (reqlen < sizeof(_pptpReq.ocreq)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + /* FIXME: break; */ + } + + /* client initiating connection to server */ + if (info->sstate != PPTP_SESSION_CONFIRMED) { + DEBUGP("%s but no session\n", + pptp_msg_name[msg]); + break; + } + info->cstate = PPTP_CALL_OUT_REQ; + /* track PNS call id */ + cid = &pptpReq->ocreq.callID; + DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid)); + info->pns_call_id = ntohs(*cid); + break; + case PPTP_IN_CALL_REPLY: + if (reqlen < sizeof(_pptpReq.icack)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + + /* client answers incoming call */ + if (info->cstate != PPTP_CALL_IN_REQ + && info->cstate != PPTP_CALL_IN_REP) { + DEBUGP("%s without incall_req\n", + pptp_msg_name[msg]); + break; + } + if (pptpReq->icack.resultCode != PPTP_INCALL_ACCEPT) { + info->cstate = PPTP_CALL_NONE; + break; + } + pcid = &pptpReq->icack.peersCallID; + if (info->pac_call_id != ntohs(*pcid)) { + DEBUGP("%s for unknown call %u\n", + pptp_msg_name[msg], ntohs(*pcid)); + break; + } + DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); + /* part two of the three-way handshake */ + info->cstate = PPTP_CALL_IN_REP; + info->pns_call_id = ntohs(pptpReq->icack.callID); + break; + + case PPTP_CALL_CLEAR_REQUEST: + /* client requests hangup of call */ + if (info->sstate != PPTP_SESSION_CONFIRMED) { + DEBUGP("CLEAR_CALL but no session\n"); + break; + } + /* FUTURE: iterate over all calls and check if + * call ID is valid. We don't do this without newnat, + * because we only know about last call */ + info->cstate = PPTP_CALL_CLEAR_REQ; + break; + case PPTP_SET_LINK_INFO: + break; + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* I don't have to explain these ;) */ + break; + default: + DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)? + pptp_msg_name[msg]:pptp_msg_name[0], msg); + /* unknown: no need to create GRE masq table entry */ + break; + } + + if (ip_nat_pptp_hook_outbound) + return ip_nat_pptp_hook_outbound(pskb, ct, ctinfo, ctlh, + pptpReq); + + return NF_ACCEPT; +} + + +/* track caller id inside control connection, call expect_related */ +static int +conntrack_pptp_help(struct sk_buff **pskb, + struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) + +{ + struct pptp_pkt_hdr _pptph, *pptph; + struct tcphdr _tcph, *tcph; + u_int32_t tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4; + u_int32_t datalen; + int dir = CTINFO2DIR(ctinfo); + struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; + unsigned int nexthdr_off; + + int oldsstate, oldcstate; + int ret; + + /* don't do any tracking before tcp handshake complete */ + if (ctinfo != IP_CT_ESTABLISHED + && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) { + DEBUGP("ctinfo = %u, skipping\n", ctinfo); + return NF_ACCEPT; + } + + nexthdr_off = (*pskb)->nh.iph->ihl*4; + tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph); + BUG_ON(!tcph); + nexthdr_off += tcph->doff * 4; + datalen = tcplen - tcph->doff * 4; + + if (tcph->fin || tcph->rst) { + DEBUGP("RST/FIN received, timeouting GRE\n"); + /* can't do this after real newnat */ + info->cstate = PPTP_CALL_NONE; + + /* untrack this call id, unexpect GRE packets */ + pptp_destroy_siblings(ct); + } + + pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph); + if (!pptph) { + DEBUGP("no full PPTP header, can't track\n"); + return NF_ACCEPT; + } + nexthdr_off += sizeof(_pptph); + datalen -= sizeof(_pptph); + + /* if it's not a control message we can't do anything with it */ + if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL || + ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) { + DEBUGP("not a control packet\n"); + return NF_ACCEPT; + } + + oldsstate = info->sstate; + oldcstate = info->cstate; + + spin_lock_bh(&ip_pptp_lock); + + /* FIXME: We just blindly assume that the control connection is always + * established from PNS->PAC. However, RFC makes no guarantee */ + if (dir == IP_CT_DIR_ORIGINAL) + /* client -> server (PNS -> PAC) */ + ret = pptp_outbound_pkt(pskb, tcph, nexthdr_off, datalen, ct, + ctinfo); + else + /* server -> client (PAC -> PNS) */ + ret = pptp_inbound_pkt(pskb, tcph, nexthdr_off, datalen, ct, + ctinfo); + DEBUGP("sstate: %d->%d, cstate: %d->%d\n", + oldsstate, info->sstate, oldcstate, info->cstate); + spin_unlock_bh(&ip_pptp_lock); + + return ret; +} + +/* control protocol helper */ +static struct ip_conntrack_helper pptp = { + .list = { NULL, NULL }, + .name = "pptp", + .me = THIS_MODULE, + .max_expected = 2, + .timeout = 5 * 60, + .tuple = { .src = { .ip = 0, + .u = { .tcp = { .port = + __constant_htons(PPTP_CONTROL_PORT) } } + }, + .dst = { .ip = 0, + .u = { .all = 0 }, + .protonum = IPPROTO_TCP + } + }, + .mask = { .src = { .ip = 0, + .u = { .tcp = { .port = __constant_htons(0xffff) } } + }, + .dst = { .ip = 0, + .u = { .all = 0 }, + .protonum = 0xff + } + }, + .help = conntrack_pptp_help +}; + +extern void __exit ip_ct_proto_gre_fini(void); +extern int __init ip_ct_proto_gre_init(void); + +/* ip_conntrack_pptp initialization */ +static int __init init(void) +{ + int retcode; + + retcode = ip_ct_proto_gre_init(); + if (retcode < 0) + return retcode; + + DEBUGP(" registering helper\n"); + if ((retcode = ip_conntrack_helper_register(&pptp))) { + printk(KERN_ERR "Unable to register conntrack application " + "helper for pptp: %d\n", retcode); + ip_ct_proto_gre_fini(); + return retcode; + } + + printk("ip_conntrack_pptp version %s loaded\n", IP_CT_PPTP_VERSION); + return 0; +} + +static void __exit fini(void) +{ + ip_conntrack_helper_unregister(&pptp); + ip_ct_proto_gre_fini(); + printk("ip_conntrack_pptp version %s unloaded\n", IP_CT_PPTP_VERSION); +} + +module_init(init); +module_exit(fini); + +EXPORT_SYMBOL(ip_nat_pptp_hook_outbound); +EXPORT_SYMBOL(ip_nat_pptp_hook_inbound); +EXPORT_SYMBOL(ip_nat_pptp_hook_exp_gre); +EXPORT_SYMBOL(ip_nat_pptp_hook_expectfn); diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c index 71ef19d126d..577bac22dcc 100644 --- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c +++ b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c @@ -91,7 +91,7 @@ static int help(struct sk_buff **pskb, ip_conntrack_expect_related(exp); ip_conntrack_expect_put(exp); - ip_ct_refresh_acct(ct, ctinfo, NULL, timeout * HZ); + ip_ct_refresh(ct, *pskb, timeout * HZ); out: return NF_ACCEPT; } diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c index 15aef356474..b08a432efcf 100644 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -1270,7 +1270,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - exp = ip_conntrack_expect_find_get(&tuple); + exp = ip_conntrack_expect_find(&tuple); if (!exp) return -ENOENT; @@ -1318,7 +1318,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, return err; /* bump usage count to 2 */ - exp = ip_conntrack_expect_find_get(&tuple); + exp = ip_conntrack_expect_find(&tuple); if (!exp) return -ENOENT; diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c new file mode 100644 index 00000000000..de3cb9db6f8 --- /dev/null +++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c @@ -0,0 +1,327 @@ +/* + * ip_conntrack_proto_gre.c - Version 3.0 + * + * Connection tracking protocol helper module for GRE. + * + * GRE is a generic encapsulation protocol, which is generally not very + * suited for NAT, as it has no protocol-specific part as port numbers. + * + * It has an optional key field, which may help us distinguishing two + * connections between the same two hosts. + * + * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 + * + * PPTP is built on top of a modified version of GRE, and has a mandatory + * field called "CallID", which serves us for the same purpose as the key + * field in plain GRE. + * + * Documentation about PPTP can be found in RFC 2637 + * + * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/timer.h> +#include <linux/netfilter.h> +#include <linux/ip.h> +#include <linux/in.h> +#include <linux/list.h> + +static DEFINE_RWLOCK(ip_ct_gre_lock); +#define ASSERT_READ_LOCK(x) +#define ASSERT_WRITE_LOCK(x) + +#include <linux/netfilter_ipv4/listhelp.h> +#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> +#include <linux/netfilter_ipv4/ip_conntrack_helper.h> +#include <linux/netfilter_ipv4/ip_conntrack_core.h> + +#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h> +#include <linux/netfilter_ipv4/ip_conntrack_pptp.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); +MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE"); + +/* shamelessly stolen from ip_conntrack_proto_udp.c */ +#define GRE_TIMEOUT (30*HZ) +#define GRE_STREAM_TIMEOUT (180*HZ) + +#if 0 +#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args) +#define DUMP_TUPLE_GRE(x) printk("%u.%u.%u.%u:0x%x -> %u.%u.%u.%u:0x%x\n", \ + NIPQUAD((x)->src.ip), ntohs((x)->src.u.gre.key), \ + NIPQUAD((x)->dst.ip), ntohs((x)->dst.u.gre.key)) +#else +#define DEBUGP(x, args...) +#define DUMP_TUPLE_GRE(x) +#endif + +/* GRE KEYMAP HANDLING FUNCTIONS */ +static LIST_HEAD(gre_keymap_list); + +static inline int gre_key_cmpfn(const struct ip_ct_gre_keymap *km, + const struct ip_conntrack_tuple *t) +{ + return ((km->tuple.src.ip == t->src.ip) && + (km->tuple.dst.ip == t->dst.ip) && + (km->tuple.dst.protonum == t->dst.protonum) && + (km->tuple.dst.u.all == t->dst.u.all)); +} + +/* look up the source key for a given tuple */ +static u_int32_t gre_keymap_lookup(struct ip_conntrack_tuple *t) +{ + struct ip_ct_gre_keymap *km; + u_int32_t key = 0; + + read_lock_bh(&ip_ct_gre_lock); + km = LIST_FIND(&gre_keymap_list, gre_key_cmpfn, + struct ip_ct_gre_keymap *, t); + if (km) + key = km->tuple.src.u.gre.key; + read_unlock_bh(&ip_ct_gre_lock); + + DEBUGP("lookup src key 0x%x up key for ", key); + DUMP_TUPLE_GRE(t); + + return key; +} + +/* add a single keymap entry, associate with specified master ct */ +int +ip_ct_gre_keymap_add(struct ip_conntrack *ct, + struct ip_conntrack_tuple *t, int reply) +{ + struct ip_ct_gre_keymap **exist_km, *km, *old; + + if (!ct->helper || strcmp(ct->helper->name, "pptp")) { + DEBUGP("refusing to add GRE keymap to non-pptp session\n"); + return -1; + } + + if (!reply) + exist_km = &ct->help.ct_pptp_info.keymap_orig; + else + exist_km = &ct->help.ct_pptp_info.keymap_reply; + + if (*exist_km) { + /* check whether it's a retransmission */ + old = LIST_FIND(&gre_keymap_list, gre_key_cmpfn, + struct ip_ct_gre_keymap *, t); + if (old == *exist_km) { + DEBUGP("retransmission\n"); + return 0; + } + + DEBUGP("trying to override keymap_%s for ct %p\n", + reply? "reply":"orig", ct); + return -EEXIST; + } + + km = kmalloc(sizeof(*km), GFP_ATOMIC); + if (!km) + return -ENOMEM; + + memcpy(&km->tuple, t, sizeof(*t)); + *exist_km = km; + + DEBUGP("adding new entry %p: ", km); + DUMP_TUPLE_GRE(&km->tuple); + + write_lock_bh(&ip_ct_gre_lock); + list_append(&gre_keymap_list, km); + write_unlock_bh(&ip_ct_gre_lock); + + return 0; +} + +/* destroy the keymap entries associated with specified master ct */ +void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct) +{ + DEBUGP("entering for ct %p\n", ct); + + if (!ct->helper || strcmp(ct->helper->name, "pptp")) { + DEBUGP("refusing to destroy GRE keymap to non-pptp session\n"); + return; + } + + write_lock_bh(&ip_ct_gre_lock); + if (ct->help.ct_pptp_info.keymap_orig) { + DEBUGP("removing %p from list\n", + ct->help.ct_pptp_info.keymap_orig); + list_del(&ct->help.ct_pptp_info.keymap_orig->list); + kfree(ct->help.ct_pptp_info.keymap_orig); + ct->help.ct_pptp_info.keymap_orig = NULL; + } + if (ct->help.ct_pptp_info.keymap_reply) { + DEBUGP("removing %p from list\n", + ct->help.ct_pptp_info.keymap_reply); + list_del(&ct->help.ct_pptp_info.keymap_reply->list); + kfree(ct->help.ct_pptp_info.keymap_reply); + ct->help.ct_pptp_info.keymap_reply = NULL; + } + write_unlock_bh(&ip_ct_gre_lock); +} + + +/* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */ + +/* invert gre part of tuple */ +static int gre_invert_tuple(struct ip_conntrack_tuple *tuple, + const struct ip_conntrack_tuple *orig) +{ + tuple->dst.u.gre.key = orig->src.u.gre.key; + tuple->src.u.gre.key = orig->dst.u.gre.key; + + return 1; +} + +/* gre hdr info to tuple */ +static int gre_pkt_to_tuple(const struct sk_buff *skb, + unsigned int dataoff, + struct ip_conntrack_tuple *tuple) +{ + struct gre_hdr_pptp _pgrehdr, *pgrehdr; + u_int32_t srckey; + struct gre_hdr _grehdr, *grehdr; + + /* first only delinearize old RFC1701 GRE header */ + grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr); + if (!grehdr || grehdr->version != GRE_VERSION_PPTP) { + /* try to behave like "ip_conntrack_proto_generic" */ + tuple->src.u.all = 0; + tuple->dst.u.all = 0; + return 1; + } + + /* PPTP header is variable length, only need up to the call_id field */ + pgrehdr = skb_header_pointer(skb, dataoff, 8, &_pgrehdr); + if (!pgrehdr) + return 1; + + if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) { + DEBUGP("GRE_VERSION_PPTP but unknown proto\n"); + return 0; + } + + tuple->dst.u.gre.key = pgrehdr->call_id; + srckey = gre_keymap_lookup(tuple); + tuple->src.u.gre.key = srckey; + + return 1; +} + +/* print gre part of tuple */ +static int gre_print_tuple(struct seq_file *s, + const struct ip_conntrack_tuple *tuple) +{ + return seq_printf(s, "srckey=0x%x dstkey=0x%x ", + ntohs(tuple->src.u.gre.key), + ntohs(tuple->dst.u.gre.key)); +} + +/* print private data for conntrack */ +static int gre_print_conntrack(struct seq_file *s, + const struct ip_conntrack *ct) +{ + return seq_printf(s, "timeout=%u, stream_timeout=%u ", + (ct->proto.gre.timeout / HZ), + (ct->proto.gre.stream_timeout / HZ)); +} + +/* Returns verdict for packet, and may modify conntrack */ +static int gre_packet(struct ip_conntrack *ct, + const struct sk_buff *skb, + enum ip_conntrack_info conntrackinfo) +{ + /* If we've seen traffic both ways, this is a GRE connection. + * Extend timeout. */ + if (ct->status & IPS_SEEN_REPLY) { + ip_ct_refresh_acct(ct, conntrackinfo, skb, + ct->proto.gre.stream_timeout); + /* Also, more likely to be important, and not a probe. */ + set_bit(IPS_ASSURED_BIT, &ct->status); + } else + ip_ct_refresh_acct(ct, conntrackinfo, skb, + ct->proto.gre.timeout); + + return NF_ACCEPT; +} + +/* Called when a new connection for this protocol found. */ +static int gre_new(struct ip_conntrack *ct, + const struct sk_buff *skb) +{ + DEBUGP(": "); + DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + + /* initialize to sane value. Ideally a conntrack helper + * (e.g. in case of pptp) is increasing them */ + ct->proto.gre.stream_timeout = GRE_STREAM_TIMEOUT; + ct->proto.gre.timeout = GRE_TIMEOUT; + + return 1; +} + +/* Called when a conntrack entry has already been removed from the hashes + * and is about to be deleted from memory */ +static void gre_destroy(struct ip_conntrack *ct) +{ + struct ip_conntrack *master = ct->master; + DEBUGP(" entering\n"); + + if (!master) + DEBUGP("no master !?!\n"); + else + ip_ct_gre_keymap_destroy(master); +} + +/* protocol helper struct */ +static struct ip_conntrack_protocol gre = { + .proto = IPPROTO_GRE, + .name = "gre", + .pkt_to_tuple = gre_pkt_to_tuple, + .invert_tuple = gre_invert_tuple, + .print_tuple = gre_print_tuple, + .print_conntrack = gre_print_conntrack, + .packet = gre_packet, + .new = gre_new, + .destroy = gre_destroy, + .me = THIS_MODULE, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, + .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, +#endif +}; + +/* ip_conntrack_proto_gre initialization */ +int __init ip_ct_proto_gre_init(void) +{ + return ip_conntrack_protocol_register(&gre); +} + +void __exit ip_ct_proto_gre_fini(void) +{ + struct list_head *pos, *n; + + /* delete all keymap entries */ + write_lock_bh(&ip_ct_gre_lock); + list_for_each_safe(pos, n, &gre_keymap_list) { + DEBUGP("deleting keymap %p at module unload time\n", pos); + list_del(pos); + kfree(pos); + } + write_unlock_bh(&ip_ct_gre_lock); + + ip_conntrack_protocol_unregister(&gre); +} + +EXPORT_SYMBOL(ip_ct_gre_keymap_add); +EXPORT_SYMBOL(ip_ct_gre_keymap_destroy); diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index ae3e3e655db..dd476b191f4 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -989,15 +989,15 @@ EXPORT_SYMBOL(need_ip_conntrack); EXPORT_SYMBOL(ip_conntrack_helper_register); EXPORT_SYMBOL(ip_conntrack_helper_unregister); EXPORT_SYMBOL(ip_ct_iterate_cleanup); -EXPORT_SYMBOL(ip_ct_refresh_acct); +EXPORT_SYMBOL(__ip_ct_refresh_acct); EXPORT_SYMBOL(ip_conntrack_expect_alloc); EXPORT_SYMBOL(ip_conntrack_expect_put); -EXPORT_SYMBOL_GPL(ip_conntrack_expect_find_get); +EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find); +EXPORT_SYMBOL_GPL(ip_conntrack_expect_find); EXPORT_SYMBOL(ip_conntrack_expect_related); EXPORT_SYMBOL(ip_conntrack_unexpect_related); EXPORT_SYMBOL_GPL(ip_conntrack_expect_list); -EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find); EXPORT_SYMBOL_GPL(ip_ct_unlink_expect); EXPORT_SYMBOL(ip_conntrack_tuple_taken); diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index 1adedb743f6..c3ea891d38e 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -578,6 +578,8 @@ ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range) return ret; } +EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_range); +EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr); #endif int __init ip_nat_init(void) diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c new file mode 100644 index 00000000000..3cdd0684d30 --- /dev/null +++ b/net/ipv4/netfilter/ip_nat_helper_pptp.c @@ -0,0 +1,401 @@ +/* + * ip_nat_pptp.c - Version 3.0 + * + * NAT support for PPTP (Point to Point Tunneling Protocol). + * PPTP is a a protocol for creating virtual private networks. + * It is a specification defined by Microsoft and some vendors + * working with Microsoft. PPTP is built on top of a modified + * version of the Internet Generic Routing Encapsulation Protocol. + * GRE is defined in RFC 1701 and RFC 1702. Documentation of + * PPTP can be found in RFC 2637 + * + * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + * + * TODO: - NAT to a unique tuple, not to TCP source port + * (needs netfilter tuple reservation) + * + * Changes: + * 2002-02-10 - Version 1.3 + * - Use ip_nat_mangle_tcp_packet() because of cloned skb's + * in local connections (Philip Craig <philipc@snapgear.com>) + * - add checks for magicCookie and pptp version + * - make argument list of pptp_{out,in}bound_packet() shorter + * - move to C99 style initializers + * - print version number at module loadtime + * 2003-09-22 - Version 1.5 + * - use SNATed tcp sourceport as callid, since we get called before + * TCP header is mangled (Philip Craig <philipc@snapgear.com>) + * 2004-10-22 - Version 2.0 + * - kernel 2.6.x version + * 2005-06-10 - Version 3.0 + * - kernel >= 2.6.11 version, + * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/) + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/ip.h> +#include <linux/tcp.h> +#include <net/tcp.h> + +#include <linux/netfilter_ipv4/ip_nat.h> +#include <linux/netfilter_ipv4/ip_nat_rule.h> +#include <linux/netfilter_ipv4/ip_nat_helper.h> +#include <linux/netfilter_ipv4/ip_nat_pptp.h> +#include <linux/netfilter_ipv4/ip_conntrack_core.h> +#include <linux/netfilter_ipv4/ip_conntrack_helper.h> +#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h> +#include <linux/netfilter_ipv4/ip_conntrack_pptp.h> + +#define IP_NAT_PPTP_VERSION "3.0" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); +MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP"); + + +#if 0 +extern const char *pptp_msg_name[]; +#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \ + __FUNCTION__, ## args) +#else +#define DEBUGP(format, args...) +#endif + +static void pptp_nat_expected(struct ip_conntrack *ct, + struct ip_conntrack_expect *exp) +{ + struct ip_conntrack *master = ct->master; + struct ip_conntrack_expect *other_exp; + struct ip_conntrack_tuple t; + struct ip_ct_pptp_master *ct_pptp_info; + struct ip_nat_pptp *nat_pptp_info; + + ct_pptp_info = &master->help.ct_pptp_info; + nat_pptp_info = &master->nat.help.nat_pptp_info; + + /* And here goes the grand finale of corrosion... */ + + if (exp->dir == IP_CT_DIR_ORIGINAL) { + DEBUGP("we are PNS->PAC\n"); + /* therefore, build tuple for PAC->PNS */ + t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; + t.src.u.gre.key = htons(master->help.ct_pptp_info.pac_call_id); + t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; + t.dst.u.gre.key = htons(master->help.ct_pptp_info.pns_call_id); + t.dst.protonum = IPPROTO_GRE; + } else { + DEBUGP("we are PAC->PNS\n"); + /* build tuple for PNS->PAC */ + t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; + t.src.u.gre.key = + htons(master->nat.help.nat_pptp_info.pns_call_id); + t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; + t.dst.u.gre.key = + htons(master->nat.help.nat_pptp_info.pac_call_id); + t.dst.protonum = IPPROTO_GRE; + } + + DEBUGP("trying to unexpect other dir: "); + DUMP_TUPLE(&t); + other_exp = ip_conntrack_expect_find(&t); + if (other_exp) { + ip_conntrack_unexpect_related(other_exp); + ip_conntrack_expect_put(other_exp); + DEBUGP("success\n"); + } else { + DEBUGP("not found!\n"); + } + + ip_nat_follow_master(ct, exp); +} + +/* outbound packets == from PNS to PAC */ +static int +pptp_outbound_pkt(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq) + +{ + struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info; + struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; + + u_int16_t msg, *cid = NULL, new_callid; + + new_callid = htons(ct_pptp_info->pns_call_id); + + switch (msg = ntohs(ctlh->messageType)) { + case PPTP_OUT_CALL_REQUEST: + cid = &pptpReq->ocreq.callID; + /* FIXME: ideally we would want to reserve a call ID + * here. current netfilter NAT core is not able to do + * this :( For now we use TCP source port. This breaks + * multiple calls within one control session */ + + /* save original call ID in nat_info */ + nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id; + + /* don't use tcph->source since we are at a DSTmanip + * hook (e.g. PREROUTING) and pkt is not mangled yet */ + new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; + + /* save new call ID in ct info */ + ct_pptp_info->pns_call_id = ntohs(new_callid); + break; + case PPTP_IN_CALL_REPLY: + cid = &pptpReq->icreq.callID; + break; + case PPTP_CALL_CLEAR_REQUEST: + cid = &pptpReq->clrreq.callID; + break; + default: + DEBUGP("unknown outbound packet 0x%04x:%s\n", msg, + (msg <= PPTP_MSG_MAX)? + pptp_msg_name[msg]:pptp_msg_name[0]); + /* fall through */ + + case PPTP_SET_LINK_INFO: + /* only need to NAT in case PAC is behind NAT box */ + case PPTP_START_SESSION_REQUEST: + case PPTP_START_SESSION_REPLY: + case PPTP_STOP_SESSION_REQUEST: + case PPTP_STOP_SESSION_REPLY: + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* no need to alter packet */ + return NF_ACCEPT; + } + + /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass + * down to here */ + + IP_NF_ASSERT(cid); + + DEBUGP("altering call id from 0x%04x to 0x%04x\n", + ntohs(*cid), ntohs(new_callid)); + + /* mangle packet */ + if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, + (void *)cid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)), + sizeof(new_callid), + (char *)&new_callid, + sizeof(new_callid)) == 0) + return NF_DROP; + + return NF_ACCEPT; +} + +static int +pptp_exp_gre(struct ip_conntrack_expect *expect_orig, + struct ip_conntrack_expect *expect_reply) +{ + struct ip_ct_pptp_master *ct_pptp_info = + &expect_orig->master->help.ct_pptp_info; + struct ip_nat_pptp *nat_pptp_info = + &expect_orig->master->nat.help.nat_pptp_info; + + struct ip_conntrack *ct = expect_orig->master; + + struct ip_conntrack_tuple inv_t; + struct ip_conntrack_tuple *orig_t, *reply_t; + + /* save original PAC call ID in nat_info */ + nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id; + + /* alter expectation */ + orig_t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + reply_t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; + + /* alter expectation for PNS->PAC direction */ + invert_tuplepr(&inv_t, &expect_orig->tuple); + expect_orig->saved_proto.gre.key = htons(nat_pptp_info->pac_call_id); + expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id); + expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id); + inv_t.src.ip = reply_t->src.ip; + inv_t.dst.ip = reply_t->dst.ip; + inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id); + inv_t.dst.u.gre.key = htons(ct_pptp_info->pns_call_id); + + if (!ip_conntrack_expect_related(expect_orig)) { + DEBUGP("successfully registered expect\n"); + } else { + DEBUGP("can't expect_related(expect_orig)\n"); + return 1; + } + + /* alter expectation for PAC->PNS direction */ + invert_tuplepr(&inv_t, &expect_reply->tuple); + expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id); + expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id); + expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id); + inv_t.src.ip = orig_t->src.ip; + inv_t.dst.ip = orig_t->dst.ip; + inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id); + inv_t.dst.u.gre.key = htons(ct_pptp_info->pac_call_id); + + if (!ip_conntrack_expect_related(expect_reply)) { + DEBUGP("successfully registered expect\n"); + } else { + DEBUGP("can't expect_related(expect_reply)\n"); + ip_conntrack_unexpect_related(expect_orig); + return 1; + } + + if (ip_ct_gre_keymap_add(ct, &expect_reply->tuple, 0) < 0) { + DEBUGP("can't register original keymap\n"); + ip_conntrack_unexpect_related(expect_orig); + ip_conntrack_unexpect_related(expect_reply); + return 1; + } + + if (ip_ct_gre_keymap_add(ct, &inv_t, 1) < 0) { + DEBUGP("can't register reply keymap\n"); + ip_conntrack_unexpect_related(expect_orig); + ip_conntrack_unexpect_related(expect_reply); + ip_ct_gre_keymap_destroy(ct); + return 1; + } + + return 0; +} + +/* inbound packets == from PAC to PNS */ +static int +pptp_inbound_pkt(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq) +{ + struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; + u_int16_t msg, new_cid = 0, new_pcid, *pcid = NULL, *cid = NULL; + + int ret = NF_ACCEPT, rv; + + new_pcid = htons(nat_pptp_info->pns_call_id); + + switch (msg = ntohs(ctlh->messageType)) { + case PPTP_OUT_CALL_REPLY: + pcid = &pptpReq->ocack.peersCallID; + cid = &pptpReq->ocack.callID; + break; + case PPTP_IN_CALL_CONNECT: + pcid = &pptpReq->iccon.peersCallID; + break; + case PPTP_IN_CALL_REQUEST: + /* only need to nat in case PAC is behind NAT box */ + break; + case PPTP_WAN_ERROR_NOTIFY: + pcid = &pptpReq->wanerr.peersCallID; + break; + case PPTP_CALL_DISCONNECT_NOTIFY: + pcid = &pptpReq->disc.callID; + break; + case PPTP_SET_LINK_INFO: + pcid = &pptpReq->setlink.peersCallID; + break; + + default: + DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)? + pptp_msg_name[msg]:pptp_msg_name[0]); + /* fall through */ + + case PPTP_START_SESSION_REQUEST: + case PPTP_START_SESSION_REPLY: + case PPTP_STOP_SESSION_REQUEST: + case PPTP_STOP_SESSION_REPLY: + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* no need to alter packet */ + return NF_ACCEPT; + } + + /* only OUT_CALL_REPLY, IN_CALL_CONNECT, IN_CALL_REQUEST, + * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */ + + /* mangle packet */ + IP_NF_ASSERT(pcid); + DEBUGP("altering peer call id from 0x%04x to 0x%04x\n", + ntohs(*pcid), ntohs(new_pcid)); + + rv = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, + (void *)pcid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)), + sizeof(new_pcid), (char *)&new_pcid, + sizeof(new_pcid)); + if (rv != NF_ACCEPT) + return rv; + + if (new_cid) { + IP_NF_ASSERT(cid); + DEBUGP("altering call id from 0x%04x to 0x%04x\n", + ntohs(*cid), ntohs(new_cid)); + rv = ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, + (void *)cid - ((void *)ctlh - sizeof(struct pptp_pkt_hdr)), + sizeof(new_cid), + (char *)&new_cid, + sizeof(new_cid)); + if (rv != NF_ACCEPT) + return rv; + } + + /* check for earlier return value of 'switch' above */ + if (ret != NF_ACCEPT) + return ret; + + /* great, at least we don't need to resize packets */ + return NF_ACCEPT; +} + + +extern int __init ip_nat_proto_gre_init(void); +extern void __exit ip_nat_proto_gre_fini(void); + +static int __init init(void) +{ + int ret; + + DEBUGP("%s: registering NAT helper\n", __FILE__); + + ret = ip_nat_proto_gre_init(); + if (ret < 0) + return ret; + + BUG_ON(ip_nat_pptp_hook_outbound); + ip_nat_pptp_hook_outbound = &pptp_outbound_pkt; + + BUG_ON(ip_nat_pptp_hook_inbound); + ip_nat_pptp_hook_inbound = &pptp_inbound_pkt; + + BUG_ON(ip_nat_pptp_hook_exp_gre); + ip_nat_pptp_hook_exp_gre = &pptp_exp_gre; + + BUG_ON(ip_nat_pptp_hook_expectfn); + ip_nat_pptp_hook_expectfn = &pptp_nat_expected; + + printk("ip_nat_pptp version %s loaded\n", IP_NAT_PPTP_VERSION); + return 0; +} + +static void __exit fini(void) +{ + DEBUGP("cleanup_module\n" ); + + ip_nat_pptp_hook_expectfn = NULL; + ip_nat_pptp_hook_exp_gre = NULL; + ip_nat_pptp_hook_inbound = NULL; + ip_nat_pptp_hook_outbound = NULL; + + ip_nat_proto_gre_fini(); + /* Make sure noone calls it, meanwhile */ + synchronize_net(); + + printk("ip_nat_pptp version %s unloaded\n", IP_NAT_PPTP_VERSION); +} + +module_init(init); +module_exit(fini); diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c new file mode 100644 index 00000000000..7c128540167 --- /dev/null +++ b/net/ipv4/netfilter/ip_nat_proto_gre.c @@ -0,0 +1,214 @@ +/* + * ip_nat_proto_gre.c - Version 2.0 + * + * NAT protocol helper module for GRE. + * + * GRE is a generic encapsulation protocol, which is generally not very + * suited for NAT, as it has no protocol-specific part as port numbers. + * + * It has an optional key field, which may help us distinguishing two + * connections between the same two hosts. + * + * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 + * + * PPTP is built on top of a modified version of GRE, and has a mandatory + * field called "CallID", which serves us for the same purpose as the key + * field in plain GRE. + * + * Documentation about PPTP can be found in RFC 2637 + * + * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + * + */ + +#include <linux/config.h> +#include <linux/module.h> +#include <linux/ip.h> +#include <linux/netfilter_ipv4/ip_nat.h> +#include <linux/netfilter_ipv4/ip_nat_rule.h> +#include <linux/netfilter_ipv4/ip_nat_protocol.h> +#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); +MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); + +#if 0 +#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \ + __FUNCTION__, ## args) +#else +#define DEBUGP(x, args...) +#endif + +/* is key in given range between min and max */ +static int +gre_in_range(const struct ip_conntrack_tuple *tuple, + enum ip_nat_manip_type maniptype, + const union ip_conntrack_manip_proto *min, + const union ip_conntrack_manip_proto *max) +{ + u_int32_t key; + + if (maniptype == IP_NAT_MANIP_SRC) + key = tuple->src.u.gre.key; + else + key = tuple->dst.u.gre.key; + + return ntohl(key) >= ntohl(min->gre.key) + && ntohl(key) <= ntohl(max->gre.key); +} + +/* generate unique tuple ... */ +static int +gre_unique_tuple(struct ip_conntrack_tuple *tuple, + const struct ip_nat_range *range, + enum ip_nat_manip_type maniptype, + const struct ip_conntrack *conntrack) +{ + static u_int16_t key; + u_int16_t *keyptr; + unsigned int min, i, range_size; + + if (maniptype == IP_NAT_MANIP_SRC) + keyptr = &tuple->src.u.gre.key; + else + keyptr = &tuple->dst.u.gre.key; + + if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { + DEBUGP("%p: NATing GRE PPTP\n", conntrack); + min = 1; + range_size = 0xffff; + } else { + min = ntohl(range->min.gre.key); + range_size = ntohl(range->max.gre.key) - min + 1; + } + + DEBUGP("min = %u, range_size = %u\n", min, range_size); + + for (i = 0; i < range_size; i++, key++) { + *keyptr = htonl(min + key % range_size); + if (!ip_nat_used_tuple(tuple, conntrack)) + return 1; + } + + DEBUGP("%p: no NAT mapping\n", conntrack); + + return 0; +} + +/* manipulate a GRE packet according to maniptype */ +static int +gre_manip_pkt(struct sk_buff **pskb, + unsigned int iphdroff, + const struct ip_conntrack_tuple *tuple, + enum ip_nat_manip_type maniptype) +{ + struct gre_hdr *greh; + struct gre_hdr_pptp *pgreh; + struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); + unsigned int hdroff = iphdroff + iph->ihl*4; + + /* pgreh includes two optional 32bit fields which are not required + * to be there. That's where the magic '8' comes from */ + if (!skb_make_writable(pskb, hdroff + sizeof(*pgreh)-8)) + return 0; + + greh = (void *)(*pskb)->data + hdroff; + pgreh = (struct gre_hdr_pptp *) greh; + + /* we only have destination manip of a packet, since 'source key' + * is not present in the packet itself */ + if (maniptype == IP_NAT_MANIP_DST) { + /* key manipulation is always dest */ + switch (greh->version) { + case 0: + if (!greh->key) { + DEBUGP("can't nat GRE w/o key\n"); + break; + } + if (greh->csum) { + /* FIXME: Never tested this code... */ + *(gre_csum(greh)) = + ip_nat_cheat_check(~*(gre_key(greh)), + tuple->dst.u.gre.key, + *(gre_csum(greh))); + } + *(gre_key(greh)) = tuple->dst.u.gre.key; + break; + case GRE_VERSION_PPTP: + DEBUGP("call_id -> 0x%04x\n", + ntohl(tuple->dst.u.gre.key)); + pgreh->call_id = htons(ntohl(tuple->dst.u.gre.key)); + break; + default: + DEBUGP("can't nat unknown GRE version\n"); + return 0; + break; + } + } + return 1; +} + +/* print out a nat tuple */ +static unsigned int +gre_print(char *buffer, + const struct ip_conntrack_tuple *match, + const struct ip_conntrack_tuple *mask) +{ + unsigned int len = 0; + + if (mask->src.u.gre.key) + len += sprintf(buffer + len, "srckey=0x%x ", + ntohl(match->src.u.gre.key)); + + if (mask->dst.u.gre.key) + len += sprintf(buffer + len, "dstkey=0x%x ", + ntohl(match->src.u.gre.key)); + + return len; +} + +/* print a range of keys */ +static unsigned int +gre_print_range(char *buffer, const struct ip_nat_range *range) +{ + if (range->min.gre.key != 0 + || range->max.gre.key != 0xFFFF) { + if (range->min.gre.key == range->max.gre.key) + return sprintf(buffer, "key 0x%x ", + ntohl(range->min.gre.key)); + else + return sprintf(buffer, "keys 0x%u-0x%u ", + ntohl(range->min.gre.key), + ntohl(range->max.gre.key)); + } else + return 0; +} + +/* nat helper struct */ +static struct ip_nat_protocol gre = { + .name = "GRE", + .protonum = IPPROTO_GRE, + .manip_pkt = gre_manip_pkt, + .in_range = gre_in_range, + .unique_tuple = gre_unique_tuple, + .print = gre_print, + .print_range = gre_print_range, +#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ + defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) + .range_to_nfattr = ip_nat_port_range_to_nfattr, + .nfattr_to_range = ip_nat_port_nfattr_to_range, +#endif +}; + +int __init ip_nat_proto_gre_init(void) +{ + return ip_nat_protocol_register(&gre); +} + +void __exit ip_nat_proto_gre_fini(void) +{ + ip_nat_protocol_unregister(&gre); +} diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 304bb0a1d4f..4b0d7e4d626 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -361,7 +361,7 @@ static void raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg) if (type && code) { get_user(fl->fl_icmp_type, type); - __get_user(fl->fl_icmp_code, code); + get_user(fl->fl_icmp_code, code); probed = 1; } break; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index a88db28b0af..b1a63b2c6b4 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -384,7 +384,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->frto_counter = 0; newtp->frto_highmark = 0; - newicsk->icsk_ca_ops = &tcp_reno; + newicsk->icsk_ca_ops = &tcp_init_congestion_ops; tcp_set_ca_state(newsk, TCP_CA_Open); tcp_init_xmit_timers(newsk); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b018e31b653..d6e3d269e90 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -461,9 +461,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss flags = TCP_SKB_CB(skb)->flags; TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); TCP_SKB_CB(buff)->flags = flags; - TCP_SKB_CB(buff)->sacked = - (TCP_SKB_CB(skb)->sacked & - (TCPCB_LOST | TCPCB_EVER_RETRANS | TCPCB_AT_TAIL)); + TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL; if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) { @@ -501,11 +499,26 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss tcp_skb_pcount(buff); tp->packets_out -= diff; + + if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) + tp->sacked_out -= diff; + if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) + tp->retrans_out -= diff; + if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { tp->lost_out -= diff; tp->left_out -= diff; } + if (diff > 0) { + /* Adjust Reno SACK estimate. */ + if (!tp->rx_opt.sack_ok) { + tp->sacked_out -= diff; + if ((int)tp->sacked_out < 0) + tp->sacked_out = 0; + tcp_sync_left_out(tp); + } + tp->fackets_out -= diff; if ((int)tp->fackets_out < 0) tp->fackets_out = 0; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 1cb8adb2787..2da514b16d9 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1955,6 +1955,57 @@ static void __exit fini(void) #endif } +/* + * find specified header up to transport protocol header. + * If found target header, the offset to the header is set to *offset + * and return 0. otherwise, return -1. + * + * Notes: - non-1st Fragment Header isn't skipped. + * - ESP header isn't skipped. + * - The target header may be trancated. + */ +int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target) +{ + unsigned int start = (u8*)(skb->nh.ipv6h + 1) - skb->data; + u8 nexthdr = skb->nh.ipv6h->nexthdr; + unsigned int len = skb->len - start; + + while (nexthdr != target) { + struct ipv6_opt_hdr _hdr, *hp; + unsigned int hdrlen; + + if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) + return -1; + hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); + if (hp == NULL) + return -1; + if (nexthdr == NEXTHDR_FRAGMENT) { + unsigned short _frag_off, *fp; + fp = skb_header_pointer(skb, + start+offsetof(struct frag_hdr, + frag_off), + sizeof(_frag_off), + &_frag_off); + if (fp == NULL) + return -1; + + if (ntohs(*fp) & ~0x7) + return -1; + hdrlen = 8; + } else if (nexthdr == NEXTHDR_AUTH) + hdrlen = (hp->hdrlen + 2) << 2; + else + hdrlen = ipv6_optlen(hp); + + nexthdr = hp->nexthdr; + len -= hdrlen; + start += hdrlen; + } + + *offset = start; + return 0; +} + EXPORT_SYMBOL(ip6t_register_table); EXPORT_SYMBOL(ip6t_unregister_table); EXPORT_SYMBOL(ip6t_do_table); @@ -1963,6 +2014,7 @@ EXPORT_SYMBOL(ip6t_unregister_match); EXPORT_SYMBOL(ip6t_register_target); EXPORT_SYMBOL(ip6t_unregister_target); EXPORT_SYMBOL(ip6t_ext_hdr); +EXPORT_SYMBOL(ipv6_find_hdr); module_init(init); module_exit(fini); diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index d5b94f142bb..dde37793d20 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c @@ -48,92 +48,21 @@ match(const struct sk_buff *skb, unsigned int protoff, int *hotdrop) { - struct ip_auth_hdr *ah = NULL, _ah; + struct ip_auth_hdr *ah, _ah; const struct ip6t_ah *ahinfo = matchinfo; - unsigned int temp; - int len; - u8 nexthdr; unsigned int ptr; unsigned int hdrlen = 0; - /*DEBUGP("IPv6 AH entered\n");*/ - /* if (opt->auth == 0) return 0; - * It does not filled on output */ - - /* type of the 1st exthdr */ - nexthdr = skb->nh.ipv6h->nexthdr; - /* pointer to the 1st exthdr */ - ptr = sizeof(struct ipv6hdr); - /* available length */ - len = skb->len - ptr; - temp = 0; - - while (ip6t_ext_hdr(nexthdr)) { - struct ipv6_opt_hdr _hdr, *hp; - - DEBUGP("ipv6_ah header iteration \n"); - - /* Is there enough space for the next ext header? */ - if (len < sizeof(struct ipv6_opt_hdr)) - return 0; - /* No more exthdr -> evaluate */ - if (nexthdr == NEXTHDR_NONE) - break; - /* ESP -> evaluate */ - if (nexthdr == NEXTHDR_ESP) - break; - - hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); - BUG_ON(hp == NULL); - - /* Calculate the header length */ - if (nexthdr == NEXTHDR_FRAGMENT) - hdrlen = 8; - else if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hp->hdrlen+2)<<2; - else - hdrlen = ipv6_optlen(hp); - - /* AH -> evaluate */ - if (nexthdr == NEXTHDR_AUTH) { - temp |= MASK_AH; - break; - } - - - /* set the flag */ - switch (nexthdr) { - case NEXTHDR_HOP: - case NEXTHDR_ROUTING: - case NEXTHDR_FRAGMENT: - case NEXTHDR_AUTH: - case NEXTHDR_DEST: - break; - default: - DEBUGP("ipv6_ah match: unknown nextheader %u\n",nexthdr); - return 0; - } - - nexthdr = hp->nexthdr; - len -= hdrlen; - ptr += hdrlen; - if (ptr > skb->len) { - DEBUGP("ipv6_ah: new pointer too large! \n"); - break; - } - } - - /* AH header not found */ - if (temp != MASK_AH) + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH) < 0) return 0; - if (len < sizeof(struct ip_auth_hdr)){ + ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah); + if (ah == NULL) { *hotdrop = 1; return 0; } - ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah); - BUG_ON(ah == NULL); + hdrlen = (ah->hdrlen + 2) << 2; DEBUGP("IPv6 AH LEN %u %u ", hdrlen, ah->hdrlen); DEBUGP("RES %04X ", ah->reserved); diff --git a/net/ipv6/netfilter/ip6t_dst.c b/net/ipv6/netfilter/ip6t_dst.c index 540925e4a7a..c450a635e54 100644 --- a/net/ipv6/netfilter/ip6t_dst.c +++ b/net/ipv6/netfilter/ip6t_dst.c @@ -63,8 +63,6 @@ match(const struct sk_buff *skb, struct ipv6_opt_hdr _optsh, *oh; const struct ip6t_opts *optinfo = matchinfo; unsigned int temp; - unsigned int len; - u8 nexthdr; unsigned int ptr; unsigned int hdrlen = 0; unsigned int ret = 0; @@ -72,97 +70,25 @@ match(const struct sk_buff *skb, u8 _optlen, *lp = NULL; unsigned int optlen; - /* type of the 1st exthdr */ - nexthdr = skb->nh.ipv6h->nexthdr; - /* pointer to the 1st exthdr */ - ptr = sizeof(struct ipv6hdr); - /* available length */ - len = skb->len - ptr; - temp = 0; - - while (ip6t_ext_hdr(nexthdr)) { - struct ipv6_opt_hdr _hdr, *hp; - - DEBUGP("ipv6_opts header iteration \n"); - - /* Is there enough space for the next ext header? */ - if (len < (int)sizeof(struct ipv6_opt_hdr)) - return 0; - /* No more exthdr -> evaluate */ - if (nexthdr == NEXTHDR_NONE) { - break; - } - /* ESP -> evaluate */ - if (nexthdr == NEXTHDR_ESP) { - break; - } - - hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); - BUG_ON(hp == NULL); - - /* Calculate the header length */ - if (nexthdr == NEXTHDR_FRAGMENT) { - hdrlen = 8; - } else if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hp->hdrlen+2)<<2; - else - hdrlen = ipv6_optlen(hp); - - /* OPTS -> evaluate */ #if HOPBYHOP - if (nexthdr == NEXTHDR_HOP) { - temp |= MASK_HOPOPTS; + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0) #else - if (nexthdr == NEXTHDR_DEST) { - temp |= MASK_DSTOPTS; + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0) #endif - break; - } - + return 0; - /* set the flag */ - switch (nexthdr){ - case NEXTHDR_HOP: - case NEXTHDR_ROUTING: - case NEXTHDR_FRAGMENT: - case NEXTHDR_AUTH: - case NEXTHDR_DEST: - break; - default: - DEBUGP("ipv6_opts match: unknown nextheader %u\n",nexthdr); - return 0; - break; - } - - nexthdr = hp->nexthdr; - len -= hdrlen; - ptr += hdrlen; - if ( ptr > skb->len ) { - DEBUGP("ipv6_opts: new pointer is too large! \n"); - break; - } - } - - /* OPTIONS header not found */ -#if HOPBYHOP - if ( temp != MASK_HOPOPTS ) return 0; -#else - if ( temp != MASK_DSTOPTS ) return 0; -#endif - - if (len < (int)sizeof(struct ipv6_opt_hdr)){ + oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); + if (oh == NULL){ *hotdrop = 1; return 0; } - if (len < hdrlen){ + hdrlen = ipv6_optlen(oh); + if (skb->len - ptr < hdrlen){ /* Packet smaller than it's length field */ return 0; } - oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); - BUG_ON(oh == NULL); - DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen); DEBUGP("len %02X %04X %02X ", diff --git a/net/ipv6/netfilter/ip6t_esp.c b/net/ipv6/netfilter/ip6t_esp.c index e39dd236fd8..24bc0cde43a 100644 --- a/net/ipv6/netfilter/ip6t_esp.c +++ b/net/ipv6/netfilter/ip6t_esp.c @@ -48,87 +48,22 @@ match(const struct sk_buff *skb, unsigned int protoff, int *hotdrop) { - struct ip_esp_hdr _esp, *eh = NULL; + struct ip_esp_hdr _esp, *eh; const struct ip6t_esp *espinfo = matchinfo; - unsigned int temp; - int len; - u8 nexthdr; unsigned int ptr; /* Make sure this isn't an evil packet */ /*DEBUGP("ipv6_esp entered \n");*/ - /* type of the 1st exthdr */ - nexthdr = skb->nh.ipv6h->nexthdr; - /* pointer to the 1st exthdr */ - ptr = sizeof(struct ipv6hdr); - /* available length */ - len = skb->len - ptr; - temp = 0; - - while (ip6t_ext_hdr(nexthdr)) { - struct ipv6_opt_hdr _hdr, *hp; - int hdrlen; - - DEBUGP("ipv6_esp header iteration \n"); - - /* Is there enough space for the next ext header? */ - if (len < sizeof(struct ipv6_opt_hdr)) - return 0; - /* No more exthdr -> evaluate */ - if (nexthdr == NEXTHDR_NONE) - break; - /* ESP -> evaluate */ - if (nexthdr == NEXTHDR_ESP) { - temp |= MASK_ESP; - break; - } - - hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); - BUG_ON(hp == NULL); - - /* Calculate the header length */ - if (nexthdr == NEXTHDR_FRAGMENT) - hdrlen = 8; - else if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hp->hdrlen+2)<<2; - else - hdrlen = ipv6_optlen(hp); - - /* set the flag */ - switch (nexthdr) { - case NEXTHDR_HOP: - case NEXTHDR_ROUTING: - case NEXTHDR_FRAGMENT: - case NEXTHDR_AUTH: - case NEXTHDR_DEST: - break; - default: - DEBUGP("ipv6_esp match: unknown nextheader %u\n",nexthdr); - return 0; - } - - nexthdr = hp->nexthdr; - len -= hdrlen; - ptr += hdrlen; - if (ptr > skb->len) { - DEBUGP("ipv6_esp: new pointer too large! \n"); - break; - } - } - - /* ESP header not found */ - if (temp != MASK_ESP) + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ESP) < 0) return 0; - if (len < sizeof(struct ip_esp_hdr)) { + eh = skb_header_pointer(skb, ptr, sizeof(_esp), &_esp); + if (eh == NULL) { *hotdrop = 1; return 0; } - eh = skb_header_pointer(skb, ptr, sizeof(_esp), &_esp); - BUG_ON(eh == NULL); - DEBUGP("IPv6 ESP SPI %u %08X\n", ntohl(eh->spi), ntohl(eh->spi)); return (eh != NULL) diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c index 4bfa30a9bc8..085d5f8eea2 100644 --- a/net/ipv6/netfilter/ip6t_frag.c +++ b/net/ipv6/netfilter/ip6t_frag.c @@ -48,90 +48,18 @@ match(const struct sk_buff *skb, unsigned int protoff, int *hotdrop) { - struct frag_hdr _frag, *fh = NULL; + struct frag_hdr _frag, *fh; const struct ip6t_frag *fraginfo = matchinfo; - unsigned int temp; - int len; - u8 nexthdr; unsigned int ptr; - unsigned int hdrlen = 0; - - /* type of the 1st exthdr */ - nexthdr = skb->nh.ipv6h->nexthdr; - /* pointer to the 1st exthdr */ - ptr = sizeof(struct ipv6hdr); - /* available length */ - len = skb->len - ptr; - temp = 0; - - while (ip6t_ext_hdr(nexthdr)) { - struct ipv6_opt_hdr _hdr, *hp; - - DEBUGP("ipv6_frag header iteration \n"); - - /* Is there enough space for the next ext header? */ - if (len < (int)sizeof(struct ipv6_opt_hdr)) - return 0; - /* No more exthdr -> evaluate */ - if (nexthdr == NEXTHDR_NONE) { - break; - } - /* ESP -> evaluate */ - if (nexthdr == NEXTHDR_ESP) { - break; - } - - hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); - BUG_ON(hp == NULL); - - /* Calculate the header length */ - if (nexthdr == NEXTHDR_FRAGMENT) { - hdrlen = 8; - } else if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hp->hdrlen+2)<<2; - else - hdrlen = ipv6_optlen(hp); - - /* FRAG -> evaluate */ - if (nexthdr == NEXTHDR_FRAGMENT) { - temp |= MASK_FRAGMENT; - break; - } - - - /* set the flag */ - switch (nexthdr){ - case NEXTHDR_HOP: - case NEXTHDR_ROUTING: - case NEXTHDR_FRAGMENT: - case NEXTHDR_AUTH: - case NEXTHDR_DEST: - break; - default: - DEBUGP("ipv6_frag match: unknown nextheader %u\n",nexthdr); - return 0; - break; - } - - nexthdr = hp->nexthdr; - len -= hdrlen; - ptr += hdrlen; - if ( ptr > skb->len ) { - DEBUGP("ipv6_frag: new pointer too large! \n"); - break; - } - } - - /* FRAG header not found */ - if ( temp != MASK_FRAGMENT ) return 0; - - if (len < sizeof(struct frag_hdr)){ - *hotdrop = 1; - return 0; - } - fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag); - BUG_ON(fh == NULL); + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT) < 0) + return 0; + + fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag); + if (fh == NULL){ + *hotdrop = 1; + return 0; + } DEBUGP("INFO %04X ", fh->frag_off); DEBUGP("OFFSET %04X ", ntohs(fh->frag_off) & ~0x7); diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index 27f3650d127..1d09485111d 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -63,8 +63,6 @@ match(const struct sk_buff *skb, struct ipv6_opt_hdr _optsh, *oh; const struct ip6t_opts *optinfo = matchinfo; unsigned int temp; - unsigned int len; - u8 nexthdr; unsigned int ptr; unsigned int hdrlen = 0; unsigned int ret = 0; @@ -72,97 +70,25 @@ match(const struct sk_buff *skb, u8 _optlen, *lp = NULL; unsigned int optlen; - /* type of the 1st exthdr */ - nexthdr = skb->nh.ipv6h->nexthdr; - /* pointer to the 1st exthdr */ - ptr = sizeof(struct ipv6hdr); - /* available length */ - len = skb->len - ptr; - temp = 0; - - while (ip6t_ext_hdr(nexthdr)) { - struct ipv6_opt_hdr _hdr, *hp; - - DEBUGP("ipv6_opts header iteration \n"); - - /* Is there enough space for the next ext header? */ - if (len < (int)sizeof(struct ipv6_opt_hdr)) - return 0; - /* No more exthdr -> evaluate */ - if (nexthdr == NEXTHDR_NONE) { - break; - } - /* ESP -> evaluate */ - if (nexthdr == NEXTHDR_ESP) { - break; - } - - hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); - BUG_ON(hp == NULL); - - /* Calculate the header length */ - if (nexthdr == NEXTHDR_FRAGMENT) { - hdrlen = 8; - } else if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hp->hdrlen+2)<<2; - else - hdrlen = ipv6_optlen(hp); - - /* OPTS -> evaluate */ #if HOPBYHOP - if (nexthdr == NEXTHDR_HOP) { - temp |= MASK_HOPOPTS; + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0) #else - if (nexthdr == NEXTHDR_DEST) { - temp |= MASK_DSTOPTS; + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0) #endif - break; - } - + return 0; - /* set the flag */ - switch (nexthdr){ - case NEXTHDR_HOP: - case NEXTHDR_ROUTING: - case NEXTHDR_FRAGMENT: - case NEXTHDR_AUTH: - case NEXTHDR_DEST: - break; - default: - DEBUGP("ipv6_opts match: unknown nextheader %u\n",nexthdr); - return 0; - break; - } - - nexthdr = hp->nexthdr; - len -= hdrlen; - ptr += hdrlen; - if ( ptr > skb->len ) { - DEBUGP("ipv6_opts: new pointer is too large! \n"); - break; - } - } - - /* OPTIONS header not found */ -#if HOPBYHOP - if ( temp != MASK_HOPOPTS ) return 0; -#else - if ( temp != MASK_DSTOPTS ) return 0; -#endif - - if (len < (int)sizeof(struct ipv6_opt_hdr)){ + oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); + if (oh == NULL){ *hotdrop = 1; return 0; } - if (len < hdrlen){ + hdrlen = ipv6_optlen(oh); + if (skb->len - ptr < hdrlen){ /* Packet smaller than it's length field */ return 0; } - oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); - BUG_ON(oh == NULL); - DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen); DEBUGP("len %02X %04X %02X ", diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index 2bb670037df..beb2fd5cebb 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -50,98 +50,29 @@ match(const struct sk_buff *skb, unsigned int protoff, int *hotdrop) { - struct ipv6_rt_hdr _route, *rh = NULL; + struct ipv6_rt_hdr _route, *rh; const struct ip6t_rt *rtinfo = matchinfo; unsigned int temp; - unsigned int len; - u8 nexthdr; unsigned int ptr; unsigned int hdrlen = 0; unsigned int ret = 0; struct in6_addr *ap, _addr; - /* type of the 1st exthdr */ - nexthdr = skb->nh.ipv6h->nexthdr; - /* pointer to the 1st exthdr */ - ptr = sizeof(struct ipv6hdr); - /* available length */ - len = skb->len - ptr; - temp = 0; + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING) < 0) + return 0; - while (ip6t_ext_hdr(nexthdr)) { - struct ipv6_opt_hdr _hdr, *hp; - - DEBUGP("ipv6_rt header iteration \n"); - - /* Is there enough space for the next ext header? */ - if (len < (int)sizeof(struct ipv6_opt_hdr)) - return 0; - /* No more exthdr -> evaluate */ - if (nexthdr == NEXTHDR_NONE) { - break; - } - /* ESP -> evaluate */ - if (nexthdr == NEXTHDR_ESP) { - break; - } - - hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); - BUG_ON(hp == NULL); - - /* Calculate the header length */ - if (nexthdr == NEXTHDR_FRAGMENT) { - hdrlen = 8; - } else if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hp->hdrlen+2)<<2; - else - hdrlen = ipv6_optlen(hp); - - /* ROUTING -> evaluate */ - if (nexthdr == NEXTHDR_ROUTING) { - temp |= MASK_ROUTING; - break; - } - - - /* set the flag */ - switch (nexthdr){ - case NEXTHDR_HOP: - case NEXTHDR_ROUTING: - case NEXTHDR_FRAGMENT: - case NEXTHDR_AUTH: - case NEXTHDR_DEST: - break; - default: - DEBUGP("ipv6_rt match: unknown nextheader %u\n",nexthdr); - return 0; - break; - } - - nexthdr = hp->nexthdr; - len -= hdrlen; - ptr += hdrlen; - if ( ptr > skb->len ) { - DEBUGP("ipv6_rt: new pointer is too large! \n"); - break; - } - } - - /* ROUTING header not found */ - if ( temp != MASK_ROUTING ) return 0; - - if (len < (int)sizeof(struct ipv6_rt_hdr)){ + rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route); + if (rh == NULL){ *hotdrop = 1; return 0; } - if (len < hdrlen){ + hdrlen = ipv6_optlen(rh); + if (skb->len - ptr < hdrlen){ /* Pcket smaller than its length field */ return 0; } - rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route); - BUG_ON(rh == NULL); - DEBUGP("IPv6 RT LEN %u %u ", hdrlen, rh->hdrlen); DEBUGP("TYPE %04X ", rh->type); DEBUGP("SGS_LEFT %u %02X\n", rh->segments_left, rh->segments_left); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 5aa3691c578..a1265a320b1 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -627,7 +627,7 @@ static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) if (type && code) { get_user(fl->fl_icmp_type, type); - __get_user(fl->fl_icmp_code, code); + get_user(fl->fl_icmp_code, code); probed = 1; } break; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8690f171c1e..ee865d88183 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -36,6 +36,11 @@ * Michal Ostrowski : Module initialization cleanup. * Ulises Alonso : Frame number limit removal and * packet_set_ring memory leak. + * Eric Biederman : Allow for > 8 byte hardware addresses. + * The convention is that longer addresses + * will simply extend the hardware address + * byte arrays at the end of sockaddr_ll + * and packet_mreq. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -161,7 +166,17 @@ struct packet_mclist int count; unsigned short type; unsigned short alen; - unsigned char addr[8]; + unsigned char addr[MAX_ADDR_LEN]; +}; +/* identical to struct packet_mreq except it has + * a longer address field. + */ +struct packet_mreq_max +{ + int mr_ifindex; + unsigned short mr_type; + unsigned short mr_alen; + unsigned char mr_address[MAX_ADDR_LEN]; }; #endif #ifdef CONFIG_PACKET_MMAP @@ -716,6 +731,8 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, err = -EINVAL; if (msg->msg_namelen < sizeof(struct sockaddr_ll)) goto out; + if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr))) + goto out; ifindex = saddr->sll_ifindex; proto = saddr->sll_protocol; addr = saddr->sll_addr; @@ -744,6 +761,12 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, if (dev->hard_header) { int res; err = -EINVAL; + if (saddr) { + if (saddr->sll_halen != dev->addr_len) + goto out_free; + if (saddr->sll_hatype != dev->type) + goto out_free; + } res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len); if (sock->type != SOCK_DGRAM) { skb->tail = skb->data; @@ -1045,6 +1068,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, struct sock *sk = sock->sk; struct sk_buff *skb; int copied, err; + struct sockaddr_ll *sll; err = -EINVAL; if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) @@ -1057,16 +1081,6 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, #endif /* - * If the address length field is there to be filled in, we fill - * it in now. - */ - - if (sock->type == SOCK_PACKET) - msg->msg_namelen = sizeof(struct sockaddr_pkt); - else - msg->msg_namelen = sizeof(struct sockaddr_ll); - - /* * Call the generic datagram receiver. This handles all sorts * of horrible races and re-entrancy so we can forget about it * in the protocol layers. @@ -1087,6 +1101,17 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, goto out; /* + * If the address length field is there to be filled in, we fill + * it in now. + */ + + sll = (struct sockaddr_ll*)skb->cb; + if (sock->type == SOCK_PACKET) + msg->msg_namelen = sizeof(struct sockaddr_pkt); + else + msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr); + + /* * You lose any data beyond the buffer you gave. If it worries a * user program they can ask the device for its MTU anyway. */ @@ -1166,7 +1191,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr, sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */ sll->sll_halen = 0; } - *uaddr_len = sizeof(*sll); + *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen; return 0; } @@ -1199,7 +1224,7 @@ static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, i } } -static int packet_mc_add(struct sock *sk, struct packet_mreq *mreq) +static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq) { struct packet_sock *po = pkt_sk(sk); struct packet_mclist *ml, *i; @@ -1249,7 +1274,7 @@ done: return err; } -static int packet_mc_drop(struct sock *sk, struct packet_mreq *mreq) +static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq) { struct packet_mclist *ml, **mlp; @@ -1315,11 +1340,17 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv case PACKET_ADD_MEMBERSHIP: case PACKET_DROP_MEMBERSHIP: { - struct packet_mreq mreq; - if (optlen<sizeof(mreq)) + struct packet_mreq_max mreq; + int len = optlen; + memset(&mreq, 0, sizeof(mreq)); + if (len < sizeof(struct packet_mreq)) return -EINVAL; - if (copy_from_user(&mreq,optval,sizeof(mreq))) + if (len > sizeof(mreq)) + len = sizeof(mreq); + if (copy_from_user(&mreq,optval,len)) return -EFAULT; + if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address))) + return -EINVAL; if (optname == PACKET_ADD_MEMBERSHIP) ret = packet_mc_add(sk, &mreq); else diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 86073df418f..505c7de10c5 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2414,6 +2414,17 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep, skb_pull(chunk->skb, sizeof(sctp_shutdownhdr_t)); chunk->subh.shutdown_hdr = sdh; + /* API 5.3.1.5 SCTP_SHUTDOWN_EVENT + * When a peer sends a SHUTDOWN, SCTP delivers this notification to + * inform the application that it should cease sending data. + */ + ev = sctp_ulpevent_make_shutdown_event(asoc, 0, GFP_ATOMIC); + if (!ev) { + disposition = SCTP_DISPOSITION_NOMEM; + goto out; + } + sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); + /* Upon the reception of the SHUTDOWN, the peer endpoint shall * - enter the SHUTDOWN-RECEIVED state, * - stop accepting new data from its SCTP user @@ -2439,17 +2450,6 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_CTSN, SCTP_U32(chunk->subh.shutdown_hdr->cum_tsn_ack)); - /* API 5.3.1.5 SCTP_SHUTDOWN_EVENT - * When a peer sends a SHUTDOWN, SCTP delivers this notification to - * inform the application that it should cease sending data. - */ - ev = sctp_ulpevent_make_shutdown_event(asoc, 0, GFP_ATOMIC); - if (!ev) { - disposition = SCTP_DISPOSITION_NOMEM; - goto out; - } - sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); - out: return disposition; } diff --git a/sound/oss/au1000.c b/sound/oss/au1000.c index 4491733c9e4..2c2ae2ee01a 100644 --- a/sound/oss/au1000.c +++ b/sound/oss/au1000.c @@ -1295,7 +1295,7 @@ static int au1000_mmap(struct file *file, struct vm_area_struct *vma) unsigned long size; int ret = 0; - dbg(__FUNCTION__); + dbg("%s", __FUNCTION__); lock_kernel(); down(&s->sem); diff --git a/sound/oss/ite8172.c b/sound/oss/ite8172.c index 58f879fda97..26e5944b6ba 100644 --- a/sound/oss/ite8172.c +++ b/sound/oss/ite8172.c @@ -1859,7 +1859,7 @@ static int it8172_release(struct inode *inode, struct file *file) struct it8172_state *s = (struct it8172_state *)file->private_data; #ifdef IT8172_VERBOSE_DEBUG - dbg(__FUNCTION__); + dbg("%s", __FUNCTION__); #endif lock_kernel(); if (file->f_mode & FMODE_WRITE) diff --git a/sound/pci/atiixp_modem.c b/sound/pci/atiixp_modem.c index 8a59598167f..c1a239a4dac 100644 --- a/sound/pci/atiixp_modem.c +++ b/sound/pci/atiixp_modem.c @@ -405,7 +405,7 @@ static int snd_atiixp_acquire_codec(atiixp_t *chip) while (atiixp_read(chip, PHYS_OUT_ADDR) & ATI_REG_PHYS_OUT_ADDR_EN) { if (! timeout--) { - snd_printk(KERN_WARNING "atiixp: codec acquire timeout\n"); + snd_printk(KERN_WARNING "atiixp-modem: codec acquire timeout\n"); return -EBUSY; } udelay(1); @@ -436,7 +436,7 @@ static unsigned short snd_atiixp_codec_read(atiixp_t *chip, unsigned short codec } while (--timeout); /* time out may happen during reset */ if (reg < 0x7c) - snd_printk(KERN_WARNING "atiixp: codec read timeout (reg %x)\n", reg); + snd_printk(KERN_WARNING "atiixp-modem: codec read timeout (reg %x)\n", reg); return 0xffff; } @@ -498,7 +498,7 @@ static int snd_atiixp_aclink_reset(atiixp_t *chip) do_delay(); atiixp_update(chip, CMD, ATI_REG_CMD_AC_RESET, ATI_REG_CMD_AC_RESET); if (--timeout) { - snd_printk(KERN_ERR "atiixp: codec reset timeout\n"); + snd_printk(KERN_ERR "atiixp-modem: codec reset timeout\n"); break; } } @@ -552,7 +552,7 @@ static int snd_atiixp_codec_detect(atiixp_t *chip) atiixp_write(chip, IER, 0); /* disable irqs */ if ((chip->codec_not_ready_bits & ALL_CODEC_NOT_READY) == ALL_CODEC_NOT_READY) { - snd_printk(KERN_ERR "atiixp: no codec detected!\n"); + snd_printk(KERN_ERR "atiixp-modem: no codec detected!\n"); return -ENXIO; } return 0; @@ -635,7 +635,7 @@ static void snd_atiixp_xrun_dma(atiixp_t *chip, atiixp_dma_t *dma) { if (! dma->substream || ! dma->running) return; - snd_printdd("atiixp: XRUN detected (DMA %d)\n", dma->ops->type); + snd_printdd("atiixp-modem: XRUN detected (DMA %d)\n", dma->ops->type); snd_pcm_stop(dma->substream, SNDRV_PCM_STATE_XRUN); } @@ -1081,14 +1081,14 @@ static int __devinit snd_atiixp_mixer_new(atiixp_t *chip, int clock) ac97.scaps = AC97_SCAP_SKIP_AUDIO; if ((err = snd_ac97_mixer(pbus, &ac97, &chip->ac97[i])) < 0) { chip->ac97[i] = NULL; /* to be sure */ - snd_printdd("atiixp: codec %d not available for modem\n", i); + snd_printdd("atiixp-modem: codec %d not available for modem\n", i); continue; } codec_count++; } if (! codec_count) { - snd_printk(KERN_ERR "atiixp: no codec available\n"); + snd_printk(KERN_ERR "atiixp-modem: no codec available\n"); return -ENODEV; } @@ -1159,7 +1159,7 @@ static void __devinit snd_atiixp_proc_init(atiixp_t *chip) { snd_info_entry_t *entry; - if (! snd_card_proc_new(chip->card, "atiixp", &entry)) + if (! snd_card_proc_new(chip->card, "atiixp-modem", &entry)) snd_info_set_text_ops(entry, chip, 1024, snd_atiixp_proc_read); } diff --git a/sound/sparc/cs4231.c b/sound/sparc/cs4231.c index 2fb27c4e951..f4361c518e4 100644 --- a/sound/sparc/cs4231.c +++ b/sound/sparc/cs4231.c @@ -173,7 +173,7 @@ static cs4231_t *cs4231_list; #define CS4231_GLOBALIRQ 0x01 /* IRQ is active */ -/* definitions for codec irq status */ +/* definitions for codec irq status - CS4231_IRQ_STATUS */ #define CS4231_PLAYBACK_IRQ 0x10 #define CS4231_RECORD_IRQ 0x20 @@ -402,7 +402,7 @@ static void snd_cs4231_outm(cs4231_t *chip, unsigned char reg, udelay(100); #ifdef CONFIG_SND_DEBUG if (__cs4231_readb(chip, CS4231P(chip, REGSEL)) & CS4231_INIT) - snd_printk("outm: auto calibration time out - reg = 0x%x, value = 0x%x\n", reg, value); + snd_printdd("outm: auto calibration time out - reg = 0x%x, value = 0x%x\n", reg, value); #endif if (chip->calibrate_mute) { chip->image[reg] &= mask; @@ -425,6 +425,10 @@ static void snd_cs4231_dout(cs4231_t *chip, unsigned char reg, unsigned char val timeout > 0 && (__cs4231_readb(chip, CS4231P(chip, REGSEL)) & CS4231_INIT); timeout--) udelay(100); +#ifdef CONFIG_SND_DEBUG + if (__cs4231_readb(chip, CS4231P(chip, REGSEL)) & CS4231_INIT) + snd_printdd("out: auto calibration time out - reg = 0x%x, value = 0x%x\n", reg, value); +#endif __cs4231_writeb(chip, chip->mce_bit | reg, CS4231P(chip, REGSEL)); __cs4231_writeb(chip, value, CS4231P(chip, REG)); mb(); @@ -440,15 +444,12 @@ static void snd_cs4231_out(cs4231_t *chip, unsigned char reg, unsigned char valu udelay(100); #ifdef CONFIG_SND_DEBUG if (__cs4231_readb(chip, CS4231P(chip, REGSEL)) & CS4231_INIT) - snd_printk("out: auto calibration time out - reg = 0x%x, value = 0x%x\n", reg, value); + snd_printdd("out: auto calibration time out - reg = 0x%x, value = 0x%x\n", reg, value); #endif __cs4231_writeb(chip, chip->mce_bit | reg, CS4231P(chip, REGSEL)); __cs4231_writeb(chip, value, CS4231P(chip, REG)); chip->image[reg] = value; mb(); -#if 0 - printk("codec out - reg 0x%x = 0x%x\n", chip->mce_bit | reg, value); -#endif } static unsigned char snd_cs4231_in(cs4231_t *chip, unsigned char reg) @@ -462,61 +463,14 @@ static unsigned char snd_cs4231_in(cs4231_t *chip, unsigned char reg) udelay(100); #ifdef CONFIG_SND_DEBUG if (__cs4231_readb(chip, CS4231P(chip, REGSEL)) & CS4231_INIT) - snd_printk("in: auto calibration time out - reg = 0x%x\n", reg); + snd_printdd("in: auto calibration time out - reg = 0x%x\n", reg); #endif __cs4231_writeb(chip, chip->mce_bit | reg, CS4231P(chip, REGSEL)); mb(); ret = __cs4231_readb(chip, CS4231P(chip, REG)); -#if 0 - printk("codec in - reg 0x%x = 0x%x\n", chip->mce_bit | reg, ret); -#endif return ret; } -#if 0 - -static void snd_cs4231_debug(cs4231_t *chip) -{ - printk("CS4231 REGS: INDEX = 0x%02x ", - __cs4231_readb(chip, CS4231P(chip, REGSEL))); - printk(" STATUS = 0x%02x\n", - __cs4231_readb(chip, CS4231P(chip, STATUS))); - printk(" 0x00: left input = 0x%02x ", snd_cs4231_in(chip, 0x00)); - printk(" 0x10: alt 1 (CFIG 2) = 0x%02x\n", snd_cs4231_in(chip, 0x10)); - printk(" 0x01: right input = 0x%02x ", snd_cs4231_in(chip, 0x01)); - printk(" 0x11: alt 2 (CFIG 3) = 0x%02x\n", snd_cs4231_in(chip, 0x11)); - printk(" 0x02: GF1 left input = 0x%02x ", snd_cs4231_in(chip, 0x02)); - printk(" 0x12: left line in = 0x%02x\n", snd_cs4231_in(chip, 0x12)); - printk(" 0x03: GF1 right input = 0x%02x ", snd_cs4231_in(chip, 0x03)); - printk(" 0x13: right line in = 0x%02x\n", snd_cs4231_in(chip, 0x13)); - printk(" 0x04: CD left input = 0x%02x ", snd_cs4231_in(chip, 0x04)); - printk(" 0x14: timer low = 0x%02x\n", snd_cs4231_in(chip, 0x14)); - printk(" 0x05: CD right input = 0x%02x ", snd_cs4231_in(chip, 0x05)); - printk(" 0x15: timer high = 0x%02x\n", snd_cs4231_in(chip, 0x15)); - printk(" 0x06: left output = 0x%02x ", snd_cs4231_in(chip, 0x06)); - printk(" 0x16: left MIC (PnP) = 0x%02x\n", snd_cs4231_in(chip, 0x16)); - printk(" 0x07: right output = 0x%02x ", snd_cs4231_in(chip, 0x07)); - printk(" 0x17: right MIC (PnP) = 0x%02x\n", snd_cs4231_in(chip, 0x17)); - printk(" 0x08: playback format = 0x%02x ", snd_cs4231_in(chip, 0x08)); - printk(" 0x18: IRQ status = 0x%02x\n", snd_cs4231_in(chip, 0x18)); - printk(" 0x09: iface (CFIG 1) = 0x%02x ", snd_cs4231_in(chip, 0x09)); - printk(" 0x19: left line out = 0x%02x\n", snd_cs4231_in(chip, 0x19)); - printk(" 0x0a: pin control = 0x%02x ", snd_cs4231_in(chip, 0x0a)); - printk(" 0x1a: mono control = 0x%02x\n", snd_cs4231_in(chip, 0x1a)); - printk(" 0x0b: init & status = 0x%02x ", snd_cs4231_in(chip, 0x0b)); - printk(" 0x1b: right line out = 0x%02x\n", snd_cs4231_in(chip, 0x1b)); - printk(" 0x0c: revision & mode = 0x%02x ", snd_cs4231_in(chip, 0x0c)); - printk(" 0x1c: record format = 0x%02x\n", snd_cs4231_in(chip, 0x1c)); - printk(" 0x0d: loopback = 0x%02x ", snd_cs4231_in(chip, 0x0d)); - printk(" 0x1d: var freq (PnP) = 0x%02x\n", snd_cs4231_in(chip, 0x1d)); - printk(" 0x0e: ply upr count = 0x%02x ", snd_cs4231_in(chip, 0x0e)); - printk(" 0x1e: rec upr count = 0x%02x\n", snd_cs4231_in(chip, 0x1e)); - printk(" 0x0f: ply lwr count = 0x%02x ", snd_cs4231_in(chip, 0x0f)); - printk(" 0x1f: rec lwr count = 0x%02x\n", snd_cs4231_in(chip, 0x1f)); -} - -#endif - /* * CS4231 detection / MCE routines */ @@ -528,11 +482,12 @@ static void snd_cs4231_busy_wait(cs4231_t *chip) /* huh.. looks like this sequence is proper for CS4231A chip (GUS MAX) */ for (timeout = 5; timeout > 0; timeout--) __cs4231_readb(chip, CS4231P(chip, REGSEL)); + /* end of cleanup sequence */ - for (timeout = 250; + for (timeout = 500; timeout > 0 && (__cs4231_readb(chip, CS4231P(chip, REGSEL)) & CS4231_INIT); timeout--) - udelay(100); + udelay(1000); } static void snd_cs4231_mce_up(cs4231_t *chip) @@ -545,12 +500,12 @@ static void snd_cs4231_mce_up(cs4231_t *chip) udelay(100); #ifdef CONFIG_SND_DEBUG if (__cs4231_readb(chip, CS4231P(chip, REGSEL)) & CS4231_INIT) - snd_printk("mce_up - auto calibration time out (0)\n"); + snd_printdd("mce_up - auto calibration time out (0)\n"); #endif chip->mce_bit |= CS4231_MCE; timeout = __cs4231_readb(chip, CS4231P(chip, REGSEL)); if (timeout == 0x80) - snd_printk("mce_up [%p]: serious init problem - codec still busy\n", chip->port); + snd_printdd("mce_up [%p]: serious init problem - codec still busy\n", chip->port); if (!(timeout & CS4231_MCE)) __cs4231_writeb(chip, chip->mce_bit | (timeout & 0x1f), CS4231P(chip, REGSEL)); spin_unlock_irqrestore(&chip->lock, flags); @@ -563,18 +518,15 @@ static void snd_cs4231_mce_down(cs4231_t *chip) spin_lock_irqsave(&chip->lock, flags); snd_cs4231_busy_wait(chip); -#if 0 - printk("(1) timeout = %i\n", timeout); -#endif #ifdef CONFIG_SND_DEBUG if (__cs4231_readb(chip, CS4231P(chip, REGSEL)) & CS4231_INIT) - snd_printk("mce_down [%p] - auto calibration time out (0)\n", CS4231P(chip, REGSEL)); + snd_printdd("mce_down [%p] - auto calibration time out (0)\n", CS4231P(chip, REGSEL)); #endif chip->mce_bit &= ~CS4231_MCE; timeout = __cs4231_readb(chip, CS4231P(chip, REGSEL)); __cs4231_writeb(chip, chip->mce_bit | (timeout & 0x1f), CS4231P(chip, REGSEL)); if (timeout == 0x80) - snd_printk("mce_down [%p]: serious init problem - codec still busy\n", chip->port); + snd_printdd("mce_down [%p]: serious init problem - codec still busy\n", chip->port); if ((timeout & CS4231_MCE) == 0) { spin_unlock_irqrestore(&chip->lock, flags); return; @@ -590,9 +542,7 @@ static void snd_cs4231_mce_down(cs4231_t *chip) spin_unlock_irqrestore(&chip->lock, flags); return; } -#if 0 - printk("(2) timeout = %i, jiffies = %li\n", timeout, jiffies); -#endif + /* in 10ms increments, check condition, up to 250ms */ timeout = 25; while (snd_cs4231_in(chip, CS4231_TEST_INIT) & CS4231_CALIB_IN_PROGRESS) { @@ -604,9 +554,7 @@ static void snd_cs4231_mce_down(cs4231_t *chip) msleep(10); spin_lock_irqsave(&chip->lock, flags); } -#if 0 - printk("(3) jiffies = %li\n", jiffies); -#endif + /* in 10ms increments, check condition, up to 100ms */ timeout = 10; while (__cs4231_readb(chip, CS4231P(chip, REGSEL)) & CS4231_INIT) { @@ -619,54 +567,58 @@ static void snd_cs4231_mce_down(cs4231_t *chip) spin_lock_irqsave(&chip->lock, flags); } spin_unlock_irqrestore(&chip->lock, flags); -#if 0 - printk("(4) jiffies = %li\n", jiffies); - snd_printk("mce_down - exit = 0x%x\n", __cs4231_readb(chip, CS4231P(chip, REGSEL))); -#endif } -#if 0 /* Unused for now... */ -static unsigned int snd_cs4231_get_count(unsigned char format, unsigned int size) -{ - switch (format & 0xe0) { - case CS4231_LINEAR_16: - case CS4231_LINEAR_16_BIG: - size >>= 1; - break; - case CS4231_ADPCM_16: - return size >> 2; - } - if (format & CS4231_STEREO) - size >>= 1; - return size; -} -#endif - #ifdef EBUS_SUPPORT static void snd_cs4231_ebus_advance_dma(struct ebus_dma_info *p, snd_pcm_substream_t *substream, unsigned int *periods_sent) { snd_pcm_runtime_t *runtime = substream->runtime; while (1) { - unsigned int dma_size = snd_pcm_lib_period_bytes(substream); - unsigned int offset = dma_size * (*periods_sent); + unsigned int period_size = snd_pcm_lib_period_bytes(substream); + unsigned int offset = period_size * (*periods_sent); - if (dma_size >= (1 << 24)) + if (period_size >= (1 << 24)) BUG(); - if (ebus_dma_request(p, runtime->dma_addr + offset, dma_size)) + if (ebus_dma_request(p, runtime->dma_addr + offset, period_size)) return; -#if 0 - printk("ebus_advance: Sent period %u (size[%x] offset[%x])\n", - (*periods_sent), dma_size, offset); -#endif (*periods_sent) = ((*periods_sent) + 1) % runtime->periods; } } #endif -static void cs4231_dma_trigger(cs4231_t *chip, unsigned int what, int on) +#ifdef SBUS_SUPPORT +static void snd_cs4231_sbus_advance_dma(snd_pcm_substream_t *substream, unsigned int *periods_sent) +{ + cs4231_t *chip = snd_pcm_substream_chip(substream); + snd_pcm_runtime_t *runtime = substream->runtime; + + unsigned int period_size = snd_pcm_lib_period_bytes(substream); + unsigned int offset = period_size * (*periods_sent % runtime->periods); + + if (runtime->period_size > 0xffff + 1) + BUG(); + + switch (substream->stream) { + case SNDRV_PCM_STREAM_PLAYBACK: + sbus_writel(runtime->dma_addr + offset, chip->port + APCPNVA); + sbus_writel(period_size, chip->port + APCPNC); + break; + case SNDRV_PCM_STREAM_CAPTURE: + sbus_writel(runtime->dma_addr + offset, chip->port + APCCNVA); + sbus_writel(period_size, chip->port + APCCNC); + break; + } + + (*periods_sent) = (*periods_sent + 1) % runtime->periods; +} +#endif + +static void cs4231_dma_trigger(snd_pcm_substream_t *substream, unsigned int what, int on) { + cs4231_t *chip = snd_pcm_substream_chip(substream); + #ifdef EBUS_SUPPORT if (chip->flags & CS4231_FLAG_EBUS) { if (what & CS4231_PLAYBACK_ENABLE) { @@ -694,6 +646,60 @@ static void cs4231_dma_trigger(cs4231_t *chip, unsigned int what, int on) } else { #endif #ifdef SBUS_SUPPORT + u32 csr = sbus_readl(chip->port + APCCSR); + /* I don't know why, but on sbus the period counter must + * only start counting after the first period is sent. + * Therefore this dummy thing. + */ + unsigned int dummy = 0; + + switch (what) { + case CS4231_PLAYBACK_ENABLE: + if (on) { + csr &= ~APC_XINT_PLAY; + sbus_writel(csr, chip->port + APCCSR); + + csr &= ~APC_PPAUSE; + sbus_writel(csr, chip->port + APCCSR); + + snd_cs4231_sbus_advance_dma(substream, &dummy); + + csr |= APC_GENL_INT | APC_PLAY_INT | APC_XINT_ENA | + APC_XINT_PLAY | APC_XINT_EMPT | APC_XINT_GENL | + APC_XINT_PENA | APC_PDMA_READY; + sbus_writel(csr, chip->port + APCCSR); + } else { + csr |= APC_PPAUSE; + sbus_writel(csr, chip->port + APCCSR); + + csr &= ~APC_PDMA_READY; + sbus_writel(csr, chip->port + APCCSR); + } + break; + case CS4231_RECORD_ENABLE: + if (on) { + csr &= ~APC_XINT_CAPT; + sbus_writel(csr, chip->port + APCCSR); + + csr &= ~APC_CPAUSE; + sbus_writel(csr, chip->port + APCCSR); + + snd_cs4231_sbus_advance_dma(substream, &dummy); + + csr |= APC_GENL_INT | APC_CAPT_INT | APC_XINT_ENA | + APC_XINT_CAPT | APC_XINT_CEMP | APC_XINT_GENL | + APC_CDMA_READY; + + sbus_writel(csr, chip->port + APCCSR); + } else { + csr |= APC_CPAUSE; + sbus_writel(csr, chip->port + APCCSR); + + csr &= ~APC_CDMA_READY; + sbus_writel(csr, chip->port + APCCSR); + } + break; + } #endif #ifdef EBUS_SUPPORT } @@ -725,25 +731,12 @@ static int snd_cs4231_trigger(snd_pcm_substream_t *substream, int cmd) } } -#if 0 - printk("TRIGGER: what[%x] on(%d)\n", - what, (cmd == SNDRV_PCM_TRIGGER_START)); -#endif - spin_lock_irqsave(&chip->lock, flags); if (cmd == SNDRV_PCM_TRIGGER_START) { - cs4231_dma_trigger(chip, what, 1); + cs4231_dma_trigger(substream, what, 1); chip->image[CS4231_IFACE_CTRL] |= what; - if (what & CS4231_PLAYBACK_ENABLE) { - snd_cs4231_out(chip, CS4231_PLY_LWR_CNT, 0xff); - snd_cs4231_out(chip, CS4231_PLY_UPR_CNT, 0xff); - } - if (what & CS4231_RECORD_ENABLE) { - snd_cs4231_out(chip, CS4231_REC_LWR_CNT, 0xff); - snd_cs4231_out(chip, CS4231_REC_UPR_CNT, 0xff); - } } else { - cs4231_dma_trigger(chip, what, 0); + cs4231_dma_trigger(substream, what, 0); chip->image[CS4231_IFACE_CTRL] &= ~what; } snd_cs4231_out(chip, CS4231_IFACE_CTRL, @@ -755,9 +748,7 @@ static int snd_cs4231_trigger(snd_pcm_substream_t *substream, int cmd) result = -EINVAL; break; } -#if 0 - snd_cs4231_debug(chip); -#endif + return result; } @@ -790,9 +781,6 @@ static unsigned char snd_cs4231_get_format(cs4231_t *chip, int format, int chann } if (channels > 1) rformat |= CS4231_STEREO; -#if 0 - snd_printk("get_format: 0x%x (mode=0x%x)\n", format, mode); -#endif return rformat; } @@ -944,7 +932,7 @@ static void snd_cs4231_init(cs4231_t *chip) snd_cs4231_mce_down(chip); #ifdef SNDRV_DEBUG_MCE - snd_printk("init: (1)\n"); + snd_printdd("init: (1)\n"); #endif snd_cs4231_mce_up(chip); spin_lock_irqsave(&chip->lock, flags); @@ -957,7 +945,7 @@ static void snd_cs4231_init(cs4231_t *chip) snd_cs4231_mce_down(chip); #ifdef SNDRV_DEBUG_MCE - snd_printk("init: (2)\n"); + snd_printdd("init: (2)\n"); #endif snd_cs4231_mce_up(chip); @@ -967,7 +955,7 @@ static void snd_cs4231_init(cs4231_t *chip) snd_cs4231_mce_down(chip); #ifdef SNDRV_DEBUG_MCE - snd_printk("init: (3) - afei = 0x%x\n", chip->image[CS4231_ALT_FEATURE_1]); + snd_printdd("init: (3) - afei = 0x%x\n", chip->image[CS4231_ALT_FEATURE_1]); #endif spin_lock_irqsave(&chip->lock, flags); @@ -981,7 +969,7 @@ static void snd_cs4231_init(cs4231_t *chip) snd_cs4231_mce_down(chip); #ifdef SNDRV_DEBUG_MCE - snd_printk("init: (4)\n"); + snd_printdd("init: (4)\n"); #endif snd_cs4231_mce_up(chip); @@ -991,7 +979,7 @@ static void snd_cs4231_init(cs4231_t *chip) snd_cs4231_mce_down(chip); #ifdef SNDRV_DEBUG_MCE - snd_printk("init: (5)\n"); + snd_printdd("init: (5)\n"); #endif } @@ -1022,6 +1010,7 @@ static int snd_cs4231_open(cs4231_t *chip, unsigned int mode) CS4231_RECORD_IRQ | CS4231_TIMER_IRQ); snd_cs4231_out(chip, CS4231_IRQ_STATUS, 0); + spin_unlock_irqrestore(&chip->lock, flags); chip->mode = mode; @@ -1136,11 +1125,21 @@ static int snd_cs4231_playback_hw_free(snd_pcm_substream_t *substream) static int snd_cs4231_playback_prepare(snd_pcm_substream_t *substream) { cs4231_t *chip = snd_pcm_substream_chip(substream); + snd_pcm_runtime_t *runtime = substream->runtime; unsigned long flags; spin_lock_irqsave(&chip->lock, flags); + chip->image[CS4231_IFACE_CTRL] &= ~(CS4231_PLAYBACK_ENABLE | CS4231_PLAYBACK_PIO); + + if (runtime->period_size > 0xffff + 1) + BUG(); + + snd_cs4231_out(chip, CS4231_PLY_LWR_CNT, (runtime->period_size - 1) & 0x00ff); + snd_cs4231_out(chip, CS4231_PLY_UPR_CNT, (runtime->period_size - 1) >> 8 & 0x00ff); + chip->p_periods_sent = 0; + spin_unlock_irqrestore(&chip->lock, flags); return 0; @@ -1172,12 +1171,16 @@ static int snd_cs4231_capture_hw_free(snd_pcm_substream_t *substream) static int snd_cs4231_capture_prepare(snd_pcm_substream_t *substream) { cs4231_t *chip = snd_pcm_substream_chip(substream); + snd_pcm_runtime_t *runtime = substream->runtime; unsigned long flags; spin_lock_irqsave(&chip->lock, flags); chip->image[CS4231_IFACE_CTRL] &= ~(CS4231_RECORD_ENABLE | CS4231_RECORD_PIO); + snd_cs4231_out(chip, CS4231_REC_LWR_CNT, (runtime->period_size - 1) & 0x00ff); + snd_cs4231_out(chip, CS4231_REC_LWR_CNT, (runtime->period_size - 1) >> 8 & 0x00ff); + spin_unlock_irqrestore(&chip->lock, flags); return 0; @@ -1196,53 +1199,61 @@ static void snd_cs4231_overrange(cs4231_t *chip) chip->capture_substream->runtime->overrange++; } -static void snd_cs4231_generic_interrupt(cs4231_t *chip) +static irqreturn_t snd_cs4231_generic_interrupt(cs4231_t *chip) { unsigned long flags; unsigned char status; + /*This is IRQ is not raised by the cs4231*/ + if (!(__cs4231_readb(chip, CS4231P(chip, STATUS)) & CS4231_GLOBALIRQ)) + return IRQ_NONE; + status = snd_cs4231_in(chip, CS4231_IRQ_STATUS); - if (!status) - return; if (status & CS4231_TIMER_IRQ) { if (chip->timer) snd_timer_interrupt(chip->timer, chip->timer->sticks); } - if (status & CS4231_PLAYBACK_IRQ) - snd_pcm_period_elapsed(chip->playback_substream); - if (status & CS4231_RECORD_IRQ) { + + if (status & CS4231_RECORD_IRQ) snd_cs4231_overrange(chip); - snd_pcm_period_elapsed(chip->capture_substream); - } /* ACK the CS4231 interrupt. */ spin_lock_irqsave(&chip->lock, flags); snd_cs4231_outm(chip, CS4231_IRQ_STATUS, ~CS4231_ALL_IRQS | ~status, 0); spin_unlock_irqrestore(&chip->lock, flags); + + return 0; } #ifdef SBUS_SUPPORT static irqreturn_t snd_cs4231_sbus_interrupt(int irq, void *dev_id, struct pt_regs *regs) { cs4231_t *chip = dev_id; - u32 csr; - - csr = sbus_readl(chip->port + APCCSR); - if (!(csr & (APC_INT_PENDING | - APC_PLAY_INT | - APC_CAPT_INT | - APC_GENL_INT | - APC_XINT_PEMP | - APC_XINT_CEMP))) - return IRQ_NONE; /* ACK the APC interrupt. */ + u32 csr = sbus_readl(chip->port + APCCSR); + sbus_writel(csr, chip->port + APCCSR); - snd_cs4231_generic_interrupt(chip); + if ((chip->image[CS4231_IFACE_CTRL] & CS4231_PLAYBACK_ENABLE) && + (csr & APC_PLAY_INT) && + (csr & APC_XINT_PNVA) && + !(csr & APC_XINT_EMPT)) { + snd_cs4231_sbus_advance_dma(chip->playback_substream, + &chip->p_periods_sent); + snd_pcm_period_elapsed(chip->playback_substream); + } - return IRQ_HANDLED; + if ((chip->image[CS4231_IFACE_CTRL] & CS4231_RECORD_ENABLE) && + (csr & APC_CAPT_INT) && + (csr & APC_XINT_CNVA)) { + snd_cs4231_sbus_advance_dma(chip->capture_substream, + &chip->c_periods_sent); + snd_pcm_period_elapsed(chip->capture_substream); + } + + return snd_cs4231_generic_interrupt(chip); } #endif @@ -1290,7 +1301,8 @@ static snd_pcm_uframes_t snd_cs4231_playback_pointer(snd_pcm_substream_t *substr #ifdef EBUS_SUPPORT } #endif - ptr += (period_bytes - residue); + ptr += period_bytes - residue; + return bytes_to_frames(substream->runtime, ptr); } @@ -1314,7 +1326,7 @@ static snd_pcm_uframes_t snd_cs4231_capture_pointer(snd_pcm_substream_t * substr #ifdef EBUS_SUPPORT } #endif - ptr += (period_bytes - residue); + ptr += period_bytes - residue; return bytes_to_frames(substream->runtime, ptr); } @@ -1328,9 +1340,6 @@ static int snd_cs4231_probe(cs4231_t *chip) int i, id, vers; unsigned char *ptr; -#if 0 - snd_cs4231_debug(chip); -#endif id = vers = 0; for (i = 0; i < 50; i++) { mb(); @@ -1985,13 +1994,13 @@ static int __init snd_cs4231_sbus_create(snd_card_t *card, chip->port = sbus_ioremap(&sdev->resource[0], 0, chip->regs_size, "cs4231"); if (!chip->port) { - snd_printk("cs4231-%d: Unable to map chip registers.\n", dev); + snd_printdd("cs4231-%d: Unable to map chip registers.\n", dev); return -EIO; } if (request_irq(sdev->irqs[0], snd_cs4231_sbus_interrupt, SA_SHIRQ, "cs4231", chip)) { - snd_printk("cs4231-%d: Unable to grab SBUS IRQ %s\n", + snd_printdd("cs4231-%d: Unable to grab SBUS IRQ %s\n", dev, __irq_itoa(sdev->irqs[0])); snd_cs4231_sbus_free(chip); @@ -2113,29 +2122,29 @@ static int __init snd_cs4231_ebus_create(snd_card_t *card, chip->eb2c.regs = ioremap(edev->resource[2].start, 0x10); if (!chip->port || !chip->eb2p.regs || !chip->eb2c.regs) { snd_cs4231_ebus_free(chip); - snd_printk("cs4231-%d: Unable to map chip registers.\n", dev); + snd_printdd("cs4231-%d: Unable to map chip registers.\n", dev); return -EIO; } if (ebus_dma_register(&chip->eb2c)) { snd_cs4231_ebus_free(chip); - snd_printk("cs4231-%d: Unable to register EBUS capture DMA\n", dev); + snd_printdd("cs4231-%d: Unable to register EBUS capture DMA\n", dev); return -EBUSY; } if (ebus_dma_irq_enable(&chip->eb2c, 1)) { snd_cs4231_ebus_free(chip); - snd_printk("cs4231-%d: Unable to enable EBUS capture IRQ\n", dev); + snd_printdd("cs4231-%d: Unable to enable EBUS capture IRQ\n", dev); return -EBUSY; } if (ebus_dma_register(&chip->eb2p)) { snd_cs4231_ebus_free(chip); - snd_printk("cs4231-%d: Unable to register EBUS play DMA\n", dev); + snd_printdd("cs4231-%d: Unable to register EBUS play DMA\n", dev); return -EBUSY; } if (ebus_dma_irq_enable(&chip->eb2p, 1)) { snd_cs4231_ebus_free(chip); - snd_printk("cs4231-%d: Unable to enable EBUS play IRQ\n", dev); + snd_printdd("cs4231-%d: Unable to enable EBUS play IRQ\n", dev); return -EBUSY; } |