aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/e100.txt158
-rw-r--r--Documentation/networking/e1000.txt620
-rw-r--r--MAINTAINERS16
-rw-r--r--arch/alpha/mm/init.c2
-rw-r--r--arch/arm/mm/consistent.c4
-rw-r--r--arch/arm/mm/init.c2
-rw-r--r--arch/arm26/mm/init.c2
-rw-r--r--arch/cris/mm/init.c2
-rw-r--r--arch/frv/kernel/frv_ksyms.c1
-rw-r--r--arch/frv/mm/dma-alloc.c4
-rw-r--r--arch/frv/mm/init.c6
-rw-r--r--arch/h8300/kernel/h8300_ksyms.c1
-rw-r--r--arch/h8300/mm/init.c4
-rw-r--r--arch/i386/kernel/efi.c2
-rw-r--r--arch/i386/kernel/smp.c28
-rw-r--r--arch/i386/kernel/sys_i386.c25
-rw-r--r--arch/i386/kernel/timers/timer_hpet.c2
-rw-r--r--arch/i386/kernel/timers/timer_tsc.c2
-rw-r--r--arch/i386/mm/hugetlbpage.c12
-rw-r--r--arch/i386/mm/init.c6
-rw-r--r--arch/i386/mm/pageattr.c20
-rw-r--r--arch/ia64/Kconfig19
-rw-r--r--arch/ia64/configs/tiger_defconfig2
-rw-r--r--arch/ia64/kernel/acpi.c14
-rw-r--r--arch/ia64/kernel/entry.S14
-rw-r--r--arch/ia64/kernel/iosapic.c6
-rw-r--r--arch/ia64/kernel/irq.c13
-rw-r--r--arch/ia64/kernel/mca.c90
-rw-r--r--arch/ia64/kernel/perfmon.c5
-rw-r--r--arch/ia64/kernel/signal.c101
-rw-r--r--arch/ia64/kernel/smpboot.c114
-rw-r--r--arch/ia64/kernel/time.c9
-rw-r--r--arch/ia64/kernel/topology.c2
-rw-r--r--arch/ia64/mm/contig.c4
-rw-r--r--arch/ia64/mm/discontig.c9
-rw-r--r--arch/ia64/mm/hugetlbpage.c5
-rw-r--r--arch/ia64/mm/init.c6
-rw-r--r--arch/ia64/sn/kernel/Makefile3
-rw-r--r--arch/ia64/sn/kernel/pio_phys.S71
-rw-r--r--arch/ia64/sn/kernel/setup.c6
-rw-r--r--arch/ia64/sn/kernel/sn2/sn2_smp.c21
-rw-r--r--arch/ia64/sn/kernel/xpc_channel.c102
-rw-r--r--arch/ia64/sn/kernel/xpc_main.c1
-rw-r--r--arch/ia64/sn/kernel/xpc_partition.c28
-rw-r--r--arch/ia64/sn/pci/tioce_provider.c326
-rw-r--r--arch/m32r/mm/init.c4
-rw-r--r--arch/m68k/mm/init.c2
-rw-r--r--arch/m68k/mm/memory.c2
-rw-r--r--arch/m68k/mm/motorola.c2
-rw-r--r--arch/m68knommu/kernel/m68k_ksyms.c1
-rw-r--r--arch/m68knommu/mm/init.c4
-rw-r--r--arch/mips/arc/memory.c2
-rw-r--r--arch/mips/dec/prom/memory.c2
-rw-r--r--arch/mips/mips-boards/generic/memory.c2
-rw-r--r--arch/mips/mips-boards/sim/sim_mem.c2
-rw-r--r--arch/mips/mm/init.c11
-rw-r--r--arch/mips/sgi-ip27/ip27-memory.c2
-rw-r--r--arch/parisc/mm/init.c4
-rw-r--r--arch/powerpc/mm/hugetlbpage.c15
-rw-r--r--arch/powerpc/mm/init_32.c4
-rw-r--r--arch/powerpc/mm/init_64.c4
-rw-r--r--arch/powerpc/mm/mem.c6
-rw-r--r--arch/powerpc/platforms/cell/setup.c2
-rw-r--r--arch/ppc/kernel/dma-mapping.c4
-rw-r--r--arch/ppc/mm/init.c6
-rw-r--r--arch/s390/mm/init.c4
-rw-r--r--arch/sh/mm/consistent.c3
-rw-r--r--arch/sh/mm/hugetlbpage.c12
-rw-r--r--arch/sh/mm/init.c4
-rw-r--r--arch/sh64/mm/hugetlbpage.c12
-rw-r--r--arch/sh64/mm/init.c4
-rw-r--r--arch/sparc/kernel/sun4d_smp.c6
-rw-r--r--arch/sparc/kernel/sun4m_smp.c6
-rw-r--r--arch/sparc/mm/init.c6
-rw-r--r--arch/sparc64/mm/hugetlbpage.c12
-rw-r--r--arch/sparc64/mm/init.c4
-rw-r--r--arch/um/kernel/mem.c4
-rw-r--r--arch/um/kernel/physmem.c2
-rw-r--r--arch/x86_64/kernel/time.c2
-rw-r--r--arch/x86_64/kernel/x8664_ksyms.c1
-rw-r--r--arch/x86_64/mm/init.c6
-rw-r--r--arch/x86_64/mm/pageattr.c63
-rw-r--r--arch/xtensa/mm/init.c2
-rw-r--r--arch/xtensa/mm/pgtable.c24
-rw-r--r--drivers/char/snsc.h5
-rw-r--r--drivers/char/snsc_event.c32
-rw-r--r--drivers/char/tb0219.c24
-rw-r--r--drivers/char/vr41xx_giu.c19
-rw-r--r--drivers/char/vr41xx_rtc.c30
-rw-r--r--drivers/char/watchdog/mv64x60_wdt.c20
-rw-r--r--drivers/firmware/dcdbas.c110
-rw-r--r--drivers/md/dm.c43
-rw-r--r--drivers/media/dvb/bt8xx/Makefile2
-rw-r--r--drivers/net/mv643xx_eth.h18
-rw-r--r--drivers/net/pcnet32.c4143
-rw-r--r--drivers/net/skfp/fplustm.c12
-rw-r--r--drivers/net/skge.c275
-rw-r--r--drivers/net/skge.h1
-rw-r--r--drivers/net/sky2.c583
-rw-r--r--drivers/net/sky2.h22
-rw-r--r--drivers/net/smc91x.c53
-rw-r--r--drivers/net/smc91x.h474
-rw-r--r--drivers/scsi/iscsi_tcp.c2
-rw-r--r--drivers/scsi/sg.c37
-rw-r--r--drivers/serial/Kconfig2
-rw-r--r--drivers/serial/serial_txx9.c77
-rw-r--r--drivers/serial/vr41xx_siu.c24
-rw-r--r--drivers/sn/ioc4.c41
-rw-r--r--drivers/video/acornfb.c2
-rw-r--r--drivers/video/i810/i810_main.c2
-rw-r--r--fs/9p/vfs_inode.c3
-rw-r--r--fs/buffer.c62
-rw-r--r--fs/hugetlbfs/inode.c92
-rw-r--r--fs/ocfs2/super.c2
-rw-r--r--fs/ramfs/file-nommu.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c1
-rw-r--r--include/asm-i386/acpi.h10
-rw-r--r--include/asm-i386/pgtable.h5
-rw-r--r--include/asm-ia64/intel_intrin.h134
-rw-r--r--include/asm-ia64/machvec.h13
-rw-r--r--include/asm-ia64/machvec_sn2.h4
-rw-r--r--include/asm-ia64/mca.h2
-rw-r--r--include/asm-ia64/mutex.h93
-rw-r--r--include/asm-ia64/page.h2
-rw-r--r--include/asm-ia64/pgtable.h5
-rw-r--r--include/asm-ia64/processor.h3
-rw-r--r--include/asm-ia64/signal.h2
-rw-r--r--include/asm-ia64/sn/addrs.h8
-rw-r--r--include/asm-ia64/sn/rw_mmr.h56
-rw-r--r--include/asm-ia64/sn/tioce.h36
-rw-r--r--include/asm-ia64/sn/xpc.h22
-rw-r--r--include/asm-ia64/system.h7
-rw-r--r--include/asm-ia64/thread_info.h12
-rw-r--r--include/asm-powerpc/pgtable.h5
-rw-r--r--include/asm-s390/pgalloc.h7
-rw-r--r--include/asm-sh64/pgalloc.h16
-rw-r--r--include/asm-x86_64/pgtable.h4
-rw-r--r--include/linux/hugetlb.h45
-rw-r--r--include/linux/migrate.h36
-rw-r--r--include/linux/mm.h48
-rw-r--r--include/linux/mm_inline.h2
-rw-r--r--include/linux/page-flags.h24
-rw-r--r--include/linux/rtc.h4
-rw-r--r--include/linux/slab.h3
-rw-r--r--include/linux/smp.h23
-rw-r--r--include/linux/swap.h38
-rw-r--r--kernel/fork.c6
-rw-r--r--kernel/sched.c6
-rw-r--r--kernel/softirq.c20
-rw-r--r--lib/string.c1
-rw-r--r--mm/Kconfig6
-rw-r--r--mm/Makefile2
-rw-r--r--mm/filemap.c2
-rw-r--r--mm/hugetlb.c286
-rw-r--r--mm/internal.h34
-rw-r--r--mm/memory.c21
-rw-r--r--mm/mempolicy.c117
-rw-r--r--mm/mempool.c4
-rw-r--r--mm/migrate.c655
-rw-r--r--mm/mmap.c10
-rw-r--r--mm/mprotect.c12
-rw-r--r--mm/nommu.c4
-rw-r--r--mm/page_alloc.c113
-rw-r--r--mm/readahead.c32
-rw-r--r--mm/rmap.c14
-rw-r--r--mm/shmem.c7
-rw-r--r--mm/slab.c890
-rw-r--r--mm/swap.c64
-rw-r--r--mm/swap_state.c1
-rw-r--r--mm/swapfile.c2
-rw-r--r--mm/vmscan.c882
-rw-r--r--security/keys/process_keys.c7
-rw-r--r--security/selinux/hooks.c14
-rw-r--r--security/selinux/selinuxfs.c112
-rw-r--r--security/selinux/ss/services.c9
175 files changed, 6839 insertions, 5527 deletions
diff --git a/Documentation/networking/e100.txt b/Documentation/networking/e100.txt
index 4ef9f7cd5dc..944aa55e79f 100644
--- a/Documentation/networking/e100.txt
+++ b/Documentation/networking/e100.txt
@@ -1,16 +1,17 @@
Linux* Base Driver for the Intel(R) PRO/100 Family of Adapters
==============================================================
-November 17, 2004
-
+November 15, 2005
Contents
========
- In This Release
- Identifying Your Adapter
+- Building and Installation
- Driver Configuration Parameters
- Additional Configurations
+- Known Issues
- Support
@@ -18,18 +19,30 @@ In This Release
===============
This file describes the Linux* Base Driver for the Intel(R) PRO/100 Family of
-Adapters, version 3.3.x. This driver supports 2.4.x and 2.6.x kernels.
+Adapters. This driver includes support for Itanium(R)2-based systems.
+
+For questions related to hardware requirements, refer to the documentation
+supplied with your Intel PRO/100 adapter.
+
+The following features are now available in supported kernels:
+ - Native VLANs
+ - Channel Bonding (teaming)
+ - SNMP
+
+Channel Bonding documentation can be found in the Linux kernel source:
+/Documentation/networking/bonding.txt
+
Identifying Your Adapter
========================
-For more information on how to identify your adapter, go to the Adapter &
+For more information on how to identify your adapter, go to the Adapter &
Driver ID Guide at:
http://support.intel.com/support/network/adapter/pro100/21397.htm
-For the latest Intel network drivers for Linux, refer to the following
-website. In the search field, enter your adapter name or type, or use the
+For the latest Intel network drivers for Linux, refer to the following
+website. In the search field, enter your adapter name or type, or use the
networking link on the left to search for your adapter:
http://downloadfinder.intel.com/scripts-df/support_intel.asp
@@ -40,73 +53,75 @@ Driver Configuration Parameters
The default value for each parameter is generally the recommended setting,
unless otherwise noted.
-Rx Descriptors: Number of receive descriptors. A receive descriptor is a data
- structure that describes a receive buffer and its attributes to the network
- controller. The data in the descriptor is used by the controller to write
- data from the controller to host memory. In the 3.0.x driver the valid
- range for this parameter is 64-256. The default value is 64. This parameter
- can be changed using the command
-
+Rx Descriptors: Number of receive descriptors. A receive descriptor is a data
+ structure that describes a receive buffer and its attributes to the network
+ controller. The data in the descriptor is used by the controller to write
+ data from the controller to host memory. In the 3.x.x driver the valid range
+ for this parameter is 64-256. The default value is 64. This parameter can be
+ changed using the command:
+
ethtool -G eth? rx n, where n is the number of desired rx descriptors.
-Tx Descriptors: Number of transmit descriptors. A transmit descriptor is a
- data structure that describes a transmit buffer and its attributes to the
- network controller. The data in the descriptor is used by the controller to
- read data from the host memory to the controller. In the 3.0.x driver the
- valid range for this parameter is 64-256. The default value is 64. This
- parameter can be changed using the command
+Tx Descriptors: Number of transmit descriptors. A transmit descriptor is a data
+ structure that describes a transmit buffer and its attributes to the network
+ controller. The data in the descriptor is used by the controller to read
+ data from the host memory to the controller. In the 3.x.x driver the valid
+ range for this parameter is 64-256. The default value is 64. This parameter
+ can be changed using the command:
ethtool -G eth? tx n, where n is the number of desired tx descriptors.
-Speed/Duplex: The driver auto-negotiates the link speed and duplex settings by
- default. Ethtool can be used as follows to force speed/duplex.
+Speed/Duplex: The driver auto-negotiates the link speed and duplex settings by
+ default. Ethtool can be used as follows to force speed/duplex.
ethtool -s eth? autoneg off speed {10|100} duplex {full|half}
NOTE: setting the speed/duplex to incorrect values will cause the link to
fail.
-Event Log Message Level: The driver uses the message level flag to log events
- to syslog. The message level can be set at driver load time. It can also be
- set using the command
+Event Log Message Level: The driver uses the message level flag to log events
+ to syslog. The message level can be set at driver load time. It can also be
+ set using the command:
ethtool -s eth? msglvl n
+
Additional Configurations
=========================
Configuring the Driver on Different Distributions
-------------------------------------------------
- Configuring a network driver to load properly when the system is started is
- distribution dependent. Typically, the configuration process involves adding
- an alias line to /etc/modules.conf as well as editing other system startup
- scripts and/or configuration files. Many popular Linux distributions ship
- with tools to make these changes for you. To learn the proper way to
- configure a network device for your system, refer to your distribution
- documentation. If during this process you are asked for the driver or module
- name, the name for the Linux Base Driver for the Intel PRO/100 Family of
- Adapters is e100.
+ Configuring a network driver to load properly when the system is started is
+ distribution dependent. Typically, the configuration process involves adding
+ an alias line to /etc/modules.conf or /etc/modprobe.conf as well as editing
+ other system startup scripts and/or configuration files. Many popular Linux
+ distributions ship with tools to make these changes for you. To learn the
+ proper way to configure a network device for your system, refer to your
+ distribution documentation. If during this process you are asked for the
+ driver or module name, the name for the Linux Base Driver for the Intel
+ PRO/100 Family of Adapters is e100.
- As an example, if you install the e100 driver for two PRO/100 adapters
- (eth0 and eth1), add the following to modules.conf:
+ As an example, if you install the e100 driver for two PRO/100 adapters
+ (eth0 and eth1), add the following to modules.conf or modprobe.conf:
alias eth0 e100
alias eth1 e100
Viewing Link Messages
---------------------
- In order to see link messages and other Intel driver information on your
- console, you must set the dmesg level up to six. This can be done by
- entering the following on the command line before loading the e100 driver:
+ In order to see link messages and other Intel driver information on your
+ console, you must set the dmesg level up to six. This can be done by
+ entering the following on the command line before loading the e100 driver:
dmesg -n 8
- If you wish to see all messages issued by the driver, including debug
+ If you wish to see all messages issued by the driver, including debug
messages, set the dmesg level to eight.
NOTE: This setting is not saved across reboots.
+
Ethtool
-------
@@ -114,29 +129,27 @@ Additional Configurations
diagnostics, as well as displaying statistical information. Ethtool
version 1.6 or later is required for this functionality.
- The latest release of ethtool can be found at:
- http://sf.net/projects/gkernel.
+ The latest release of ethtool can be found from
+ http://sourceforge.net/projects/gkernel.
- NOTE: This driver uses mii support from the kernel. As a result, when
- there is no link, ethtool will report speed/duplex to be 10/half.
+ NOTE: Ethtool 1.6 only supports a limited set of ethtool options. Support
+ for a more complete ethtool feature set can be enabled by upgrading
+ ethtool to ethtool-1.8.1.
- NOTE: Ethtool 1.6 only supports a limited set of ethtool options. Support
- for a more complete ethtool feature set can be enabled by upgrading
- ethtool to ethtool-1.8.1.
Enabling Wake on LAN* (WoL)
---------------------------
- WoL is provided through the Ethtool* utility. Ethtool is included with Red
- Hat* 8.0. For other Linux distributions, download and install Ethtool from
- the following website: http://sourceforge.net/projects/gkernel.
+ WoL is provided through the Ethtool* utility. Ethtool is included with Red
+ Hat* 8.0. For other Linux distributions, download and install Ethtool from
+ the following website: http://sourceforge.net/projects/gkernel.
- For instructions on enabling WoL with Ethtool, refer to the Ethtool man
- page.
+ For instructions on enabling WoL with Ethtool, refer to the Ethtool man page.
WoL will be enabled on the system during the next shut down or reboot. For
- this driver version, in order to enable WoL, the e100 driver must be
+ this driver version, in order to enable WoL, the e100 driver must be
loaded when shutting down or rebooting the system.
+
NAPI
----
@@ -144,6 +157,25 @@ Additional Configurations
See www.cyberus.ca/~hadi/usenix-paper.tgz for more information on NAPI.
+ Multiple Interfaces on Same Ethernet Broadcast Network
+ ------------------------------------------------------
+
+ Due to the default ARP behavior on Linux, it is not possible to have
+ one system on two IP networks in the same Ethernet broadcast domain
+ (non-partitioned switch) behave as expected. All Ethernet interfaces
+ will respond to IP traffic for any IP address assigned to the system.
+ This results in unbalanced receive traffic.
+
+ If you have multiple interfaces in a server, either turn on ARP
+ filtering by
+
+ (1) entering: echo 1 > /proc/sys/net/ipv4/conf/all/arp_filter
+ (this only works if your kernel's version is higher than 2.4.5), or
+
+ (2) installing the interfaces in separate broadcast domains (either
+ in different switches or in a switch partitioned to VLANs).
+
+
Support
=======
@@ -151,20 +183,24 @@ For general information, go to the Intel support website at:
http://support.intel.com
+ or the Intel Wired Networking project hosted by Sourceforge at:
+
+ http://sourceforge.net/projects/e1000
+
If an issue is identified with the released source code on the supported
-kernel with a supported adapter, email the specific information related to
-the issue to linux.nics@intel.com.
+kernel with a supported adapter, email the specific information related to the
+issue to e1000-devel@lists.sourceforge.net.
License
=======
-This software program is released under the terms of a license agreement
-between you ('Licensee') and Intel. Do not use or load this software or any
-associated materials (collectively, the 'Software') until you have carefully
-read the full terms and conditions of the LICENSE located in this software
-package. By loading or using the Software, you agree to the terms of this
-Agreement. If you do not agree with the terms of this Agreement, do not
-install or use the Software.
+This software program is released under the terms of a license agreement
+between you ('Licensee') and Intel. Do not use or load this software or any
+associated materials (collectively, the 'Software') until you have carefully
+read the full terms and conditions of the file COPYING located in this software
+package. By loading or using the Software, you agree to the terms of this
+Agreement. If you do not agree with the terms of this Agreement, do not install
+or use the Software.
* Other names and brands may be claimed as the property of others.
diff --git a/Documentation/networking/e1000.txt b/Documentation/networking/e1000.txt
index 2ebd4058d46..71fe15af356 100644
--- a/Documentation/networking/e1000.txt
+++ b/Documentation/networking/e1000.txt
@@ -1,7 +1,7 @@
Linux* Base Driver for the Intel(R) PRO/1000 Family of Adapters
===============================================================
-November 17, 2004
+November 15, 2005
Contents
@@ -20,254 +20,316 @@ In This Release
===============
This file describes the Linux* Base Driver for the Intel(R) PRO/1000 Family
-of Adapters, version 5.x.x.
+of Adapters. This driver includes support for Itanium(R)2-based systems.
-For questions related to hardware requirements, refer to the documentation
-supplied with your Intel PRO/1000 adapter. All hardware requirements listed
+For questions related to hardware requirements, refer to the documentation
+supplied with your Intel PRO/1000 adapter. All hardware requirements listed
apply to use with Linux.
-Native VLANs are now available with supported kernels.
+The following features are now available in supported kernels:
+ - Native VLANs
+ - Channel Bonding (teaming)
+ - SNMP
+
+Channel Bonding documentation can be found in the Linux kernel source:
+/Documentation/networking/bonding.txt
+
+The driver information previously displayed in the /proc filesystem is not
+supported in this release. Alternatively, you can use ethtool (version 1.6
+or later), lspci, and ifconfig to obtain the same information.
+
+Instructions on updating ethtool can be found in the section "Additional
+Configurations" later in this document.
+
Identifying Your Adapter
========================
-For more information on how to identify your adapter, go to the Adapter &
+For more information on how to identify your adapter, go to the Adapter &
Driver ID Guide at:
http://support.intel.com/support/network/adapter/pro100/21397.htm
-For the latest Intel network drivers for Linux, refer to the following
-website. In the search field, enter your adapter name or type, or use the
+For the latest Intel network drivers for Linux, refer to the following
+website. In the search field, enter your adapter name or type, or use the
networking link on the left to search for your adapter:
http://downloadfinder.intel.com/scripts-df/support_intel.asp
-Command Line Parameters
-=======================
-If the driver is built as a module, the following optional parameters are
-used by entering them on the command line with the modprobe or insmod command
-using this syntax:
+Command Line Parameters =======================
+
+If the driver is built as a module, the following optional parameters
+are used by entering them on the command line with the modprobe or insmod
+command using this syntax:
modprobe e1000 [<option>=<VAL1>,<VAL2>,...]
- insmod e1000 [<option>=<VAL1>,<VAL2>,...]
+ insmod e1000 [<option>=<VAL1>,<VAL2>,...]
For example, with two PRO/1000 PCI adapters, entering:
insmod e1000 TxDescriptors=80,128
-loads the e1000 driver with 80 TX descriptors for the first adapter and 128 TX
-descriptors for the second adapter.
+loads the e1000 driver with 80 TX descriptors for the first adapter and 128
+TX descriptors for the second adapter.
The default value for each parameter is generally the recommended setting,
-unless otherwise noted. Also, if the driver is statically built into the
-kernel, the driver is loaded with the default values for all the parameters.
-Ethtool can be used to change some of the parameters at runtime.
+unless otherwise noted.
+
+NOTES: For more information about the AutoNeg, Duplex, and Speed
+ parameters, see the "Speed and Duplex Configuration" section in
+ this document.
- NOTES: For more information about the AutoNeg, Duplex, and Speed
- parameters, see the "Speed and Duplex Configuration" section in
- this document.
+ For more information about the InterruptThrottleRate,
+ RxIntDelay, TxIntDelay, RxAbsIntDelay, and TxAbsIntDelay
+ parameters, see the application note at:
+ http://www.intel.com/design/network/applnots/ap450.htm
- For more information about the InterruptThrottleRate, RxIntDelay,
- TxIntDelay, RxAbsIntDelay, and TxAbsIntDelay parameters, see the
- application note at:
- http://www.intel.com/design/network/applnots/ap450.htm
+ A descriptor describes a data buffer and attributes related to
+ the data buffer. This information is accessed by the hardware.
- A descriptor describes a data buffer and attributes related to the
- data buffer. This information is accessed by the hardware.
-AutoNeg (adapters using copper connections only)
-Valid Range: 0x01-0x0F, 0x20-0x2F
+AutoNeg
+-------
+(Supported only on adapters with copper connections)
+Valid Range: 0x01-0x0F, 0x20-0x2F
Default Value: 0x2F
- This parameter is a bit mask that specifies which speed and duplex
- settings the board advertises. When this parameter is used, the Speed and
- Duplex parameters must not be specified.
- NOTE: Refer to the Speed and Duplex section of this readme for more
- information on the AutoNeg parameter.
-
-Duplex (adapters using copper connections only)
-Valid Range: 0-2 (0=auto-negotiate, 1=half, 2=full)
+
+This parameter is a bit mask that specifies which speed and duplex
+settings the board advertises. When this parameter is used, the Speed
+and Duplex parameters must not be specified.
+
+NOTE: Refer to the Speed and Duplex section of this readme for more
+ information on the AutoNeg parameter.
+
+
+Duplex
+------
+(Supported only on adapters with copper connections)
+Valid Range: 0-2 (0=auto-negotiate, 1=half, 2=full)
Default Value: 0
- Defines the direction in which data is allowed to flow. Can be either one
- or two-directional. If both Duplex and the link partner are set to auto-
- negotiate, the board auto-detects the correct duplex. If the link partner
- is forced (either full or half), Duplex defaults to half-duplex.
+
+Defines the direction in which data is allowed to flow. Can be either
+one or two-directional. If both Duplex and the link partner are set to
+auto-negotiate, the board auto-detects the correct duplex. If the link
+partner is forced (either full or half), Duplex defaults to half-duplex.
+
FlowControl
-Valid Range: 0-3 (0=none, 1=Rx only, 2=Tx only, 3=Rx&Tx)
-Default: Read flow control settings from the EEPROM
- This parameter controls the automatic generation(Tx) and response(Rx) to
- Ethernet PAUSE frames.
+----------
+Valid Range: 0-3 (0=none, 1=Rx only, 2=Tx only, 3=Rx&Tx)
+Default Value: Reads flow control settings from the EEPROM
+
+This parameter controls the automatic generation(Tx) and response(Rx)
+to Ethernet PAUSE frames.
+
InterruptThrottleRate
-Valid Range: 100-100000 (0=off, 1=dynamic)
+---------------------
+(not supported on Intel 82542, 82543 or 82544-based adapters)
+Valid Range: 100-100000 (0=off, 1=dynamic)
Default Value: 8000
- This value represents the maximum number of interrupts per second the
- controller generates. InterruptThrottleRate is another setting used in
- interrupt moderation. Dynamic mode uses a heuristic algorithm to adjust
- InterruptThrottleRate based on the current traffic load.
-Un-supported Adapters: InterruptThrottleRate is NOT supported by 82542, 82543
- or 82544-based adapters.
-
- NOTE: InterruptThrottleRate takes precedence over the TxAbsIntDelay and
- RxAbsIntDelay parameters. In other words, minimizing the receive
- and/or transmit absolute delays does not force the controller to
- generate more interrupts than what the Interrupt Throttle Rate
- allows.
- CAUTION: If you are using the Intel PRO/1000 CT Network Connection
- (controller 82547), setting InterruptThrottleRate to a value
- greater than 75,000, may hang (stop transmitting) adapters under
- certain network conditions. If this occurs a NETDEV WATCHDOG
- message is logged in the system event log. In addition, the
- controller is automatically reset, restoring the network
- connection. To eliminate the potential for the hang, ensure
- that InterruptThrottleRate is set no greater than 75,000 and is
- not set to 0.
- NOTE: When e1000 is loaded with default settings and multiple adapters are
- in use simultaneously, the CPU utilization may increase non-linearly.
- In order to limit the CPU utilization without impacting the overall
- throughput, we recommend that you load the driver as follows:
-
- insmod e1000.o InterruptThrottleRate=3000,3000,3000
-
- This sets the InterruptThrottleRate to 3000 interrupts/sec for the
- first, second, and third instances of the driver. The range of 2000 to
- 3000 interrupts per second works on a majority of systems and is a
- good starting point, but the optimal value will be platform-specific.
- If CPU utilization is not a concern, use RX_POLLING (NAPI) and default
- driver settings.
+
+This value represents the maximum number of interrupts per second the
+controller generates. InterruptThrottleRate is another setting used in
+interrupt moderation. Dynamic mode uses a heuristic algorithm to adjust
+InterruptThrottleRate based on the current traffic load.
+
+NOTE: InterruptThrottleRate takes precedence over the TxAbsIntDelay and
+ RxAbsIntDelay parameters. In other words, minimizing the receive
+ and/or transmit absolute delays does not force the controller to
+ generate more interrupts than what the Interrupt Throttle Rate
+ allows.
+
+CAUTION: If you are using the Intel PRO/1000 CT Network Connection
+ (controller 82547), setting InterruptThrottleRate to a value
+ greater than 75,000, may hang (stop transmitting) adapters
+ under certain network conditions. If this occurs a NETDEV
+ WATCHDOG message is logged in the system event log. In
+ addition, the controller is automatically reset, restoring
+ the network connection. To eliminate the potential for the
+ hang, ensure that InterruptThrottleRate is set no greater
+ than 75,000 and is not set to 0.
+
+NOTE: When e1000 is loaded with default settings and multiple adapters
+ are in use simultaneously, the CPU utilization may increase non-
+ linearly. In order to limit the CPU utilization without impacting
+ the overall throughput, we recommend that you load the driver as
+ follows:
+
+ insmod e1000.o InterruptThrottleRate=3000,3000,3000
+
+ This sets the InterruptThrottleRate to 3000 interrupts/sec for
+ the first, second, and third instances of the driver. The range
+ of 2000 to 3000 interrupts per second works on a majority of
+ systems and is a good starting point, but the optimal value will
+ be platform-specific. If CPU utilization is not a concern, use
+ RX_POLLING (NAPI) and default driver settings.
+
RxDescriptors
-Valid Range: 80-256 for 82542 and 82543-based adapters
- 80-4096 for all other supported adapters
+-------------
+Valid Range: 80-256 for 82542 and 82543-based adapters
+ 80-4096 for all other supported adapters
Default Value: 256
- This value is the number of receive descriptors allocated by the driver.
- Increasing this value allows the driver to buffer more incoming packets.
- Each descriptor is 16 bytes. A receive buffer is allocated for each
- descriptor and can either be 2048 or 4096 bytes long, depending on the MTU
- setting. An incoming packet can span one or more receive descriptors.
- The maximum MTU size is 16110.
+This value specifies the number of receive descriptors allocated by the
+driver. Increasing this value allows the driver to buffer more incoming
+packets. Each descriptor is 16 bytes. A receive buffer is also
+allocated for each descriptor and is 2048.
- NOTE: MTU designates the frame size. It only needs to be set for Jumbo
- Frames.
- NOTE: Depending on the available system resources, the request for a
- higher number of receive descriptors may be denied. In this case,
- use a lower number.
RxIntDelay
-Valid Range: 0-65535 (0=off)
+----------
+Valid Range: 0-65535 (0=off)
Default Value: 0
- This value delays the generation of receive interrupts in units of 1.024
- microseconds. Receive interrupt reduction can improve CPU efficiency if
- properly tuned for specific network traffic. Increasing this value adds
- extra latency to frame reception and can end up decreasing the throughput
- of TCP traffic. If the system is reporting dropped receives, this value
- may be set too high, causing the driver to run out of available receive
- descriptors.
-
- CAUTION: When setting RxIntDelay to a value other than 0, adapters may
- hang (stop transmitting) under certain network conditions. If
- this occurs a NETDEV WATCHDOG message is logged in the system
- event log. In addition, the controller is automatically reset,
- restoring the network connection. To eliminate the potential for
- the hang ensure that RxIntDelay is set to 0.
-
-RxAbsIntDelay (82540, 82545 and later adapters only)
-Valid Range: 0-65535 (0=off)
+
+This value delays the generation of receive interrupts in units of 1.024
+microseconds. Receive interrupt reduction can improve CPU efficiency if
+properly tuned for specific network traffic. Increasing this value adds
+extra latency to frame reception and can end up decreasing the throughput
+of TCP traffic. If the system is reporting dropped receives, this value
+may be set too high, causing the driver to run out of available receive
+descriptors.
+
+CAUTION: When setting RxIntDelay to a value other than 0, adapters may
+ hang (stop transmitting) under certain network conditions. If
+ this occurs a NETDEV WATCHDOG message is logged in the system
+ event log. In addition, the controller is automatically reset,
+ restoring the network connection. To eliminate the potential
+ for the hang ensure that RxIntDelay is set to 0.
+
+
+RxAbsIntDelay
+-------------
+(This parameter is supported only on 82540, 82545 and later adapters.)
+Valid Range: 0-65535 (0=off)
Default Value: 128
- This value, in units of 1.024 microseconds, limits the delay in which a
- receive interrupt is generated. Useful only if RxIntDelay is non-zero,
- this value ensures that an interrupt is generated after the initial
- packet is received within the set amount of time. Proper tuning,
- along with RxIntDelay, may improve traffic throughput in specific network
- conditions.
-
-Speed (adapters using copper connections only)
+
+This value, in units of 1.024 microseconds, limits the delay in which a
+receive interrupt is generated. Useful only if RxIntDelay is non-zero,
+this value ensures that an interrupt is generated after the initial
+packet is received within the set amount of time. Proper tuning,
+along with RxIntDelay, may improve traffic throughput in specific network
+conditions.
+
+
+Speed
+-----
+(This parameter is supported only on adapters with copper connections.)
Valid Settings: 0, 10, 100, 1000
-Default Value: 0 (auto-negotiate at all supported speeds)
- Speed forces the line speed to the specified value in megabits per second
- (Mbps). If this parameter is not specified or is set to 0 and the link
- partner is set to auto-negotiate, the board will auto-detect the correct
- speed. Duplex should also be set when Speed is set to either 10 or 100.
+Default Value: 0 (auto-negotiate at all supported speeds)
+
+Speed forces the line speed to the specified value in megabits per second
+(Mbps). If this parameter is not specified or is set to 0 and the link
+partner is set to auto-negotiate, the board will auto-detect the correct
+speed. Duplex should also be set when Speed is set to either 10 or 100.
+
TxDescriptors
-Valid Range: 80-256 for 82542 and 82543-based adapters
- 80-4096 for all other supported adapters
+-------------
+Valid Range: 80-256 for 82542 and 82543-based adapters
+ 80-4096 for all other supported adapters
Default Value: 256
- This value is the number of transmit descriptors allocated by the driver.
- Increasing this value allows the driver to queue more transmits. Each
- descriptor is 16 bytes.
- NOTE: Depending on the available system resources, the request for a
- higher number of transmit descriptors may be denied. In this case,
- use a lower number.
+This value is the number of transmit descriptors allocated by the driver.
+Increasing this value allows the driver to queue more transmits. Each
+descriptor is 16 bytes.
+
+NOTE: Depending on the available system resources, the request for a
+ higher number of transmit descriptors may be denied. In this case,
+ use a lower number.
+
TxIntDelay
-Valid Range: 0-65535 (0=off)
+----------
+Valid Range: 0-65535 (0=off)
Default Value: 64
- This value delays the generation of transmit interrupts in units of
- 1.024 microseconds. Transmit interrupt reduction can improve CPU
- efficiency if properly tuned for specific network traffic. If the
- system is reporting dropped transmits, this value may be set too high
- causing the driver to run out of available transmit descriptors.
-
-TxAbsIntDelay (82540, 82545 and later adapters only)
-Valid Range: 0-65535 (0=off)
+
+This value delays the generation of transmit interrupts in units of
+1.024 microseconds. Transmit interrupt reduction can improve CPU
+efficiency if properly tuned for specific network traffic. If the
+system is reporting dropped transmits, this value may be set too high
+causing the driver to run out of available transmit descriptors.
+
+
+TxAbsIntDelay
+-------------
+(This parameter is supported only on 82540, 82545 and later adapters.)
+Valid Range: 0-65535 (0=off)
Default Value: 64
- This value, in units of 1.024 microseconds, limits the delay in which a
- transmit interrupt is generated. Useful only if TxIntDelay is non-zero,
- this value ensures that an interrupt is generated after the initial
- packet is sent on the wire within the set amount of time. Proper tuning,
- along with TxIntDelay, may improve traffic throughput in specific
- network conditions.
-
-XsumRX (not available on the 82542-based adapter)
-Valid Range: 0-1
+
+This value, in units of 1.024 microseconds, limits the delay in which a
+transmit interrupt is generated. Useful only if TxIntDelay is non-zero,
+this value ensures that an interrupt is generated after the initial
+packet is sent on the wire within the set amount of time. Proper tuning,
+along with TxIntDelay, may improve traffic throughput in specific
+network conditions.
+
+XsumRX
+------
+(This parameter is NOT supported on the 82542-based adapter.)
+Valid Range: 0-1
Default Value: 1
- A value of '1' indicates that the driver should enable IP checksum
- offload for received packets (both UDP and TCP) to the adapter hardware.
+
+A value of '1' indicates that the driver should enable IP checksum
+offload for received packets (both UDP and TCP) to the adapter hardware.
+
Speed and Duplex Configuration
==============================
-Three keywords are used to control the speed and duplex configuration. These
-keywords are Speed, Duplex, and AutoNeg.
+Three keywords are used to control the speed and duplex configuration.
+These keywords are Speed, Duplex, and AutoNeg.
-If the board uses a fiber interface, these keywords are ignored, and the
+If the board uses a fiber interface, these keywords are ignored, and the
fiber interface board only links at 1000 Mbps full-duplex.
For copper-based boards, the keywords interact as follows:
- The default operation is auto-negotiate. The board advertises all supported
- speed and duplex combinations, and it links at the highest common speed and
- duplex mode IF the link partner is set to auto-negotiate.
+ The default operation is auto-negotiate. The board advertises all
+ supported speed and duplex combinations, and it links at the highest
+ common speed and duplex mode IF the link partner is set to auto-negotiate.
- If Speed = 1000, limited auto-negotiation is enabled and only 1000 Mbps is
- advertised (The 1000BaseT spec requires auto-negotiation.)
+ If Speed = 1000, limited auto-negotiation is enabled and only 1000 Mbps
+ is advertised (The 1000BaseT spec requires auto-negotiation.)
If Speed = 10 or 100, then both Speed and Duplex should be set. Auto-
- negotiation is disabled, and the AutoNeg parameter is ignored. Partner SHOULD
- also be forced.
+ negotiation is disabled, and the AutoNeg parameter is ignored. Partner
+ SHOULD also be forced.
+
+The AutoNeg parameter is used when more control is required over the
+auto-negotiation process. It should be used when you wish to control which
+speed and duplex combinations are advertised during the auto-negotiation
+process.
+
+The parameter may be specified as either a decimal or hexidecimal value as
+determined by the bitmap below.
-The AutoNeg parameter is used when more control is required over the auto-
-negotiation process. When this parameter is used, Speed and Duplex parameters
-must not be specified. The following table describes supported values for the
-AutoNeg parameter:
+Bit position 7 6 5 4 3 2 1 0
+Decimal Value 128 64 32 16 8 4 2 1
+Hex value 80 40 20 10 8 4 2 1
+Speed (Mbps) N/A N/A 1000 N/A 100 100 10 10
+Duplex Full Full Half Full Half
-Speed (Mbps) 1000 100 100 10 10
-Duplex Full Full Half Full Half
-Value (in base 16) 0x20 0x08 0x04 0x02 0x01
+Some examples of using AutoNeg:
-Example: insmod e1000 AutoNeg=0x03, loads e1000 and specifies (10 full duplex,
-10 half duplex) for negotiation with the peer.
+ modprobe e1000 AutoNeg=0x01 (Restricts autonegotiation to 10 Half)
+ modprobe e1000 AutoNeg=1 (Same as above)
+ modprobe e1000 AutoNeg=0x02 (Restricts autonegotiation to 10 Full)
+ modprobe e1000 AutoNeg=0x03 (Restricts autonegotiation to 10 Half or 10 Full)
+ modprobe e1000 AutoNeg=0x04 (Restricts autonegotiation to 100 Half)
+ modprobe e1000 AutoNeg=0x05 (Restricts autonegotiation to 10 Half or 100
+ Half)
+ modprobe e1000 AutoNeg=0x020 (Restricts autonegotiation to 1000 Full)
+ modprobe e1000 AutoNeg=32 (Same as above)
-Note that setting AutoNeg does not guarantee that the board will link at the
-highest specified speed or duplex mode, but the board will link at the
-highest possible speed/duplex of the link partner IF the link partner is also
-set to auto-negotiate. If the link partner is forced speed/duplex, the
-adapter MUST be forced to the same speed/duplex.
+Note that when this parameter is used, Speed and Duplex must not be specified.
+
+If the link partner is forced to a specific speed and duplex, then this
+parameter should not be used. Instead, use the Speed and Duplex parameters
+previously mentioned to force the adapter to the same speed and duplex.
Additional Configurations
@@ -276,19 +338,19 @@ Additional Configurations
Configuring the Driver on Different Distributions
-------------------------------------------------
- Configuring a network driver to load properly when the system is started is
- distribution dependent. Typically, the configuration process involves adding
- an alias line to /etc/modules.conf as well as editing other system startup
- scripts and/or configuration files. Many popular Linux distributions ship
- with tools to make these changes for you. To learn the proper way to
- configure a network device for your system, refer to your distribution
- documentation. If during this process you are asked for the driver or module
- name, the name for the Linux Base Driver for the Intel PRO/1000 Family of
- Adapters is e1000.
+ Configuring a network driver to load properly when the system is started
+ is distribution dependent. Typically, the configuration process involves
+ adding an alias line to /etc/modules.conf or /etc/modprobe.conf as well
+ as editing other system startup scripts and/or configuration files. Many
+ popular Linux distributions ship with tools to make these changes for you.
+ To learn the proper way to configure a network device for your system,
+ refer to your distribution documentation. If during this process you are
+ asked for the driver or module name, the name for the Linux Base Driver
+ for the Intel PRO/1000 Family of Adapters is e1000.
- As an example, if you install the e1000 driver for two PRO/1000 adapters
- (eth0 and eth1) and set the speed and duplex to 10full and 100half, add the
- following to modules.conf:
+ As an example, if you install the e1000 driver for two PRO/1000 adapters
+ (eth0 and eth1) and set the speed and duplex to 10full and 100half, add
+ the following to modules.conf or or modprobe.conf:
alias eth0 e1000
alias eth1 e1000
@@ -297,9 +359,9 @@ Additional Configurations
Viewing Link Messages
---------------------
- Link messages will not be displayed to the console if the distribution is
- restricting system messages. In order to see network driver link messages on
- your console, set dmesg to eight by entering the following:
+ Link messages will not be displayed to the console if the distribution is
+ restricting system messages. In order to see network driver link messages
+ on your console, set dmesg to eight by entering the following:
dmesg -n 8
@@ -308,22 +370,42 @@ Additional Configurations
Jumbo Frames
------------
- The driver supports Jumbo Frames for all adapters except 82542-based
- adapters. Jumbo Frames support is enabled by changing the MTU to a value
- larger than the default of 1500. Use the ifconfig command to increase the
- MTU size. For example:
+ The driver supports Jumbo Frames for all adapters except 82542 and
+ 82573-based adapters. Jumbo Frames support is enabled by changing the
+ MTU to a value larger than the default of 1500. Use the ifconfig command
+ to increase the MTU size. For example:
+
+ ifconfig eth<x> mtu 9000 up
+
+ This setting is not saved across reboots. It can be made permanent if
+ you add:
+
+ MTU=9000
- ifconfig ethx mtu 9000 up
+ to the file /etc/sysconfig/network-scripts/ifcfg-eth<x>. This example
+ applies to the Red Hat distributions; other distributions may store this
+ setting in a different location.
- The maximum MTU setting for Jumbo Frames is 16110. This value coincides
- with the maximum Jumbo Frames size of 16128.
+ Notes:
- NOTE: Jumbo Frames are supported at 1000 Mbps only. Using Jumbo Frames at
- 10 or 100 Mbps may result in poor performance or loss of link.
+ - To enable Jumbo Frames, increase the MTU size on the interface beyond
+ 1500.
+ - The maximum MTU setting for Jumbo Frames is 16110. This value coincides
+ with the maximum Jumbo Frames size of 16128.
+ - Using Jumbo Frames at 10 or 100 Mbps may result in poor performance or
+ loss of link.
+ - Some Intel gigabit adapters that support Jumbo Frames have a frame size
+ limit of 9238 bytes, with a corresponding MTU size limit of 9216 bytes.
+ The adapters with this limitation are based on the Intel 82571EB and
+ 82572EI controllers, which correspond to these product names:
+ Intel® PRO/1000 PT Dual Port Server Adapter
+ Intel® PRO/1000 PF Dual Port Server Adapter
+ Intel® PRO/1000 PT Server Adapter
+ Intel® PRO/1000 PT Desktop Adapter
+ Intel® PRO/1000 PF Server Adapter
+ - The Intel PRO/1000 PM Network Connection does not support jumbo frames.
- NOTE: MTU designates the frame size. To enable Jumbo Frames, increase the
- MTU size on the interface beyond 1500.
Ethtool
-------
@@ -333,32 +415,41 @@ Additional Configurations
version 1.6 or later is required for this functionality.
The latest release of ethtool can be found from
- http://sf.net/projects/gkernel.
+ http://sourceforge.net/projects/gkernel.
- NOTE: Ethtool 1.6 only supports a limited set of ethtool options. Support
- for a more complete ethtool feature set can be enabled by upgrading
- ethtool to ethtool-1.8.1.
+ NOTE: Ethtool 1.6 only supports a limited set of ethtool options. Support
+ for a more complete ethtool feature set can be enabled by upgrading
+ ethtool to ethtool-1.8.1.
Enabling Wake on LAN* (WoL)
---------------------------
WoL is configured through the Ethtool* utility. Ethtool is included with
- all versions of Red Hat after Red Hat 7.2. For other Linux distributions,
- download and install Ethtool from the following website:
+ all versions of Red Hat after Red Hat 7.2. For other Linux distributions,
+ download and install Ethtool from the following website:
http://sourceforge.net/projects/gkernel.
- For instructions on enabling WoL with Ethtool, refer to the website listed
+ For instructions on enabling WoL with Ethtool, refer to the website listed
above.
- WoL will be enabled on the system during the next shut down or reboot.
- For this driver version, in order to enable WoL, the e1000 driver must be
+ WoL will be enabled on the system during the next shut down or reboot.
+ For this driver version, in order to enable WoL, the e1000 driver must be
loaded when shutting down or rebooting the system.
NAPI
----
NAPI (Rx polling mode) is supported in the e1000 driver. NAPI is enabled
- or disabled based on the configuration of the kernel.
+ or disabled based on the configuration of the kernel. To override
+ the default, use the following compile-time flags.
+
+ To enable NAPI, compile the driver module, passing in a configuration option:
+
+ make CFLAGS_EXTRA=-DE1000_NAPI install
+
+ To disable NAPI, compile the driver module, passing in a configuration option:
+
+ make CFLAGS_EXTRA=-DE1000_NO_NAPI install
See www.cyberus.ca/~hadi/usenix-paper.tgz for more information on NAPI.
@@ -369,10 +460,85 @@ Known Issues
Jumbo Frames System Requirement
-------------------------------
- Memory allocation failures have been observed on Linux systems with 64 MB
- of RAM or less that are running Jumbo Frames. If you are using Jumbo Frames,
- your system may require more than the advertised minimum requirement of 64 MB
- of system memory.
+ Memory allocation failures have been observed on Linux systems with 64 MB
+ of RAM or less that are running Jumbo Frames. If you are using Jumbo
+ Frames, your system may require more than the advertised minimum
+ requirement of 64 MB of system memory.
+
+ Performance Degradation with Jumbo Frames
+ -----------------------------------------
+
+ Degradation in throughput performance may be observed in some Jumbo frames
+ environments. If this is observed, increasing the application's socket
+ buffer size and/or increasing the /proc/sys/net/ipv4/tcp_*mem entry values
+ may help. See the specific application manual and
+ /usr/src/linux*/Documentation/
+ networking/ip-sysctl.txt for more details.
+
+ Jumbo frames on Foundry BigIron 8000 switch
+ -------------------------------------------
+ There is a known issue using Jumbo frames when connected to a Foundry
+ BigIron 8000 switch. This is a 3rd party limitation. If you experience
+ loss of packets, lower the MTU size.
+
+ Multiple Interfaces on Same Ethernet Broadcast Network
+ ------------------------------------------------------
+
+ Due to the default ARP behavior on Linux, it is not possible to have
+ one system on two IP networks in the same Ethernet broadcast domain
+ (non-partitioned switch) behave as expected. All Ethernet interfaces
+ will respond to IP traffic for any IP address assigned to the system.
+ This results in unbalanced receive traffic.
+
+ If you have multiple interfaces in a server, either turn on ARP
+ filtering by entering:
+
+ echo 1 > /proc/sys/net/ipv4/conf/all/arp_filter
+ (this only works if your kernel's version is higher than 2.4.5),
+
+ NOTE: This setting is not saved across reboots. The configuration
+ change can be made permanent by adding the line:
+ net.ipv4.conf.all.arp_filter = 1
+ to the file /etc/sysctl.conf
+
+ or,
+
+ install the interfaces in separate broadcast domains (either in
+ different switches or in a switch partitioned to VLANs).
+
+ 82541/82547 can't link or are slow to link with some link partners
+ -----------------------------------------------------------------
+
+ There is a known compatibility issue with 82541/82547 and some
+ low-end switches where the link will not be established, or will
+ be slow to establish. In particular, these switches are known to
+ be incompatible with 82541/82547:
+
+ Planex FXG-08TE
+ I-O Data ETG-SH8
+
+ To workaround this issue, the driver can be compiled with an override
+ of the PHY's master/slave setting. Forcing master or forcing slave
+ mode will improve time-to-link.
+
+ # make EXTRA_CFLAGS=-DE1000_MASTER_SLAVE=<n>
+
+ Where <n> is:
+
+ 0 = Hardware default
+ 1 = Master mode
+ 2 = Slave mode
+ 3 = Auto master/slave
+
+ Disable rx flow control with ethtool
+ ------------------------------------
+
+ In order to disable receive flow control using ethtool, you must turn
+ off auto-negotiation on the same command line.
+
+ For example:
+
+ ethtool -A eth? autoneg off rx off
Support
@@ -382,20 +548,24 @@ For general information, go to the Intel support website at:
http://support.intel.com
+ or the Intel Wired Networking project hosted by Sourceforge at:
+
+ http://sourceforge.net/projects/e1000
+
If an issue is identified with the released source code on the supported
-kernel with a supported adapter, email the specific information related to
-the issue to linux.nics@intel.com.
+kernel with a supported adapter, email the specific information related
+to the issue to e1000-devel@lists.sourceforge.net
License
=======
-This software program is released under the terms of a license agreement
-between you ('Licensee') and Intel. Do not use or load this software or any
-associated materials (collectively, the 'Software') until you have carefully
-read the full terms and conditions of the LICENSE located in this software
-package. By loading or using the Software, you agree to the terms of this
-Agreement. If you do not agree with the terms of this Agreement, do not
+This software program is released under the terms of a license agreement
+between you ('Licensee') and Intel. Do not use or load this software or any
+associated materials (collectively, the 'Software') until you have carefully
+read the full terms and conditions of the file COPYING located in this software
+package. By loading or using the Software, you agree to the terms of this
+Agreement. If you do not agree with the terms of this Agreement, do not
install or use the Software.
* Other names and brands may be claimed as the property of others.
diff --git a/MAINTAINERS b/MAINTAINERS
index b0dc75a5e74..dd1351dc32b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1349,10 +1349,10 @@ S: Maintained
INTEL PRO/100 ETHERNET SUPPORT
P: John Ronciak
M: john.ronciak@intel.com
-P: Ganesh Venkatesan
-M: ganesh.venkatesan@intel.com
P: Jesse Brandeburg
M: jesse.brandeburg@intel.com
+P: Jeff Kirsher
+M: jeffrey.t.kirsher@intel.com
W: http://sourceforge.net/projects/e1000/
S: Supported
@@ -1361,18 +1361,22 @@ P: Jeb Cramer
M: cramerj@intel.com
P: John Ronciak
M: john.ronciak@intel.com
-P: Ganesh Venkatesan
-M: ganesh.venkatesan@intel.com
+P: Jesse Brandeburg
+M: jesse.brandeburg@intel.com
+P: Jeff Kirsher
+M: jeffrey.t.kirsher@intel.com
W: http://sourceforge.net/projects/e1000/
S: Supported
INTEL PRO/10GbE SUPPORT
+P: Jeff Kirsher
+M: jeffrey.t.kirsher@intel.com
P: Ayyappan Veeraiyan
M: ayyappan.veeraiyan@intel.com
-P: Ganesh Venkatesan
-M: ganesh.venkatesan@intel.com
P: John Ronciak
M: john.ronciak@intel.com
+P: Jesse Brandeburg
+M: jesse.brandeburg@intel.com
W: http://sourceforge.net/projects/e1000/
S: Supported
diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c
index 486d7945583..544ac5dc09e 100644
--- a/arch/alpha/mm/init.c
+++ b/arch/alpha/mm/init.c
@@ -357,7 +357,7 @@ free_reserved_mem(void *start, void *end)
void *__start = start;
for (; __start < end; __start += PAGE_SIZE) {
ClearPageReserved(virt_to_page(__start));
- set_page_count(virt_to_page(__start), 1);
+ init_page_count(virt_to_page(__start));
free_page((long)__start);
totalram_pages++;
}
diff --git a/arch/arm/mm/consistent.c b/arch/arm/mm/consistent.c
index c2ee18d2075..8a1bfcd5008 100644
--- a/arch/arm/mm/consistent.c
+++ b/arch/arm/mm/consistent.c
@@ -223,6 +223,8 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
pte = consistent_pte[idx] + off;
c->vm_pages = page;
+ split_page(page, order);
+
/*
* Set the "dma handle"
*/
@@ -231,7 +233,6 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
do {
BUG_ON(!pte_none(*pte));
- set_page_count(page, 1);
/*
* x86 does not mark the pages reserved...
*/
@@ -250,7 +251,6 @@ __dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp,
* Free the otherwise unused pages.
*/
while (page < end) {
- set_page_count(page, 1);
__free_page(page);
page++;
}
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 8b276ee38ac..b0321e943b7 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -531,7 +531,7 @@ static inline void free_area(unsigned long addr, unsigned long end, char *s)
for (; addr < end; addr += PAGE_SIZE) {
struct page *page = virt_to_page(addr);
ClearPageReserved(page);
- set_page_count(page, 1);
+ init_page_count(page);
free_page(addr);
totalram_pages++;
}
diff --git a/arch/arm26/mm/init.c b/arch/arm26/mm/init.c
index 1f09a9d0fb8..e3ecaa45374 100644
--- a/arch/arm26/mm/init.c
+++ b/arch/arm26/mm/init.c
@@ -324,7 +324,7 @@ static inline void free_area(unsigned long addr, unsigned long end, char *s)
for (; addr < end; addr += PAGE_SIZE) {
struct page *page = virt_to_page(addr);
ClearPageReserved(page);
- set_page_count(page, 1);
+ init_page_count(page);
free_page(addr);
totalram_pages++;
}
diff --git a/arch/cris/mm/init.c b/arch/cris/mm/init.c
index 31a0018b525..b7842ff213a 100644
--- a/arch/cris/mm/init.c
+++ b/arch/cris/mm/init.c
@@ -216,7 +216,7 @@ free_initmem(void)
addr = (unsigned long)(&__init_begin);
for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
- set_page_count(virt_to_page(addr), 1);
+ init_page_count(virt_to_page(addr));
free_page(addr);
totalram_pages++;
}
diff --git a/arch/frv/kernel/frv_ksyms.c b/arch/frv/kernel/frv_ksyms.c
index 0f1c6cbc4f5..aa6b7d0a210 100644
--- a/arch/frv/kernel/frv_ksyms.c
+++ b/arch/frv/kernel/frv_ksyms.c
@@ -27,6 +27,7 @@ EXPORT_SYMBOL(__ioremap);
EXPORT_SYMBOL(iounmap);
EXPORT_SYMBOL(strnlen);
+EXPORT_SYMBOL(strpbrk);
EXPORT_SYMBOL(strrchr);
EXPORT_SYMBOL(strstr);
EXPORT_SYMBOL(strchr);
diff --git a/arch/frv/mm/dma-alloc.c b/arch/frv/mm/dma-alloc.c
index 342823aad75..636b2f8b5d9 100644
--- a/arch/frv/mm/dma-alloc.c
+++ b/arch/frv/mm/dma-alloc.c
@@ -115,9 +115,7 @@ void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *dma_handle)
*/
if (order > 0) {
struct page *rpage = virt_to_page(page);
-
- for (i = 1; i < (1 << order); i++)
- set_page_count(rpage + i, 1);
+ split_page(rpage, order);
}
err = 0;
diff --git a/arch/frv/mm/init.c b/arch/frv/mm/init.c
index 765088ea8a5..8899aa1a4f0 100644
--- a/arch/frv/mm/init.c
+++ b/arch/frv/mm/init.c
@@ -169,7 +169,7 @@ void __init mem_init(void)
struct page *page = &mem_map[pfn];
ClearPageReserved(page);
- set_page_count(page, 1);
+ init_page_count(page);
__free_page(page);
totalram_pages++;
}
@@ -210,7 +210,7 @@ void __init free_initmem(void)
/* next to check that the page we free is not a partial page */
for (addr = start; addr < end; addr += PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
- set_page_count(virt_to_page(addr), 1);
+ init_page_count(virt_to_page(addr));
free_page(addr);
totalram_pages++;
}
@@ -230,7 +230,7 @@ void __init free_initrd_mem(unsigned long start, unsigned long end)
int pages = 0;
for (; start < end; start += PAGE_SIZE) {
ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
+ init_page_count(virt_to_page(start));
free_page(start);
totalram_pages++;
pages++;
diff --git a/arch/h8300/kernel/h8300_ksyms.c b/arch/h8300/kernel/h8300_ksyms.c
index 5cc76efaf7a..69d6ad32d56 100644
--- a/arch/h8300/kernel/h8300_ksyms.c
+++ b/arch/h8300/kernel/h8300_ksyms.c
@@ -25,6 +25,7 @@ extern char h8300_debug_device[];
/* platform dependent support */
EXPORT_SYMBOL(strnlen);
+EXPORT_SYMBOL(strpbrk);
EXPORT_SYMBOL(strrchr);
EXPORT_SYMBOL(strstr);
EXPORT_SYMBOL(strchr);
diff --git a/arch/h8300/mm/init.c b/arch/h8300/mm/init.c
index 1e0929ddc8c..09efc4b1f03 100644
--- a/arch/h8300/mm/init.c
+++ b/arch/h8300/mm/init.c
@@ -196,7 +196,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
int pages = 0;
for (; start < end; start += PAGE_SIZE) {
ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
+ init_page_count(virt_to_page(start));
free_page(start);
totalram_pages++;
pages++;
@@ -219,7 +219,7 @@ free_initmem()
/* next to check that the page we free is not a partial page */
for (; addr + PAGE_SIZE < (unsigned long)(&__init_end); addr +=PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
- set_page_count(virt_to_page(addr), 1);
+ init_page_count(virt_to_page(addr));
free_page(addr);
totalram_pages++;
}
diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c
index c9cad7ba0d2..aeabb419686 100644
--- a/arch/i386/kernel/efi.c
+++ b/arch/i386/kernel/efi.c
@@ -115,7 +115,7 @@ static void efi_call_phys_epilog(void)
unsigned long cr4;
struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, 0);
- cpu_gdt_descr->address = __va(cpu_gdt_descr->address);
+ cpu_gdt_descr->address = (unsigned long)__va(cpu_gdt_descr->address);
load_gdt(cpu_gdt_descr);
cr4 = read_cr4();
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index 218d725a5a1..d134e9643a5 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -504,27 +504,23 @@ void unlock_ipi_call_lock(void)
spin_unlock_irq(&call_lock);
}
-static struct call_data_struct * call_data;
-
-/*
- * this function sends a 'generic call function' IPI to all other CPUs
- * in the system.
- */
-
-int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
- int wait)
-/*
- * [SUMMARY] Run a function on all other CPUs.
- * <func> The function to run. This must be fast and non-blocking.
- * <info> An arbitrary pointer to pass to the function.
- * <nonatomic> currently unused.
- * <wait> If true, wait (atomically) until function has completed on other CPUs.
- * [RETURNS] 0 on success, else a negative status code. Does not return until
+static struct call_data_struct *call_data;
+
+/**
+ * smp_call_function(): Run a function on all other CPUs.
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @nonatomic: currently unused.
+ * @wait: If true, wait (atomically) until function has completed on other CPUs.
+ *
+ * Returns 0 on success, else a negative status code. Does not return until
* remote CPUs are nearly ready to execute <<func>> or are or have executed.
*
* You must not call this function with disabled interrupts or from a
* hardware interrupt handler or from a bottom half handler.
*/
+int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
+ int wait)
{
struct call_data_struct data;
int cpus;
diff --git a/arch/i386/kernel/sys_i386.c b/arch/i386/kernel/sys_i386.c
index a4a61976ecb..8fdb1fb17a5 100644
--- a/arch/i386/kernel/sys_i386.c
+++ b/arch/i386/kernel/sys_i386.c
@@ -40,14 +40,13 @@ asmlinkage int sys_pipe(unsigned long __user * fildes)
return error;
}
-/* common code for old and new mmaps */
-static inline long do_mmap2(
- unsigned long addr, unsigned long len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long pgoff)
+asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
+ unsigned long prot, unsigned long flags,
+ unsigned long fd, unsigned long pgoff)
{
int error = -EBADF;
- struct file * file = NULL;
+ struct file *file = NULL;
+ struct mm_struct *mm = current->mm;
flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
if (!(flags & MAP_ANONYMOUS)) {
@@ -56,9 +55,9 @@ static inline long do_mmap2(
goto out;
}
- down_write(&current->mm->mmap_sem);
+ down_write(&mm->mmap_sem);
error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
- up_write(&current->mm->mmap_sem);
+ up_write(&mm->mmap_sem);
if (file)
fput(file);
@@ -66,13 +65,6 @@ out:
return error;
}
-asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
- unsigned long prot, unsigned long flags,
- unsigned long fd, unsigned long pgoff)
-{
- return do_mmap2(addr, len, prot, flags, fd, pgoff);
-}
-
/*
* Perform the select(nd, in, out, ex, tv) and mmap() system
* calls. Linux/i386 didn't use to be able to handle more than
@@ -101,7 +93,8 @@ asmlinkage int old_mmap(struct mmap_arg_struct __user *arg)
if (a.offset & ~PAGE_MASK)
goto out;
- err = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT);
+ err = sys_mmap2(a.addr, a.len, a.prot, a.flags,
+ a.fd, a.offset >> PAGE_SHIFT);
out:
return err;
}
diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c
index be242723c33..17a6fe7166e 100644
--- a/arch/i386/kernel/timers/timer_hpet.c
+++ b/arch/i386/kernel/timers/timer_hpet.c
@@ -46,7 +46,7 @@ static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED;
*
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
*/
-static unsigned long cyc2ns_scale;
+static unsigned long cyc2ns_scale __read_mostly;
#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
static inline void set_cyc2ns_scale(unsigned long cpu_khz)
diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c
index a7f5a2aceba..5e41ee29c8c 100644
--- a/arch/i386/kernel/timers/timer_tsc.c
+++ b/arch/i386/kernel/timers/timer_tsc.c
@@ -74,7 +74,7 @@ late_initcall(start_lost_tick_compensation);
*
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
*/
-static unsigned long cyc2ns_scale;
+static unsigned long cyc2ns_scale __read_mostly;
#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
static inline void set_cyc2ns_scale(unsigned long cpu_khz)
diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c
index d524127c9af..a7d89158541 100644
--- a/arch/i386/mm/hugetlbpage.c
+++ b/arch/i386/mm/hugetlbpage.c
@@ -48,18 +48,6 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
return (pte_t *) pmd;
}
-/*
- * This function checks for proper alignment of input addr and len parameters.
- */
-int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
-{
- if (len & ~HPAGE_MASK)
- return -EINVAL;
- if (addr & ~HPAGE_MASK)
- return -EINVAL;
- return 0;
-}
-
#if 0 /* This is just for testing */
struct page *
follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c
index 2700f01994b..7ba55a6e2db 100644
--- a/arch/i386/mm/init.c
+++ b/arch/i386/mm/init.c
@@ -270,7 +270,7 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
static void __meminit free_new_highpage(struct page *page)
{
- set_page_count(page, 1);
+ init_page_count(page);
__free_page(page);
totalhigh_pages++;
}
@@ -727,7 +727,7 @@ void free_initmem(void)
addr = (unsigned long)(&__init_begin);
for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
- set_page_count(virt_to_page(addr), 1);
+ init_page_count(virt_to_page(addr));
memset((void *)addr, 0xcc, PAGE_SIZE);
free_page(addr);
totalram_pages++;
@@ -766,7 +766,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
for (; start < end; start += PAGE_SIZE) {
ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
+ init_page_count(virt_to_page(start));
free_page(start);
totalram_pages++;
}
diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c
index d0cadb33b54..92c3d9f0e73 100644
--- a/arch/i386/mm/pageattr.c
+++ b/arch/i386/mm/pageattr.c
@@ -51,6 +51,13 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot,
if (!base)
return NULL;
+ /*
+ * page_private is used to track the number of entries in
+ * the page table page that have non standard attributes.
+ */
+ SetPagePrivate(base);
+ page_private(base) = 0;
+
address = __pa(address);
addr = address & LARGE_PAGE_MASK;
pbase = (pte_t *)page_address(base);
@@ -143,11 +150,12 @@ __change_page_attr(struct page *page, pgprot_t prot)
return -ENOMEM;
set_pmd_pte(kpte,address,mk_pte(split, ref_prot));
kpte_page = split;
- }
- get_page(kpte_page);
+ }
+ page_private(kpte_page)++;
} else if ((pte_val(*kpte) & _PAGE_PSE) == 0) {
set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL));
- __put_page(kpte_page);
+ BUG_ON(page_private(kpte_page) == 0);
+ page_private(kpte_page)--;
} else
BUG();
@@ -157,10 +165,8 @@ __change_page_attr(struct page *page, pgprot_t prot)
* replace it with a largepage.
*/
if (!PageReserved(kpte_page)) {
- /* memleak and potential failed 2M page regeneration */
- BUG_ON(!page_count(kpte_page));
-
- if (cpu_has_pse && (page_count(kpte_page) == 1)) {
+ if (cpu_has_pse && (page_private(kpte_page) == 0)) {
+ ClearPagePrivate(kpte_page);
list_add(&kpte_page->lru, &df_list);
revert_page(kpte_page, address);
}
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index a85ea9d37f0..ff7ae6b664e 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -271,6 +271,25 @@ config SCHED_SMT
Intel IA64 chips with MultiThreading at a cost of slightly increased
overhead in some places. If unsure say N here.
+config PERMIT_BSP_REMOVE
+ bool "Support removal of Bootstrap Processor"
+ depends on HOTPLUG_CPU
+ default n
+ ---help---
+ Say Y here if your platform SAL will support removal of BSP with HOTPLUG_CPU
+ support.
+
+config FORCE_CPEI_RETARGET
+ bool "Force assumption that CPEI can be re-targetted"
+ depends on PERMIT_BSP_REMOVE
+ default n
+ ---help---
+ Say Y if you need to force the assumption that CPEI can be re-targetted to
+ any cpu in the system. This hint is available via ACPI 3.0 specifications.
+ Tiger4 systems are capable of re-directing CPEI to any CPU other than BSP.
+ This option it useful to enable this feature on older BIOS's as well.
+ You can also enable this by using boot command line option force_cpei=1.
+
config PREEMPT
bool "Preemptible Kernel"
help
diff --git a/arch/ia64/configs/tiger_defconfig b/arch/ia64/configs/tiger_defconfig
index 125568118b8..766bf495543 100644
--- a/arch/ia64/configs/tiger_defconfig
+++ b/arch/ia64/configs/tiger_defconfig
@@ -116,6 +116,8 @@ CONFIG_FORCE_MAX_ZONEORDER=17
CONFIG_SMP=y
CONFIG_NR_CPUS=4
CONFIG_HOTPLUG_CPU=y
+CONFIG_PERMIT_BSP_REMOVE=y
+CONFIG_FORCE_CPEI_RETARGET=y
# CONFIG_SCHED_SMT is not set
# CONFIG_PREEMPT is not set
CONFIG_SELECT_MEMORY_MODEL=y
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index ecd44bdc839..4722ec51c70 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -284,19 +284,24 @@ acpi_parse_plat_int_src(acpi_table_entry_header * header,
return 0;
}
+#ifdef CONFIG_HOTPLUG_CPU
unsigned int can_cpei_retarget(void)
{
extern int cpe_vector;
+ extern unsigned int force_cpei_retarget;
/*
* Only if CPEI is supported and the override flag
* is present, otherwise return that its re-targettable
* if we are in polling mode.
*/
- if (cpe_vector > 0 && !acpi_cpei_override)
- return 0;
- else
- return 1;
+ if (cpe_vector > 0) {
+ if (acpi_cpei_override || force_cpei_retarget)
+ return 1;
+ else
+ return 0;
+ }
+ return 1;
}
unsigned int is_cpu_cpei_target(unsigned int cpu)
@@ -315,6 +320,7 @@ void set_cpei_target_cpu(unsigned int cpu)
{
acpi_cpei_phys_cpuid = cpu_physical_id(cpu);
}
+#endif
unsigned int get_cpei_target_cpu(void)
{
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 930fdfca6dd..0e3eda99e54 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1102,9 +1102,6 @@ skip_rbs_switch:
st8 [r2]=r8
st8 [r3]=r10
.work_pending:
- tbit.nz p6,p0=r31,TIF_SIGDELAYED // signal delayed from MCA/INIT/NMI/PMI context?
-(p6) br.cond.sptk.few .sigdelayed
- ;;
tbit.z p6,p0=r31,TIF_NEED_RESCHED // current_thread_info()->need_resched==0?
(p6) br.cond.sptk.few .notify
#ifdef CONFIG_PREEMPT
@@ -1131,17 +1128,6 @@ skip_rbs_switch:
(pLvSys)br.cond.sptk.few .work_pending_syscall_end
br.cond.sptk.many .work_processed_kernel // don't re-check
-// There is a delayed signal that was detected in MCA/INIT/NMI/PMI context where
-// it could not be delivered. Deliver it now. The signal might be for us and
-// may set TIF_SIGPENDING, so redrive ia64_leave_* after processing the delayed
-// signal.
-
-.sigdelayed:
- br.call.sptk.many rp=do_sigdelayed
- cmp.eq p6,p0=r0,r0 // p6 <- 1, always re-check
-(pLvSys)br.cond.sptk.few .work_pending_syscall_end
- br.cond.sptk.many .work_processed_kernel // re-check
-
.work_pending_syscall_end:
adds r2=PT(R8)+16,r12
adds r3=PT(R10)+16,r12
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
index 574084f343f..8832c553230 100644
--- a/arch/ia64/kernel/iosapic.c
+++ b/arch/ia64/kernel/iosapic.c
@@ -631,6 +631,7 @@ get_target_cpu (unsigned int gsi, int vector)
{
#ifdef CONFIG_SMP
static int cpu = -1;
+ extern int cpe_vector;
/*
* In case of vector shared by multiple RTEs, all RTEs that
@@ -653,6 +654,11 @@ get_target_cpu (unsigned int gsi, int vector)
if (!cpu_online(smp_processor_id()))
return cpu_physical_id(smp_processor_id());
+#ifdef CONFIG_ACPI
+ if (cpe_vector > 0 && vector == IA64_CPEP_VECTOR)
+ return get_cpei_target_cpu();
+#endif
+
#ifdef CONFIG_NUMA
{
int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0;
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
index d33244c3275..5ce908ef9c9 100644
--- a/arch/ia64/kernel/irq.c
+++ b/arch/ia64/kernel/irq.c
@@ -163,8 +163,19 @@ void fixup_irqs(void)
{
unsigned int irq;
extern void ia64_process_pending_intr(void);
+ extern void ia64_disable_timer(void);
+ extern volatile int time_keeper_id;
+
+ ia64_disable_timer();
+
+ /*
+ * Find a new timesync master
+ */
+ if (smp_processor_id() == time_keeper_id) {
+ time_keeper_id = first_cpu(cpu_online_map);
+ printk ("CPU %d is now promoted to time-keeper master\n", time_keeper_id);
+ }
- ia64_set_itv(1<<16);
/*
* Phase 1: Locate irq's bound to this cpu and
* relocate them for cpu removal.
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index ee7eec9ee57..b57e723f194 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -281,14 +281,10 @@ ia64_mca_log_sal_error_record(int sal_info_type)
ia64_sal_clear_state_info(sal_info_type);
}
-/*
- * platform dependent error handling
- */
-#ifndef PLATFORM_MCA_HANDLERS
-
#ifdef CONFIG_ACPI
int cpe_vector = -1;
+int ia64_cpe_irq = -1;
static irqreturn_t
ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs)
@@ -377,8 +373,6 @@ ia64_mca_register_cpev (int cpev)
}
#endif /* CONFIG_ACPI */
-#endif /* PLATFORM_MCA_HANDLERS */
-
/*
* ia64_mca_cmc_vector_setup
*
@@ -630,6 +624,32 @@ copy_reg(const u64 *fr, u64 fnat, u64 *tr, u64 *tnat)
*tnat |= (nat << tslot);
}
+/* Change the comm field on the MCA/INT task to include the pid that
+ * was interrupted, it makes for easier debugging. If that pid was 0
+ * (swapper or nested MCA/INIT) then use the start of the previous comm
+ * field suffixed with its cpu.
+ */
+
+static void
+ia64_mca_modify_comm(const task_t *previous_current)
+{
+ char *p, comm[sizeof(current->comm)];
+ if (previous_current->pid)
+ snprintf(comm, sizeof(comm), "%s %d",
+ current->comm, previous_current->pid);
+ else {
+ int l;
+ if ((p = strchr(previous_current->comm, ' ')))
+ l = p - previous_current->comm;
+ else
+ l = strlen(previous_current->comm);
+ snprintf(comm, sizeof(comm), "%s %*s %d",
+ current->comm, l, previous_current->comm,
+ task_thread_info(previous_current)->cpu);
+ }
+ memcpy(current->comm, comm, sizeof(current->comm));
+}
+
/* On entry to this routine, we are running on the per cpu stack, see
* mca_asm.h. The original stack has not been touched by this event. Some of
* the original stack's registers will be in the RBS on this stack. This stack
@@ -648,7 +668,7 @@ ia64_mca_modify_original_stack(struct pt_regs *regs,
struct ia64_sal_os_state *sos,
const char *type)
{
- char *p, comm[sizeof(current->comm)];
+ char *p;
ia64_va va;
extern char ia64_leave_kernel[]; /* Need asm address, not function descriptor */
const pal_min_state_area_t *ms = sos->pal_min_state;
@@ -721,6 +741,10 @@ ia64_mca_modify_original_stack(struct pt_regs *regs,
/* Verify the previous stack state before we change it */
if (user_mode(regs)) {
msg = "occurred in user space";
+ /* previous_current is guaranteed to be valid when the task was
+ * in user space, so ...
+ */
+ ia64_mca_modify_comm(previous_current);
goto no_mod;
}
if (r13 != sos->prev_IA64_KR_CURRENT) {
@@ -750,25 +774,7 @@ ia64_mca_modify_original_stack(struct pt_regs *regs,
goto no_mod;
}
- /* Change the comm field on the MCA/INT task to include the pid that
- * was interrupted, it makes for easier debugging. If that pid was 0
- * (swapper or nested MCA/INIT) then use the start of the previous comm
- * field suffixed with its cpu.
- */
- if (previous_current->pid)
- snprintf(comm, sizeof(comm), "%s %d",
- current->comm, previous_current->pid);
- else {
- int l;
- if ((p = strchr(previous_current->comm, ' ')))
- l = p - previous_current->comm;
- else
- l = strlen(previous_current->comm);
- snprintf(comm, sizeof(comm), "%s %*s %d",
- current->comm, l, previous_current->comm,
- task_thread_info(previous_current)->cpu);
- }
- memcpy(current->comm, comm, sizeof(current->comm));
+ ia64_mca_modify_comm(previous_current);
/* Make the original task look blocked. First stack a struct pt_regs,
* describing the state at the time of interrupt. mca_asm.S built a
@@ -908,7 +914,7 @@ no_mod:
static void
ia64_wait_for_slaves(int monarch)
{
- int c, wait = 0;
+ int c, wait = 0, missing = 0;
for_each_online_cpu(c) {
if (c == monarch)
continue;
@@ -919,15 +925,32 @@ ia64_wait_for_slaves(int monarch)
}
}
if (!wait)
- return;
+ goto all_in;
for_each_online_cpu(c) {
if (c == monarch)
continue;
if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) {
udelay(5*1000000); /* wait 5 seconds for slaves (arbitrary) */
+ if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE)
+ missing = 1;
break;
}
}
+ if (!missing)
+ goto all_in;
+ printk(KERN_INFO "OS MCA slave did not rendezvous on cpu");
+ for_each_online_cpu(c) {
+ if (c == monarch)
+ continue;
+ if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE)
+ printk(" %d", c);
+ }
+ printk("\n");
+ return;
+
+all_in:
+ printk(KERN_INFO "All OS MCA slaves have reached rendezvous\n");
+ return;
}
/*
@@ -953,6 +976,10 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
task_t *previous_current;
oops_in_progress = 1; /* FIXME: make printk NMI/MCA/INIT safe */
+ console_loglevel = 15; /* make sure printks make it to console */
+ printk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d monarch=%ld\n",
+ sos->proc_state_param, cpu, sos->monarch);
+
previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA");
monarch_cpu = cpu;
if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, 0, 0, 0)
@@ -1444,11 +1471,13 @@ void __devinit
ia64_mca_cpu_init(void *cpu_data)
{
void *pal_vaddr;
+ static int first_time = 1;
- if (smp_processor_id() == 0) {
+ if (first_time) {
void *mca_data;
int cpu;
+ first_time = 0;
mca_data = alloc_bootmem(sizeof(struct ia64_mca_cpu)
* NR_CPUS + KERNEL_STACK_SIZE);
mca_data = (void *)(((unsigned long)mca_data +
@@ -1704,6 +1733,7 @@ ia64_mca_late_init(void)
desc = irq_descp(irq);
desc->status |= IRQ_PER_CPU;
setup_irq(irq, &mca_cpe_irqaction);
+ ia64_cpe_irq = irq;
}
ia64_mca_register_cpev(cpe_vector);
IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n", __FUNCTION__);
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 9c5194b385d..077f21216b6 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -6722,6 +6722,7 @@ __initcall(pfm_init);
void
pfm_init_percpu (void)
{
+ static int first_time=1;
/*
* make sure no measurement is active
* (may inherit programmed PMCs from EFI).
@@ -6734,8 +6735,10 @@ pfm_init_percpu (void)
*/
pfm_unfreeze_pmu();
- if (smp_processor_id() == 0)
+ if (first_time) {
register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction);
+ first_time=0;
+ }
ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR);
ia64_srlz_d();
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index 463f6bb44d0..1d7903ee212 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -588,104 +588,3 @@ ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
}
return 0;
}
-
-/* Set a delayed signal that was detected in MCA/INIT/NMI/PMI context where it
- * could not be delivered. It is important that the target process is not
- * allowed to do any more work in user space. Possible cases for the target
- * process:
- *
- * - It is sleeping and will wake up soon. Store the data in the current task,
- * the signal will be sent when the current task returns from the next
- * interrupt.
- *
- * - It is running in user context. Store the data in the current task, the
- * signal will be sent when the current task returns from the next interrupt.
- *
- * - It is running in kernel context on this or another cpu and will return to
- * user context. Store the data in the target task, the signal will be sent
- * to itself when the target task returns to user space.
- *
- * - It is running in kernel context on this cpu and will sleep before
- * returning to user context. Because this is also the current task, the
- * signal will not get delivered and the task could sleep indefinitely.
- * Store the data in the idle task for this cpu, the signal will be sent
- * after the idle task processes its next interrupt.
- *
- * To cover all cases, store the data in the target task, the current task and
- * the idle task on this cpu. Whatever happens, the signal will be delivered
- * to the target task before it can do any useful user space work. Multiple
- * deliveries have no unwanted side effects.
- *
- * Note: This code is executed in MCA/INIT/NMI/PMI context, with interrupts
- * disabled. It must not take any locks nor use kernel structures or services
- * that require locks.
- */
-
-/* To ensure that we get the right pid, check its start time. To avoid extra
- * include files in thread_info.h, convert the task start_time to unsigned long,
- * giving us a cycle time of > 580 years.
- */
-static inline unsigned long
-start_time_ul(const struct task_struct *t)
-{
- return t->start_time.tv_sec * NSEC_PER_SEC + t->start_time.tv_nsec;
-}
-
-void
-set_sigdelayed(pid_t pid, int signo, int code, void __user *addr)
-{
- struct task_struct *t;
- unsigned long start_time = 0;
- int i;
-
- for (i = 1; i <= 3; ++i) {
- switch (i) {
- case 1:
- t = find_task_by_pid(pid);
- if (t)
- start_time = start_time_ul(t);
- break;
- case 2:
- t = current;
- break;
- default:
- t = idle_task(smp_processor_id());
- break;
- }
-
- if (!t)
- return;
- task_thread_info(t)->sigdelayed.signo = signo;
- task_thread_info(t)->sigdelayed.code = code;
- task_thread_info(t)->sigdelayed.addr = addr;
- task_thread_info(t)->sigdelayed.start_time = start_time;
- task_thread_info(t)->sigdelayed.pid = pid;
- wmb();
- set_tsk_thread_flag(t, TIF_SIGDELAYED);
- }
-}
-
-/* Called from entry.S when it detects TIF_SIGDELAYED, a delayed signal that
- * was detected in MCA/INIT/NMI/PMI context where it could not be delivered.
- */
-
-void
-do_sigdelayed(void)
-{
- struct siginfo siginfo;
- pid_t pid;
- struct task_struct *t;
-
- clear_thread_flag(TIF_SIGDELAYED);
- memset(&siginfo, 0, sizeof(siginfo));
- siginfo.si_signo = current_thread_info()->sigdelayed.signo;
- siginfo.si_code = current_thread_info()->sigdelayed.code;
- siginfo.si_addr = current_thread_info()->sigdelayed.addr;
- pid = current_thread_info()->sigdelayed.pid;
- t = find_task_by_pid(pid);
- if (!t)
- return;
- if (current_thread_info()->sigdelayed.start_time != start_time_ul(t))
- return;
- force_sig_info(siginfo.si_signo, &siginfo, t);
-}
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
index b681ef34a86..c4b633b36da 100644
--- a/arch/ia64/kernel/smpboot.c
+++ b/arch/ia64/kernel/smpboot.c
@@ -70,6 +70,12 @@
#endif
#ifdef CONFIG_HOTPLUG_CPU
+#ifdef CONFIG_PERMIT_BSP_REMOVE
+#define bsp_remove_ok 1
+#else
+#define bsp_remove_ok 0
+#endif
+
/*
* Store all idle threads, this can be reused instead of creating
* a new thread. Also avoids complicated thread destroy functionality
@@ -104,7 +110,7 @@ struct sal_to_os_boot *sal_state_for_booting_cpu = &sal_boot_rendez_state[0];
/*
* ITC synchronization related stuff:
*/
-#define MASTER 0
+#define MASTER (0)
#define SLAVE (SMP_CACHE_BYTES/8)
#define NUM_ROUNDS 64 /* magic value */
@@ -151,6 +157,27 @@ char __initdata no_int_routing;
unsigned char smp_int_redirect; /* are INT and IPI redirectable by the chipset? */
+#ifdef CONFIG_FORCE_CPEI_RETARGET
+#define CPEI_OVERRIDE_DEFAULT (1)
+#else
+#define CPEI_OVERRIDE_DEFAULT (0)
+#endif
+
+unsigned int force_cpei_retarget = CPEI_OVERRIDE_DEFAULT;
+
+static int __init
+cmdl_force_cpei(char *str)
+{
+ int value=0;
+
+ get_option (&str, &value);
+ force_cpei_retarget = value;
+
+ return 1;
+}
+
+__setup("force_cpei=", cmdl_force_cpei);
+
static int __init
nointroute (char *str)
{
@@ -161,6 +188,27 @@ nointroute (char *str)
__setup("nointroute", nointroute);
+static void fix_b0_for_bsp(void)
+{
+#ifdef CONFIG_HOTPLUG_CPU
+ int cpuid;
+ static int fix_bsp_b0 = 1;
+
+ cpuid = smp_processor_id();
+
+ /*
+ * Cache the b0 value on the first AP that comes up
+ */
+ if (!(fix_bsp_b0 && cpuid))
+ return;
+
+ sal_boot_rendez_state[0].br[0] = sal_boot_rendez_state[cpuid].br[0];
+ printk ("Fixed BSP b0 value from CPU %d\n", cpuid);
+
+ fix_bsp_b0 = 0;
+#endif
+}
+
void
sync_master (void *arg)
{
@@ -327,8 +375,9 @@ smp_setup_percpu_timer (void)
static void __devinit
smp_callin (void)
{
- int cpuid, phys_id;
+ int cpuid, phys_id, itc_master;
extern void ia64_init_itm(void);
+ extern volatile int time_keeper_id;
#ifdef CONFIG_PERFMON
extern void pfm_init_percpu(void);
@@ -336,6 +385,7 @@ smp_callin (void)
cpuid = smp_processor_id();
phys_id = hard_smp_processor_id();
+ itc_master = time_keeper_id;
if (cpu_online(cpuid)) {
printk(KERN_ERR "huh, phys CPU#0x%x, CPU#0x%x already present??\n",
@@ -343,6 +393,8 @@ smp_callin (void)
BUG();
}
+ fix_b0_for_bsp();
+
lock_ipi_calllock();
cpu_set(cpuid, cpu_online_map);
unlock_ipi_calllock();
@@ -365,8 +417,8 @@ smp_callin (void)
* calls spin_unlock_bh(), which calls spin_unlock_bh(), which calls
* local_bh_enable(), which bugs out if irqs are not enabled...
*/
- Dprintk("Going to syncup ITC with BP.\n");
- ia64_sync_itc(0);
+ Dprintk("Going to syncup ITC with ITC Master.\n");
+ ia64_sync_itc(itc_master);
}
/*
@@ -635,6 +687,47 @@ remove_siblinginfo(int cpu)
}
extern void fixup_irqs(void);
+
+int migrate_platform_irqs(unsigned int cpu)
+{
+ int new_cpei_cpu;
+ irq_desc_t *desc = NULL;
+ cpumask_t mask;
+ int retval = 0;
+
+ /*
+ * dont permit CPEI target to removed.
+ */
+ if (cpe_vector > 0 && is_cpu_cpei_target(cpu)) {
+ printk ("CPU (%d) is CPEI Target\n", cpu);
+ if (can_cpei_retarget()) {
+ /*
+ * Now re-target the CPEI to a different processor
+ */
+ new_cpei_cpu = any_online_cpu(cpu_online_map);
+ mask = cpumask_of_cpu(new_cpei_cpu);
+ set_cpei_target_cpu(new_cpei_cpu);
+ desc = irq_descp(ia64_cpe_irq);
+ /*
+ * Switch for now, immediatly, we need to do fake intr
+ * as other interrupts, but need to study CPEI behaviour with
+ * polling before making changes.
+ */
+ if (desc) {
+ desc->handler->disable(ia64_cpe_irq);
+ desc->handler->set_affinity(ia64_cpe_irq, mask);
+ desc->handler->enable(ia64_cpe_irq);
+ printk ("Re-targetting CPEI to cpu %d\n", new_cpei_cpu);
+ }
+ }
+ if (!desc) {
+ printk ("Unable to retarget CPEI, offline cpu [%d] failed\n", cpu);
+ retval = -EBUSY;
+ }
+ }
+ return retval;
+}
+
/* must be called with cpucontrol mutex held */
int __cpu_disable(void)
{
@@ -643,8 +736,17 @@ int __cpu_disable(void)
/*
* dont permit boot processor for now
*/
- if (cpu == 0)
- return -EBUSY;
+ if (cpu == 0 && !bsp_remove_ok) {
+ printk ("Your platform does not support removal of BSP\n");
+ return (-EBUSY);
+ }
+
+ cpu_clear(cpu, cpu_online_map);
+
+ if (migrate_platform_irqs(cpu)) {
+ cpu_set(cpu, cpu_online_map);
+ return (-EBUSY);
+ }
remove_siblinginfo(cpu);
cpu_clear(cpu, cpu_online_map);
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 307d01e15b2..ac167436e93 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -32,7 +32,7 @@
extern unsigned long wall_jiffies;
-#define TIME_KEEPER_ID 0 /* smp_processor_id() of time-keeper */
+volatile int time_keeper_id = 0; /* smp_processor_id() of time-keeper */
#ifdef CONFIG_IA64_DEBUG_IRQ
@@ -71,7 +71,7 @@ timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
new_itm += local_cpu_data->itm_delta;
- if (smp_processor_id() == TIME_KEEPER_ID) {
+ if (smp_processor_id() == time_keeper_id) {
/*
* Here we are in the timer irq handler. We have irqs locally
* disabled, but we don't know if the timer_bh is running on
@@ -236,6 +236,11 @@ static struct irqaction timer_irqaction = {
.name = "timer"
};
+void __devinit ia64_disable_timer(void)
+{
+ ia64_set_itv(1 << 16);
+}
+
void __init
time_init (void)
{
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
index 6e5eea19fa6..3b6fd798c4d 100644
--- a/arch/ia64/kernel/topology.c
+++ b/arch/ia64/kernel/topology.c
@@ -36,7 +36,7 @@ int arch_register_cpu(int num)
parent = &sysfs_nodes[cpu_to_node(num)];
#endif /* CONFIG_NUMA */
-#ifdef CONFIG_ACPI
+#if defined (CONFIG_ACPI) && defined (CONFIG_HOTPLUG_CPU)
/*
* If CPEI cannot be re-targetted, and this is
* CPEI target, then dont create the control file
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index acaaec4e468..9855ba31809 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -181,13 +181,15 @@ per_cpu_init (void)
{
void *cpu_data;
int cpu;
+ static int first_time=1;
/*
* get_free_pages() cannot be used before cpu_init() done. BSP
* allocates "NR_CPUS" pages for all CPUs to avoid that AP calls
* get_zeroed_page().
*/
- if (smp_processor_id() == 0) {
+ if (first_time) {
+ first_time=0;
cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS,
PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
for (cpu = 0; cpu < NR_CPUS; cpu++) {
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index c87d6d1d581..573d5cc63e2 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -528,12 +528,17 @@ void __init find_memory(void)
void *per_cpu_init(void)
{
int cpu;
+ static int first_time = 1;
+
if (smp_processor_id() != 0)
return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
- for (cpu = 0; cpu < NR_CPUS; cpu++)
- per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
+ if (first_time) {
+ first_time = 0;
+ for (cpu = 0; cpu < NR_CPUS; cpu++)
+ per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
+ }
return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
}
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
index 2d13889d0a9..9dbc7dadd16 100644
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -68,9 +68,10 @@ huge_pte_offset (struct mm_struct *mm, unsigned long addr)
#define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; }
/*
- * This function checks for proper alignment of input addr and len parameters.
+ * Don't actually need to do any preparation, but need to make sure
+ * the address is in the right region.
*/
-int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
+int prepare_hugepage_range(unsigned long addr, unsigned long len)
{
if (len & ~HPAGE_MASK)
return -EINVAL;
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index b38b6d213c1..08d94e6bfa1 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -197,7 +197,7 @@ free_initmem (void)
eaddr = (unsigned long) ia64_imva(__init_end);
while (addr < eaddr) {
ClearPageReserved(virt_to_page(addr));
- set_page_count(virt_to_page(addr), 1);
+ init_page_count(virt_to_page(addr));
free_page(addr);
++totalram_pages;
addr += PAGE_SIZE;
@@ -252,7 +252,7 @@ free_initrd_mem (unsigned long start, unsigned long end)
continue;
page = virt_to_page(start);
ClearPageReserved(page);
- set_page_count(page, 1);
+ init_page_count(page);
free_page(start);
++totalram_pages;
}
@@ -640,7 +640,7 @@ mem_init (void)
void online_page(struct page *page)
{
ClearPageReserved(page);
- set_page_count(page, 1);
+ init_page_count(page);
__free_page(page);
totalram_pages++;
num_physpages++;
diff --git a/arch/ia64/sn/kernel/Makefile b/arch/ia64/sn/kernel/Makefile
index 3e9b4eea741..ab9c48c8801 100644
--- a/arch/ia64/sn/kernel/Makefile
+++ b/arch/ia64/sn/kernel/Makefile
@@ -10,7 +10,8 @@
CPPFLAGS += -I$(srctree)/arch/ia64/sn/include
obj-y += setup.o bte.o bte_error.o irq.o mca.o idle.o \
- huberror.o io_init.o iomv.o klconflib.o sn2/
+ huberror.o io_init.o iomv.o klconflib.o pio_phys.o \
+ sn2/
obj-$(CONFIG_IA64_GENERIC) += machvec.o
obj-$(CONFIG_SGI_TIOCX) += tiocx.o
obj-$(CONFIG_IA64_SGI_SN_XP) += xp.o
diff --git a/arch/ia64/sn/kernel/pio_phys.S b/arch/ia64/sn/kernel/pio_phys.S
new file mode 100644
index 00000000000..3c7d48d6ecb
--- /dev/null
+++ b/arch/ia64/sn/kernel/pio_phys.S
@@ -0,0 +1,71 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000-2005 Silicon Graphics, Inc. All rights reserved.
+ *
+ * This file contains macros used to access MMR registers via
+ * uncached physical addresses.
+ * pio_phys_read_mmr - read an MMR
+ * pio_phys_write_mmr - write an MMR
+ * pio_atomic_phys_write_mmrs - atomically write 1 or 2 MMRs with psr.ic=0
+ * Second MMR will be skipped if address is NULL
+ *
+ * Addresses passed to these routines should be uncached physical addresses
+ * ie., 0x80000....
+ */
+
+
+
+#include <asm/asmmacro.h>
+#include <asm/page.h>
+
+GLOBAL_ENTRY(pio_phys_read_mmr)
+ .prologue
+ .regstk 1,0,0,0
+ .body
+ mov r2=psr
+ rsm psr.i | psr.dt
+ ;;
+ srlz.d
+ ld8.acq r8=[r32]
+ ;;
+ mov psr.l=r2;;
+ srlz.d
+ br.ret.sptk.many rp
+END(pio_phys_read_mmr)
+
+GLOBAL_ENTRY(pio_phys_write_mmr)
+ .prologue
+ .regstk 2,0,0,0
+ .body
+ mov r2=psr
+ rsm psr.i | psr.dt
+ ;;
+ srlz.d
+ st8.rel [r32]=r33
+ ;;
+ mov psr.l=r2;;
+ srlz.d
+ br.ret.sptk.many rp
+END(pio_phys_write_mmr)
+
+GLOBAL_ENTRY(pio_atomic_phys_write_mmrs)
+ .prologue
+ .regstk 4,0,0,0
+ .body
+ mov r2=psr
+ cmp.ne p9,p0=r34,r0;
+ rsm psr.i | psr.dt | psr.ic
+ ;;
+ srlz.d
+ st8.rel [r32]=r33
+(p9) st8.rel [r34]=r35
+ ;;
+ mov psr.l=r2;;
+ srlz.d
+ br.ret.sptk.many rp
+END(pio_atomic_phys_write_mmrs)
+
+
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index 5b84836c217..8b6d5c84470 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -3,7 +3,7 @@
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
- * Copyright (C) 1999,2001-2005 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 1999,2001-2006 Silicon Graphics, Inc. All rights reserved.
*/
#include <linux/config.h>
@@ -498,6 +498,7 @@ void __init sn_setup(char **cmdline_p)
* for sn.
*/
pm_power_off = ia64_sn_power_down;
+ current->thread.flags |= IA64_THREAD_MIGRATION;
}
/**
@@ -660,7 +661,8 @@ void __init sn_cpu_init(void)
SH2_PIO_WRITE_STATUS_1, SH2_PIO_WRITE_STATUS_3};
u64 *pio;
pio = is_shub1() ? pio1 : pio2;
- pda->pio_write_status_addr = (volatile unsigned long *) LOCAL_MMR_ADDR(pio[slice]);
+ pda->pio_write_status_addr =
+ (volatile unsigned long *)GLOBAL_MMR_ADDR(nasid, pio[slice]);
pda->pio_write_status_val = is_shub1() ? SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK : 0;
}
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
index b2e1e746b47..d9d306c79f2 100644
--- a/arch/ia64/sn/kernel/sn2/sn2_smp.c
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -93,6 +93,27 @@ static inline unsigned long wait_piowc(void)
return (ws & SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK) != 0;
}
+/**
+ * sn_migrate - SN-specific task migration actions
+ * @task: Task being migrated to new CPU
+ *
+ * SN2 PIO writes from separate CPUs are not guaranteed to arrive in order.
+ * Context switching user threads which have memory-mapped MMIO may cause
+ * PIOs to issue from seperate CPUs, thus the PIO writes must be drained
+ * from the previous CPU's Shub before execution resumes on the new CPU.
+ */
+void sn_migrate(struct task_struct *task)
+{
+ pda_t *last_pda = pdacpu(task_thread_info(task)->last_cpu);
+ volatile unsigned long *adr = last_pda->pio_write_status_addr;
+ unsigned long val = last_pda->pio_write_status_val;
+
+ /* Drain PIO writes from old CPU's Shub */
+ while (unlikely((*adr & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK)
+ != val))
+ cpu_relax();
+}
+
void sn_tlb_migrate_finish(struct mm_struct *mm)
{
/* flush_tlb_mm is inefficient if more than 1 users of mm */
diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c
index cdf6856ce08..d0abddd9ffe 100644
--- a/arch/ia64/sn/kernel/xpc_channel.c
+++ b/arch/ia64/sn/kernel/xpc_channel.c
@@ -21,7 +21,6 @@
#include <linux/sched.h>
#include <linux/cache.h>
#include <linux/interrupt.h>
-#include <linux/slab.h>
#include <linux/mutex.h>
#include <linux/completion.h>
#include <asm/sn/bte.h>
@@ -30,6 +29,31 @@
/*
+ * Guarantee that the kzalloc'd memory is cacheline aligned.
+ */
+static void *
+xpc_kzalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
+{
+ /* see if kzalloc will give us cachline aligned memory by default */
+ *base = kzalloc(size, flags);
+ if (*base == NULL) {
+ return NULL;
+ }
+ if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) {
+ return *base;
+ }
+ kfree(*base);
+
+ /* nope, we'll have to do it ourselves */
+ *base = kzalloc(size + L1_CACHE_BYTES, flags);
+ if (*base == NULL) {
+ return NULL;
+ }
+ return (void *) L1_CACHE_ALIGN((u64) *base);
+}
+
+
+/*
* Set up the initial values for the XPartition Communication channels.
*/
static void
@@ -93,20 +117,19 @@ xpc_setup_infrastructure(struct xpc_partition *part)
* Allocate all of the channel structures as a contiguous chunk of
* memory.
*/
- part->channels = kmalloc(sizeof(struct xpc_channel) * XPC_NCHANNELS,
+ part->channels = kzalloc(sizeof(struct xpc_channel) * XPC_NCHANNELS,
GFP_KERNEL);
if (part->channels == NULL) {
dev_err(xpc_chan, "can't get memory for channels\n");
return xpcNoMemory;
}
- memset(part->channels, 0, sizeof(struct xpc_channel) * XPC_NCHANNELS);
part->nchannels = XPC_NCHANNELS;
/* allocate all the required GET/PUT values */
- part->local_GPs = xpc_kmalloc_cacheline_aligned(XPC_GP_SIZE,
+ part->local_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
GFP_KERNEL, &part->local_GPs_base);
if (part->local_GPs == NULL) {
kfree(part->channels);
@@ -115,55 +138,51 @@ xpc_setup_infrastructure(struct xpc_partition *part)
"values\n");
return xpcNoMemory;
}
- memset(part->local_GPs, 0, XPC_GP_SIZE);
- part->remote_GPs = xpc_kmalloc_cacheline_aligned(XPC_GP_SIZE,
+ part->remote_GPs = xpc_kzalloc_cacheline_aligned(XPC_GP_SIZE,
GFP_KERNEL, &part->remote_GPs_base);
if (part->remote_GPs == NULL) {
- kfree(part->channels);
- part->channels = NULL;
- kfree(part->local_GPs_base);
- part->local_GPs = NULL;
dev_err(xpc_chan, "can't get memory for remote get/put "
"values\n");
+ kfree(part->local_GPs_base);
+ part->local_GPs = NULL;
+ kfree(part->channels);
+ part->channels = NULL;
return xpcNoMemory;
}
- memset(part->remote_GPs, 0, XPC_GP_SIZE);
/* allocate all the required open and close args */
- part->local_openclose_args = xpc_kmalloc_cacheline_aligned(
+ part->local_openclose_args = xpc_kzalloc_cacheline_aligned(
XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL,
&part->local_openclose_args_base);
if (part->local_openclose_args == NULL) {
- kfree(part->channels);
- part->channels = NULL;
- kfree(part->local_GPs_base);
- part->local_GPs = NULL;
+ dev_err(xpc_chan, "can't get memory for local connect args\n");
kfree(part->remote_GPs_base);
part->remote_GPs = NULL;
- dev_err(xpc_chan, "can't get memory for local connect args\n");
+ kfree(part->local_GPs_base);
+ part->local_GPs = NULL;
+ kfree(part->channels);
+ part->channels = NULL;
return xpcNoMemory;
}
- memset(part->local_openclose_args, 0, XPC_OPENCLOSE_ARGS_SIZE);
- part->remote_openclose_args = xpc_kmalloc_cacheline_aligned(
+ part->remote_openclose_args = xpc_kzalloc_cacheline_aligned(
XPC_OPENCLOSE_ARGS_SIZE, GFP_KERNEL,
&part->remote_openclose_args_base);
if (part->remote_openclose_args == NULL) {
- kfree(part->channels);
- part->channels = NULL;
- kfree(part->local_GPs_base);
- part->local_GPs = NULL;
- kfree(part->remote_GPs_base);
- part->remote_GPs = NULL;
+ dev_err(xpc_chan, "can't get memory for remote connect args\n");
kfree(part->local_openclose_args_base);
part->local_openclose_args = NULL;
- dev_err(xpc_chan, "can't get memory for remote connect args\n");
+ kfree(part->remote_GPs_base);
+ part->remote_GPs = NULL;
+ kfree(part->local_GPs_base);
+ part->local_GPs = NULL;
+ kfree(part->channels);
+ part->channels = NULL;
return xpcNoMemory;
}
- memset(part->remote_openclose_args, 0, XPC_OPENCLOSE_ARGS_SIZE);
xpc_initialize_channels(part, partid);
@@ -186,18 +205,18 @@ xpc_setup_infrastructure(struct xpc_partition *part)
ret = request_irq(SGI_XPC_NOTIFY, xpc_notify_IRQ_handler, SA_SHIRQ,
part->IPI_owner, (void *) (u64) partid);
if (ret != 0) {
- kfree(part->channels);
- part->channels = NULL;
- kfree(part->local_GPs_base);
- part->local_GPs = NULL;
- kfree(part->remote_GPs_base);
- part->remote_GPs = NULL;
- kfree(part->local_openclose_args_base);
- part->local_openclose_args = NULL;
- kfree(part->remote_openclose_args_base);
- part->remote_openclose_args = NULL;
dev_err(xpc_chan, "can't register NOTIFY IRQ handler, "
"errno=%d\n", -ret);
+ kfree(part->remote_openclose_args_base);
+ part->remote_openclose_args = NULL;
+ kfree(part->local_openclose_args_base);
+ part->local_openclose_args = NULL;
+ kfree(part->remote_GPs_base);
+ part->remote_GPs = NULL;
+ kfree(part->local_GPs_base);
+ part->local_GPs = NULL;
+ kfree(part->channels);
+ part->channels = NULL;
return xpcLackOfResources;
}
@@ -446,22 +465,20 @@ xpc_allocate_local_msgqueue(struct xpc_channel *ch)
for (nentries = ch->local_nentries; nentries > 0; nentries--) {
nbytes = nentries * ch->msg_size;
- ch->local_msgqueue = xpc_kmalloc_cacheline_aligned(nbytes,
+ ch->local_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes,
GFP_KERNEL,
&ch->local_msgqueue_base);
if (ch->local_msgqueue == NULL) {
continue;
}
- memset(ch->local_msgqueue, 0, nbytes);
nbytes = nentries * sizeof(struct xpc_notify);
- ch->notify_queue = kmalloc(nbytes, GFP_KERNEL);
+ ch->notify_queue = kzalloc(nbytes, GFP_KERNEL);
if (ch->notify_queue == NULL) {
kfree(ch->local_msgqueue_base);
ch->local_msgqueue = NULL;
continue;
}
- memset(ch->notify_queue, 0, nbytes);
spin_lock_irqsave(&ch->lock, irq_flags);
if (nentries < ch->local_nentries) {
@@ -501,13 +518,12 @@ xpc_allocate_remote_msgqueue(struct xpc_channel *ch)
for (nentries = ch->remote_nentries; nentries > 0; nentries--) {
nbytes = nentries * ch->msg_size;
- ch->remote_msgqueue = xpc_kmalloc_cacheline_aligned(nbytes,
+ ch->remote_msgqueue = xpc_kzalloc_cacheline_aligned(nbytes,
GFP_KERNEL,
&ch->remote_msgqueue_base);
if (ch->remote_msgqueue == NULL) {
continue;
}
- memset(ch->remote_msgqueue, 0, nbytes);
spin_lock_irqsave(&ch->lock, irq_flags);
if (nentries < ch->remote_nentries) {
diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c
index 8cbf1643257..99b123a6421 100644
--- a/arch/ia64/sn/kernel/xpc_main.c
+++ b/arch/ia64/sn/kernel/xpc_main.c
@@ -52,7 +52,6 @@
#include <linux/syscalls.h>
#include <linux/cache.h>
#include <linux/interrupt.h>
-#include <linux/slab.h>
#include <linux/delay.h>
#include <linux/reboot.h>
#include <linux/completion.h>
diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c
index 88a730e6cfd..94211429fd0 100644
--- a/arch/ia64/sn/kernel/xpc_partition.c
+++ b/arch/ia64/sn/kernel/xpc_partition.c
@@ -81,6 +81,31 @@ char ____cacheline_aligned xpc_remote_copy_buffer[XPC_RP_HEADER_SIZE +
/*
+ * Guarantee that the kmalloc'd memory is cacheline aligned.
+ */
+static void *
+xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
+{
+ /* see if kmalloc will give us cachline aligned memory by default */
+ *base = kmalloc(size, flags);
+ if (*base == NULL) {
+ return NULL;
+ }
+ if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) {
+ return *base;
+ }
+ kfree(*base);
+
+ /* nope, we'll have to do it ourselves */
+ *base = kmalloc(size + L1_CACHE_BYTES, flags);
+ if (*base == NULL) {
+ return NULL;
+ }
+ return (void *) L1_CACHE_ALIGN((u64) *base);
+}
+
+
+/*
* Given a nasid, get the physical address of the partition's reserved page
* for that nasid. This function returns 0 on any error.
*/
@@ -1038,13 +1063,12 @@ xpc_discovery(void)
remote_vars = (struct xpc_vars *) remote_rp;
- discovered_nasids = kmalloc(sizeof(u64) * xp_nasid_mask_words,
+ discovered_nasids = kzalloc(sizeof(u64) * xp_nasid_mask_words,
GFP_KERNEL);
if (discovered_nasids == NULL) {
kfree(remote_rp_base);
return;
}
- memset(discovered_nasids, 0, sizeof(u64) * xp_nasid_mask_words);
rp = (struct xpc_rsvd_page *) xpc_rsvd_page;
diff --git a/arch/ia64/sn/pci/tioce_provider.c b/arch/ia64/sn/pci/tioce_provider.c
index e52831ed93e..fa073cc4b56 100644
--- a/arch/ia64/sn/pci/tioce_provider.c
+++ b/arch/ia64/sn/pci/tioce_provider.c
@@ -15,6 +15,124 @@
#include <asm/sn/pcidev.h>
#include <asm/sn/pcibus_provider_defs.h>
#include <asm/sn/tioce_provider.h>
+#include <asm/sn/sn2/sn_hwperf.h>
+
+/*
+ * 1/26/2006
+ *
+ * WAR for SGI PV 944642. For revA TIOCE, need to use the following recipe
+ * (taken from the above PV) before and after accessing tioce internal MMR's
+ * to avoid tioce lockups.
+ *
+ * The recipe as taken from the PV:
+ *
+ * if(mmr address < 0x45000) {
+ * if(mmr address == 0 or 0x80)
+ * mmr wrt or read address 0xc0
+ * else if(mmr address == 0x148 or 0x200)
+ * mmr wrt or read address 0x28
+ * else
+ * mmr wrt or read address 0x158
+ *
+ * do desired mmr access (rd or wrt)
+ *
+ * if(mmr address == 0x100)
+ * mmr wrt or read address 0x38
+ * mmr wrt or read address 0xb050
+ * } else
+ * do desired mmr access
+ *
+ * According to hw, we can use reads instead of writes to the above addres
+ *
+ * Note this WAR can only to be used for accessing internal MMR's in the
+ * TIOCE Coretalk Address Range 0x0 - 0x07ff_ffff. This includes the
+ * "Local CE Registers and Memories" and "PCI Compatible Config Space" address
+ * spaces from table 2-1 of the "CE Programmer's Reference Overview" document.
+ *
+ * All registers defined in struct tioce will meet that criteria.
+ */
+
+static void inline
+tioce_mmr_war_pre(struct tioce_kernel *kern, void *mmr_addr)
+{
+ u64 mmr_base;
+ u64 mmr_offset;
+
+ if (kern->ce_common->ce_rev != TIOCE_REV_A)
+ return;
+
+ mmr_base = kern->ce_common->ce_pcibus.bs_base;
+ mmr_offset = (u64)mmr_addr - mmr_base;
+
+ if (mmr_offset < 0x45000) {
+ u64 mmr_war_offset;
+
+ if (mmr_offset == 0 || mmr_offset == 0x80)
+ mmr_war_offset = 0xc0;
+ else if (mmr_offset == 0x148 || mmr_offset == 0x200)
+ mmr_war_offset = 0x28;
+ else
+ mmr_war_offset = 0x158;
+
+ readq_relaxed((void *)(mmr_base + mmr_war_offset));
+ }
+}
+
+static void inline
+tioce_mmr_war_post(struct tioce_kernel *kern, void *mmr_addr)
+{
+ u64 mmr_base;
+ u64 mmr_offset;
+
+ if (kern->ce_common->ce_rev != TIOCE_REV_A)
+ return;
+
+ mmr_base = kern->ce_common->ce_pcibus.bs_base;
+ mmr_offset = (u64)mmr_addr - mmr_base;
+
+ if (mmr_offset < 0x45000) {
+ if (mmr_offset == 0x100)
+ readq_relaxed((void *)(mmr_base + 0x38));
+ readq_relaxed((void *)(mmr_base + 0xb050));
+ }
+}
+
+/* load mmr contents into a variable */
+#define tioce_mmr_load(kern, mmrp, varp) do {\
+ tioce_mmr_war_pre(kern, mmrp); \
+ *(varp) = readq_relaxed(mmrp); \
+ tioce_mmr_war_post(kern, mmrp); \
+} while (0)
+
+/* store variable contents into mmr */
+#define tioce_mmr_store(kern, mmrp, varp) do {\
+ tioce_mmr_war_pre(kern, mmrp); \
+ writeq(*varp, mmrp); \
+ tioce_mmr_war_post(kern, mmrp); \
+} while (0)
+
+/* store immediate value into mmr */
+#define tioce_mmr_storei(kern, mmrp, val) do {\
+ tioce_mmr_war_pre(kern, mmrp); \
+ writeq(val, mmrp); \
+ tioce_mmr_war_post(kern, mmrp); \
+} while (0)
+
+/* set bits (immediate value) into mmr */
+#define tioce_mmr_seti(kern, mmrp, bits) do {\
+ u64 tmp; \
+ tioce_mmr_load(kern, mmrp, &tmp); \
+ tmp |= (bits); \
+ tioce_mmr_store(kern, mmrp, &tmp); \
+} while (0)
+
+/* clear bits (immediate value) into mmr */
+#define tioce_mmr_clri(kern, mmrp, bits) do { \
+ u64 tmp; \
+ tioce_mmr_load(kern, mmrp, &tmp); \
+ tmp &= ~(bits); \
+ tioce_mmr_store(kern, mmrp, &tmp); \
+} while (0)
/**
* Bus address ranges for the 5 flavors of TIOCE DMA
@@ -62,9 +180,9 @@
#define TIOCE_ATE_M40 2
#define TIOCE_ATE_M40S 3
-#define KB(x) ((x) << 10)
-#define MB(x) ((x) << 20)
-#define GB(x) ((x) << 30)
+#define KB(x) ((u64)(x) << 10)
+#define MB(x) ((u64)(x) << 20)
+#define GB(x) ((u64)(x) << 30)
/**
* tioce_dma_d64 - create a DMA mapping using 64-bit direct mode
@@ -151,7 +269,7 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port,
int last;
int entries;
int nates;
- int pagesize;
+ u64 pagesize;
u64 *ate_shadow;
u64 *ate_reg;
u64 addr;
@@ -228,7 +346,7 @@ tioce_alloc_map(struct tioce_kernel *ce_kern, int type, int port,
ate = ATE_MAKE(addr, pagesize);
ate_shadow[i + j] = ate;
- writeq(ate, &ate_reg[i + j]);
+ tioce_mmr_storei(ce_kern, &ate_reg[i + j], ate);
addr += pagesize;
}
@@ -272,7 +390,8 @@ tioce_dma_d32(struct pci_dev *pdev, u64 ct_addr)
u64 tmp;
ce_kern->ce_port[port].dirmap_shadow = ct_upper;
- writeq(ct_upper, &ce_mmr->ce_ure_dir_map[port]);
+ tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_dir_map[port],
+ ct_upper);
tmp = ce_mmr->ce_ure_dir_map[port];
dma_ok = 1;
} else
@@ -344,7 +463,8 @@ tioce_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir)
if (TIOCE_D32_ADDR(bus_addr)) {
if (--ce_kern->ce_port[port].dirmap_refcnt == 0) {
ce_kern->ce_port[port].dirmap_shadow = 0;
- writeq(0, &ce_mmr->ce_ure_dir_map[port]);
+ tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_dir_map[port],
+ 0);
}
} else {
struct tioce_dmamap *map;
@@ -365,7 +485,7 @@ tioce_dma_unmap(struct pci_dev *pdev, dma_addr_t bus_addr, int dir)
} else if (--map->refcnt == 0) {
for (i = 0; i < map->ate_count; i++) {
map->ate_shadow[i] = 0;
- map->ate_hw[i] = 0;
+ tioce_mmr_storei(ce_kern, &map->ate_hw[i], 0);
}
list_del(&map->ce_dmamap_list);
@@ -486,7 +606,7 @@ tioce_do_dma_map(struct pci_dev *pdev, u64 paddr, size_t byte_count,
spin_unlock_irqrestore(&ce_kern->ce_lock, flags);
dma_map_done:
- if (mapaddr & barrier)
+ if (mapaddr && barrier)
mapaddr = tioce_dma_barrier(mapaddr, 1);
return mapaddr;
@@ -541,17 +661,61 @@ tioce_error_intr_handler(int irq, void *arg, struct pt_regs *pt)
soft->ce_pcibus.bs_persist_segment,
soft->ce_pcibus.bs_persist_busnum, 0, 0, 0, 0, 0);
+ if (ret_stuff.v0)
+ panic("tioce_error_intr_handler: Fatal TIOCE error");
+
return IRQ_HANDLED;
}
/**
+ * tioce_reserve_m32 - reserve M32 ate's for the indicated address range
+ * @tioce_kernel: TIOCE context to reserve ate's for
+ * @base: starting bus address to reserve
+ * @limit: last bus address to reserve
+ *
+ * If base/limit falls within the range of bus space mapped through the
+ * M32 space, reserve the resources corresponding to the range.
+ */
+static void
+tioce_reserve_m32(struct tioce_kernel *ce_kern, u64 base, u64 limit)
+{
+ int ate_index, last_ate, ps;
+ struct tioce *ce_mmr;
+
+ if (!TIOCE_M32_ADDR(base))
+ return;
+
+ ce_mmr = (struct tioce *)ce_kern->ce_common->ce_pcibus.bs_base;
+ ps = ce_kern->ce_ate3240_pagesize;
+ ate_index = ATE_PAGE(base, ps);
+ last_ate = ate_index + ATE_NPAGES(base, limit-base+1, ps) - 1;
+
+ if (ate_index < 64)
+ ate_index = 64;
+
+ while (ate_index <= last_ate) {
+ u64 ate;
+
+ ate = ATE_MAKE(0xdeadbeef, ps);
+ ce_kern->ce_ate3240_shadow[ate_index] = ate;
+ tioce_mmr_storei(ce_kern, &ce_mmr->ce_ure_ate3240[ate_index],
+ ate);
+ ate_index++;
+ }
+}
+
+/**
* tioce_kern_init - init kernel structures related to a given TIOCE
* @tioce_common: ptr to a cached tioce_common struct that originated in prom
- */ static struct tioce_kernel *
+ */
+static struct tioce_kernel *
tioce_kern_init(struct tioce_common *tioce_common)
{
int i;
+ int ps;
+ int dev;
u32 tmp;
+ unsigned int seg, bus;
struct tioce *tioce_mmr;
struct tioce_kernel *tioce_kern;
@@ -572,9 +736,10 @@ tioce_kern_init(struct tioce_common *tioce_common)
* here to use pci_read_config_xxx() so use the raw_pci_ops vector.
*/
- raw_pci_ops->read(tioce_common->ce_pcibus.bs_persist_segment,
- tioce_common->ce_pcibus.bs_persist_busnum,
- PCI_DEVFN(2, 0), PCI_SECONDARY_BUS, 1, &tmp);
+ seg = tioce_common->ce_pcibus.bs_persist_segment;
+ bus = tioce_common->ce_pcibus.bs_persist_busnum;
+
+ raw_pci_ops->read(seg, bus, PCI_DEVFN(2, 0), PCI_SECONDARY_BUS, 1,&tmp);
tioce_kern->ce_port1_secondary = (u8) tmp;
/*
@@ -583,18 +748,76 @@ tioce_kern_init(struct tioce_common *tioce_common)
*/
tioce_mmr = (struct tioce *)tioce_common->ce_pcibus.bs_base;
- __sn_clrq_relaxed(&tioce_mmr->ce_ure_page_map, CE_URE_PAGESIZE_MASK);
- __sn_setq_relaxed(&tioce_mmr->ce_ure_page_map, CE_URE_256K_PAGESIZE);
- tioce_kern->ce_ate3240_pagesize = KB(256);
+ tioce_mmr_clri(tioce_kern, &tioce_mmr->ce_ure_page_map,
+ CE_URE_PAGESIZE_MASK);
+ tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_ure_page_map,
+ CE_URE_256K_PAGESIZE);
+ ps = tioce_kern->ce_ate3240_pagesize = KB(256);
for (i = 0; i < TIOCE_NUM_M40_ATES; i++) {
tioce_kern->ce_ate40_shadow[i] = 0;
- writeq(0, &tioce_mmr->ce_ure_ate40[i]);
+ tioce_mmr_storei(tioce_kern, &tioce_mmr->ce_ure_ate40[i], 0);
}
for (i = 0; i < TIOCE_NUM_M3240_ATES; i++) {
tioce_kern->ce_ate3240_shadow[i] = 0;
- writeq(0, &tioce_mmr->ce_ure_ate3240[i]);
+ tioce_mmr_storei(tioce_kern, &tioce_mmr->ce_ure_ate3240[i], 0);
+ }
+
+ /*
+ * Reserve ATE's corresponding to reserved address ranges. These
+ * include:
+ *
+ * Memory space covered by each PPB mem base/limit register
+ * Memory space covered by each PPB prefetch base/limit register
+ *
+ * These bus ranges are for pio (downstream) traffic only, and so
+ * cannot be used for DMA.
+ */
+
+ for (dev = 1; dev <= 2; dev++) {
+ u64 base, limit;
+
+ /* mem base/limit */
+
+ raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+ PCI_MEMORY_BASE, 2, &tmp);
+ base = (u64)tmp << 16;
+
+ raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+ PCI_MEMORY_LIMIT, 2, &tmp);
+ limit = (u64)tmp << 16;
+ limit |= 0xfffffUL;
+
+ if (base < limit)
+ tioce_reserve_m32(tioce_kern, base, limit);
+
+ /*
+ * prefetch mem base/limit. The tioce ppb's have 64-bit
+ * decoders, so read the upper portions w/o checking the
+ * attributes.
+ */
+
+ raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+ PCI_PREF_MEMORY_BASE, 2, &tmp);
+ base = ((u64)tmp & PCI_PREF_RANGE_MASK) << 16;
+
+ raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+ PCI_PREF_BASE_UPPER32, 4, &tmp);
+ base |= (u64)tmp << 32;
+
+ raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+ PCI_PREF_MEMORY_LIMIT, 2, &tmp);
+
+ limit = ((u64)tmp & PCI_PREF_RANGE_MASK) << 16;
+ limit |= 0xfffffUL;
+
+ raw_pci_ops->read(seg, bus, PCI_DEVFN(dev, 0),
+ PCI_PREF_LIMIT_UPPER32, 4, &tmp);
+ limit |= (u64)tmp << 32;
+
+ if ((base < limit) && TIOCE_M32_ADDR(base))
+ tioce_reserve_m32(tioce_kern, base, limit);
}
return tioce_kern;
@@ -614,6 +837,7 @@ tioce_force_interrupt(struct sn_irq_info *sn_irq_info)
{
struct pcidev_info *pcidev_info;
struct tioce_common *ce_common;
+ struct tioce_kernel *ce_kern;
struct tioce *ce_mmr;
u64 force_int_val;
@@ -629,6 +853,29 @@ tioce_force_interrupt(struct sn_irq_info *sn_irq_info)
ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info;
ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base;
+ ce_kern = (struct tioce_kernel *)ce_common->ce_kernel_private;
+
+ /*
+ * TIOCE Rev A workaround (PV 945826), force an interrupt by writing
+ * the TIO_INTx register directly (1/26/2006)
+ */
+ if (ce_common->ce_rev == TIOCE_REV_A) {
+ u64 int_bit_mask = (1ULL << sn_irq_info->irq_int_bit);
+ u64 status;
+
+ tioce_mmr_load(ce_kern, &ce_mmr->ce_adm_int_status, &status);
+ if (status & int_bit_mask) {
+ u64 force_irq = (1 << 8) | sn_irq_info->irq_irq;
+ u64 ctalk = sn_irq_info->irq_xtalkaddr;
+ u64 nasid, offset;
+
+ nasid = (ctalk & CTALK_NASID_MASK) >> CTALK_NASID_SHFT;
+ offset = (ctalk & CTALK_NODE_OFFSET);
+ HUB_S(TIO_IOSPACE_ADDR(nasid, offset), force_irq);
+ }
+
+ return;
+ }
/*
* irq_int_bit is originally set up by prom, and holds the interrupt
@@ -666,7 +913,7 @@ tioce_force_interrupt(struct sn_irq_info *sn_irq_info)
default:
return;
}
- writeq(force_int_val, &ce_mmr->ce_adm_force_int);
+ tioce_mmr_storei(ce_kern, &ce_mmr->ce_adm_force_int, force_int_val);
}
/**
@@ -685,6 +932,7 @@ tioce_target_interrupt(struct sn_irq_info *sn_irq_info)
{
struct pcidev_info *pcidev_info;
struct tioce_common *ce_common;
+ struct tioce_kernel *ce_kern;
struct tioce *ce_mmr;
int bit;
u64 vector;
@@ -695,14 +943,15 @@ tioce_target_interrupt(struct sn_irq_info *sn_irq_info)
ce_common = (struct tioce_common *)pcidev_info->pdi_pcibus_info;
ce_mmr = (struct tioce *)ce_common->ce_pcibus.bs_base;
+ ce_kern = (struct tioce_kernel *)ce_common->ce_kernel_private;
bit = sn_irq_info->irq_int_bit;
- __sn_setq_relaxed(&ce_mmr->ce_adm_int_mask, (1UL << bit));
+ tioce_mmr_seti(ce_kern, &ce_mmr->ce_adm_int_mask, (1UL << bit));
vector = (u64)sn_irq_info->irq_irq << INTR_VECTOR_SHFT;
vector |= sn_irq_info->irq_xtalkaddr;
- writeq(vector, &ce_mmr->ce_adm_int_dest[bit]);
- __sn_clrq_relaxed(&ce_mmr->ce_adm_int_mask, (1UL << bit));
+ tioce_mmr_storei(ce_kern, &ce_mmr->ce_adm_int_dest[bit], vector);
+ tioce_mmr_clri(ce_kern, &ce_mmr->ce_adm_int_mask, (1UL << bit));
tioce_force_interrupt(sn_irq_info);
}
@@ -721,7 +970,11 @@ tioce_target_interrupt(struct sn_irq_info *sn_irq_info)
static void *
tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *controller)
{
+ int my_nasid;
+ cnodeid_t my_cnode, mem_cnode;
struct tioce_common *tioce_common;
+ struct tioce_kernel *tioce_kern;
+ struct tioce *tioce_mmr;
/*
* Allocate kernel bus soft and copy from prom.
@@ -734,11 +987,23 @@ tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont
memcpy(tioce_common, prom_bussoft, sizeof(struct tioce_common));
tioce_common->ce_pcibus.bs_base |= __IA64_UNCACHED_OFFSET;
- if (tioce_kern_init(tioce_common) == NULL) {
+ tioce_kern = tioce_kern_init(tioce_common);
+ if (tioce_kern == NULL) {
kfree(tioce_common);
return NULL;
}
+ /*
+ * Clear out any transient errors before registering the error
+ * interrupt handler.
+ */
+
+ tioce_mmr = (struct tioce *)tioce_common->ce_pcibus.bs_base;
+ tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_int_status_alias, ~0ULL);
+ tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_adm_error_summary_alias,
+ ~0ULL);
+ tioce_mmr_seti(tioce_kern, &tioce_mmr->ce_dre_comp_err_addr, ~0ULL);
+
if (request_irq(SGI_PCIASIC_ERROR,
tioce_error_intr_handler,
SA_SHIRQ, "TIOCE error", (void *)tioce_common))
@@ -750,6 +1015,21 @@ tioce_bus_fixup(struct pcibus_bussoft *prom_bussoft, struct pci_controller *cont
tioce_common->ce_pcibus.bs_persist_segment,
tioce_common->ce_pcibus.bs_persist_busnum);
+ /*
+ * identify closest nasid for memory allocations
+ */
+
+ my_nasid = NASID_GET(tioce_common->ce_pcibus.bs_base);
+ my_cnode = nasid_to_cnodeid(my_nasid);
+
+ if (sn_hwperf_get_nearest_node(my_cnode, &mem_cnode, NULL) < 0) {
+ printk(KERN_WARNING "tioce_bus_fixup: failed to find "
+ "closest node with MEM to TIO node %d\n", my_cnode);
+ mem_cnode = (cnodeid_t)-1; /* use any node */
+ }
+
+ controller->node = mem_cnode;
+
return tioce_common;
}
diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c
index 6facf15b04f..c9e7dad860b 100644
--- a/arch/m32r/mm/init.c
+++ b/arch/m32r/mm/init.c
@@ -226,7 +226,7 @@ void free_initmem(void)
addr = (unsigned long)(&__init_begin);
for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
- set_page_count(virt_to_page(addr), 1);
+ init_page_count(virt_to_page(addr));
free_page(addr);
totalram_pages++;
}
@@ -244,7 +244,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
unsigned long p;
for (p = start; p < end; p += PAGE_SIZE) {
ClearPageReserved(virt_to_page(p));
- set_page_count(virt_to_page(p), 1);
+ init_page_count(virt_to_page(p));
free_page(p);
totalram_pages++;
}
diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c
index c45beb95594..a190e39c907 100644
--- a/arch/m68k/mm/init.c
+++ b/arch/m68k/mm/init.c
@@ -137,7 +137,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
int pages = 0;
for (; start < end; start += PAGE_SIZE) {
ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
+ init_page_count(virt_to_page(start));
free_page(start);
totalram_pages++;
pages++;
diff --git a/arch/m68k/mm/memory.c b/arch/m68k/mm/memory.c
index 559942ce0e1..d6d582a5abb 100644
--- a/arch/m68k/mm/memory.c
+++ b/arch/m68k/mm/memory.c
@@ -54,7 +54,7 @@ void __init init_pointer_table(unsigned long ptable)
/* unreserve the page so it's possible to free that page */
PD_PAGE(dp)->flags &= ~(1 << PG_reserved);
- set_page_count(PD_PAGE(dp), 1);
+ init_page_count(PD_PAGE(dp));
return;
}
diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index d855fec2631..afb57eeafdc 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -276,7 +276,7 @@ void free_initmem(void)
addr = (unsigned long)&__init_begin;
for (; addr < (unsigned long)&__init_end; addr += PAGE_SIZE) {
virt_to_page(addr)->flags &= ~(1 << PG_reserved);
- set_page_count(virt_to_page(addr), 1);
+ init_page_count(virt_to_page(addr));
free_page(addr);
totalram_pages++;
}
diff --git a/arch/m68knommu/kernel/m68k_ksyms.c b/arch/m68knommu/kernel/m68k_ksyms.c
index eddb8d3e130..d844c755945 100644
--- a/arch/m68knommu/kernel/m68k_ksyms.c
+++ b/arch/m68knommu/kernel/m68k_ksyms.c
@@ -26,6 +26,7 @@ EXPORT_SYMBOL(__ioremap);
EXPORT_SYMBOL(iounmap);
EXPORT_SYMBOL(dump_fpu);
EXPORT_SYMBOL(strnlen);
+EXPORT_SYMBOL(strpbrk);
EXPORT_SYMBOL(strrchr);
EXPORT_SYMBOL(strstr);
EXPORT_SYMBOL(strchr);
diff --git a/arch/m68knommu/mm/init.c b/arch/m68knommu/mm/init.c
index 89f0b554ffb..d79503fe6e4 100644
--- a/arch/m68knommu/mm/init.c
+++ b/arch/m68knommu/mm/init.c
@@ -195,7 +195,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
int pages = 0;
for (; start < end; start += PAGE_SIZE) {
ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
+ init_page_count(virt_to_page(start));
free_page(start);
totalram_pages++;
pages++;
@@ -218,7 +218,7 @@ free_initmem()
/* next to check that the page we free is not a partial page */
for (; addr + PAGE_SIZE < (unsigned long)(&__init_end); addr +=PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
- set_page_count(virt_to_page(addr), 1);
+ init_page_count(virt_to_page(addr));
free_page(addr);
totalram_pages++;
}
diff --git a/arch/mips/arc/memory.c b/arch/mips/arc/memory.c
index 958d2eb7886..8a9ef58cc39 100644
--- a/arch/mips/arc/memory.c
+++ b/arch/mips/arc/memory.c
@@ -158,7 +158,7 @@ unsigned long __init prom_free_prom_memory(void)
while (addr < boot_mem_map.map[i].addr
+ boot_mem_map.map[i].size) {
ClearPageReserved(virt_to_page(__va(addr)));
- set_page_count(virt_to_page(__va(addr)), 1);
+ init_page_count(virt_to_page(__va(addr)));
free_page((unsigned long)__va(addr));
addr += PAGE_SIZE;
freed += PAGE_SIZE;
diff --git a/arch/mips/dec/prom/memory.c b/arch/mips/dec/prom/memory.c
index 81cb5a76cfb..1edaf3074ee 100644
--- a/arch/mips/dec/prom/memory.c
+++ b/arch/mips/dec/prom/memory.c
@@ -118,7 +118,7 @@ unsigned long __init prom_free_prom_memory(void)
addr = PAGE_SIZE;
while (addr < end) {
ClearPageReserved(virt_to_page(__va(addr)));
- set_page_count(virt_to_page(__va(addr)), 1);
+ init_page_count(virt_to_page(__va(addr)));
free_page((unsigned long)__va(addr));
addr += PAGE_SIZE;
}
diff --git a/arch/mips/mips-boards/generic/memory.c b/arch/mips/mips-boards/generic/memory.c
index 2c8afd77a20..ee5e70c95cf 100644
--- a/arch/mips/mips-boards/generic/memory.c
+++ b/arch/mips/mips-boards/generic/memory.c
@@ -174,7 +174,7 @@ unsigned long __init prom_free_prom_memory(void)
while (addr < boot_mem_map.map[i].addr
+ boot_mem_map.map[i].size) {
ClearPageReserved(virt_to_page(__va(addr)));
- set_page_count(virt_to_page(__va(addr)), 1);
+ init_page_count(virt_to_page(__va(addr)));
free_page((unsigned long)__va(addr));
addr += PAGE_SIZE;
freed += PAGE_SIZE;
diff --git a/arch/mips/mips-boards/sim/sim_mem.c b/arch/mips/mips-boards/sim/sim_mem.c
index 0dbd7435bb2..1ec4e75656b 100644
--- a/arch/mips/mips-boards/sim/sim_mem.c
+++ b/arch/mips/mips-boards/sim/sim_mem.c
@@ -117,7 +117,7 @@ unsigned long __init prom_free_prom_memory(void)
while (addr < boot_mem_map.map[i].addr
+ boot_mem_map.map[i].size) {
ClearPageReserved(virt_to_page(__va(addr)));
- set_page_count(virt_to_page(__va(addr)), 1);
+ init_page_count(virt_to_page(__va(addr)));
free_page((unsigned long)__va(addr));
addr += PAGE_SIZE;
freed += PAGE_SIZE;
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 0ff9a348b84..52f7d59fe61 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -54,7 +54,8 @@ unsigned long empty_zero_page, zero_page_mask;
*/
unsigned long setup_zero_pages(void)
{
- unsigned long order, size;
+ unsigned int order;
+ unsigned long size;
struct page *page;
if (cpu_has_vce)
@@ -67,9 +68,9 @@ unsigned long setup_zero_pages(void)
panic("Oh boy, that early out of memory?");
page = virt_to_page(empty_zero_page);
+ split_page(page, order);
while (page < virt_to_page(empty_zero_page + (PAGE_SIZE << order))) {
SetPageReserved(page);
- set_page_count(page, 1);
page++;
}
@@ -244,7 +245,7 @@ void __init mem_init(void)
#ifdef CONFIG_LIMITED_DMA
set_page_address(page, lowmem_page_address(page));
#endif
- set_page_count(page, 1);
+ init_page_count(page);
__free_page(page);
totalhigh_pages++;
}
@@ -291,7 +292,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
for (; start < end; start += PAGE_SIZE) {
ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
+ init_page_count(virt_to_page(start));
free_page(start);
totalram_pages++;
}
@@ -314,7 +315,7 @@ void free_initmem(void)
page = addr;
#endif
ClearPageReserved(virt_to_page(page));
- set_page_count(virt_to_page(page), 1);
+ init_page_count(virt_to_page(page));
free_page(page);
totalram_pages++;
freed += PAGE_SIZE;
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index ed93a979295..e0d095daa5e 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -559,7 +559,7 @@ void __init mem_init(void)
/* if (!page_is_ram(pgnr)) continue; */
/* commented out until page_is_ram works */
ClearPageReserved(p);
- set_page_count(p, 1);
+ init_page_count(p);
__free_page(p);
totalram_pages++;
}
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 7847ca13d6c..852eda3953d 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -398,7 +398,7 @@ void free_initmem(void)
addr = (unsigned long)(&__init_begin);
for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
- set_page_count(virt_to_page(addr), 1);
+ init_page_count(virt_to_page(addr));
free_page(addr);
num_physpages++;
totalram_pages++;
@@ -1018,7 +1018,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
printk(KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
for (; start < end; start += PAGE_SIZE) {
ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
+ init_page_count(virt_to_page(start));
free_page(start);
num_physpages++;
totalram_pages++;
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index b51bb28c054..7370f9f33e2 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -133,21 +133,6 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
return __pte(old);
}
-/*
- * This function checks for proper alignment of input addr and len parameters.
- */
-int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
-{
- if (len & ~HPAGE_MASK)
- return -EINVAL;
- if (addr & ~HPAGE_MASK)
- return -EINVAL;
- if (! (within_hugepage_low_range(addr, len)
- || within_hugepage_high_range(addr, len)) )
- return -EINVAL;
- return 0;
-}
-
struct slb_flush_info {
struct mm_struct *mm;
u16 newareas;
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 7d0d75c1184..b57fb3a2b7b 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -216,7 +216,7 @@ static void free_sec(unsigned long start, unsigned long end, const char *name)
while (start < end) {
ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
+ init_page_count(virt_to_page(start));
free_page(start);
cnt++;
start += PAGE_SIZE;
@@ -248,7 +248,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
for (; start < end; start += PAGE_SIZE) {
ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
+ init_page_count(virt_to_page(start));
free_page(start);
totalram_pages++;
}
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 81cfb0c2ec5..bacb71c8981 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -140,7 +140,7 @@ void free_initmem(void)
for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) {
memset((void *)addr, 0xcc, PAGE_SIZE);
ClearPageReserved(virt_to_page(addr));
- set_page_count(virt_to_page(addr), 1);
+ init_page_count(virt_to_page(addr));
free_page(addr);
totalram_pages++;
}
@@ -155,7 +155,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
for (; start < end; start += PAGE_SIZE) {
ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
+ init_page_count(virt_to_page(start));
free_page(start);
totalram_pages++;
}
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 550517c2dd4..454cac01d8c 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -108,8 +108,8 @@ EXPORT_SYMBOL(phys_mem_access_prot);
void online_page(struct page *page)
{
ClearPageReserved(page);
- set_page_count(page, 0);
- free_cold_page(page);
+ init_page_count(page);
+ __free_page(page);
totalram_pages++;
num_physpages++;
}
@@ -376,7 +376,7 @@ void __init mem_init(void)
struct page *page = pfn_to_page(pfn);
ClearPageReserved(page);
- set_page_count(page, 1);
+ init_page_count(page);
__free_page(page);
totalhigh_pages++;
}
diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c
index b33a4443f5a..fec8e65b36e 100644
--- a/arch/powerpc/platforms/cell/setup.c
+++ b/arch/powerpc/platforms/cell/setup.c
@@ -115,7 +115,7 @@ static void __init cell_spuprop_present(struct device_node *spe,
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
struct page *page = pfn_to_page(pfn);
set_page_links(page, ZONE_DMA, node_id, pfn);
- set_page_count(page, 1);
+ init_page_count(page);
reset_page_mapcount(page);
SetPageReserved(page);
INIT_LIST_HEAD(&page->lru);
diff --git a/arch/ppc/kernel/dma-mapping.c b/arch/ppc/kernel/dma-mapping.c
index 685fd0defe2..61465ec88bc 100644
--- a/arch/ppc/kernel/dma-mapping.c
+++ b/arch/ppc/kernel/dma-mapping.c
@@ -223,6 +223,8 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp)
pte_t *pte = consistent_pte + CONSISTENT_OFFSET(vaddr);
struct page *end = page + (1 << order);
+ split_page(page, order);
+
/*
* Set the "dma handle"
*/
@@ -231,7 +233,6 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp)
do {
BUG_ON(!pte_none(*pte));
- set_page_count(page, 1);
SetPageReserved(page);
set_pte_at(&init_mm, vaddr,
pte, mk_pte(page, pgprot_noncached(PAGE_KERNEL)));
@@ -244,7 +245,6 @@ __dma_alloc_coherent(size_t size, dma_addr_t *handle, gfp_t gfp)
* Free the otherwise unused pages.
*/
while (page < end) {
- set_page_count(page, 1);
__free_page(page);
page++;
}
diff --git a/arch/ppc/mm/init.c b/arch/ppc/mm/init.c
index 134db5c0420..cb1c294fb93 100644
--- a/arch/ppc/mm/init.c
+++ b/arch/ppc/mm/init.c
@@ -140,7 +140,7 @@ static void free_sec(unsigned long start, unsigned long end, const char *name)
while (start < end) {
ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
+ init_page_count(virt_to_page(start));
free_page(start);
cnt++;
start += PAGE_SIZE;
@@ -172,7 +172,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
for (; start < end; start += PAGE_SIZE) {
ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
+ init_page_count(virt_to_page(start));
free_page(start);
totalram_pages++;
}
@@ -441,7 +441,7 @@ void __init mem_init(void)
struct page *page = mem_map + pfn;
ClearPageReserved(page);
- set_page_count(page, 1);
+ init_page_count(page);
__free_page(page);
totalhigh_pages++;
}
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index df953383724..a055894f3bd 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -292,7 +292,7 @@ void free_initmem(void)
addr = (unsigned long)(&__init_begin);
for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
- set_page_count(virt_to_page(addr), 1);
+ init_page_count(virt_to_page(addr));
free_page(addr);
totalram_pages++;
}
@@ -307,7 +307,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
for (; start < end; start += PAGE_SIZE) {
ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
+ init_page_count(virt_to_page(start));
free_page(start);
totalram_pages++;
}
diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c
index df3a9e452cc..ee73e30263a 100644
--- a/arch/sh/mm/consistent.c
+++ b/arch/sh/mm/consistent.c
@@ -23,6 +23,7 @@ void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *handle)
page = alloc_pages(gfp, order);
if (!page)
return NULL;
+ split_page(page, order);
ret = page_address(page);
*handle = virt_to_phys(ret);
@@ -37,8 +38,6 @@ void *consistent_alloc(gfp_t gfp, size_t size, dma_addr_t *handle)
end = page + (1 << order);
while (++page < end) {
- set_page_count(page, 1);
-
/* Free any unused pages */
if (page >= free) {
__free_page(page);
diff --git a/arch/sh/mm/hugetlbpage.c b/arch/sh/mm/hugetlbpage.c
index 6b7a7688c98..a3568fd5150 100644
--- a/arch/sh/mm/hugetlbpage.c
+++ b/arch/sh/mm/hugetlbpage.c
@@ -84,18 +84,6 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
return entry;
}
-/*
- * This function checks for proper alignment of input addr and len parameters.
- */
-int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
-{
- if (len & ~HPAGE_MASK)
- return -EINVAL;
- if (addr & ~HPAGE_MASK)
- return -EINVAL;
- return 0;
-}
-
struct page *follow_huge_addr(struct mm_struct *mm,
unsigned long address, int write)
{
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index e342565f75f..77b4a838fe1 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -273,7 +273,7 @@ void free_initmem(void)
addr = (unsigned long)(&__init_begin);
for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
- set_page_count(virt_to_page(addr), 1);
+ init_page_count(virt_to_page(addr));
free_page(addr);
totalram_pages++;
}
@@ -286,7 +286,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
unsigned long p;
for (p = start; p < end; p += PAGE_SIZE) {
ClearPageReserved(virt_to_page(p));
- set_page_count(virt_to_page(p), 1);
+ init_page_count(virt_to_page(p));
free_page(p);
totalram_pages++;
}
diff --git a/arch/sh64/mm/hugetlbpage.c b/arch/sh64/mm/hugetlbpage.c
index ed6a505b3ee..3d89f2a6c78 100644
--- a/arch/sh64/mm/hugetlbpage.c
+++ b/arch/sh64/mm/hugetlbpage.c
@@ -84,18 +84,6 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
return entry;
}
-/*
- * This function checks for proper alignment of input addr and len parameters.
- */
-int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
-{
- if (len & ~HPAGE_MASK)
- return -EINVAL;
- if (addr & ~HPAGE_MASK)
- return -EINVAL;
- return 0;
-}
-
struct page *follow_huge_addr(struct mm_struct *mm,
unsigned long address, int write)
{
diff --git a/arch/sh64/mm/init.c b/arch/sh64/mm/init.c
index a65e8bb2c3c..1169757fb38 100644
--- a/arch/sh64/mm/init.c
+++ b/arch/sh64/mm/init.c
@@ -173,7 +173,7 @@ void free_initmem(void)
addr = (unsigned long)(&__init_begin);
for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
- set_page_count(virt_to_page(addr), 1);
+ init_page_count(virt_to_page(addr));
free_page(addr);
totalram_pages++;
}
@@ -186,7 +186,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
unsigned long p;
for (p = start; p < end; p += PAGE_SIZE) {
ClearPageReserved(virt_to_page(p));
- set_page_count(virt_to_page(p), 1);
+ init_page_count(virt_to_page(p));
free_page(p);
totalram_pages++;
}
diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c
index 40d426cce82..4219dd2ce3a 100644
--- a/arch/sparc/kernel/sun4d_smp.c
+++ b/arch/sparc/kernel/sun4d_smp.c
@@ -266,19 +266,19 @@ void __init smp4d_boot_cpus(void)
/* Free unneeded trap tables */
ClearPageReserved(virt_to_page(trapbase_cpu1));
- set_page_count(virt_to_page(trapbase_cpu1), 1);
+ init_page_count(virt_to_page(trapbase_cpu1));
free_page((unsigned long)trapbase_cpu1);
totalram_pages++;
num_physpages++;
ClearPageReserved(virt_to_page(trapbase_cpu2));
- set_page_count(virt_to_page(trapbase_cpu2), 1);
+ init_page_count(virt_to_page(trapbase_cpu2));
free_page((unsigned long)trapbase_cpu2);
totalram_pages++;
num_physpages++;
ClearPageReserved(virt_to_page(trapbase_cpu3));
- set_page_count(virt_to_page(trapbase_cpu3), 1);
+ init_page_count(virt_to_page(trapbase_cpu3));
free_page((unsigned long)trapbase_cpu3);
totalram_pages++;
num_physpages++;
diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c
index a21f27d10e5..fbbd8a474c4 100644
--- a/arch/sparc/kernel/sun4m_smp.c
+++ b/arch/sparc/kernel/sun4m_smp.c
@@ -233,21 +233,21 @@ void __init smp4m_boot_cpus(void)
/* Free unneeded trap tables */
if (!cpu_isset(i, cpu_present_map)) {
ClearPageReserved(virt_to_page(trapbase_cpu1));
- set_page_count(virt_to_page(trapbase_cpu1), 1);
+ init_page_count(virt_to_page(trapbase_cpu1));
free_page((unsigned long)trapbase_cpu1);
totalram_pages++;
num_physpages++;
}
if (!cpu_isset(2, cpu_present_map)) {
ClearPageReserved(virt_to_page(trapbase_cpu2));
- set_page_count(virt_to_page(trapbase_cpu2), 1);
+ init_page_count(virt_to_page(trapbase_cpu2));
free_page((unsigned long)trapbase_cpu2);
totalram_pages++;
num_physpages++;
}
if (!cpu_isset(3, cpu_present_map)) {
ClearPageReserved(virt_to_page(trapbase_cpu3));
- set_page_count(virt_to_page(trapbase_cpu3), 1);
+ init_page_count(virt_to_page(trapbase_cpu3));
free_page((unsigned long)trapbase_cpu3);
totalram_pages++;
num_physpages++;
diff --git a/arch/sparc/mm/init.c b/arch/sparc/mm/init.c
index c03babaa049..89866973246 100644
--- a/arch/sparc/mm/init.c
+++ b/arch/sparc/mm/init.c
@@ -383,7 +383,7 @@ void map_high_region(unsigned long start_pfn, unsigned long end_pfn)
struct page *page = pfn_to_page(tmp);
ClearPageReserved(page);
- set_page_count(page, 1);
+ init_page_count(page);
__free_page(page);
totalhigh_pages++;
}
@@ -480,7 +480,7 @@ void free_initmem (void)
p = virt_to_page(addr);
ClearPageReserved(p);
- set_page_count(p, 1);
+ init_page_count(p);
__free_page(p);
totalram_pages++;
num_physpages++;
@@ -497,7 +497,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
struct page *p = virt_to_page(start);
ClearPageReserved(p);
- set_page_count(p, 1);
+ init_page_count(p);
__free_page(p);
num_physpages++;
}
diff --git a/arch/sparc64/mm/hugetlbpage.c b/arch/sparc64/mm/hugetlbpage.c
index a7a24869d04..280dc7958a1 100644
--- a/arch/sparc64/mm/hugetlbpage.c
+++ b/arch/sparc64/mm/hugetlbpage.c
@@ -263,18 +263,6 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
return entry;
}
-/*
- * This function checks for proper alignment of input addr and len parameters.
- */
-int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
-{
- if (len & ~HPAGE_MASK)
- return -EINVAL;
- if (addr & ~HPAGE_MASK)
- return -EINVAL;
- return 0;
-}
-
struct page *follow_huge_addr(struct mm_struct *mm,
unsigned long address, int write)
{
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index c2b556106fc..2ae143ba50d 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -1461,7 +1461,7 @@ void free_initmem(void)
p = virt_to_page(page);
ClearPageReserved(p);
- set_page_count(p, 1);
+ init_page_count(p);
__free_page(p);
num_physpages++;
totalram_pages++;
@@ -1477,7 +1477,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
struct page *p = virt_to_page(start);
ClearPageReserved(p);
- set_page_count(p, 1);
+ init_page_count(p);
__free_page(p);
num_physpages++;
totalram_pages++;
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index fa4f915be5c..92cce96b5e2 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -57,7 +57,7 @@ static void setup_highmem(unsigned long highmem_start,
for(i = 0; i < highmem_len >> PAGE_SHIFT; i++){
page = &mem_map[highmem_pfn + i];
ClearPageReserved(page);
- set_page_count(page, 1);
+ init_page_count(page);
__free_page(page);
}
}
@@ -296,7 +296,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
(end - start) >> 10);
for (; start < end; start += PAGE_SIZE) {
ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
+ init_page_count(virt_to_page(start));
free_page(start);
totalram_pages++;
}
diff --git a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c
index 544665e0451..0e65340eee3 100644
--- a/arch/um/kernel/physmem.c
+++ b/arch/um/kernel/physmem.c
@@ -279,7 +279,7 @@ int init_maps(unsigned long physmem, unsigned long iomem, unsigned long highmem)
for(i = 0; i < total_pages; i++){
p = &map[i];
- set_page_count(p, 0);
+ memset(p, 0, sizeof(struct page));
SetPageReserved(p);
INIT_LIST_HEAD(&p->lru);
}
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 3080f84bf7b..ee5ce3d3cbc 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -477,7 +477,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
return IRQ_HANDLED;
}
-static unsigned int cyc2ns_scale;
+static unsigned int cyc2ns_scale __read_mostly;
#define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */
static inline void set_cyc2ns_scale(unsigned long cpu_khz)
diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c
index 3496abc8d37..c9dc7e46731 100644
--- a/arch/x86_64/kernel/x8664_ksyms.c
+++ b/arch/x86_64/kernel/x8664_ksyms.c
@@ -124,6 +124,7 @@ extern void * __memcpy(void *,const void *,__kernel_size_t);
EXPORT_SYMBOL(memset);
EXPORT_SYMBOL(strlen);
+EXPORT_SYMBOL(strpbrk);
EXPORT_SYMBOL(memmove);
EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(__memcpy);
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c
index 7af1742aa95..40ed13d263c 100644
--- a/arch/x86_64/mm/init.c
+++ b/arch/x86_64/mm/init.c
@@ -486,7 +486,7 @@ void __init clear_kernel_mapping(unsigned long address, unsigned long size)
void online_page(struct page *page)
{
ClearPageReserved(page);
- set_page_count(page, 1);
+ init_page_count(page);
__free_page(page);
totalram_pages++;
num_physpages++;
@@ -592,7 +592,7 @@ void free_initmem(void)
addr = (unsigned long)(&__init_begin);
for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
- set_page_count(virt_to_page(addr), 1);
+ init_page_count(virt_to_page(addr));
memset((void *)(addr & ~(PAGE_SIZE-1)), 0xcc, PAGE_SIZE);
free_page(addr);
totalram_pages++;
@@ -632,7 +632,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
for (; start < end; start += PAGE_SIZE) {
ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
+ init_page_count(virt_to_page(start));
free_page(start);
totalram_pages++;
}
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c
index 35f1f1aab06..531ad21447b 100644
--- a/arch/x86_64/mm/pageattr.c
+++ b/arch/x86_64/mm/pageattr.c
@@ -45,6 +45,13 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot,
pte_t *pbase;
if (!base)
return NULL;
+ /*
+ * page_private is used to track the number of entries in
+ * the page table page have non standard attributes.
+ */
+ SetPagePrivate(base);
+ page_private(base) = 0;
+
address = __pa(address);
addr = address & LARGE_PAGE_MASK;
pbase = (pte_t *)page_address(base);
@@ -77,26 +84,12 @@ static inline void flush_map(unsigned long address)
on_each_cpu(flush_kernel_map, (void *)address, 1, 1);
}
-struct deferred_page {
- struct deferred_page *next;
- struct page *fpage;
- unsigned long address;
-};
-static struct deferred_page *df_list; /* protected by init_mm.mmap_sem */
+static struct page *deferred_pages; /* protected by init_mm.mmap_sem */
-static inline void save_page(unsigned long address, struct page *fpage)
+static inline void save_page(struct page *fpage)
{
- struct deferred_page *df;
- df = kmalloc(sizeof(struct deferred_page), GFP_KERNEL);
- if (!df) {
- flush_map(address);
- __free_page(fpage);
- } else {
- df->next = df_list;
- df->fpage = fpage;
- df->address = address;
- df_list = df;
- }
+ fpage->lru.next = (struct list_head *)deferred_pages;
+ deferred_pages = fpage;
}
/*
@@ -138,8 +131,8 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
set_pte(kpte, pfn_pte(pfn, prot));
} else {
/*
- * split_large_page will take the reference for this change_page_attr
- * on the split page.
+ * split_large_page will take the reference for this
+ * change_page_attr on the split page.
*/
struct page *split;
@@ -151,23 +144,20 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot,
set_pte(kpte,mk_pte(split, ref_prot2));
kpte_page = split;
}
- get_page(kpte_page);
+ page_private(kpte_page)++;
} else if ((kpte_flags & _PAGE_PSE) == 0) {
set_pte(kpte, pfn_pte(pfn, ref_prot));
- __put_page(kpte_page);
+ BUG_ON(page_private(kpte_page) == 0);
+ page_private(kpte_page)--;
} else
BUG();
/* on x86-64 the direct mapping set at boot is not using 4k pages */
BUG_ON(PageReserved(kpte_page));
- switch (page_count(kpte_page)) {
- case 1:
- save_page(address, kpte_page);
+ if (page_private(kpte_page) == 0) {
+ save_page(kpte_page);
revert_page(address, ref_prot);
- break;
- case 0:
- BUG(); /* memleak and failed 2M page regeneration */
}
return 0;
}
@@ -220,17 +210,18 @@ int change_page_attr(struct page *page, int numpages, pgprot_t prot)
void global_flush_tlb(void)
{
- struct deferred_page *df, *next_df;
+ struct page *dpage;
down_read(&init_mm.mmap_sem);
- df = xchg(&df_list, NULL);
+ dpage = xchg(&deferred_pages, NULL);
up_read(&init_mm.mmap_sem);
- flush_map((df && !df->next) ? df->address : 0);
- for (; df; df = next_df) {
- next_df = df->next;
- if (df->fpage)
- __free_page(df->fpage);
- kfree(df);
+
+ flush_map((dpage && !dpage->lru.next) ? (unsigned long)page_address(dpage) : 0);
+ while (dpage) {
+ struct page *tmp = dpage;
+ dpage = (struct page *)dpage->lru.next;
+ ClearPagePrivate(tmp);
+ __free_page(tmp);
}
}
diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c
index 5a91d6c9e66..e1be4235f36 100644
--- a/arch/xtensa/mm/init.c
+++ b/arch/xtensa/mm/init.c
@@ -272,7 +272,7 @@ free_reserved_mem(void *start, void *end)
{
for (; start < end; start += PAGE_SIZE) {
ClearPageReserved(virt_to_page(start));
- set_page_count(virt_to_page(start), 1);
+ init_page_count(virt_to_page(start));
free_page((unsigned long)start);
totalram_pages++;
}
diff --git a/arch/xtensa/mm/pgtable.c b/arch/xtensa/mm/pgtable.c
index e5e119c820e..7d28914d11c 100644
--- a/arch/xtensa/mm/pgtable.c
+++ b/arch/xtensa/mm/pgtable.c
@@ -14,25 +14,21 @@
pte_t* pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
- pte_t *pte, p;
+ pte_t *pte = NULL, *p;
int color = ADDR_COLOR(address);
int i;
p = (pte_t*) __get_free_pages(GFP_KERNEL|__GFP_REPEAT, COLOR_ORDER);
if (likely(p)) {
- struct page *page;
-
- for (i = 0; i < COLOR_SIZE; i++, p++) {
- page = virt_to_page(pte);
-
- set_page_count(page, 1);
- ClearPageCompound(page);
+ split_page(virt_to_page(p), COLOR_ORDER);
+ for (i = 0; i < COLOR_SIZE; i++) {
if (ADDR_COLOR(p) == color)
pte = p;
else
free_page(p);
+ p += PTRS_PER_PTE;
}
clear_page(pte);
}
@@ -49,20 +45,20 @@ int flush;
struct page* pte_alloc_one(struct mm_struct *mm, unsigned long address)
{
- struct page *page, p;
+ struct page *page = NULL, *p;
int color = ADDR_COLOR(address);
p = alloc_pages(GFP_KERNEL | __GFP_REPEAT, PTE_ORDER);
if (likely(p)) {
- for (i = 0; i < PAGE_ORDER; i++) {
- set_page_count(p, 1);
- ClearPageCompound(p);
+ split_page(p, COLOR_ORDER);
- if (PADDR_COLOR(page_address(pg)) == color)
+ for (i = 0; i < PAGE_ORDER; i++) {
+ if (PADDR_COLOR(page_address(p)) == color)
page = p;
else
- free_page(p);
+ __free_page(p);
+ p++;
}
clear_highpage(page);
}
diff --git a/drivers/char/snsc.h b/drivers/char/snsc.h
index a9efc13cc85..8a98169b60c 100644
--- a/drivers/char/snsc.h
+++ b/drivers/char/snsc.h
@@ -5,7 +5,7 @@
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
- * Copyright (C) 2004 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2004-2006 Silicon Graphics, Inc. All rights reserved.
*/
/*
@@ -70,6 +70,9 @@ struct sysctl_data_s {
#define EV_CLASS_TEST_WARNING 0x6000ul
#define EV_CLASS_PWRD_NOTIFY 0x8000ul
+/* ENV class codes */
+#define ENV_PWRDN_PEND 0x4101ul
+
#define EV_SEVERITY_POWER_STABLE 0x0000ul
#define EV_SEVERITY_POWER_LOW_WARNING 0x0100ul
#define EV_SEVERITY_POWER_HIGH_WARNING 0x0200ul
diff --git a/drivers/char/snsc_event.c b/drivers/char/snsc_event.c
index baaa365285f..a4fa507eed9 100644
--- a/drivers/char/snsc_event.c
+++ b/drivers/char/snsc_event.c
@@ -5,7 +5,7 @@
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
- * Copyright (C) 2004 Silicon Graphics, Inc. All rights reserved.
+ * Copyright (C) 2004-2006 Silicon Graphics, Inc. All rights reserved.
*/
/*
@@ -187,7 +187,8 @@ scdrv_event_severity(int code)
static void
scdrv_dispatch_event(char *event, int len)
{
- int code, esp_code, src;
+ static int snsc_shutting_down = 0;
+ int code, esp_code, src, class;
char desc[CHUNKSIZE];
char *severity;
@@ -199,9 +200,25 @@ scdrv_dispatch_event(char *event, int len)
/* how urgent is the message? */
severity = scdrv_event_severity(code);
- if ((code & EV_CLASS_MASK) == EV_CLASS_PWRD_NOTIFY) {
+ class = (code & EV_CLASS_MASK);
+
+ if (class == EV_CLASS_PWRD_NOTIFY || code == ENV_PWRDN_PEND) {
struct task_struct *p;
+ if (snsc_shutting_down)
+ return;
+
+ snsc_shutting_down = 1;
+
+ /* give a message for each type of event */
+ if (class == EV_CLASS_PWRD_NOTIFY)
+ printk(KERN_NOTICE "Power off indication received."
+ " Sending SIGPWR to init...\n");
+ else if (code == ENV_PWRDN_PEND)
+ printk(KERN_CRIT "WARNING: Shutting down the system"
+ " due to a critical environmental condition."
+ " Sending SIGPWR to init...\n");
+
/* give a SIGPWR signal to init proc */
/* first find init's task */
@@ -210,12 +227,11 @@ scdrv_dispatch_event(char *event, int len)
if (p->pid == 1)
break;
}
- if (p) { /* we found init's task */
- printk(KERN_EMERG "Power off indication received. Initiating power fail sequence...\n");
+ if (p) {
force_sig(SIGPWR, p);
- } else { /* failed to find init's task - just give message(s) */
- printk(KERN_WARNING "Failed to find init proc to handle power off!\n");
- printk("%s|$(0x%x)%s\n", severity, esp_code, desc);
+ } else {
+ printk(KERN_ERR "Failed to signal init!\n");
+ snsc_shutting_down = 0; /* so can try again (?) */
}
read_unlock(&tasklist_lock);
} else {
diff --git a/drivers/char/tb0219.c b/drivers/char/tb0219.c
index ac2a297ce37..a80c8321087 100644
--- a/drivers/char/tb0219.c
+++ b/drivers/char/tb0219.c
@@ -283,7 +283,7 @@ static void tb0219_pci_irq_init(void)
vr41xx_set_irq_level(TB0219_PCI_SLOT3_PIN, IRQ_LEVEL_LOW);
}
-static int tb0219_probe(struct platform_device *dev)
+static int __devinit tb0219_probe(struct platform_device *dev)
{
int retval;
@@ -319,7 +319,7 @@ static int tb0219_probe(struct platform_device *dev)
return 0;
}
-static int tb0219_remove(struct platform_device *dev)
+static int __devexit tb0219_remove(struct platform_device *dev)
{
_machine_restart = old_machine_restart;
@@ -335,19 +335,26 @@ static struct platform_device *tb0219_platform_device;
static struct platform_driver tb0219_device_driver = {
.probe = tb0219_probe,
- .remove = tb0219_remove,
+ .remove = __devexit_p(tb0219_remove),
.driver = {
.name = "TB0219",
+ .owner = THIS_MODULE,
},
};
-static int __devinit tanbac_tb0219_init(void)
+static int __init tanbac_tb0219_init(void)
{
int retval;
- tb0219_platform_device = platform_device_register_simple("TB0219", -1, NULL, 0);
- if (IS_ERR(tb0219_platform_device))
- return PTR_ERR(tb0219_platform_device);
+ tb0219_platform_device = platform_device_alloc("TB0219", -1);
+ if (!tb0219_platform_device)
+ return -ENOMEM;
+
+ retval = platform_device_add(tb0219_platform_device);
+ if (retval < 0) {
+ platform_device_put(tb0219_platform_device);
+ return retval;
+ }
retval = platform_driver_register(&tb0219_device_driver);
if (retval < 0)
@@ -356,10 +363,9 @@ static int __devinit tanbac_tb0219_init(void)
return retval;
}
-static void __devexit tanbac_tb0219_exit(void)
+static void __exit tanbac_tb0219_exit(void)
{
platform_driver_unregister(&tb0219_device_driver);
-
platform_device_unregister(tb0219_platform_device);
}
diff --git a/drivers/char/vr41xx_giu.c b/drivers/char/vr41xx_giu.c
index 2267c7b8179..05e6e814d86 100644
--- a/drivers/char/vr41xx_giu.c
+++ b/drivers/char/vr41xx_giu.c
@@ -613,7 +613,7 @@ static struct file_operations gpio_fops = {
.release = gpio_release,
};
-static int giu_probe(struct platform_device *dev)
+static int __devinit giu_probe(struct platform_device *dev)
{
unsigned long start, size, flags = 0;
unsigned int nr_pins = 0;
@@ -697,7 +697,7 @@ static int giu_probe(struct platform_device *dev)
return cascade_irq(GIUINT_IRQ, giu_get_irq);
}
-static int giu_remove(struct platform_device *dev)
+static int __devexit giu_remove(struct platform_device *dev)
{
iounmap(giu_base);
@@ -712,9 +712,10 @@ static struct platform_device *giu_platform_device;
static struct platform_driver giu_device_driver = {
.probe = giu_probe,
- .remove = giu_remove,
+ .remove = __devexit_p(giu_remove),
.driver = {
.name = "GIU",
+ .owner = THIS_MODULE,
},
};
@@ -722,9 +723,15 @@ static int __init vr41xx_giu_init(void)
{
int retval;
- giu_platform_device = platform_device_register_simple("GIU", -1, NULL, 0);
- if (IS_ERR(giu_platform_device))
- return PTR_ERR(giu_platform_device);
+ giu_platform_device = platform_device_alloc("GIU", -1);
+ if (!giu_platform_device)
+ return -ENOMEM;
+
+ retval = platform_device_add(giu_platform_device);
+ if (retval < 0) {
+ platform_device_put(giu_platform_device);
+ return retval;
+ }
retval = platform_driver_register(&giu_device_driver);
if (retval < 0)
diff --git a/drivers/char/vr41xx_rtc.c b/drivers/char/vr41xx_rtc.c
index bc1b4a15212..b109d9a502d 100644
--- a/drivers/char/vr41xx_rtc.c
+++ b/drivers/char/vr41xx_rtc.c
@@ -558,7 +558,7 @@ static struct miscdevice rtc_miscdevice = {
.fops = &rtc_fops,
};
-static int rtc_probe(struct platform_device *pdev)
+static int __devinit rtc_probe(struct platform_device *pdev)
{
unsigned int irq;
int retval;
@@ -631,7 +631,7 @@ static int rtc_probe(struct platform_device *pdev)
return 0;
}
-static int rtc_remove(struct platform_device *dev)
+static int __devexit rtc_remove(struct platform_device *dev)
{
int retval;
@@ -653,13 +653,14 @@ static struct platform_device *rtc_platform_device;
static struct platform_driver rtc_device_driver = {
.probe = rtc_probe,
- .remove = rtc_remove,
+ .remove = __devexit_p(rtc_remove),
.driver = {
.name = rtc_name,
+ .owner = THIS_MODULE,
},
};
-static int __devinit vr41xx_rtc_init(void)
+static int __init vr41xx_rtc_init(void)
{
int retval;
@@ -684,10 +685,20 @@ static int __devinit vr41xx_rtc_init(void)
break;
}
- rtc_platform_device = platform_device_register_simple("RTC", -1,
- rtc_resource, ARRAY_SIZE(rtc_resource));
- if (IS_ERR(rtc_platform_device))
- return PTR_ERR(rtc_platform_device);
+ rtc_platform_device = platform_device_alloc("RTC", -1);
+ if (!rtc_platform_device)
+ return -ENOMEM;
+
+ retval = platform_device_add_resources(rtc_platform_device,
+ rtc_resource, ARRAY_SIZE(rtc_resource));
+
+ if (retval == 0)
+ retval = platform_device_add(rtc_platform_device);
+
+ if (retval < 0) {
+ platform_device_put(rtc_platform_device);
+ return retval;
+ }
retval = platform_driver_register(&rtc_device_driver);
if (retval < 0)
@@ -696,10 +707,9 @@ static int __devinit vr41xx_rtc_init(void)
return retval;
}
-static void __devexit vr41xx_rtc_exit(void)
+static void __exit vr41xx_rtc_exit(void)
{
platform_driver_unregister(&rtc_device_driver);
-
platform_device_unregister(rtc_platform_device);
}
diff --git a/drivers/char/watchdog/mv64x60_wdt.c b/drivers/char/watchdog/mv64x60_wdt.c
index 00d9ef04a36..f1b9cf89f15 100644
--- a/drivers/char/watchdog/mv64x60_wdt.c
+++ b/drivers/char/watchdog/mv64x60_wdt.c
@@ -228,15 +228,25 @@ static int __init mv64x60_wdt_init(void)
printk(KERN_INFO "MV64x60 watchdog driver\n");
- mv64x60_wdt_dev = platform_device_register_simple(MV64x60_WDT_NAME,
- -1, NULL, 0);
- if (IS_ERR(mv64x60_wdt_dev)) {
- ret = PTR_ERR(mv64x60_wdt_dev);
+ mv64x60_wdt_dev = platform_device_alloc(MV64x60_WDT_NAME, -1);
+ if (!mv64x60_wdt_dev) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = platform_device_add(mv64x60_wdt_dev);
+ if (ret) {
+ platform_device_put(mv64x60_wdt_dev);
goto out;
}
ret = platform_driver_register(&mv64x60_wdt_driver);
- out:
+ if (ret) {
+ platform_device_unregister(mv64x60_wdt_dev);
+ goto out;
+ }
+
+ out:
return ret;
}
diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c
index 4652512f7d1..3a4e5c5b4e1 100644
--- a/drivers/firmware/dcdbas.c
+++ b/drivers/firmware/dcdbas.c
@@ -530,30 +530,27 @@ static DCDBAS_DEV_ATTR_RW(host_control_action);
static DCDBAS_DEV_ATTR_RW(host_control_smi_type);
static DCDBAS_DEV_ATTR_RW(host_control_on_shutdown);
-static struct device_attribute *dcdbas_dev_attrs[] = {
- &dev_attr_smi_data_buf_size,
- &dev_attr_smi_data_buf_phys_addr,
- &dev_attr_smi_request,
- &dev_attr_host_control_action,
- &dev_attr_host_control_smi_type,
- &dev_attr_host_control_on_shutdown,
+static struct attribute *dcdbas_dev_attrs[] = {
+ &dev_attr_smi_data_buf_size.attr,
+ &dev_attr_smi_data_buf_phys_addr.attr,
+ &dev_attr_smi_request.attr,
+ &dev_attr_host_control_action.attr,
+ &dev_attr_host_control_smi_type.attr,
+ &dev_attr_host_control_on_shutdown.attr,
NULL
};
-/**
- * dcdbas_init: initialize driver
- */
-static int __init dcdbas_init(void)
+static struct attribute_group dcdbas_attr_group = {
+ .attrs = dcdbas_dev_attrs,
+};
+
+static int __devinit dcdbas_probe(struct platform_device *dev)
{
- int i;
+ int i, error;
host_control_action = HC_ACTION_NONE;
host_control_smi_type = HC_SMITYPE_NONE;
- dcdbas_pdev = platform_device_register_simple(DRIVER_NAME, -1, NULL, 0);
- if (IS_ERR(dcdbas_pdev))
- return PTR_ERR(dcdbas_pdev);
-
/*
* BIOS SMI calls require buffer addresses be in 32-bit address space.
* This is done by setting the DMA mask below.
@@ -561,19 +558,79 @@ static int __init dcdbas_init(void)
dcdbas_pdev->dev.coherent_dma_mask = DMA_32BIT_MASK;
dcdbas_pdev->dev.dma_mask = &dcdbas_pdev->dev.coherent_dma_mask;
+ error = sysfs_create_group(&dev->dev.kobj, &dcdbas_attr_group);
+ if (error)
+ return error;
+
+ for (i = 0; dcdbas_bin_attrs[i]; i++) {
+ error = sysfs_create_bin_file(&dev->dev.kobj,
+ dcdbas_bin_attrs[i]);
+ if (error) {
+ while (--i >= 0)
+ sysfs_remove_bin_file(&dev->dev.kobj,
+ dcdbas_bin_attrs[i]);
+ sysfs_create_group(&dev->dev.kobj, &dcdbas_attr_group);
+ return error;
+ }
+ }
+
register_reboot_notifier(&dcdbas_reboot_nb);
+ dev_info(&dev->dev, "%s (version %s)\n",
+ DRIVER_DESCRIPTION, DRIVER_VERSION);
+
+ return 0;
+}
+
+static int __devexit dcdbas_remove(struct platform_device *dev)
+{
+ int i;
+
+ unregister_reboot_notifier(&dcdbas_reboot_nb);
for (i = 0; dcdbas_bin_attrs[i]; i++)
- sysfs_create_bin_file(&dcdbas_pdev->dev.kobj,
- dcdbas_bin_attrs[i]);
+ sysfs_remove_bin_file(&dev->dev.kobj, dcdbas_bin_attrs[i]);
+ sysfs_remove_group(&dev->dev.kobj, &dcdbas_attr_group);
- for (i = 0; dcdbas_dev_attrs[i]; i++)
- device_create_file(&dcdbas_pdev->dev, dcdbas_dev_attrs[i]);
+ return 0;
+}
- dev_info(&dcdbas_pdev->dev, "%s (version %s)\n",
- DRIVER_DESCRIPTION, DRIVER_VERSION);
+static struct platform_driver dcdbas_driver = {
+ .driver = {
+ .name = DRIVER_NAME,
+ .owner = THIS_MODULE,
+ },
+ .probe = dcdbas_probe,
+ .remove = __devexit_p(dcdbas_remove),
+};
+
+/**
+ * dcdbas_init: initialize driver
+ */
+static int __init dcdbas_init(void)
+{
+ int error;
+
+ error = platform_driver_register(&dcdbas_driver);
+ if (error)
+ return error;
+
+ dcdbas_pdev = platform_device_alloc(DRIVER_NAME, -1);
+ if (!dcdbas_pdev) {
+ error = -ENOMEM;
+ goto err_unregister_driver;
+ }
+
+ error = platform_device_add(dcdbas_pdev);
+ if (error)
+ goto err_free_device;
return 0;
+
+ err_free_device:
+ platform_device_put(dcdbas_pdev);
+ err_unregister_driver:
+ platform_driver_unregister(&dcdbas_driver);
+ return error;
}
/**
@@ -588,6 +645,15 @@ static void __exit dcdbas_exit(void)
unregister_reboot_notifier(&dcdbas_reboot_nb);
smi_data_buf_free();
platform_device_unregister(dcdbas_pdev);
+ platform_driver_unregister(&dcdbas_driver);
+
+ /*
+ * We have to free the buffer here instead of dcdbas_remove
+ * because only in module exit function we can be sure that
+ * all sysfs attributes belonging to this module have been
+ * released.
+ */
+ smi_data_buf_free();
}
module_init(dcdbas_init);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 88d60202b9d..26b08ee425c 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -533,30 +533,35 @@ static void __clone_and_map(struct clone_info *ci)
} else {
/*
- * Create two copy bios to deal with io that has
- * been split across a target.
+ * Handle a bvec that must be split between two or more targets.
*/
struct bio_vec *bv = bio->bi_io_vec + ci->idx;
+ sector_t remaining = to_sector(bv->bv_len);
+ unsigned int offset = 0;
- clone = split_bvec(bio, ci->sector, ci->idx,
- bv->bv_offset, max);
- __map_bio(ti, clone, tio);
+ do {
+ if (offset) {
+ ti = dm_table_find_target(ci->map, ci->sector);
+ max = max_io_len(ci->md, ci->sector, ti);
- ci->sector += max;
- ci->sector_count -= max;
- ti = dm_table_find_target(ci->map, ci->sector);
-
- len = to_sector(bv->bv_len) - max;
- clone = split_bvec(bio, ci->sector, ci->idx,
- bv->bv_offset + to_bytes(max), len);
- tio = alloc_tio(ci->md);
- tio->io = ci->io;
- tio->ti = ti;
- memset(&tio->info, 0, sizeof(tio->info));
- __map_bio(ti, clone, tio);
+ tio = alloc_tio(ci->md);
+ tio->io = ci->io;
+ tio->ti = ti;
+ memset(&tio->info, 0, sizeof(tio->info));
+ }
+
+ len = min(remaining, max);
+
+ clone = split_bvec(bio, ci->sector, ci->idx,
+ bv->bv_offset + offset, len);
+
+ __map_bio(ti, clone, tio);
+
+ ci->sector += len;
+ ci->sector_count -= len;
+ offset += to_bytes(len);
+ } while (remaining -= len);
- ci->sector += len;
- ci->sector_count -= len;
ci->idx++;
}
}
diff --git a/drivers/media/dvb/bt8xx/Makefile b/drivers/media/dvb/bt8xx/Makefile
index 9d197efb481..d188e4c670b 100644
--- a/drivers/media/dvb/bt8xx/Makefile
+++ b/drivers/media/dvb/bt8xx/Makefile
@@ -1,3 +1,3 @@
obj-$(CONFIG_DVB_BT8XX) += bt878.o dvb-bt8xx.o dst.o dst_ca.o
-EXTRA_CFLAGS = -Idrivers/media/dvb/dvb-core/ -Idrivers/media/video/bt8xx -Idrivers/media/dvb/frontends
+EXTRA_CFLAGS = -Idrivers/media/dvb/dvb-core/ -Idrivers/media/video -Idrivers/media/dvb/frontends
diff --git a/drivers/net/mv643xx_eth.h b/drivers/net/mv643xx_eth.h
index 7754d1974b9..4262c1da6d4 100644
--- a/drivers/net/mv643xx_eth.h
+++ b/drivers/net/mv643xx_eth.h
@@ -42,13 +42,23 @@
#define MAX_DESCS_PER_SKB 1
#endif
+/*
+ * The MV643XX HW requires 8-byte alignment. However, when I/O
+ * is non-cache-coherent, we need to ensure that the I/O buffers
+ * we use don't share cache lines with other data.
+ */
+#if defined(CONFIG_DMA_NONCOHERENT) || defined(CONFIG_NOT_COHERENT_CACHE)
+#define ETH_DMA_ALIGN L1_CACHE_BYTES
+#else
+#define ETH_DMA_ALIGN 8
+#endif
+
#define ETH_VLAN_HLEN 4
#define ETH_FCS_LEN 4
-#define ETH_DMA_ALIGN 8 /* hw requires 8-byte alignment */
-#define ETH_HW_IP_ALIGN 2 /* hw aligns IP header */
+#define ETH_HW_IP_ALIGN 2 /* hw aligns IP header */
#define ETH_WRAPPER_LEN (ETH_HW_IP_ALIGN + ETH_HLEN + \
- ETH_VLAN_HLEN + ETH_FCS_LEN)
-#define ETH_RX_SKB_SIZE ((dev->mtu + ETH_WRAPPER_LEN + 7) & ~0x7)
+ ETH_VLAN_HLEN + ETH_FCS_LEN)
+#define ETH_RX_SKB_SIZE (dev->mtu + ETH_WRAPPER_LEN + ETH_DMA_ALIGN)
#define ETH_RX_QUEUES_ENABLED (1 << 0) /* use only Q0 for receive */
#define ETH_TX_QUEUES_ENABLED (1 << 0) /* use only Q0 for transmit */
diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c
index 7e900572eaf..9595f74da93 100644
--- a/drivers/net/pcnet32.c
+++ b/drivers/net/pcnet32.c
@@ -22,12 +22,12 @@
*************************************************************************/
#define DRV_NAME "pcnet32"
-#define DRV_VERSION "1.31c"
-#define DRV_RELDATE "01.Nov.2005"
+#define DRV_VERSION "1.32"
+#define DRV_RELDATE "18.Mar.2006"
#define PFX DRV_NAME ": "
-static const char * const version =
-DRV_NAME ".c:v" DRV_VERSION " " DRV_RELDATE " tsbogend@alpha.franken.de\n";
+static const char *const version =
+ DRV_NAME ".c:v" DRV_VERSION " " DRV_RELDATE " tsbogend@alpha.franken.de\n";
#include <linux/module.h>
#include <linux/kernel.h>
@@ -58,18 +58,23 @@ DRV_NAME ".c:v" DRV_VERSION " " DRV_RELDATE " tsbogend@alpha.franken.de\n";
* PCI device identifiers for "new style" Linux PCI Device Drivers
*/
static struct pci_device_id pcnet32_pci_tbl[] = {
- { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_LANCE_HOME, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
- { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_LANCE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
- /*
- * Adapters that were sold with IBM's RS/6000 or pSeries hardware have
- * the incorrect vendor id.
- */
- { PCI_VENDOR_ID_TRIDENT, PCI_DEVICE_ID_AMD_LANCE, PCI_ANY_ID, PCI_ANY_ID,
- PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, 0 },
- { 0, }
+ { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_LANCE_HOME,
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+ { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_LANCE,
+ PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+
+ /*
+ * Adapters that were sold with IBM's RS/6000 or pSeries hardware have
+ * the incorrect vendor id.
+ */
+ { PCI_VENDOR_ID_TRIDENT, PCI_DEVICE_ID_AMD_LANCE,
+ PCI_ANY_ID, PCI_ANY_ID,
+ PCI_CLASS_NETWORK_ETHERNET << 8, 0xffff00, 0},
+
+ { } /* terminate list */
};
-MODULE_DEVICE_TABLE (pci, pcnet32_pci_tbl);
+MODULE_DEVICE_TABLE(pci, pcnet32_pci_tbl);
static int cards_found;
@@ -77,13 +82,11 @@ static int cards_found;
* VLB I/O addresses
*/
static unsigned int pcnet32_portlist[] __initdata =
- { 0x300, 0x320, 0x340, 0x360, 0 };
-
-
+ { 0x300, 0x320, 0x340, 0x360, 0 };
static int pcnet32_debug = 0;
-static int tx_start = 1; /* Mapping -- 0:20, 1:64, 2:128, 3:~220 (depends on chip vers) */
-static int pcnet32vlb; /* check for VLB cards ? */
+static int tx_start = 1; /* Mapping -- 0:20, 1:64, 2:128, 3:~220 (depends on chip vers) */
+static int pcnet32vlb; /* check for VLB cards ? */
static struct net_device *pcnet32_dev;
@@ -110,32 +113,34 @@ static int rx_copybreak = 200;
* to internal options
*/
static const unsigned char options_mapping[] = {
- PCNET32_PORT_ASEL, /* 0 Auto-select */
- PCNET32_PORT_AUI, /* 1 BNC/AUI */
- PCNET32_PORT_AUI, /* 2 AUI/BNC */
- PCNET32_PORT_ASEL, /* 3 not supported */
- PCNET32_PORT_10BT | PCNET32_PORT_FD, /* 4 10baseT-FD */
- PCNET32_PORT_ASEL, /* 5 not supported */
- PCNET32_PORT_ASEL, /* 6 not supported */
- PCNET32_PORT_ASEL, /* 7 not supported */
- PCNET32_PORT_ASEL, /* 8 not supported */
- PCNET32_PORT_MII, /* 9 MII 10baseT */
- PCNET32_PORT_MII | PCNET32_PORT_FD, /* 10 MII 10baseT-FD */
- PCNET32_PORT_MII, /* 11 MII (autosel) */
- PCNET32_PORT_10BT, /* 12 10BaseT */
- PCNET32_PORT_MII | PCNET32_PORT_100, /* 13 MII 100BaseTx */
- PCNET32_PORT_MII | PCNET32_PORT_100 | PCNET32_PORT_FD, /* 14 MII 100BaseTx-FD */
- PCNET32_PORT_ASEL /* 15 not supported */
+ PCNET32_PORT_ASEL, /* 0 Auto-select */
+ PCNET32_PORT_AUI, /* 1 BNC/AUI */
+ PCNET32_PORT_AUI, /* 2 AUI/BNC */
+ PCNET32_PORT_ASEL, /* 3 not supported */
+ PCNET32_PORT_10BT | PCNET32_PORT_FD, /* 4 10baseT-FD */
+ PCNET32_PORT_ASEL, /* 5 not supported */
+ PCNET32_PORT_ASEL, /* 6 not supported */
+ PCNET32_PORT_ASEL, /* 7 not supported */
+ PCNET32_PORT_ASEL, /* 8 not supported */
+ PCNET32_PORT_MII, /* 9 MII 10baseT */
+ PCNET32_PORT_MII | PCNET32_PORT_FD, /* 10 MII 10baseT-FD */
+ PCNET32_PORT_MII, /* 11 MII (autosel) */
+ PCNET32_PORT_10BT, /* 12 10BaseT */
+ PCNET32_PORT_MII | PCNET32_PORT_100, /* 13 MII 100BaseTx */
+ /* 14 MII 100BaseTx-FD */
+ PCNET32_PORT_MII | PCNET32_PORT_100 | PCNET32_PORT_FD,
+ PCNET32_PORT_ASEL /* 15 not supported */
};
static const char pcnet32_gstrings_test[][ETH_GSTRING_LEN] = {
- "Loopback test (offline)"
+ "Loopback test (offline)"
};
+
#define PCNET32_TEST_LEN (sizeof(pcnet32_gstrings_test) / ETH_GSTRING_LEN)
-#define PCNET32_NUM_REGS 168
+#define PCNET32_NUM_REGS 136
-#define MAX_UNITS 8 /* More are supported, limit only on options */
+#define MAX_UNITS 8 /* More are supported, limit only on options */
static int options[MAX_UNITS];
static int full_duplex[MAX_UNITS];
static int homepna[MAX_UNITS];
@@ -151,124 +156,6 @@ static int homepna[MAX_UNITS];
*/
/*
- * History:
- * v0.01: Initial version
- * only tested on Alpha Noname Board
- * v0.02: changed IRQ handling for new interrupt scheme (dev_id)
- * tested on a ASUS SP3G
- * v0.10: fixed an odd problem with the 79C974 in a Compaq Deskpro XL
- * looks like the 974 doesn't like stopping and restarting in a
- * short period of time; now we do a reinit of the lance; the
- * bug was triggered by doing ifconfig eth0 <ip> broadcast <addr>
- * and hangs the machine (thanks to Klaus Liedl for debugging)
- * v0.12: by suggestion from Donald Becker: Renamed driver to pcnet32,
- * made it standalone (no need for lance.c)
- * v0.13: added additional PCI detecting for special PCI devices (Compaq)
- * v0.14: stripped down additional PCI probe (thanks to David C Niemi
- * and sveneric@xs4all.nl for testing this on their Compaq boxes)
- * v0.15: added 79C965 (VLB) probe
- * added interrupt sharing for PCI chips
- * v0.16: fixed set_multicast_list on Alpha machines
- * v0.17: removed hack from dev.c; now pcnet32 uses ethif_probe in Space.c
- * v0.19: changed setting of autoselect bit
- * v0.20: removed additional Compaq PCI probe; there is now a working one
- * in arch/i386/bios32.c
- * v0.21: added endian conversion for ppc, from work by cort@cs.nmt.edu
- * v0.22: added printing of status to ring dump
- * v0.23: changed enet_statistics to net_devive_stats
- * v0.90: added multicast filter
- * added module support
- * changed irq probe to new style
- * added PCnetFast chip id
- * added fix for receive stalls with Intel saturn chipsets
- * added in-place rx skbs like in the tulip driver
- * minor cleanups
- * v0.91: added PCnetFast+ chip id
- * back port to 2.0.x
- * v1.00: added some stuff from Donald Becker's 2.0.34 version
- * added support for byte counters in net_dev_stats
- * v1.01: do ring dumps, only when debugging the driver
- * increased the transmit timeout
- * v1.02: fixed memory leak in pcnet32_init_ring()
- * v1.10: workaround for stopped transmitter
- * added port selection for modules
- * detect special T1/E1 WAN card and setup port selection
- * v1.11: fixed wrong checking of Tx errors
- * v1.20: added check of return value kmalloc (cpeterso@cs.washington.edu)
- * added save original kmalloc addr for freeing (mcr@solidum.com)
- * added support for PCnetHome chip (joe@MIT.EDU)
- * rewritten PCI card detection
- * added dwio mode to get driver working on some PPC machines
- * v1.21: added mii selection and mii ioctl
- * v1.22: changed pci scanning code to make PPC people happy
- * fixed switching to 32bit mode in pcnet32_open() (thanks
- * to Michael Richard <mcr@solidum.com> for noticing this one)
- * added sub vendor/device id matching (thanks again to
- * Michael Richard <mcr@solidum.com>)
- * added chip id for 79c973/975 (thanks to Zach Brown <zab@zabbo.net>)
- * v1.23 fixed small bug, when manual selecting MII speed/duplex
- * v1.24 Applied Thomas' patch to use TxStartPoint and thus decrease TxFIFO
- * underflows. Added tx_start_pt module parameter. Increased
- * TX_RING_SIZE from 16 to 32. Added #ifdef'd code to use DXSUFLO
- * for FAST[+] chipsets. <kaf@fc.hp.com>
- * v1.24ac Added SMP spinlocking - Alan Cox <alan@redhat.com>
- * v1.25kf Added No Interrupt on successful Tx for some Tx's <kaf@fc.hp.com>
- * v1.26 Converted to pci_alloc_consistent, Jamey Hicks / George France
- * <jamey@crl.dec.com>
- * - Fixed a few bugs, related to running the controller in 32bit mode.
- * 23 Oct, 2000. Carsten Langgaard, carstenl@mips.com
- * Copyright (C) 2000 MIPS Technologies, Inc. All rights reserved.
- * v1.26p Fix oops on rmmod+insmod; plug i/o resource leak - Paul Gortmaker
- * v1.27 improved CSR/PROM address detection, lots of cleanups,
- * new pcnet32vlb module option, HP-PARISC support,
- * added module parameter descriptions,
- * initial ethtool support - Helge Deller <deller@gmx.de>
- * v1.27a Sun Feb 10 2002 Go Taniguchi <go@turbolinux.co.jp>
- * use alloc_etherdev and register_netdev
- * fix pci probe not increment cards_found
- * FD auto negotiate error workaround for xSeries250
- * clean up and using new mii module
- * v1.27b Sep 30 2002 Kent Yoder <yoder1@us.ibm.com>
- * Added timer for cable connection state changes.
- * v1.28 20 Feb 2004 Don Fry <brazilnut@us.ibm.com>
- * Jon Mason <jonmason@us.ibm.com>, Chinmay Albal <albal@in.ibm.com>
- * Now uses ethtool_ops, netif_msg_* and generic_mii_ioctl.
- * Fixes bogus 'Bus master arbitration failure', pci_[un]map_single
- * length errors, and transmit hangs. Cleans up after errors in open.
- * Jim Lewis <jklewis@us.ibm.com> added ethernet loopback test.
- * Thomas Munck Steenholdt <tmus@tmus.dk> non-mii ioctl corrections.
- * v1.29 6 Apr 2004 Jim Lewis <jklewis@us.ibm.com> added physical
- * identification code (blink led's) and register dump.
- * Don Fry added timer for 971/972 so skbufs don't remain on tx ring
- * forever.
- * v1.30 18 May 2004 Don Fry removed timer and Last Transmit Interrupt
- * (ltint) as they added complexity and didn't give good throughput.
- * v1.30a 22 May 2004 Don Fry limit frames received during interrupt.
- * v1.30b 24 May 2004 Don Fry fix bogus tx carrier errors with 79c973,
- * assisted by Bruce Penrod <bmpenrod@endruntechnologies.com>.
- * v1.30c 25 May 2004 Don Fry added netif_wake_queue after pcnet32_restart.
- * v1.30d 01 Jun 2004 Don Fry discard oversize rx packets.
- * v1.30e 11 Jun 2004 Don Fry recover after fifo error and rx hang.
- * v1.30f 16 Jun 2004 Don Fry cleanup IRQ to allow 0 and 1 for PCI,
- * expanding on suggestions from Ralf Baechle <ralf@linux-mips.org>,
- * and Brian Murphy <brian@murphy.dk>.
- * v1.30g 22 Jun 2004 Patrick Simmons <psimmons@flash.net> added option
- * homepna for selecting HomePNA mode for PCNet/Home 79C978.
- * v1.30h 24 Jun 2004 Don Fry correctly select auto, speed, duplex in bcr32.
- * v1.30i 28 Jun 2004 Don Fry change to use module_param.
- * v1.30j 29 Apr 2005 Don Fry fix skb/map leak with loopback test.
- * v1.31 02 Sep 2005 Hubert WS Lin <wslin@tw.ibm.c0m> added set_ringparam().
- * v1.31a 12 Sep 2005 Hubert WS Lin <wslin@tw.ibm.c0m> set min ring size to 4
- * to allow loopback test to work unchanged.
- * v1.31b 06 Oct 2005 Don Fry changed alloc_ring to show name of device
- * if allocation fails
- * v1.31c 01 Nov 2005 Don Fry Allied Telesyn 2700/2701 FX are 100Mbit only.
- * Force 100Mbit FD if Auto (ASEL) is selected.
- * See Bugzilla 2669 and 4551.
- */
-
-
-/*
* Set the number of Tx and Rx buffers, using Log_2(# buffers).
* Reasonable default values are 4 Tx buffers, and 16 Rx buffers.
* That translates to 2 (4 == 2^^2) and 4 (16 == 2^^4).
@@ -303,42 +190,42 @@ static int homepna[MAX_UNITS];
/* The PCNET32 Rx and Tx ring descriptors. */
struct pcnet32_rx_head {
- u32 base;
- s16 buf_length;
- s16 status;
- u32 msg_length;
- u32 reserved;
+ u32 base;
+ s16 buf_length;
+ s16 status;
+ u32 msg_length;
+ u32 reserved;
};
struct pcnet32_tx_head {
- u32 base;
- s16 length;
- s16 status;
- u32 misc;
- u32 reserved;
+ u32 base;
+ s16 length;
+ s16 status;
+ u32 misc;
+ u32 reserved;
};
/* The PCNET32 32-Bit initialization block, described in databook. */
struct pcnet32_init_block {
- u16 mode;
- u16 tlen_rlen;
- u8 phys_addr[6];
- u16 reserved;
- u32 filter[2];
- /* Receive and transmit ring base, along with extra bits. */
- u32 rx_ring;
- u32 tx_ring;
+ u16 mode;
+ u16 tlen_rlen;
+ u8 phys_addr[6];
+ u16 reserved;
+ u32 filter[2];
+ /* Receive and transmit ring base, along with extra bits. */
+ u32 rx_ring;
+ u32 tx_ring;
};
/* PCnet32 access functions */
struct pcnet32_access {
- u16 (*read_csr)(unsigned long, int);
- void (*write_csr)(unsigned long, int, u16);
- u16 (*read_bcr)(unsigned long, int);
- void (*write_bcr)(unsigned long, int, u16);
- u16 (*read_rap)(unsigned long);
- void (*write_rap)(unsigned long, u16);
- void (*reset)(unsigned long);
+ u16 (*read_csr) (unsigned long, int);
+ void (*write_csr) (unsigned long, int, u16);
+ u16 (*read_bcr) (unsigned long, int);
+ void (*write_bcr) (unsigned long, int, u16);
+ u16 (*read_rap) (unsigned long);
+ void (*write_rap) (unsigned long, u16);
+ void (*reset) (unsigned long);
};
/*
@@ -346,760 +233,794 @@ struct pcnet32_access {
* so the structure should be allocated using pci_alloc_consistent().
*/
struct pcnet32_private {
- struct pcnet32_init_block init_block;
- /* The Tx and Rx ring entries must be aligned on 16-byte boundaries in 32bit mode. */
- struct pcnet32_rx_head *rx_ring;
- struct pcnet32_tx_head *tx_ring;
- dma_addr_t dma_addr; /* DMA address of beginning of this
- object, returned by
- pci_alloc_consistent */
- struct pci_dev *pci_dev; /* Pointer to the associated pci device
- structure */
- const char *name;
- /* The saved address of a sent-in-place packet/buffer, for skfree(). */
- struct sk_buff **tx_skbuff;
- struct sk_buff **rx_skbuff;
- dma_addr_t *tx_dma_addr;
- dma_addr_t *rx_dma_addr;
- struct pcnet32_access a;
- spinlock_t lock; /* Guard lock */
- unsigned int cur_rx, cur_tx; /* The next free ring entry */
- unsigned int rx_ring_size; /* current rx ring size */
- unsigned int tx_ring_size; /* current tx ring size */
- unsigned int rx_mod_mask; /* rx ring modular mask */
- unsigned int tx_mod_mask; /* tx ring modular mask */
- unsigned short rx_len_bits;
- unsigned short tx_len_bits;
- dma_addr_t rx_ring_dma_addr;
- dma_addr_t tx_ring_dma_addr;
- unsigned int dirty_rx, dirty_tx; /* The ring entries to be free()ed. */
- struct net_device_stats stats;
- char tx_full;
- int options;
- unsigned int shared_irq:1, /* shared irq possible */
- dxsuflo:1, /* disable transmit stop on uflo */
- mii:1; /* mii port available */
- struct net_device *next;
- struct mii_if_info mii_if;
- struct timer_list watchdog_timer;
- struct timer_list blink_timer;
- u32 msg_enable; /* debug message level */
+ struct pcnet32_init_block init_block;
+ /* The Tx and Rx ring entries must be aligned on 16-byte boundaries in 32bit mode. */
+ struct pcnet32_rx_head *rx_ring;
+ struct pcnet32_tx_head *tx_ring;
+ dma_addr_t dma_addr;/* DMA address of beginning of this
+ object, returned by pci_alloc_consistent */
+ struct pci_dev *pci_dev;
+ const char *name;
+ /* The saved address of a sent-in-place packet/buffer, for skfree(). */
+ struct sk_buff **tx_skbuff;
+ struct sk_buff **rx_skbuff;
+ dma_addr_t *tx_dma_addr;
+ dma_addr_t *rx_dma_addr;
+ struct pcnet32_access a;
+ spinlock_t lock; /* Guard lock */
+ unsigned int cur_rx, cur_tx; /* The next free ring entry */
+ unsigned int rx_ring_size; /* current rx ring size */
+ unsigned int tx_ring_size; /* current tx ring size */
+ unsigned int rx_mod_mask; /* rx ring modular mask */
+ unsigned int tx_mod_mask; /* tx ring modular mask */
+ unsigned short rx_len_bits;
+ unsigned short tx_len_bits;
+ dma_addr_t rx_ring_dma_addr;
+ dma_addr_t tx_ring_dma_addr;
+ unsigned int dirty_rx, /* ring entries to be freed. */
+ dirty_tx;
+
+ struct net_device_stats stats;
+ char tx_full;
+ char phycount; /* number of phys found */
+ int options;
+ unsigned int shared_irq:1, /* shared irq possible */
+ dxsuflo:1, /* disable transmit stop on uflo */
+ mii:1; /* mii port available */
+ struct net_device *next;
+ struct mii_if_info mii_if;
+ struct timer_list watchdog_timer;
+ struct timer_list blink_timer;
+ u32 msg_enable; /* debug message level */
+
+ /* each bit indicates an available PHY */
+ u32 phymask;
};
static void pcnet32_probe_vlbus(void);
-static int pcnet32_probe_pci(struct pci_dev *, const struct pci_device_id *);
-static int pcnet32_probe1(unsigned long, int, struct pci_dev *);
-static int pcnet32_open(struct net_device *);
-static int pcnet32_init_ring(struct net_device *);
-static int pcnet32_start_xmit(struct sk_buff *, struct net_device *);
-static int pcnet32_rx(struct net_device *);
-static void pcnet32_tx_timeout (struct net_device *dev);
+static int pcnet32_probe_pci(struct pci_dev *, const struct pci_device_id *);
+static int pcnet32_probe1(unsigned long, int, struct pci_dev *);
+static int pcnet32_open(struct net_device *);
+static int pcnet32_init_ring(struct net_device *);
+static int pcnet32_start_xmit(struct sk_buff *, struct net_device *);
+static int pcnet32_rx(struct net_device *);
+static void pcnet32_tx_timeout(struct net_device *dev);
static irqreturn_t pcnet32_interrupt(int, void *, struct pt_regs *);
-static int pcnet32_close(struct net_device *);
+static int pcnet32_close(struct net_device *);
static struct net_device_stats *pcnet32_get_stats(struct net_device *);
static void pcnet32_load_multicast(struct net_device *dev);
static void pcnet32_set_multicast_list(struct net_device *);
-static int pcnet32_ioctl(struct net_device *, struct ifreq *, int);
+static int pcnet32_ioctl(struct net_device *, struct ifreq *, int);
static void pcnet32_watchdog(struct net_device *);
static int mdio_read(struct net_device *dev, int phy_id, int reg_num);
-static void mdio_write(struct net_device *dev, int phy_id, int reg_num, int val);
+static void mdio_write(struct net_device *dev, int phy_id, int reg_num,
+ int val);
static void pcnet32_restart(struct net_device *dev, unsigned int csr0_bits);
static void pcnet32_ethtool_test(struct net_device *dev,
- struct ethtool_test *eth_test, u64 *data);
-static int pcnet32_loopback_test(struct net_device *dev, uint64_t *data1);
+ struct ethtool_test *eth_test, u64 * data);
+static int pcnet32_loopback_test(struct net_device *dev, uint64_t * data1);
static int pcnet32_phys_id(struct net_device *dev, u32 data);
static void pcnet32_led_blink_callback(struct net_device *dev);
static int pcnet32_get_regs_len(struct net_device *dev);
static void pcnet32_get_regs(struct net_device *dev, struct ethtool_regs *regs,
- void *ptr);
+ void *ptr);
static void pcnet32_purge_tx_ring(struct net_device *dev);
static int pcnet32_alloc_ring(struct net_device *dev, char *name);
static void pcnet32_free_ring(struct net_device *dev);
-
+static void pcnet32_check_media(struct net_device *dev, int verbose);
enum pci_flags_bit {
- PCI_USES_IO=1, PCI_USES_MEM=2, PCI_USES_MASTER=4,
- PCI_ADDR0=0x10<<0, PCI_ADDR1=0x10<<1, PCI_ADDR2=0x10<<2, PCI_ADDR3=0x10<<3,
+ PCI_USES_IO = 1, PCI_USES_MEM = 2, PCI_USES_MASTER = 4,
+ PCI_ADDR0 = 0x10 << 0, PCI_ADDR1 = 0x10 << 1, PCI_ADDR2 =
+ 0x10 << 2, PCI_ADDR3 = 0x10 << 3,
};
-
-static u16 pcnet32_wio_read_csr (unsigned long addr, int index)
+static u16 pcnet32_wio_read_csr(unsigned long addr, int index)
{
- outw (index, addr+PCNET32_WIO_RAP);
- return inw (addr+PCNET32_WIO_RDP);
+ outw(index, addr + PCNET32_WIO_RAP);
+ return inw(addr + PCNET32_WIO_RDP);
}
-static void pcnet32_wio_write_csr (unsigned long addr, int index, u16 val)
+static void pcnet32_wio_write_csr(unsigned long addr, int index, u16 val)
{
- outw (index, addr+PCNET32_WIO_RAP);
- outw (val, addr+PCNET32_WIO_RDP);
+ outw(index, addr + PCNET32_WIO_RAP);
+ outw(val, addr + PCNET32_WIO_RDP);
}
-static u16 pcnet32_wio_read_bcr (unsigned long addr, int index)
+static u16 pcnet32_wio_read_bcr(unsigned long addr, int index)
{
- outw (index, addr+PCNET32_WIO_RAP);
- return inw (addr+PCNET32_WIO_BDP);
+ outw(index, addr + PCNET32_WIO_RAP);
+ return inw(addr + PCNET32_WIO_BDP);
}
-static void pcnet32_wio_write_bcr (unsigned long addr, int index, u16 val)
+static void pcnet32_wio_write_bcr(unsigned long addr, int index, u16 val)
{
- outw (index, addr+PCNET32_WIO_RAP);
- outw (val, addr+PCNET32_WIO_BDP);
+ outw(index, addr + PCNET32_WIO_RAP);
+ outw(val, addr + PCNET32_WIO_BDP);
}
-static u16 pcnet32_wio_read_rap (unsigned long addr)
+static u16 pcnet32_wio_read_rap(unsigned long addr)
{
- return inw (addr+PCNET32_WIO_RAP);
+ return inw(addr + PCNET32_WIO_RAP);
}
-static void pcnet32_wio_write_rap (unsigned long addr, u16 val)
+static void pcnet32_wio_write_rap(unsigned long addr, u16 val)
{
- outw (val, addr+PCNET32_WIO_RAP);
+ outw(val, addr + PCNET32_WIO_RAP);
}
-static void pcnet32_wio_reset (unsigned long addr)
+static void pcnet32_wio_reset(unsigned long addr)
{
- inw (addr+PCNET32_WIO_RESET);
+ inw(addr + PCNET32_WIO_RESET);
}
-static int pcnet32_wio_check (unsigned long addr)
+static int pcnet32_wio_check(unsigned long addr)
{
- outw (88, addr+PCNET32_WIO_RAP);
- return (inw (addr+PCNET32_WIO_RAP) == 88);
+ outw(88, addr + PCNET32_WIO_RAP);
+ return (inw(addr + PCNET32_WIO_RAP) == 88);
}
static struct pcnet32_access pcnet32_wio = {
- .read_csr = pcnet32_wio_read_csr,
- .write_csr = pcnet32_wio_write_csr,
- .read_bcr = pcnet32_wio_read_bcr,
- .write_bcr = pcnet32_wio_write_bcr,
- .read_rap = pcnet32_wio_read_rap,
- .write_rap = pcnet32_wio_write_rap,
- .reset = pcnet32_wio_reset
+ .read_csr = pcnet32_wio_read_csr,
+ .write_csr = pcnet32_wio_write_csr,
+ .read_bcr = pcnet32_wio_read_bcr,
+ .write_bcr = pcnet32_wio_write_bcr,
+ .read_rap = pcnet32_wio_read_rap,
+ .write_rap = pcnet32_wio_write_rap,
+ .reset = pcnet32_wio_reset
};
-static u16 pcnet32_dwio_read_csr (unsigned long addr, int index)
+static u16 pcnet32_dwio_read_csr(unsigned long addr, int index)
{
- outl (index, addr+PCNET32_DWIO_RAP);
- return (inl (addr+PCNET32_DWIO_RDP) & 0xffff);
+ outl(index, addr + PCNET32_DWIO_RAP);
+ return (inl(addr + PCNET32_DWIO_RDP) & 0xffff);
}
-static void pcnet32_dwio_write_csr (unsigned long addr, int index, u16 val)
+static void pcnet32_dwio_write_csr(unsigned long addr, int index, u16 val)
{
- outl (index, addr+PCNET32_DWIO_RAP);
- outl (val, addr+PCNET32_DWIO_RDP);
+ outl(index, addr + PCNET32_DWIO_RAP);
+ outl(val, addr + PCNET32_DWIO_RDP);
}
-static u16 pcnet32_dwio_read_bcr (unsigned long addr, int index)
+static u16 pcnet32_dwio_read_bcr(unsigned long addr, int index)
{
- outl (index, addr+PCNET32_DWIO_RAP);
- return (inl (addr+PCNET32_DWIO_BDP) & 0xffff);
+ outl(index, addr + PCNET32_DWIO_RAP);
+ return (inl(addr + PCNET32_DWIO_BDP) & 0xffff);
}
-static void pcnet32_dwio_write_bcr (unsigned long addr, int index, u16 val)
+static void pcnet32_dwio_write_bcr(unsigned long addr, int index, u16 val)
{
- outl (index, addr+PCNET32_DWIO_RAP);
- outl (val, addr+PCNET32_DWIO_BDP);
+ outl(index, addr + PCNET32_DWIO_RAP);
+ outl(val, addr + PCNET32_DWIO_BDP);
}
-static u16 pcnet32_dwio_read_rap (unsigned long addr)
+static u16 pcnet32_dwio_read_rap(unsigned long addr)
{
- return (inl (addr+PCNET32_DWIO_RAP) & 0xffff);
+ return (inl(addr + PCNET32_DWIO_RAP) & 0xffff);
}
-static void pcnet32_dwio_write_rap (unsigned long addr, u16 val)
+static void pcnet32_dwio_write_rap(unsigned long addr, u16 val)
{
- outl (val, addr+PCNET32_DWIO_RAP);
+ outl(val, addr + PCNET32_DWIO_RAP);
}
-static void pcnet32_dwio_reset (unsigned long addr)
+static void pcnet32_dwio_reset(unsigned long addr)
{
- inl (addr+PCNET32_DWIO_RESET);
+ inl(addr + PCNET32_DWIO_RESET);
}
-static int pcnet32_dwio_check (unsigned long addr)
+static int pcnet32_dwio_check(unsigned long addr)
{
- outl (88, addr+PCNET32_DWIO_RAP);
- return ((inl (addr+PCNET32_DWIO_RAP) & 0xffff) == 88);
+ outl(88, addr + PCNET32_DWIO_RAP);
+ return ((inl(addr + PCNET32_DWIO_RAP) & 0xffff) == 88);
}
static struct pcnet32_access pcnet32_dwio = {
- .read_csr = pcnet32_dwio_read_csr,
- .write_csr = pcnet32_dwio_write_csr,
- .read_bcr = pcnet32_dwio_read_bcr,
- .write_bcr = pcnet32_dwio_write_bcr,
- .read_rap = pcnet32_dwio_read_rap,
- .write_rap = pcnet32_dwio_write_rap,
- .reset = pcnet32_dwio_reset
+ .read_csr = pcnet32_dwio_read_csr,
+ .write_csr = pcnet32_dwio_write_csr,
+ .read_bcr = pcnet32_dwio_read_bcr,
+ .write_bcr = pcnet32_dwio_write_bcr,
+ .read_rap = pcnet32_dwio_read_rap,
+ .write_rap = pcnet32_dwio_write_rap,
+ .reset = pcnet32_dwio_reset
};
#ifdef CONFIG_NET_POLL_CONTROLLER
static void pcnet32_poll_controller(struct net_device *dev)
{
- disable_irq(dev->irq);
- pcnet32_interrupt(0, dev, NULL);
- enable_irq(dev->irq);
+ disable_irq(dev->irq);
+ pcnet32_interrupt(0, dev, NULL);
+ enable_irq(dev->irq);
}
#endif
-
static int pcnet32_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
{
- struct pcnet32_private *lp = dev->priv;
- unsigned long flags;
- int r = -EOPNOTSUPP;
-
- if (lp->mii) {
- spin_lock_irqsave(&lp->lock, flags);
- mii_ethtool_gset(&lp->mii_if, cmd);
- spin_unlock_irqrestore(&lp->lock, flags);
- r = 0;
- }
- return r;
+ struct pcnet32_private *lp = dev->priv;
+ unsigned long flags;
+ int r = -EOPNOTSUPP;
+
+ if (lp->mii) {
+ spin_lock_irqsave(&lp->lock, flags);
+ mii_ethtool_gset(&lp->mii_if, cmd);
+ spin_unlock_irqrestore(&lp->lock, flags);
+ r = 0;
+ }
+ return r;
}
static int pcnet32_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
{
- struct pcnet32_private *lp = dev->priv;
- unsigned long flags;
- int r = -EOPNOTSUPP;
+ struct pcnet32_private *lp = dev->priv;
+ unsigned long flags;
+ int r = -EOPNOTSUPP;
- if (lp->mii) {
- spin_lock_irqsave(&lp->lock, flags);
- r = mii_ethtool_sset(&lp->mii_if, cmd);
- spin_unlock_irqrestore(&lp->lock, flags);
- }
- return r;
+ if (lp->mii) {
+ spin_lock_irqsave(&lp->lock, flags);
+ r = mii_ethtool_sset(&lp->mii_if, cmd);
+ spin_unlock_irqrestore(&lp->lock, flags);
+ }
+ return r;
}
-static void pcnet32_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
+static void pcnet32_get_drvinfo(struct net_device *dev,
+ struct ethtool_drvinfo *info)
{
- struct pcnet32_private *lp = dev->priv;
-
- strcpy (info->driver, DRV_NAME);
- strcpy (info->version, DRV_VERSION);
- if (lp->pci_dev)
- strcpy (info->bus_info, pci_name(lp->pci_dev));
- else
- sprintf(info->bus_info, "VLB 0x%lx", dev->base_addr);
+ struct pcnet32_private *lp = dev->priv;
+
+ strcpy(info->driver, DRV_NAME);
+ strcpy(info->version, DRV_VERSION);
+ if (lp->pci_dev)
+ strcpy(info->bus_info, pci_name(lp->pci_dev));
+ else
+ sprintf(info->bus_info, "VLB 0x%lx", dev->base_addr);
}
static u32 pcnet32_get_link(struct net_device *dev)
{
- struct pcnet32_private *lp = dev->priv;
- unsigned long flags;
- int r;
-
- spin_lock_irqsave(&lp->lock, flags);
- if (lp->mii) {
- r = mii_link_ok(&lp->mii_if);
- } else {
- ulong ioaddr = dev->base_addr; /* card base I/O address */
- r = (lp->a.read_bcr(ioaddr, 4) != 0xc0);
- }
- spin_unlock_irqrestore(&lp->lock, flags);
+ struct pcnet32_private *lp = dev->priv;
+ unsigned long flags;
+ int r;
- return r;
+ spin_lock_irqsave(&lp->lock, flags);
+ if (lp->mii) {
+ r = mii_link_ok(&lp->mii_if);
+ } else {
+ ulong ioaddr = dev->base_addr; /* card base I/O address */
+ r = (lp->a.read_bcr(ioaddr, 4) != 0xc0);
+ }
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ return r;
}
static u32 pcnet32_get_msglevel(struct net_device *dev)
{
- struct pcnet32_private *lp = dev->priv;
- return lp->msg_enable;
+ struct pcnet32_private *lp = dev->priv;
+ return lp->msg_enable;
}
static void pcnet32_set_msglevel(struct net_device *dev, u32 value)
{
- struct pcnet32_private *lp = dev->priv;
- lp->msg_enable = value;
+ struct pcnet32_private *lp = dev->priv;
+ lp->msg_enable = value;
}
static int pcnet32_nway_reset(struct net_device *dev)
{
- struct pcnet32_private *lp = dev->priv;
- unsigned long flags;
- int r = -EOPNOTSUPP;
+ struct pcnet32_private *lp = dev->priv;
+ unsigned long flags;
+ int r = -EOPNOTSUPP;
- if (lp->mii) {
- spin_lock_irqsave(&lp->lock, flags);
- r = mii_nway_restart(&lp->mii_if);
- spin_unlock_irqrestore(&lp->lock, flags);
- }
- return r;
+ if (lp->mii) {
+ spin_lock_irqsave(&lp->lock, flags);
+ r = mii_nway_restart(&lp->mii_if);
+ spin_unlock_irqrestore(&lp->lock, flags);
+ }
+ return r;
}
-static void pcnet32_get_ringparam(struct net_device *dev, struct ethtool_ringparam *ering)
+static void pcnet32_get_ringparam(struct net_device *dev,
+ struct ethtool_ringparam *ering)
{
- struct pcnet32_private *lp = dev->priv;
+ struct pcnet32_private *lp = dev->priv;
- ering->tx_max_pending = TX_MAX_RING_SIZE - 1;
- ering->tx_pending = lp->tx_ring_size - 1;
- ering->rx_max_pending = RX_MAX_RING_SIZE - 1;
- ering->rx_pending = lp->rx_ring_size - 1;
+ ering->tx_max_pending = TX_MAX_RING_SIZE - 1;
+ ering->tx_pending = lp->tx_ring_size - 1;
+ ering->rx_max_pending = RX_MAX_RING_SIZE - 1;
+ ering->rx_pending = lp->rx_ring_size - 1;
}
-static int pcnet32_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ering)
+static int pcnet32_set_ringparam(struct net_device *dev,
+ struct ethtool_ringparam *ering)
{
- struct pcnet32_private *lp = dev->priv;
- unsigned long flags;
- int i;
-
- if (ering->rx_mini_pending || ering->rx_jumbo_pending)
- return -EINVAL;
-
- if (netif_running(dev))
- pcnet32_close(dev);
-
- spin_lock_irqsave(&lp->lock, flags);
- pcnet32_free_ring(dev);
- lp->tx_ring_size = min(ering->tx_pending, (unsigned int) TX_MAX_RING_SIZE);
- lp->rx_ring_size = min(ering->rx_pending, (unsigned int) RX_MAX_RING_SIZE);
-
- /* set the minimum ring size to 4, to allow the loopback test to work
- * unchanged.
- */
- for (i = 2; i <= PCNET32_LOG_MAX_TX_BUFFERS; i++) {
- if (lp->tx_ring_size <= (1 << i))
- break;
- }
- lp->tx_ring_size = (1 << i);
- lp->tx_mod_mask = lp->tx_ring_size - 1;
- lp->tx_len_bits = (i << 12);
-
- for (i = 2; i <= PCNET32_LOG_MAX_RX_BUFFERS; i++) {
- if (lp->rx_ring_size <= (1 << i))
- break;
- }
- lp->rx_ring_size = (1 << i);
- lp->rx_mod_mask = lp->rx_ring_size - 1;
- lp->rx_len_bits = (i << 4);
-
- if (pcnet32_alloc_ring(dev, dev->name)) {
+ struct pcnet32_private *lp = dev->priv;
+ unsigned long flags;
+ int i;
+
+ if (ering->rx_mini_pending || ering->rx_jumbo_pending)
+ return -EINVAL;
+
+ if (netif_running(dev))
+ pcnet32_close(dev);
+
+ spin_lock_irqsave(&lp->lock, flags);
pcnet32_free_ring(dev);
- spin_unlock_irqrestore(&lp->lock, flags);
- return -ENOMEM;
- }
+ lp->tx_ring_size =
+ min(ering->tx_pending, (unsigned int)TX_MAX_RING_SIZE);
+ lp->rx_ring_size =
+ min(ering->rx_pending, (unsigned int)RX_MAX_RING_SIZE);
+
+ /* set the minimum ring size to 4, to allow the loopback test to work
+ * unchanged.
+ */
+ for (i = 2; i <= PCNET32_LOG_MAX_TX_BUFFERS; i++) {
+ if (lp->tx_ring_size <= (1 << i))
+ break;
+ }
+ lp->tx_ring_size = (1 << i);
+ lp->tx_mod_mask = lp->tx_ring_size - 1;
+ lp->tx_len_bits = (i << 12);
- spin_unlock_irqrestore(&lp->lock, flags);
+ for (i = 2; i <= PCNET32_LOG_MAX_RX_BUFFERS; i++) {
+ if (lp->rx_ring_size <= (1 << i))
+ break;
+ }
+ lp->rx_ring_size = (1 << i);
+ lp->rx_mod_mask = lp->rx_ring_size - 1;
+ lp->rx_len_bits = (i << 4);
+
+ if (pcnet32_alloc_ring(dev, dev->name)) {
+ pcnet32_free_ring(dev);
+ spin_unlock_irqrestore(&lp->lock, flags);
+ return -ENOMEM;
+ }
- if (pcnet32_debug & NETIF_MSG_DRV)
- printk(KERN_INFO PFX "%s: Ring Param Settings: RX: %d, TX: %d\n",
- dev->name, lp->rx_ring_size, lp->tx_ring_size);
+ spin_unlock_irqrestore(&lp->lock, flags);
- if (netif_running(dev))
- pcnet32_open(dev);
+ if (pcnet32_debug & NETIF_MSG_DRV)
+ printk(KERN_INFO PFX
+ "%s: Ring Param Settings: RX: %d, TX: %d\n", dev->name,
+ lp->rx_ring_size, lp->tx_ring_size);
- return 0;
+ if (netif_running(dev))
+ pcnet32_open(dev);
+
+ return 0;
}
-static void pcnet32_get_strings(struct net_device *dev, u32 stringset, u8 *data)
+static void pcnet32_get_strings(struct net_device *dev, u32 stringset,
+ u8 * data)
{
- memcpy(data, pcnet32_gstrings_test, sizeof(pcnet32_gstrings_test));
+ memcpy(data, pcnet32_gstrings_test, sizeof(pcnet32_gstrings_test));
}
static int pcnet32_self_test_count(struct net_device *dev)
{
- return PCNET32_TEST_LEN;
+ return PCNET32_TEST_LEN;
}
static void pcnet32_ethtool_test(struct net_device *dev,
- struct ethtool_test *test, u64 *data)
+ struct ethtool_test *test, u64 * data)
{
- struct pcnet32_private *lp = dev->priv;
- int rc;
-
- if (test->flags == ETH_TEST_FL_OFFLINE) {
- rc = pcnet32_loopback_test(dev, data);
- if (rc) {
- if (netif_msg_hw(lp))
- printk(KERN_DEBUG "%s: Loopback test failed.\n", dev->name);
- test->flags |= ETH_TEST_FL_FAILED;
+ struct pcnet32_private *lp = dev->priv;
+ int rc;
+
+ if (test->flags == ETH_TEST_FL_OFFLINE) {
+ rc = pcnet32_loopback_test(dev, data);
+ if (rc) {
+ if (netif_msg_hw(lp))
+ printk(KERN_DEBUG "%s: Loopback test failed.\n",
+ dev->name);
+ test->flags |= ETH_TEST_FL_FAILED;
+ } else if (netif_msg_hw(lp))
+ printk(KERN_DEBUG "%s: Loopback test passed.\n",
+ dev->name);
} else if (netif_msg_hw(lp))
- printk(KERN_DEBUG "%s: Loopback test passed.\n", dev->name);
- } else if (netif_msg_hw(lp))
- printk(KERN_DEBUG "%s: No tests to run (specify 'Offline' on ethtool).", dev->name);
-} /* end pcnet32_ethtool_test */
+ printk(KERN_DEBUG
+ "%s: No tests to run (specify 'Offline' on ethtool).",
+ dev->name);
+} /* end pcnet32_ethtool_test */
-static int pcnet32_loopback_test(struct net_device *dev, uint64_t *data1)
+static int pcnet32_loopback_test(struct net_device *dev, uint64_t * data1)
{
- struct pcnet32_private *lp = dev->priv;
- struct pcnet32_access *a = &lp->a; /* access to registers */
- ulong ioaddr = dev->base_addr; /* card base I/O address */
- struct sk_buff *skb; /* sk buff */
- int x, i; /* counters */
- int numbuffs = 4; /* number of TX/RX buffers and descs */
- u16 status = 0x8300; /* TX ring status */
- u16 teststatus; /* test of ring status */
- int rc; /* return code */
- int size; /* size of packets */
- unsigned char *packet; /* source packet data */
- static const int data_len = 60; /* length of source packets */
- unsigned long flags;
- unsigned long ticks;
-
- *data1 = 1; /* status of test, default to fail */
- rc = 1; /* default to fail */
-
- if (netif_running(dev))
- pcnet32_close(dev);
-
- spin_lock_irqsave(&lp->lock, flags);
-
- /* Reset the PCNET32 */
- lp->a.reset (ioaddr);
-
- /* switch pcnet32 to 32bit mode */
- lp->a.write_bcr (ioaddr, 20, 2);
-
- lp->init_block.mode = le16_to_cpu((lp->options & PCNET32_PORT_PORTSEL) << 7);
- lp->init_block.filter[0] = 0;
- lp->init_block.filter[1] = 0;
-
- /* purge & init rings but don't actually restart */
- pcnet32_restart(dev, 0x0000);
-
- lp->a.write_csr(ioaddr, 0, 0x0004); /* Set STOP bit */
-
- /* Initialize Transmit buffers. */
- size = data_len + 15;
- for (x=0; x<numbuffs; x++) {
- if (!(skb = dev_alloc_skb(size))) {
- if (netif_msg_hw(lp))
- printk(KERN_DEBUG "%s: Cannot allocate skb at line: %d!\n",
- dev->name, __LINE__);
- goto clean_up;
- } else {
- packet = skb->data;
- skb_put(skb, size); /* create space for data */
- lp->tx_skbuff[x] = skb;
- lp->tx_ring[x].length = le16_to_cpu(-skb->len);
- lp->tx_ring[x].misc = 0;
-
- /* put DA and SA into the skb */
- for (i=0; i<6; i++)
- *packet++ = dev->dev_addr[i];
- for (i=0; i<6; i++)
- *packet++ = dev->dev_addr[i];
- /* type */
- *packet++ = 0x08;
- *packet++ = 0x06;
- /* packet number */
- *packet++ = x;
- /* fill packet with data */
- for (i=0; i<data_len; i++)
- *packet++ = i;
-
- lp->tx_dma_addr[x] = pci_map_single(lp->pci_dev, skb->data,
- skb->len, PCI_DMA_TODEVICE);
- lp->tx_ring[x].base = (u32)le32_to_cpu(lp->tx_dma_addr[x]);
- wmb(); /* Make sure owner changes after all others are visible */
- lp->tx_ring[x].status = le16_to_cpu(status);
- }
- }
-
- x = a->read_bcr(ioaddr, 32); /* set internal loopback in BSR32 */
- x = x | 0x0002;
- a->write_bcr(ioaddr, 32, x);
-
- lp->a.write_csr (ioaddr, 15, 0x0044); /* set int loopback in CSR15 */
-
- teststatus = le16_to_cpu(0x8000);
- lp->a.write_csr(ioaddr, 0, 0x0002); /* Set STRT bit */
-
- /* Check status of descriptors */
- for (x=0; x<numbuffs; x++) {
- ticks = 0;
- rmb();
- while ((lp->rx_ring[x].status & teststatus) && (ticks < 200)) {
- spin_unlock_irqrestore(&lp->lock, flags);
- mdelay(1);
- spin_lock_irqsave(&lp->lock, flags);
- rmb();
- ticks++;
- }
- if (ticks == 200) {
- if (netif_msg_hw(lp))
- printk("%s: Desc %d failed to reset!\n",dev->name,x);
- break;
- }
- }
-
- lp->a.write_csr(ioaddr, 0, 0x0004); /* Set STOP bit */
- wmb();
- if (netif_msg_hw(lp) && netif_msg_pktdata(lp)) {
- printk(KERN_DEBUG "%s: RX loopback packets:\n", dev->name);
-
- for (x=0; x<numbuffs; x++) {
- printk(KERN_DEBUG "%s: Packet %d:\n", dev->name, x);
- skb = lp->rx_skbuff[x];
- for (i=0; i<size; i++) {
- printk("%02x ", *(skb->data+i));
- }
- printk("\n");
- }
- }
-
- x = 0;
- rc = 0;
- while (x<numbuffs && !rc) {
- skb = lp->rx_skbuff[x];
- packet = lp->tx_skbuff[x]->data;
- for (i=0; i<size; i++) {
- if (*(skb->data+i) != packet[i]) {
- if (netif_msg_hw(lp))
- printk(KERN_DEBUG "%s: Error in compare! %2x - %02x %02x\n",
- dev->name, i, *(skb->data+i), packet[i]);
- rc = 1;
- break;
- }
+ struct pcnet32_private *lp = dev->priv;
+ struct pcnet32_access *a = &lp->a; /* access to registers */
+ ulong ioaddr = dev->base_addr; /* card base I/O address */
+ struct sk_buff *skb; /* sk buff */
+ int x, i; /* counters */
+ int numbuffs = 4; /* number of TX/RX buffers and descs */
+ u16 status = 0x8300; /* TX ring status */
+ u16 teststatus; /* test of ring status */
+ int rc; /* return code */
+ int size; /* size of packets */
+ unsigned char *packet; /* source packet data */
+ static const int data_len = 60; /* length of source packets */
+ unsigned long flags;
+ unsigned long ticks;
+
+ *data1 = 1; /* status of test, default to fail */
+ rc = 1; /* default to fail */
+
+ if (netif_running(dev))
+ pcnet32_close(dev);
+
+ spin_lock_irqsave(&lp->lock, flags);
+
+ /* Reset the PCNET32 */
+ lp->a.reset(ioaddr);
+
+ /* switch pcnet32 to 32bit mode */
+ lp->a.write_bcr(ioaddr, 20, 2);
+
+ lp->init_block.mode =
+ le16_to_cpu((lp->options & PCNET32_PORT_PORTSEL) << 7);
+ lp->init_block.filter[0] = 0;
+ lp->init_block.filter[1] = 0;
+
+ /* purge & init rings but don't actually restart */
+ pcnet32_restart(dev, 0x0000);
+
+ lp->a.write_csr(ioaddr, 0, 0x0004); /* Set STOP bit */
+
+ /* Initialize Transmit buffers. */
+ size = data_len + 15;
+ for (x = 0; x < numbuffs; x++) {
+ if (!(skb = dev_alloc_skb(size))) {
+ if (netif_msg_hw(lp))
+ printk(KERN_DEBUG
+ "%s: Cannot allocate skb at line: %d!\n",
+ dev->name, __LINE__);
+ goto clean_up;
+ } else {
+ packet = skb->data;
+ skb_put(skb, size); /* create space for data */
+ lp->tx_skbuff[x] = skb;
+ lp->tx_ring[x].length = le16_to_cpu(-skb->len);
+ lp->tx_ring[x].misc = 0;
+
+ /* put DA and SA into the skb */
+ for (i = 0; i < 6; i++)
+ *packet++ = dev->dev_addr[i];
+ for (i = 0; i < 6; i++)
+ *packet++ = dev->dev_addr[i];
+ /* type */
+ *packet++ = 0x08;
+ *packet++ = 0x06;
+ /* packet number */
+ *packet++ = x;
+ /* fill packet with data */
+ for (i = 0; i < data_len; i++)
+ *packet++ = i;
+
+ lp->tx_dma_addr[x] =
+ pci_map_single(lp->pci_dev, skb->data, skb->len,
+ PCI_DMA_TODEVICE);
+ lp->tx_ring[x].base =
+ (u32) le32_to_cpu(lp->tx_dma_addr[x]);
+ wmb(); /* Make sure owner changes after all others are visible */
+ lp->tx_ring[x].status = le16_to_cpu(status);
+ }
+ }
+
+ x = a->read_bcr(ioaddr, 32); /* set internal loopback in BSR32 */
+ x = x | 0x0002;
+ a->write_bcr(ioaddr, 32, x);
+
+ lp->a.write_csr(ioaddr, 15, 0x0044); /* set int loopback in CSR15 */
+
+ teststatus = le16_to_cpu(0x8000);
+ lp->a.write_csr(ioaddr, 0, 0x0002); /* Set STRT bit */
+
+ /* Check status of descriptors */
+ for (x = 0; x < numbuffs; x++) {
+ ticks = 0;
+ rmb();
+ while ((lp->rx_ring[x].status & teststatus) && (ticks < 200)) {
+ spin_unlock_irqrestore(&lp->lock, flags);
+ mdelay(1);
+ spin_lock_irqsave(&lp->lock, flags);
+ rmb();
+ ticks++;
+ }
+ if (ticks == 200) {
+ if (netif_msg_hw(lp))
+ printk("%s: Desc %d failed to reset!\n",
+ dev->name, x);
+ break;
+ }
+ }
+
+ lp->a.write_csr(ioaddr, 0, 0x0004); /* Set STOP bit */
+ wmb();
+ if (netif_msg_hw(lp) && netif_msg_pktdata(lp)) {
+ printk(KERN_DEBUG "%s: RX loopback packets:\n", dev->name);
+
+ for (x = 0; x < numbuffs; x++) {
+ printk(KERN_DEBUG "%s: Packet %d:\n", dev->name, x);
+ skb = lp->rx_skbuff[x];
+ for (i = 0; i < size; i++) {
+ printk("%02x ", *(skb->data + i));
+ }
+ printk("\n");
+ }
+ }
+
+ x = 0;
+ rc = 0;
+ while (x < numbuffs && !rc) {
+ skb = lp->rx_skbuff[x];
+ packet = lp->tx_skbuff[x]->data;
+ for (i = 0; i < size; i++) {
+ if (*(skb->data + i) != packet[i]) {
+ if (netif_msg_hw(lp))
+ printk(KERN_DEBUG
+ "%s: Error in compare! %2x - %02x %02x\n",
+ dev->name, i, *(skb->data + i),
+ packet[i]);
+ rc = 1;
+ break;
+ }
+ }
+ x++;
+ }
+ if (!rc) {
+ *data1 = 0;
}
- x++;
- }
- if (!rc) {
- *data1 = 0;
- }
-clean_up:
- pcnet32_purge_tx_ring(dev);
- x = a->read_csr(ioaddr, 15) & 0xFFFF;
- a->write_csr(ioaddr, 15, (x & ~0x0044)); /* reset bits 6 and 2 */
+ clean_up:
+ pcnet32_purge_tx_ring(dev);
+ x = a->read_csr(ioaddr, 15) & 0xFFFF;
+ a->write_csr(ioaddr, 15, (x & ~0x0044)); /* reset bits 6 and 2 */
- x = a->read_bcr(ioaddr, 32); /* reset internal loopback */
- x = x & ~0x0002;
- a->write_bcr(ioaddr, 32, x);
+ x = a->read_bcr(ioaddr, 32); /* reset internal loopback */
+ x = x & ~0x0002;
+ a->write_bcr(ioaddr, 32, x);
- spin_unlock_irqrestore(&lp->lock, flags);
+ spin_unlock_irqrestore(&lp->lock, flags);
- if (netif_running(dev)) {
- pcnet32_open(dev);
- } else {
- lp->a.write_bcr (ioaddr, 20, 4); /* return to 16bit mode */
- }
+ if (netif_running(dev)) {
+ pcnet32_open(dev);
+ } else {
+ lp->a.write_bcr(ioaddr, 20, 4); /* return to 16bit mode */
+ }
- return(rc);
-} /* end pcnet32_loopback_test */
+ return (rc);
+} /* end pcnet32_loopback_test */
static void pcnet32_led_blink_callback(struct net_device *dev)
{
- struct pcnet32_private *lp = dev->priv;
- struct pcnet32_access *a = &lp->a;
- ulong ioaddr = dev->base_addr;
- unsigned long flags;
- int i;
-
- spin_lock_irqsave(&lp->lock, flags);
- for (i=4; i<8; i++) {
- a->write_bcr(ioaddr, i, a->read_bcr(ioaddr, i) ^ 0x4000);
- }
- spin_unlock_irqrestore(&lp->lock, flags);
-
- mod_timer(&lp->blink_timer, PCNET32_BLINK_TIMEOUT);
+ struct pcnet32_private *lp = dev->priv;
+ struct pcnet32_access *a = &lp->a;
+ ulong ioaddr = dev->base_addr;
+ unsigned long flags;
+ int i;
+
+ spin_lock_irqsave(&lp->lock, flags);
+ for (i = 4; i < 8; i++) {
+ a->write_bcr(ioaddr, i, a->read_bcr(ioaddr, i) ^ 0x4000);
+ }
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ mod_timer(&lp->blink_timer, PCNET32_BLINK_TIMEOUT);
}
static int pcnet32_phys_id(struct net_device *dev, u32 data)
{
- struct pcnet32_private *lp = dev->priv;
- struct pcnet32_access *a = &lp->a;
- ulong ioaddr = dev->base_addr;
- unsigned long flags;
- int i, regs[4];
-
- if (!lp->blink_timer.function) {
- init_timer(&lp->blink_timer);
- lp->blink_timer.function = (void *) pcnet32_led_blink_callback;
- lp->blink_timer.data = (unsigned long) dev;
- }
-
- /* Save the current value of the bcrs */
- spin_lock_irqsave(&lp->lock, flags);
- for (i=4; i<8; i++) {
- regs[i-4] = a->read_bcr(ioaddr, i);
- }
- spin_unlock_irqrestore(&lp->lock, flags);
-
- mod_timer(&lp->blink_timer, jiffies);
- set_current_state(TASK_INTERRUPTIBLE);
-
- if ((!data) || (data > (u32)(MAX_SCHEDULE_TIMEOUT / HZ)))
- data = (u32)(MAX_SCHEDULE_TIMEOUT / HZ);
-
- msleep_interruptible(data * 1000);
- del_timer_sync(&lp->blink_timer);
-
- /* Restore the original value of the bcrs */
- spin_lock_irqsave(&lp->lock, flags);
- for (i=4; i<8; i++) {
- a->write_bcr(ioaddr, i, regs[i-4]);
- }
- spin_unlock_irqrestore(&lp->lock, flags);
-
- return 0;
+ struct pcnet32_private *lp = dev->priv;
+ struct pcnet32_access *a = &lp->a;
+ ulong ioaddr = dev->base_addr;
+ unsigned long flags;
+ int i, regs[4];
+
+ if (!lp->blink_timer.function) {
+ init_timer(&lp->blink_timer);
+ lp->blink_timer.function = (void *)pcnet32_led_blink_callback;
+ lp->blink_timer.data = (unsigned long)dev;
+ }
+
+ /* Save the current value of the bcrs */
+ spin_lock_irqsave(&lp->lock, flags);
+ for (i = 4; i < 8; i++) {
+ regs[i - 4] = a->read_bcr(ioaddr, i);
+ }
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ mod_timer(&lp->blink_timer, jiffies);
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ if ((!data) || (data > (u32) (MAX_SCHEDULE_TIMEOUT / HZ)))
+ data = (u32) (MAX_SCHEDULE_TIMEOUT / HZ);
+
+ msleep_interruptible(data * 1000);
+ del_timer_sync(&lp->blink_timer);
+
+ /* Restore the original value of the bcrs */
+ spin_lock_irqsave(&lp->lock, flags);
+ for (i = 4; i < 8; i++) {
+ a->write_bcr(ioaddr, i, regs[i - 4]);
+ }
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ return 0;
}
+#define PCNET32_REGS_PER_PHY 32
+#define PCNET32_MAX_PHYS 32
static int pcnet32_get_regs_len(struct net_device *dev)
{
- return(PCNET32_NUM_REGS * sizeof(u16));
+ struct pcnet32_private *lp = dev->priv;
+ int j = lp->phycount * PCNET32_REGS_PER_PHY;
+
+ return ((PCNET32_NUM_REGS + j) * sizeof(u16));
}
static void pcnet32_get_regs(struct net_device *dev, struct ethtool_regs *regs,
- void *ptr)
+ void *ptr)
{
- int i, csr0;
- u16 *buff = ptr;
- struct pcnet32_private *lp = dev->priv;
- struct pcnet32_access *a = &lp->a;
- ulong ioaddr = dev->base_addr;
- int ticks;
- unsigned long flags;
-
- spin_lock_irqsave(&lp->lock, flags);
-
- csr0 = a->read_csr(ioaddr, 0);
- if (!(csr0 & 0x0004)) { /* If not stopped */
- /* set SUSPEND (SPND) - CSR5 bit 0 */
- a->write_csr(ioaddr, 5, 0x0001);
-
- /* poll waiting for bit to be set */
- ticks = 0;
- while (!(a->read_csr(ioaddr, 5) & 0x0001)) {
- spin_unlock_irqrestore(&lp->lock, flags);
- mdelay(1);
- spin_lock_irqsave(&lp->lock, flags);
- ticks++;
- if (ticks > 200) {
- if (netif_msg_hw(lp))
- printk(KERN_DEBUG "%s: Error getting into suspend!\n",
- dev->name);
- break;
- }
- }
- }
+ int i, csr0;
+ u16 *buff = ptr;
+ struct pcnet32_private *lp = dev->priv;
+ struct pcnet32_access *a = &lp->a;
+ ulong ioaddr = dev->base_addr;
+ int ticks;
+ unsigned long flags;
- /* read address PROM */
- for (i=0; i<16; i += 2)
- *buff++ = inw(ioaddr + i);
+ spin_lock_irqsave(&lp->lock, flags);
- /* read control and status registers */
- for (i=0; i<90; i++) {
- *buff++ = a->read_csr(ioaddr, i);
- }
+ csr0 = a->read_csr(ioaddr, 0);
+ if (!(csr0 & 0x0004)) { /* If not stopped */
+ /* set SUSPEND (SPND) - CSR5 bit 0 */
+ a->write_csr(ioaddr, 5, 0x0001);
+
+ /* poll waiting for bit to be set */
+ ticks = 0;
+ while (!(a->read_csr(ioaddr, 5) & 0x0001)) {
+ spin_unlock_irqrestore(&lp->lock, flags);
+ mdelay(1);
+ spin_lock_irqsave(&lp->lock, flags);
+ ticks++;
+ if (ticks > 200) {
+ if (netif_msg_hw(lp))
+ printk(KERN_DEBUG
+ "%s: Error getting into suspend!\n",
+ dev->name);
+ break;
+ }
+ }
+ }
- *buff++ = a->read_csr(ioaddr, 112);
- *buff++ = a->read_csr(ioaddr, 114);
+ /* read address PROM */
+ for (i = 0; i < 16; i += 2)
+ *buff++ = inw(ioaddr + i);
- /* read bus configuration registers */
- for (i=0; i<30; i++) {
- *buff++ = a->read_bcr(ioaddr, i);
- }
- *buff++ = 0; /* skip bcr30 so as not to hang 79C976 */
- for (i=31; i<36; i++) {
- *buff++ = a->read_bcr(ioaddr, i);
- }
+ /* read control and status registers */
+ for (i = 0; i < 90; i++) {
+ *buff++ = a->read_csr(ioaddr, i);
+ }
+
+ *buff++ = a->read_csr(ioaddr, 112);
+ *buff++ = a->read_csr(ioaddr, 114);
- /* read mii phy registers */
- if (lp->mii) {
- for (i=0; i<32; i++) {
- lp->a.write_bcr(ioaddr, 33, ((lp->mii_if.phy_id) << 5) | i);
- *buff++ = lp->a.read_bcr(ioaddr, 34);
+ /* read bus configuration registers */
+ for (i = 0; i < 30; i++) {
+ *buff++ = a->read_bcr(ioaddr, i);
+ }
+ *buff++ = 0; /* skip bcr30 so as not to hang 79C976 */
+ for (i = 31; i < 36; i++) {
+ *buff++ = a->read_bcr(ioaddr, i);
}
- }
- if (!(csr0 & 0x0004)) { /* If not stopped */
- /* clear SUSPEND (SPND) - CSR5 bit 0 */
- a->write_csr(ioaddr, 5, 0x0000);
- }
+ /* read mii phy registers */
+ if (lp->mii) {
+ int j;
+ for (j = 0; j < PCNET32_MAX_PHYS; j++) {
+ if (lp->phymask & (1 << j)) {
+ for (i = 0; i < PCNET32_REGS_PER_PHY; i++) {
+ lp->a.write_bcr(ioaddr, 33,
+ (j << 5) | i);
+ *buff++ = lp->a.read_bcr(ioaddr, 34);
+ }
+ }
+ }
+ }
- i = buff - (u16 *)ptr;
- for (; i < PCNET32_NUM_REGS; i++)
- *buff++ = 0;
+ if (!(csr0 & 0x0004)) { /* If not stopped */
+ /* clear SUSPEND (SPND) - CSR5 bit 0 */
+ a->write_csr(ioaddr, 5, 0x0000);
+ }
- spin_unlock_irqrestore(&lp->lock, flags);
+ spin_unlock_irqrestore(&lp->lock, flags);
}
static struct ethtool_ops pcnet32_ethtool_ops = {
- .get_settings = pcnet32_get_settings,
- .set_settings = pcnet32_set_settings,
- .get_drvinfo = pcnet32_get_drvinfo,
- .get_msglevel = pcnet32_get_msglevel,
- .set_msglevel = pcnet32_set_msglevel,
- .nway_reset = pcnet32_nway_reset,
- .get_link = pcnet32_get_link,
- .get_ringparam = pcnet32_get_ringparam,
- .set_ringparam = pcnet32_set_ringparam,
- .get_tx_csum = ethtool_op_get_tx_csum,
- .get_sg = ethtool_op_get_sg,
- .get_tso = ethtool_op_get_tso,
- .get_strings = pcnet32_get_strings,
- .self_test_count = pcnet32_self_test_count,
- .self_test = pcnet32_ethtool_test,
- .phys_id = pcnet32_phys_id,
- .get_regs_len = pcnet32_get_regs_len,
- .get_regs = pcnet32_get_regs,
- .get_perm_addr = ethtool_op_get_perm_addr,
+ .get_settings = pcnet32_get_settings,
+ .set_settings = pcnet32_set_settings,
+ .get_drvinfo = pcnet32_get_drvinfo,
+ .get_msglevel = pcnet32_get_msglevel,
+ .set_msglevel = pcnet32_set_msglevel,
+ .nway_reset = pcnet32_nway_reset,
+ .get_link = pcnet32_get_link,
+ .get_ringparam = pcnet32_get_ringparam,
+ .set_ringparam = pcnet32_set_ringparam,
+ .get_tx_csum = ethtool_op_get_tx_csum,
+ .get_sg = ethtool_op_get_sg,
+ .get_tso = ethtool_op_get_tso,
+ .get_strings = pcnet32_get_strings,
+ .self_test_count = pcnet32_self_test_count,
+ .self_test = pcnet32_ethtool_test,
+ .phys_id = pcnet32_phys_id,
+ .get_regs_len = pcnet32_get_regs_len,
+ .get_regs = pcnet32_get_regs,
+ .get_perm_addr = ethtool_op_get_perm_addr,
};
/* only probes for non-PCI devices, the rest are handled by
* pci_register_driver via pcnet32_probe_pci */
-static void __devinit
-pcnet32_probe_vlbus(void)
+static void __devinit pcnet32_probe_vlbus(void)
{
- unsigned int *port, ioaddr;
-
- /* search for PCnet32 VLB cards at known addresses */
- for (port = pcnet32_portlist; (ioaddr = *port); port++) {
- if (request_region(ioaddr, PCNET32_TOTAL_SIZE, "pcnet32_probe_vlbus")) {
- /* check if there is really a pcnet chip on that ioaddr */
- if ((inb(ioaddr + 14) == 0x57) && (inb(ioaddr + 15) == 0x57)) {
- pcnet32_probe1(ioaddr, 0, NULL);
- } else {
- release_region(ioaddr, PCNET32_TOTAL_SIZE);
- }
- }
- }
+ unsigned int *port, ioaddr;
+
+ /* search for PCnet32 VLB cards at known addresses */
+ for (port = pcnet32_portlist; (ioaddr = *port); port++) {
+ if (request_region
+ (ioaddr, PCNET32_TOTAL_SIZE, "pcnet32_probe_vlbus")) {
+ /* check if there is really a pcnet chip on that ioaddr */
+ if ((inb(ioaddr + 14) == 0x57)
+ && (inb(ioaddr + 15) == 0x57)) {
+ pcnet32_probe1(ioaddr, 0, NULL);
+ } else {
+ release_region(ioaddr, PCNET32_TOTAL_SIZE);
+ }
+ }
+ }
}
-
static int __devinit
pcnet32_probe_pci(struct pci_dev *pdev, const struct pci_device_id *ent)
{
- unsigned long ioaddr;
- int err;
-
- err = pci_enable_device(pdev);
- if (err < 0) {
- if (pcnet32_debug & NETIF_MSG_PROBE)
- printk(KERN_ERR PFX "failed to enable device -- err=%d\n", err);
- return err;
- }
- pci_set_master(pdev);
+ unsigned long ioaddr;
+ int err;
+
+ err = pci_enable_device(pdev);
+ if (err < 0) {
+ if (pcnet32_debug & NETIF_MSG_PROBE)
+ printk(KERN_ERR PFX
+ "failed to enable device -- err=%d\n", err);
+ return err;
+ }
+ pci_set_master(pdev);
+
+ ioaddr = pci_resource_start(pdev, 0);
+ if (!ioaddr) {
+ if (pcnet32_debug & NETIF_MSG_PROBE)
+ printk(KERN_ERR PFX
+ "card has no PCI IO resources, aborting\n");
+ return -ENODEV;
+ }
- ioaddr = pci_resource_start (pdev, 0);
- if (!ioaddr) {
- if (pcnet32_debug & NETIF_MSG_PROBE)
- printk (KERN_ERR PFX "card has no PCI IO resources, aborting\n");
- return -ENODEV;
- }
+ if (!pci_dma_supported(pdev, PCNET32_DMA_MASK)) {
+ if (pcnet32_debug & NETIF_MSG_PROBE)
+ printk(KERN_ERR PFX
+ "architecture does not support 32bit PCI busmaster DMA\n");
+ return -ENODEV;
+ }
+ if (request_region(ioaddr, PCNET32_TOTAL_SIZE, "pcnet32_probe_pci") ==
+ NULL) {
+ if (pcnet32_debug & NETIF_MSG_PROBE)
+ printk(KERN_ERR PFX
+ "io address range already allocated\n");
+ return -EBUSY;
+ }
- if (!pci_dma_supported(pdev, PCNET32_DMA_MASK)) {
- if (pcnet32_debug & NETIF_MSG_PROBE)
- printk(KERN_ERR PFX "architecture does not support 32bit PCI busmaster DMA\n");
- return -ENODEV;
- }
- if (request_region(ioaddr, PCNET32_TOTAL_SIZE, "pcnet32_probe_pci") == NULL) {
- if (pcnet32_debug & NETIF_MSG_PROBE)
- printk(KERN_ERR PFX "io address range already allocated\n");
- return -EBUSY;
- }
-
- err = pcnet32_probe1(ioaddr, 1, pdev);
- if (err < 0) {
- pci_disable_device(pdev);
- }
- return err;
+ err = pcnet32_probe1(ioaddr, 1, pdev);
+ if (err < 0) {
+ pci_disable_device(pdev);
+ }
+ return err;
}
-
/* pcnet32_probe1
* Called from both pcnet32_probe_vlbus and pcnet_probe_pci.
* pdev will be NULL when called from pcnet32_probe_vlbus.
@@ -1107,630 +1028,764 @@ pcnet32_probe_pci(struct pci_dev *pdev, const struct pci_device_id *ent)
static int __devinit
pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev)
{
- struct pcnet32_private *lp;
- dma_addr_t lp_dma_addr;
- int i, media;
- int fdx, mii, fset, dxsuflo;
- int chip_version;
- char *chipname;
- struct net_device *dev;
- struct pcnet32_access *a = NULL;
- u8 promaddr[6];
- int ret = -ENODEV;
-
- /* reset the chip */
- pcnet32_wio_reset(ioaddr);
-
- /* NOTE: 16-bit check is first, otherwise some older PCnet chips fail */
- if (pcnet32_wio_read_csr(ioaddr, 0) == 4 && pcnet32_wio_check(ioaddr)) {
- a = &pcnet32_wio;
- } else {
- pcnet32_dwio_reset(ioaddr);
- if (pcnet32_dwio_read_csr(ioaddr, 0) == 4 && pcnet32_dwio_check(ioaddr)) {
- a = &pcnet32_dwio;
- } else
- goto err_release_region;
- }
-
- chip_version = a->read_csr(ioaddr, 88) | (a->read_csr(ioaddr,89) << 16);
- if ((pcnet32_debug & NETIF_MSG_PROBE) && (pcnet32_debug & NETIF_MSG_HW))
- printk(KERN_INFO " PCnet chip version is %#x.\n", chip_version);
- if ((chip_version & 0xfff) != 0x003) {
- if (pcnet32_debug & NETIF_MSG_PROBE)
- printk(KERN_INFO PFX "Unsupported chip version.\n");
- goto err_release_region;
- }
-
- /* initialize variables */
- fdx = mii = fset = dxsuflo = 0;
- chip_version = (chip_version >> 12) & 0xffff;
-
- switch (chip_version) {
- case 0x2420:
- chipname = "PCnet/PCI 79C970"; /* PCI */
- break;
- case 0x2430:
- if (shared)
- chipname = "PCnet/PCI 79C970"; /* 970 gives the wrong chip id back */
- else
- chipname = "PCnet/32 79C965"; /* 486/VL bus */
- break;
- case 0x2621:
- chipname = "PCnet/PCI II 79C970A"; /* PCI */
- fdx = 1;
- break;
- case 0x2623:
- chipname = "PCnet/FAST 79C971"; /* PCI */
- fdx = 1; mii = 1; fset = 1;
- break;
- case 0x2624:
- chipname = "PCnet/FAST+ 79C972"; /* PCI */
- fdx = 1; mii = 1; fset = 1;
- break;
- case 0x2625:
- chipname = "PCnet/FAST III 79C973"; /* PCI */
- fdx = 1; mii = 1;
- break;
- case 0x2626:
- chipname = "PCnet/Home 79C978"; /* PCI */
- fdx = 1;
+ struct pcnet32_private *lp;
+ dma_addr_t lp_dma_addr;
+ int i, media;
+ int fdx, mii, fset, dxsuflo;
+ int chip_version;
+ char *chipname;
+ struct net_device *dev;
+ struct pcnet32_access *a = NULL;
+ u8 promaddr[6];
+ int ret = -ENODEV;
+
+ /* reset the chip */
+ pcnet32_wio_reset(ioaddr);
+
+ /* NOTE: 16-bit check is first, otherwise some older PCnet chips fail */
+ if (pcnet32_wio_read_csr(ioaddr, 0) == 4 && pcnet32_wio_check(ioaddr)) {
+ a = &pcnet32_wio;
+ } else {
+ pcnet32_dwio_reset(ioaddr);
+ if (pcnet32_dwio_read_csr(ioaddr, 0) == 4
+ && pcnet32_dwio_check(ioaddr)) {
+ a = &pcnet32_dwio;
+ } else
+ goto err_release_region;
+ }
+
+ chip_version =
+ a->read_csr(ioaddr, 88) | (a->read_csr(ioaddr, 89) << 16);
+ if ((pcnet32_debug & NETIF_MSG_PROBE) && (pcnet32_debug & NETIF_MSG_HW))
+ printk(KERN_INFO " PCnet chip version is %#x.\n",
+ chip_version);
+ if ((chip_version & 0xfff) != 0x003) {
+ if (pcnet32_debug & NETIF_MSG_PROBE)
+ printk(KERN_INFO PFX "Unsupported chip version.\n");
+ goto err_release_region;
+ }
+
+ /* initialize variables */
+ fdx = mii = fset = dxsuflo = 0;
+ chip_version = (chip_version >> 12) & 0xffff;
+
+ switch (chip_version) {
+ case 0x2420:
+ chipname = "PCnet/PCI 79C970"; /* PCI */
+ break;
+ case 0x2430:
+ if (shared)
+ chipname = "PCnet/PCI 79C970"; /* 970 gives the wrong chip id back */
+ else
+ chipname = "PCnet/32 79C965"; /* 486/VL bus */
+ break;
+ case 0x2621:
+ chipname = "PCnet/PCI II 79C970A"; /* PCI */
+ fdx = 1;
+ break;
+ case 0x2623:
+ chipname = "PCnet/FAST 79C971"; /* PCI */
+ fdx = 1;
+ mii = 1;
+ fset = 1;
+ break;
+ case 0x2624:
+ chipname = "PCnet/FAST+ 79C972"; /* PCI */
+ fdx = 1;
+ mii = 1;
+ fset = 1;
+ break;
+ case 0x2625:
+ chipname = "PCnet/FAST III 79C973"; /* PCI */
+ fdx = 1;
+ mii = 1;
+ break;
+ case 0x2626:
+ chipname = "PCnet/Home 79C978"; /* PCI */
+ fdx = 1;
+ /*
+ * This is based on specs published at www.amd.com. This section
+ * assumes that a card with a 79C978 wants to go into standard
+ * ethernet mode. The 79C978 can also go into 1Mb HomePNA mode,
+ * and the module option homepna=1 can select this instead.
+ */
+ media = a->read_bcr(ioaddr, 49);
+ media &= ~3; /* default to 10Mb ethernet */
+ if (cards_found < MAX_UNITS && homepna[cards_found])
+ media |= 1; /* switch to home wiring mode */
+ if (pcnet32_debug & NETIF_MSG_PROBE)
+ printk(KERN_DEBUG PFX "media set to %sMbit mode.\n",
+ (media & 1) ? "1" : "10");
+ a->write_bcr(ioaddr, 49, media);
+ break;
+ case 0x2627:
+ chipname = "PCnet/FAST III 79C975"; /* PCI */
+ fdx = 1;
+ mii = 1;
+ break;
+ case 0x2628:
+ chipname = "PCnet/PRO 79C976";
+ fdx = 1;
+ mii = 1;
+ break;
+ default:
+ if (pcnet32_debug & NETIF_MSG_PROBE)
+ printk(KERN_INFO PFX
+ "PCnet version %#x, no PCnet32 chip.\n",
+ chip_version);
+ goto err_release_region;
+ }
+
/*
- * This is based on specs published at www.amd.com. This section
- * assumes that a card with a 79C978 wants to go into standard
- * ethernet mode. The 79C978 can also go into 1Mb HomePNA mode,
- * and the module option homepna=1 can select this instead.
+ * On selected chips turn on the BCR18:NOUFLO bit. This stops transmit
+ * starting until the packet is loaded. Strike one for reliability, lose
+ * one for latency - although on PCI this isnt a big loss. Older chips
+ * have FIFO's smaller than a packet, so you can't do this.
+ * Turn on BCR18:BurstRdEn and BCR18:BurstWrEn.
*/
- media = a->read_bcr(ioaddr, 49);
- media &= ~3; /* default to 10Mb ethernet */
- if (cards_found < MAX_UNITS && homepna[cards_found])
- media |= 1; /* switch to home wiring mode */
- if (pcnet32_debug & NETIF_MSG_PROBE)
- printk(KERN_DEBUG PFX "media set to %sMbit mode.\n",
- (media & 1) ? "1" : "10");
- a->write_bcr(ioaddr, 49, media);
- break;
- case 0x2627:
- chipname = "PCnet/FAST III 79C975"; /* PCI */
- fdx = 1; mii = 1;
- break;
- case 0x2628:
- chipname = "PCnet/PRO 79C976";
- fdx = 1; mii = 1;
- break;
- default:
- if (pcnet32_debug & NETIF_MSG_PROBE)
- printk(KERN_INFO PFX "PCnet version %#x, no PCnet32 chip.\n",
- chip_version);
- goto err_release_region;
- }
-
- /*
- * On selected chips turn on the BCR18:NOUFLO bit. This stops transmit
- * starting until the packet is loaded. Strike one for reliability, lose
- * one for latency - although on PCI this isnt a big loss. Older chips
- * have FIFO's smaller than a packet, so you can't do this.
- * Turn on BCR18:BurstRdEn and BCR18:BurstWrEn.
- */
-
- if (fset) {
- a->write_bcr(ioaddr, 18, (a->read_bcr(ioaddr, 18) | 0x0860));
- a->write_csr(ioaddr, 80, (a->read_csr(ioaddr, 80) & 0x0C00) | 0x0c00);
- dxsuflo = 1;
- }
-
- dev = alloc_etherdev(0);
- if (!dev) {
+
+ if (fset) {
+ a->write_bcr(ioaddr, 18, (a->read_bcr(ioaddr, 18) | 0x0860));
+ a->write_csr(ioaddr, 80,
+ (a->read_csr(ioaddr, 80) & 0x0C00) | 0x0c00);
+ dxsuflo = 1;
+ }
+
+ dev = alloc_etherdev(0);
+ if (!dev) {
+ if (pcnet32_debug & NETIF_MSG_PROBE)
+ printk(KERN_ERR PFX "Memory allocation failed.\n");
+ ret = -ENOMEM;
+ goto err_release_region;
+ }
+ SET_NETDEV_DEV(dev, &pdev->dev);
+
if (pcnet32_debug & NETIF_MSG_PROBE)
- printk(KERN_ERR PFX "Memory allocation failed.\n");
- ret = -ENOMEM;
- goto err_release_region;
- }
- SET_NETDEV_DEV(dev, &pdev->dev);
-
- if (pcnet32_debug & NETIF_MSG_PROBE)
- printk(KERN_INFO PFX "%s at %#3lx,", chipname, ioaddr);
-
- /* In most chips, after a chip reset, the ethernet address is read from the
- * station address PROM at the base address and programmed into the
- * "Physical Address Registers" CSR12-14.
- * As a precautionary measure, we read the PROM values and complain if
- * they disagree with the CSRs. Either way, we use the CSR values, and
- * double check that they are valid.
- */
- for (i = 0; i < 3; i++) {
- unsigned int val;
- val = a->read_csr(ioaddr, i+12) & 0x0ffff;
- /* There may be endianness issues here. */
- dev->dev_addr[2*i] = val & 0x0ff;
- dev->dev_addr[2*i+1] = (val >> 8) & 0x0ff;
- }
-
- /* read PROM address and compare with CSR address */
- for (i = 0; i < 6; i++)
- promaddr[i] = inb(ioaddr + i);
-
- if (memcmp(promaddr, dev->dev_addr, 6)
- || !is_valid_ether_addr(dev->dev_addr)) {
- if (is_valid_ether_addr(promaddr)) {
- if (pcnet32_debug & NETIF_MSG_PROBE) {
- printk(" warning: CSR address invalid,\n");
- printk(KERN_INFO " using instead PROM address of");
- }
- memcpy(dev->dev_addr, promaddr, 6);
- }
- }
- memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
-
- /* if the ethernet address is not valid, force to 00:00:00:00:00:00 */
- if (!is_valid_ether_addr(dev->perm_addr))
- memset(dev->dev_addr, 0, sizeof(dev->dev_addr));
-
- if (pcnet32_debug & NETIF_MSG_PROBE) {
+ printk(KERN_INFO PFX "%s at %#3lx,", chipname, ioaddr);
+
+ /* In most chips, after a chip reset, the ethernet address is read from the
+ * station address PROM at the base address and programmed into the
+ * "Physical Address Registers" CSR12-14.
+ * As a precautionary measure, we read the PROM values and complain if
+ * they disagree with the CSRs. Either way, we use the CSR values, and
+ * double check that they are valid.
+ */
+ for (i = 0; i < 3; i++) {
+ unsigned int val;
+ val = a->read_csr(ioaddr, i + 12) & 0x0ffff;
+ /* There may be endianness issues here. */
+ dev->dev_addr[2 * i] = val & 0x0ff;
+ dev->dev_addr[2 * i + 1] = (val >> 8) & 0x0ff;
+ }
+
+ /* read PROM address and compare with CSR address */
for (i = 0; i < 6; i++)
- printk(" %2.2x", dev->dev_addr[i]);
-
- /* Version 0x2623 and 0x2624 */
- if (((chip_version + 1) & 0xfffe) == 0x2624) {
- i = a->read_csr(ioaddr, 80) & 0x0C00; /* Check tx_start_pt */
- printk("\n" KERN_INFO " tx_start_pt(0x%04x):",i);
- switch(i>>10) {
- case 0: printk(" 20 bytes,"); break;
- case 1: printk(" 64 bytes,"); break;
- case 2: printk(" 128 bytes,"); break;
- case 3: printk("~220 bytes,"); break;
- }
- i = a->read_bcr(ioaddr, 18); /* Check Burst/Bus control */
- printk(" BCR18(%x):",i&0xffff);
- if (i & (1<<5)) printk("BurstWrEn ");
- if (i & (1<<6)) printk("BurstRdEn ");
- if (i & (1<<7)) printk("DWordIO ");
- if (i & (1<<11)) printk("NoUFlow ");
- i = a->read_bcr(ioaddr, 25);
- printk("\n" KERN_INFO " SRAMSIZE=0x%04x,",i<<8);
- i = a->read_bcr(ioaddr, 26);
- printk(" SRAM_BND=0x%04x,",i<<8);
- i = a->read_bcr(ioaddr, 27);
- if (i & (1<<14)) printk("LowLatRx");
- }
- }
-
- dev->base_addr = ioaddr;
- /* pci_alloc_consistent returns page-aligned memory, so we do not have to check the alignment */
- if ((lp = pci_alloc_consistent(pdev, sizeof(*lp), &lp_dma_addr)) == NULL) {
- if (pcnet32_debug & NETIF_MSG_PROBE)
- printk(KERN_ERR PFX "Consistent memory allocation failed.\n");
- ret = -ENOMEM;
- goto err_free_netdev;
- }
-
- memset(lp, 0, sizeof(*lp));
- lp->dma_addr = lp_dma_addr;
- lp->pci_dev = pdev;
-
- spin_lock_init(&lp->lock);
-
- SET_MODULE_OWNER(dev);
- SET_NETDEV_DEV(dev, &pdev->dev);
- dev->priv = lp;
- lp->name = chipname;
- lp->shared_irq = shared;
- lp->tx_ring_size = TX_RING_SIZE; /* default tx ring size */
- lp->rx_ring_size = RX_RING_SIZE; /* default rx ring size */
- lp->tx_mod_mask = lp->tx_ring_size - 1;
- lp->rx_mod_mask = lp->rx_ring_size - 1;
- lp->tx_len_bits = (PCNET32_LOG_TX_BUFFERS << 12);
- lp->rx_len_bits = (PCNET32_LOG_RX_BUFFERS << 4);
- lp->mii_if.full_duplex = fdx;
- lp->mii_if.phy_id_mask = 0x1f;
- lp->mii_if.reg_num_mask = 0x1f;
- lp->dxsuflo = dxsuflo;
- lp->mii = mii;
- lp->msg_enable = pcnet32_debug;
- if ((cards_found >= MAX_UNITS) || (options[cards_found] > sizeof(options_mapping)))
- lp->options = PCNET32_PORT_ASEL;
- else
- lp->options = options_mapping[options[cards_found]];
- lp->mii_if.dev = dev;
- lp->mii_if.mdio_read = mdio_read;
- lp->mii_if.mdio_write = mdio_write;
-
- if (fdx && !(lp->options & PCNET32_PORT_ASEL) &&
- ((cards_found>=MAX_UNITS) || full_duplex[cards_found]))
- lp->options |= PCNET32_PORT_FD;
-
- if (!a) {
- if (pcnet32_debug & NETIF_MSG_PROBE)
- printk(KERN_ERR PFX "No access methods\n");
- ret = -ENODEV;
- goto err_free_consistent;
- }
- lp->a = *a;
-
- /* prior to register_netdev, dev->name is not yet correct */
- if (pcnet32_alloc_ring(dev, pci_name(lp->pci_dev))) {
- ret = -ENOMEM;
- goto err_free_ring;
- }
- /* detect special T1/E1 WAN card by checking for MAC address */
- if (dev->dev_addr[0] == 0x00 && dev->dev_addr[1] == 0xe0
+ promaddr[i] = inb(ioaddr + i);
+
+ if (memcmp(promaddr, dev->dev_addr, 6)
+ || !is_valid_ether_addr(dev->dev_addr)) {
+ if (is_valid_ether_addr(promaddr)) {
+ if (pcnet32_debug & NETIF_MSG_PROBE) {
+ printk(" warning: CSR address invalid,\n");
+ printk(KERN_INFO
+ " using instead PROM address of");
+ }
+ memcpy(dev->dev_addr, promaddr, 6);
+ }
+ }
+ memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
+
+ /* if the ethernet address is not valid, force to 00:00:00:00:00:00 */
+ if (!is_valid_ether_addr(dev->perm_addr))
+ memset(dev->dev_addr, 0, sizeof(dev->dev_addr));
+
+ if (pcnet32_debug & NETIF_MSG_PROBE) {
+ for (i = 0; i < 6; i++)
+ printk(" %2.2x", dev->dev_addr[i]);
+
+ /* Version 0x2623 and 0x2624 */
+ if (((chip_version + 1) & 0xfffe) == 0x2624) {
+ i = a->read_csr(ioaddr, 80) & 0x0C00; /* Check tx_start_pt */
+ printk("\n" KERN_INFO " tx_start_pt(0x%04x):", i);
+ switch (i >> 10) {
+ case 0:
+ printk(" 20 bytes,");
+ break;
+ case 1:
+ printk(" 64 bytes,");
+ break;
+ case 2:
+ printk(" 128 bytes,");
+ break;
+ case 3:
+ printk("~220 bytes,");
+ break;
+ }
+ i = a->read_bcr(ioaddr, 18); /* Check Burst/Bus control */
+ printk(" BCR18(%x):", i & 0xffff);
+ if (i & (1 << 5))
+ printk("BurstWrEn ");
+ if (i & (1 << 6))
+ printk("BurstRdEn ");
+ if (i & (1 << 7))
+ printk("DWordIO ");
+ if (i & (1 << 11))
+ printk("NoUFlow ");
+ i = a->read_bcr(ioaddr, 25);
+ printk("\n" KERN_INFO " SRAMSIZE=0x%04x,", i << 8);
+ i = a->read_bcr(ioaddr, 26);
+ printk(" SRAM_BND=0x%04x,", i << 8);
+ i = a->read_bcr(ioaddr, 27);
+ if (i & (1 << 14))
+ printk("LowLatRx");
+ }
+ }
+
+ dev->base_addr = ioaddr;
+ /* pci_alloc_consistent returns page-aligned memory, so we do not have to check the alignment */
+ if ((lp =
+ pci_alloc_consistent(pdev, sizeof(*lp), &lp_dma_addr)) == NULL) {
+ if (pcnet32_debug & NETIF_MSG_PROBE)
+ printk(KERN_ERR PFX
+ "Consistent memory allocation failed.\n");
+ ret = -ENOMEM;
+ goto err_free_netdev;
+ }
+
+ memset(lp, 0, sizeof(*lp));
+ lp->dma_addr = lp_dma_addr;
+ lp->pci_dev = pdev;
+
+ spin_lock_init(&lp->lock);
+
+ SET_MODULE_OWNER(dev);
+ SET_NETDEV_DEV(dev, &pdev->dev);
+ dev->priv = lp;
+ lp->name = chipname;
+ lp->shared_irq = shared;
+ lp->tx_ring_size = TX_RING_SIZE; /* default tx ring size */
+ lp->rx_ring_size = RX_RING_SIZE; /* default rx ring size */
+ lp->tx_mod_mask = lp->tx_ring_size - 1;
+ lp->rx_mod_mask = lp->rx_ring_size - 1;
+ lp->tx_len_bits = (PCNET32_LOG_TX_BUFFERS << 12);
+ lp->rx_len_bits = (PCNET32_LOG_RX_BUFFERS << 4);
+ lp->mii_if.full_duplex = fdx;
+ lp->mii_if.phy_id_mask = 0x1f;
+ lp->mii_if.reg_num_mask = 0x1f;
+ lp->dxsuflo = dxsuflo;
+ lp->mii = mii;
+ lp->msg_enable = pcnet32_debug;
+ if ((cards_found >= MAX_UNITS)
+ || (options[cards_found] > sizeof(options_mapping)))
+ lp->options = PCNET32_PORT_ASEL;
+ else
+ lp->options = options_mapping[options[cards_found]];
+ lp->mii_if.dev = dev;
+ lp->mii_if.mdio_read = mdio_read;
+ lp->mii_if.mdio_write = mdio_write;
+
+ if (fdx && !(lp->options & PCNET32_PORT_ASEL) &&
+ ((cards_found >= MAX_UNITS) || full_duplex[cards_found]))
+ lp->options |= PCNET32_PORT_FD;
+
+ if (!a) {
+ if (pcnet32_debug & NETIF_MSG_PROBE)
+ printk(KERN_ERR PFX "No access methods\n");
+ ret = -ENODEV;
+ goto err_free_consistent;
+ }
+ lp->a = *a;
+
+ /* prior to register_netdev, dev->name is not yet correct */
+ if (pcnet32_alloc_ring(dev, pci_name(lp->pci_dev))) {
+ ret = -ENOMEM;
+ goto err_free_ring;
+ }
+ /* detect special T1/E1 WAN card by checking for MAC address */
+ if (dev->dev_addr[0] == 0x00 && dev->dev_addr[1] == 0xe0
&& dev->dev_addr[2] == 0x75)
- lp->options = PCNET32_PORT_FD | PCNET32_PORT_GPSI;
-
- lp->init_block.mode = le16_to_cpu(0x0003); /* Disable Rx and Tx. */
- lp->init_block.tlen_rlen = le16_to_cpu(lp->tx_len_bits | lp->rx_len_bits);
- for (i = 0; i < 6; i++)
- lp->init_block.phys_addr[i] = dev->dev_addr[i];
- lp->init_block.filter[0] = 0x00000000;
- lp->init_block.filter[1] = 0x00000000;
- lp->init_block.rx_ring = (u32)le32_to_cpu(lp->rx_ring_dma_addr);
- lp->init_block.tx_ring = (u32)le32_to_cpu(lp->tx_ring_dma_addr);
-
- /* switch pcnet32 to 32bit mode */
- a->write_bcr(ioaddr, 20, 2);
-
- a->write_csr(ioaddr, 1, (lp->dma_addr + offsetof(struct pcnet32_private,
- init_block)) & 0xffff);
- a->write_csr(ioaddr, 2, (lp->dma_addr + offsetof(struct pcnet32_private,
- init_block)) >> 16);
-
- if (pdev) { /* use the IRQ provided by PCI */
- dev->irq = pdev->irq;
- if (pcnet32_debug & NETIF_MSG_PROBE)
- printk(" assigned IRQ %d.\n", dev->irq);
- } else {
- unsigned long irq_mask = probe_irq_on();
+ lp->options = PCNET32_PORT_FD | PCNET32_PORT_GPSI;
- /*
- * To auto-IRQ we enable the initialization-done and DMA error
- * interrupts. For ISA boards we get a DMA error, but VLB and PCI
- * boards will work.
- */
- /* Trigger an initialization just for the interrupt. */
- a->write_csr (ioaddr, 0, 0x41);
- mdelay (1);
+ lp->init_block.mode = le16_to_cpu(0x0003); /* Disable Rx and Tx. */
+ lp->init_block.tlen_rlen =
+ le16_to_cpu(lp->tx_len_bits | lp->rx_len_bits);
+ for (i = 0; i < 6; i++)
+ lp->init_block.phys_addr[i] = dev->dev_addr[i];
+ lp->init_block.filter[0] = 0x00000000;
+ lp->init_block.filter[1] = 0x00000000;
+ lp->init_block.rx_ring = (u32) le32_to_cpu(lp->rx_ring_dma_addr);
+ lp->init_block.tx_ring = (u32) le32_to_cpu(lp->tx_ring_dma_addr);
+
+ /* switch pcnet32 to 32bit mode */
+ a->write_bcr(ioaddr, 20, 2);
+
+ a->write_csr(ioaddr, 1, (lp->dma_addr + offsetof(struct pcnet32_private,
+ init_block)) & 0xffff);
+ a->write_csr(ioaddr, 2, (lp->dma_addr + offsetof(struct pcnet32_private,
+ init_block)) >> 16);
+
+ if (pdev) { /* use the IRQ provided by PCI */
+ dev->irq = pdev->irq;
+ if (pcnet32_debug & NETIF_MSG_PROBE)
+ printk(" assigned IRQ %d.\n", dev->irq);
+ } else {
+ unsigned long irq_mask = probe_irq_on();
+
+ /*
+ * To auto-IRQ we enable the initialization-done and DMA error
+ * interrupts. For ISA boards we get a DMA error, but VLB and PCI
+ * boards will work.
+ */
+ /* Trigger an initialization just for the interrupt. */
+ a->write_csr(ioaddr, 0, 0x41);
+ mdelay(1);
+
+ dev->irq = probe_irq_off(irq_mask);
+ if (!dev->irq) {
+ if (pcnet32_debug & NETIF_MSG_PROBE)
+ printk(", failed to detect IRQ line.\n");
+ ret = -ENODEV;
+ goto err_free_ring;
+ }
+ if (pcnet32_debug & NETIF_MSG_PROBE)
+ printk(", probed IRQ %d.\n", dev->irq);
+ }
- dev->irq = probe_irq_off (irq_mask);
- if (!dev->irq) {
- if (pcnet32_debug & NETIF_MSG_PROBE)
- printk(", failed to detect IRQ line.\n");
- ret = -ENODEV;
- goto err_free_ring;
+ /* Set the mii phy_id so that we can query the link state */
+ if (lp->mii) {
+ /* lp->phycount and lp->phymask are set to 0 by memset above */
+
+ lp->mii_if.phy_id = ((lp->a.read_bcr(ioaddr, 33)) >> 5) & 0x1f;
+ /* scan for PHYs */
+ for (i = 0; i < PCNET32_MAX_PHYS; i++) {
+ unsigned short id1, id2;
+
+ id1 = mdio_read(dev, i, MII_PHYSID1);
+ if (id1 == 0xffff)
+ continue;
+ id2 = mdio_read(dev, i, MII_PHYSID2);
+ if (id2 == 0xffff)
+ continue;
+ if (i == 31 && ((chip_version + 1) & 0xfffe) == 0x2624)
+ continue; /* 79C971 & 79C972 have phantom phy at id 31 */
+ lp->phycount++;
+ lp->phymask |= (1 << i);
+ lp->mii_if.phy_id = i;
+ if (pcnet32_debug & NETIF_MSG_PROBE)
+ printk(KERN_INFO PFX
+ "Found PHY %04x:%04x at address %d.\n",
+ id1, id2, i);
+ }
+ lp->a.write_bcr(ioaddr, 33, (lp->mii_if.phy_id) << 5);
+ if (lp->phycount > 1) {
+ lp->options |= PCNET32_PORT_MII;
+ }
}
- if (pcnet32_debug & NETIF_MSG_PROBE)
- printk(", probed IRQ %d.\n", dev->irq);
- }
-
- /* Set the mii phy_id so that we can query the link state */
- if (lp->mii)
- lp->mii_if.phy_id = ((lp->a.read_bcr (ioaddr, 33)) >> 5) & 0x1f;
-
- init_timer (&lp->watchdog_timer);
- lp->watchdog_timer.data = (unsigned long) dev;
- lp->watchdog_timer.function = (void *) &pcnet32_watchdog;
-
- /* The PCNET32-specific entries in the device structure. */
- dev->open = &pcnet32_open;
- dev->hard_start_xmit = &pcnet32_start_xmit;
- dev->stop = &pcnet32_close;
- dev->get_stats = &pcnet32_get_stats;
- dev->set_multicast_list = &pcnet32_set_multicast_list;
- dev->do_ioctl = &pcnet32_ioctl;
- dev->ethtool_ops = &pcnet32_ethtool_ops;
- dev->tx_timeout = pcnet32_tx_timeout;
- dev->watchdog_timeo = (5*HZ);
+
+ init_timer(&lp->watchdog_timer);
+ lp->watchdog_timer.data = (unsigned long)dev;
+ lp->watchdog_timer.function = (void *)&pcnet32_watchdog;
+
+ /* The PCNET32-specific entries in the device structure. */
+ dev->open = &pcnet32_open;
+ dev->hard_start_xmit = &pcnet32_start_xmit;
+ dev->stop = &pcnet32_close;
+ dev->get_stats = &pcnet32_get_stats;
+ dev->set_multicast_list = &pcnet32_set_multicast_list;
+ dev->do_ioctl = &pcnet32_ioctl;
+ dev->ethtool_ops = &pcnet32_ethtool_ops;
+ dev->tx_timeout = pcnet32_tx_timeout;
+ dev->watchdog_timeo = (5 * HZ);
#ifdef CONFIG_NET_POLL_CONTROLLER
- dev->poll_controller = pcnet32_poll_controller;
+ dev->poll_controller = pcnet32_poll_controller;
#endif
- /* Fill in the generic fields of the device structure. */
- if (register_netdev(dev))
- goto err_free_ring;
-
- if (pdev) {
- pci_set_drvdata(pdev, dev);
- } else {
- lp->next = pcnet32_dev;
- pcnet32_dev = dev;
- }
-
- if (pcnet32_debug & NETIF_MSG_PROBE)
- printk(KERN_INFO "%s: registered as %s\n", dev->name, lp->name);
- cards_found++;
-
- /* enable LED writes */
- a->write_bcr(ioaddr, 2, a->read_bcr(ioaddr, 2) | 0x1000);
-
- return 0;
-
-err_free_ring:
- pcnet32_free_ring(dev);
-err_free_consistent:
- pci_free_consistent(lp->pci_dev, sizeof(*lp), lp, lp->dma_addr);
-err_free_netdev:
- free_netdev(dev);
-err_release_region:
- release_region(ioaddr, PCNET32_TOTAL_SIZE);
- return ret;
-}
+ /* Fill in the generic fields of the device structure. */
+ if (register_netdev(dev))
+ goto err_free_ring;
+
+ if (pdev) {
+ pci_set_drvdata(pdev, dev);
+ } else {
+ lp->next = pcnet32_dev;
+ pcnet32_dev = dev;
+ }
+ if (pcnet32_debug & NETIF_MSG_PROBE)
+ printk(KERN_INFO "%s: registered as %s\n", dev->name, lp->name);
+ cards_found++;
+
+ /* enable LED writes */
+ a->write_bcr(ioaddr, 2, a->read_bcr(ioaddr, 2) | 0x1000);
+
+ return 0;
+
+ err_free_ring:
+ pcnet32_free_ring(dev);
+ err_free_consistent:
+ pci_free_consistent(lp->pci_dev, sizeof(*lp), lp, lp->dma_addr);
+ err_free_netdev:
+ free_netdev(dev);
+ err_release_region:
+ release_region(ioaddr, PCNET32_TOTAL_SIZE);
+ return ret;
+}
/* if any allocation fails, caller must also call pcnet32_free_ring */
static int pcnet32_alloc_ring(struct net_device *dev, char *name)
{
- struct pcnet32_private *lp = dev->priv;
+ struct pcnet32_private *lp = dev->priv;
- lp->tx_ring = pci_alloc_consistent(lp->pci_dev,
- sizeof(struct pcnet32_tx_head) * lp->tx_ring_size,
- &lp->tx_ring_dma_addr);
- if (lp->tx_ring == NULL) {
- if (pcnet32_debug & NETIF_MSG_DRV)
- printk("\n" KERN_ERR PFX "%s: Consistent memory allocation failed.\n",
- name);
- return -ENOMEM;
- }
-
- lp->rx_ring = pci_alloc_consistent(lp->pci_dev,
- sizeof(struct pcnet32_rx_head) * lp->rx_ring_size,
- &lp->rx_ring_dma_addr);
- if (lp->rx_ring == NULL) {
- if (pcnet32_debug & NETIF_MSG_DRV)
- printk("\n" KERN_ERR PFX "%s: Consistent memory allocation failed.\n",
- name);
- return -ENOMEM;
- }
-
- lp->tx_dma_addr = kmalloc(sizeof(dma_addr_t) * lp->tx_ring_size,
- GFP_ATOMIC);
- if (!lp->tx_dma_addr) {
- if (pcnet32_debug & NETIF_MSG_DRV)
- printk("\n" KERN_ERR PFX "%s: Memory allocation failed.\n", name);
- return -ENOMEM;
- }
- memset(lp->tx_dma_addr, 0, sizeof(dma_addr_t) * lp->tx_ring_size);
-
- lp->rx_dma_addr = kmalloc(sizeof(dma_addr_t) * lp->rx_ring_size,
- GFP_ATOMIC);
- if (!lp->rx_dma_addr) {
- if (pcnet32_debug & NETIF_MSG_DRV)
- printk("\n" KERN_ERR PFX "%s: Memory allocation failed.\n", name);
- return -ENOMEM;
- }
- memset(lp->rx_dma_addr, 0, sizeof(dma_addr_t) * lp->rx_ring_size);
-
- lp->tx_skbuff = kmalloc(sizeof(struct sk_buff *) * lp->tx_ring_size,
- GFP_ATOMIC);
- if (!lp->tx_skbuff) {
- if (pcnet32_debug & NETIF_MSG_DRV)
- printk("\n" KERN_ERR PFX "%s: Memory allocation failed.\n", name);
- return -ENOMEM;
- }
- memset(lp->tx_skbuff, 0, sizeof(struct sk_buff *) * lp->tx_ring_size);
-
- lp->rx_skbuff = kmalloc(sizeof(struct sk_buff *) * lp->rx_ring_size,
- GFP_ATOMIC);
- if (!lp->rx_skbuff) {
- if (pcnet32_debug & NETIF_MSG_DRV)
- printk("\n" KERN_ERR PFX "%s: Memory allocation failed.\n", name);
- return -ENOMEM;
- }
- memset(lp->rx_skbuff, 0, sizeof(struct sk_buff *) * lp->rx_ring_size);
+ lp->tx_ring = pci_alloc_consistent(lp->pci_dev,
+ sizeof(struct pcnet32_tx_head) *
+ lp->tx_ring_size,
+ &lp->tx_ring_dma_addr);
+ if (lp->tx_ring == NULL) {
+ if (pcnet32_debug & NETIF_MSG_DRV)
+ printk("\n" KERN_ERR PFX
+ "%s: Consistent memory allocation failed.\n",
+ name);
+ return -ENOMEM;
+ }
- return 0;
-}
+ lp->rx_ring = pci_alloc_consistent(lp->pci_dev,
+ sizeof(struct pcnet32_rx_head) *
+ lp->rx_ring_size,
+ &lp->rx_ring_dma_addr);
+ if (lp->rx_ring == NULL) {
+ if (pcnet32_debug & NETIF_MSG_DRV)
+ printk("\n" KERN_ERR PFX
+ "%s: Consistent memory allocation failed.\n",
+ name);
+ return -ENOMEM;
+ }
+ lp->tx_dma_addr = kmalloc(sizeof(dma_addr_t) * lp->tx_ring_size,
+ GFP_ATOMIC);
+ if (!lp->tx_dma_addr) {
+ if (pcnet32_debug & NETIF_MSG_DRV)
+ printk("\n" KERN_ERR PFX
+ "%s: Memory allocation failed.\n", name);
+ return -ENOMEM;
+ }
+ memset(lp->tx_dma_addr, 0, sizeof(dma_addr_t) * lp->tx_ring_size);
+
+ lp->rx_dma_addr = kmalloc(sizeof(dma_addr_t) * lp->rx_ring_size,
+ GFP_ATOMIC);
+ if (!lp->rx_dma_addr) {
+ if (pcnet32_debug & NETIF_MSG_DRV)
+ printk("\n" KERN_ERR PFX
+ "%s: Memory allocation failed.\n", name);
+ return -ENOMEM;
+ }
+ memset(lp->rx_dma_addr, 0, sizeof(dma_addr_t) * lp->rx_ring_size);
+
+ lp->tx_skbuff = kmalloc(sizeof(struct sk_buff *) * lp->tx_ring_size,
+ GFP_ATOMIC);
+ if (!lp->tx_skbuff) {
+ if (pcnet32_debug & NETIF_MSG_DRV)
+ printk("\n" KERN_ERR PFX
+ "%s: Memory allocation failed.\n", name);
+ return -ENOMEM;
+ }
+ memset(lp->tx_skbuff, 0, sizeof(struct sk_buff *) * lp->tx_ring_size);
+
+ lp->rx_skbuff = kmalloc(sizeof(struct sk_buff *) * lp->rx_ring_size,
+ GFP_ATOMIC);
+ if (!lp->rx_skbuff) {
+ if (pcnet32_debug & NETIF_MSG_DRV)
+ printk("\n" KERN_ERR PFX
+ "%s: Memory allocation failed.\n", name);
+ return -ENOMEM;
+ }
+ memset(lp->rx_skbuff, 0, sizeof(struct sk_buff *) * lp->rx_ring_size);
+
+ return 0;
+}
static void pcnet32_free_ring(struct net_device *dev)
{
- struct pcnet32_private *lp = dev->priv;
+ struct pcnet32_private *lp = dev->priv;
- kfree(lp->tx_skbuff);
- lp->tx_skbuff = NULL;
+ kfree(lp->tx_skbuff);
+ lp->tx_skbuff = NULL;
- kfree(lp->rx_skbuff);
- lp->rx_skbuff = NULL;
+ kfree(lp->rx_skbuff);
+ lp->rx_skbuff = NULL;
- kfree(lp->tx_dma_addr);
- lp->tx_dma_addr = NULL;
+ kfree(lp->tx_dma_addr);
+ lp->tx_dma_addr = NULL;
- kfree(lp->rx_dma_addr);
- lp->rx_dma_addr = NULL;
+ kfree(lp->rx_dma_addr);
+ lp->rx_dma_addr = NULL;
- if (lp->tx_ring) {
- pci_free_consistent(lp->pci_dev, sizeof(struct pcnet32_tx_head) * lp->tx_ring_size,
- lp->tx_ring, lp->tx_ring_dma_addr);
- lp->tx_ring = NULL;
- }
+ if (lp->tx_ring) {
+ pci_free_consistent(lp->pci_dev,
+ sizeof(struct pcnet32_tx_head) *
+ lp->tx_ring_size, lp->tx_ring,
+ lp->tx_ring_dma_addr);
+ lp->tx_ring = NULL;
+ }
- if (lp->rx_ring) {
- pci_free_consistent(lp->pci_dev, sizeof(struct pcnet32_rx_head) * lp->rx_ring_size,
- lp->rx_ring, lp->rx_ring_dma_addr);
- lp->rx_ring = NULL;
- }
+ if (lp->rx_ring) {
+ pci_free_consistent(lp->pci_dev,
+ sizeof(struct pcnet32_rx_head) *
+ lp->rx_ring_size, lp->rx_ring,
+ lp->rx_ring_dma_addr);
+ lp->rx_ring = NULL;
+ }
}
-
-static int
-pcnet32_open(struct net_device *dev)
+static int pcnet32_open(struct net_device *dev)
{
- struct pcnet32_private *lp = dev->priv;
- unsigned long ioaddr = dev->base_addr;
- u16 val;
- int i;
- int rc;
- unsigned long flags;
-
- if (request_irq(dev->irq, &pcnet32_interrupt,
- lp->shared_irq ? SA_SHIRQ : 0, dev->name, (void *)dev)) {
- return -EAGAIN;
- }
-
- spin_lock_irqsave(&lp->lock, flags);
- /* Check for a valid station address */
- if (!is_valid_ether_addr(dev->dev_addr)) {
- rc = -EINVAL;
- goto err_free_irq;
- }
-
- /* Reset the PCNET32 */
- lp->a.reset (ioaddr);
-
- /* switch pcnet32 to 32bit mode */
- lp->a.write_bcr (ioaddr, 20, 2);
-
- if (netif_msg_ifup(lp))
- printk(KERN_DEBUG "%s: pcnet32_open() irq %d tx/rx rings %#x/%#x init %#x.\n",
- dev->name, dev->irq,
- (u32) (lp->tx_ring_dma_addr),
- (u32) (lp->rx_ring_dma_addr),
- (u32) (lp->dma_addr + offsetof(struct pcnet32_private, init_block)));
-
- /* set/reset autoselect bit */
- val = lp->a.read_bcr (ioaddr, 2) & ~2;
- if (lp->options & PCNET32_PORT_ASEL)
- val |= 2;
- lp->a.write_bcr (ioaddr, 2, val);
-
- /* handle full duplex setting */
- if (lp->mii_if.full_duplex) {
- val = lp->a.read_bcr (ioaddr, 9) & ~3;
- if (lp->options & PCNET32_PORT_FD) {
- val |= 1;
- if (lp->options == (PCNET32_PORT_FD | PCNET32_PORT_AUI))
+ struct pcnet32_private *lp = dev->priv;
+ unsigned long ioaddr = dev->base_addr;
+ u16 val;
+ int i;
+ int rc;
+ unsigned long flags;
+
+ if (request_irq(dev->irq, &pcnet32_interrupt,
+ lp->shared_irq ? SA_SHIRQ : 0, dev->name,
+ (void *)dev)) {
+ return -EAGAIN;
+ }
+
+ spin_lock_irqsave(&lp->lock, flags);
+ /* Check for a valid station address */
+ if (!is_valid_ether_addr(dev->dev_addr)) {
+ rc = -EINVAL;
+ goto err_free_irq;
+ }
+
+ /* Reset the PCNET32 */
+ lp->a.reset(ioaddr);
+
+ /* switch pcnet32 to 32bit mode */
+ lp->a.write_bcr(ioaddr, 20, 2);
+
+ if (netif_msg_ifup(lp))
+ printk(KERN_DEBUG
+ "%s: pcnet32_open() irq %d tx/rx rings %#x/%#x init %#x.\n",
+ dev->name, dev->irq, (u32) (lp->tx_ring_dma_addr),
+ (u32) (lp->rx_ring_dma_addr),
+ (u32) (lp->dma_addr +
+ offsetof(struct pcnet32_private, init_block)));
+
+ /* set/reset autoselect bit */
+ val = lp->a.read_bcr(ioaddr, 2) & ~2;
+ if (lp->options & PCNET32_PORT_ASEL)
val |= 2;
- } else if (lp->options & PCNET32_PORT_ASEL) {
- /* workaround of xSeries250, turn on for 79C975 only */
- i = ((lp->a.read_csr(ioaddr, 88) |
- (lp->a.read_csr(ioaddr,89) << 16)) >> 12) & 0xffff;
- if (i == 0x2627)
- val |= 3;
- }
- lp->a.write_bcr (ioaddr, 9, val);
- }
-
- /* set/reset GPSI bit in test register */
- val = lp->a.read_csr (ioaddr, 124) & ~0x10;
- if ((lp->options & PCNET32_PORT_PORTSEL) == PCNET32_PORT_GPSI)
- val |= 0x10;
- lp->a.write_csr (ioaddr, 124, val);
-
- /* Allied Telesyn AT 2700/2701 FX are 100Mbit only and do not negotiate */
- if (lp->pci_dev->subsystem_vendor == PCI_VENDOR_ID_AT &&
+ lp->a.write_bcr(ioaddr, 2, val);
+
+ /* handle full duplex setting */
+ if (lp->mii_if.full_duplex) {
+ val = lp->a.read_bcr(ioaddr, 9) & ~3;
+ if (lp->options & PCNET32_PORT_FD) {
+ val |= 1;
+ if (lp->options == (PCNET32_PORT_FD | PCNET32_PORT_AUI))
+ val |= 2;
+ } else if (lp->options & PCNET32_PORT_ASEL) {
+ /* workaround of xSeries250, turn on for 79C975 only */
+ i = ((lp->a.read_csr(ioaddr, 88) |
+ (lp->a.
+ read_csr(ioaddr, 89) << 16)) >> 12) & 0xffff;
+ if (i == 0x2627)
+ val |= 3;
+ }
+ lp->a.write_bcr(ioaddr, 9, val);
+ }
+
+ /* set/reset GPSI bit in test register */
+ val = lp->a.read_csr(ioaddr, 124) & ~0x10;
+ if ((lp->options & PCNET32_PORT_PORTSEL) == PCNET32_PORT_GPSI)
+ val |= 0x10;
+ lp->a.write_csr(ioaddr, 124, val);
+
+ /* Allied Telesyn AT 2700/2701 FX are 100Mbit only and do not negotiate */
+ if (lp->pci_dev->subsystem_vendor == PCI_VENDOR_ID_AT &&
(lp->pci_dev->subsystem_device == PCI_SUBDEVICE_ID_AT_2700FX ||
lp->pci_dev->subsystem_device == PCI_SUBDEVICE_ID_AT_2701FX)) {
- if (lp->options & PCNET32_PORT_ASEL) {
- lp->options = PCNET32_PORT_FD | PCNET32_PORT_100;
- if (netif_msg_link(lp))
- printk(KERN_DEBUG "%s: Setting 100Mb-Full Duplex.\n",
- dev->name);
- }
- }
- {
- /*
- * 24 Jun 2004 according AMD, in order to change the PHY,
- * DANAS (or DISPM for 79C976) must be set; then select the speed,
- * duplex, and/or enable auto negotiation, and clear DANAS
- */
- if (lp->mii && !(lp->options & PCNET32_PORT_ASEL)) {
- lp->a.write_bcr(ioaddr, 32,
- lp->a.read_bcr(ioaddr, 32) | 0x0080);
- /* disable Auto Negotiation, set 10Mpbs, HD */
- val = lp->a.read_bcr(ioaddr, 32) & ~0xb8;
- if (lp->options & PCNET32_PORT_FD)
- val |= 0x10;
- if (lp->options & PCNET32_PORT_100)
- val |= 0x08;
- lp->a.write_bcr (ioaddr, 32, val);
+ if (lp->options & PCNET32_PORT_ASEL) {
+ lp->options = PCNET32_PORT_FD | PCNET32_PORT_100;
+ if (netif_msg_link(lp))
+ printk(KERN_DEBUG
+ "%s: Setting 100Mb-Full Duplex.\n",
+ dev->name);
+ }
+ }
+ if (lp->phycount < 2) {
+ /*
+ * 24 Jun 2004 according AMD, in order to change the PHY,
+ * DANAS (or DISPM for 79C976) must be set; then select the speed,
+ * duplex, and/or enable auto negotiation, and clear DANAS
+ */
+ if (lp->mii && !(lp->options & PCNET32_PORT_ASEL)) {
+ lp->a.write_bcr(ioaddr, 32,
+ lp->a.read_bcr(ioaddr, 32) | 0x0080);
+ /* disable Auto Negotiation, set 10Mpbs, HD */
+ val = lp->a.read_bcr(ioaddr, 32) & ~0xb8;
+ if (lp->options & PCNET32_PORT_FD)
+ val |= 0x10;
+ if (lp->options & PCNET32_PORT_100)
+ val |= 0x08;
+ lp->a.write_bcr(ioaddr, 32, val);
+ } else {
+ if (lp->options & PCNET32_PORT_ASEL) {
+ lp->a.write_bcr(ioaddr, 32,
+ lp->a.read_bcr(ioaddr,
+ 32) | 0x0080);
+ /* enable auto negotiate, setup, disable fd */
+ val = lp->a.read_bcr(ioaddr, 32) & ~0x98;
+ val |= 0x20;
+ lp->a.write_bcr(ioaddr, 32, val);
+ }
+ }
} else {
- if (lp->options & PCNET32_PORT_ASEL) {
- lp->a.write_bcr(ioaddr, 32,
- lp->a.read_bcr(ioaddr, 32) | 0x0080);
- /* enable auto negotiate, setup, disable fd */
- val = lp->a.read_bcr(ioaddr, 32) & ~0x98;
- val |= 0x20;
- lp->a.write_bcr(ioaddr, 32, val);
- }
+ int first_phy = -1;
+ u16 bmcr;
+ u32 bcr9;
+ struct ethtool_cmd ecmd;
+
+ /*
+ * There is really no good other way to handle multiple PHYs
+ * other than turning off all automatics
+ */
+ val = lp->a.read_bcr(ioaddr, 2);
+ lp->a.write_bcr(ioaddr, 2, val & ~2);
+ val = lp->a.read_bcr(ioaddr, 32);
+ lp->a.write_bcr(ioaddr, 32, val & ~(1 << 7)); /* stop MII manager */
+
+ if (!(lp->options & PCNET32_PORT_ASEL)) {
+ /* setup ecmd */
+ ecmd.port = PORT_MII;
+ ecmd.transceiver = XCVR_INTERNAL;
+ ecmd.autoneg = AUTONEG_DISABLE;
+ ecmd.speed =
+ lp->
+ options & PCNET32_PORT_100 ? SPEED_100 : SPEED_10;
+ bcr9 = lp->a.read_bcr(ioaddr, 9);
+
+ if (lp->options & PCNET32_PORT_FD) {
+ ecmd.duplex = DUPLEX_FULL;
+ bcr9 |= (1 << 0);
+ } else {
+ ecmd.duplex = DUPLEX_HALF;
+ bcr9 |= ~(1 << 0);
+ }
+ lp->a.write_bcr(ioaddr, 9, bcr9);
+ }
+
+ for (i = 0; i < PCNET32_MAX_PHYS; i++) {
+ if (lp->phymask & (1 << i)) {
+ /* isolate all but the first PHY */
+ bmcr = mdio_read(dev, i, MII_BMCR);
+ if (first_phy == -1) {
+ first_phy = i;
+ mdio_write(dev, i, MII_BMCR,
+ bmcr & ~BMCR_ISOLATE);
+ } else {
+ mdio_write(dev, i, MII_BMCR,
+ bmcr | BMCR_ISOLATE);
+ }
+ /* use mii_ethtool_sset to setup PHY */
+ lp->mii_if.phy_id = i;
+ ecmd.phy_address = i;
+ if (lp->options & PCNET32_PORT_ASEL) {
+ mii_ethtool_gset(&lp->mii_if, &ecmd);
+ ecmd.autoneg = AUTONEG_ENABLE;
+ }
+ mii_ethtool_sset(&lp->mii_if, &ecmd);
+ }
+ }
+ lp->mii_if.phy_id = first_phy;
+ if (netif_msg_link(lp))
+ printk(KERN_INFO "%s: Using PHY number %d.\n",
+ dev->name, first_phy);
}
- }
#ifdef DO_DXSUFLO
- if (lp->dxsuflo) { /* Disable transmit stop on underflow */
- val = lp->a.read_csr (ioaddr, 3);
- val |= 0x40;
- lp->a.write_csr (ioaddr, 3, val);
- }
+ if (lp->dxsuflo) { /* Disable transmit stop on underflow */
+ val = lp->a.read_csr(ioaddr, 3);
+ val |= 0x40;
+ lp->a.write_csr(ioaddr, 3, val);
+ }
#endif
- lp->init_block.mode = le16_to_cpu((lp->options & PCNET32_PORT_PORTSEL) << 7);
- pcnet32_load_multicast(dev);
-
- if (pcnet32_init_ring(dev)) {
- rc = -ENOMEM;
- goto err_free_ring;
- }
-
- /* Re-initialize the PCNET32, and start it when done. */
- lp->a.write_csr (ioaddr, 1, (lp->dma_addr +
- offsetof(struct pcnet32_private, init_block)) & 0xffff);
- lp->a.write_csr (ioaddr, 2, (lp->dma_addr +
- offsetof(struct pcnet32_private, init_block)) >> 16);
-
- lp->a.write_csr (ioaddr, 4, 0x0915);
- lp->a.write_csr (ioaddr, 0, 0x0001);
-
- netif_start_queue(dev);
-
- /* If we have mii, print the link status and start the watchdog */
- if (lp->mii) {
- mii_check_media (&lp->mii_if, netif_msg_link(lp), 1);
- mod_timer (&(lp->watchdog_timer), PCNET32_WATCHDOG_TIMEOUT);
- }
-
- i = 0;
- while (i++ < 100)
- if (lp->a.read_csr (ioaddr, 0) & 0x0100)
- break;
- /*
- * We used to clear the InitDone bit, 0x0100, here but Mark Stockton
- * reports that doing so triggers a bug in the '974.
- */
- lp->a.write_csr (ioaddr, 0, 0x0042);
-
- if (netif_msg_ifup(lp))
- printk(KERN_DEBUG "%s: pcnet32 open after %d ticks, init block %#x csr0 %4.4x.\n",
- dev->name, i, (u32) (lp->dma_addr +
- offsetof(struct pcnet32_private, init_block)),
- lp->a.read_csr(ioaddr, 0));
-
- spin_unlock_irqrestore(&lp->lock, flags);
-
- return 0; /* Always succeed */
-
-err_free_ring:
- /* free any allocated skbuffs */
- for (i = 0; i < lp->rx_ring_size; i++) {
- lp->rx_ring[i].status = 0;
- if (lp->rx_skbuff[i]) {
- pci_unmap_single(lp->pci_dev, lp->rx_dma_addr[i], PKT_BUF_SZ-2,
- PCI_DMA_FROMDEVICE);
- dev_kfree_skb(lp->rx_skbuff[i]);
- }
- lp->rx_skbuff[i] = NULL;
- lp->rx_dma_addr[i] = 0;
- }
-
- pcnet32_free_ring(dev);
-
- /*
- * Switch back to 16bit mode to avoid problems with dumb
- * DOS packet driver after a warm reboot
- */
- lp->a.write_bcr (ioaddr, 20, 4);
-
-err_free_irq:
- spin_unlock_irqrestore(&lp->lock, flags);
- free_irq(dev->irq, dev);
- return rc;
+ lp->init_block.mode =
+ le16_to_cpu((lp->options & PCNET32_PORT_PORTSEL) << 7);
+ pcnet32_load_multicast(dev);
+
+ if (pcnet32_init_ring(dev)) {
+ rc = -ENOMEM;
+ goto err_free_ring;
+ }
+
+ /* Re-initialize the PCNET32, and start it when done. */
+ lp->a.write_csr(ioaddr, 1, (lp->dma_addr +
+ offsetof(struct pcnet32_private,
+ init_block)) & 0xffff);
+ lp->a.write_csr(ioaddr, 2,
+ (lp->dma_addr +
+ offsetof(struct pcnet32_private, init_block)) >> 16);
+
+ lp->a.write_csr(ioaddr, 4, 0x0915);
+ lp->a.write_csr(ioaddr, 0, 0x0001);
+
+ netif_start_queue(dev);
+
+ /* Print the link status and start the watchdog */
+ pcnet32_check_media(dev, 1);
+ mod_timer(&(lp->watchdog_timer), PCNET32_WATCHDOG_TIMEOUT);
+
+ i = 0;
+ while (i++ < 100)
+ if (lp->a.read_csr(ioaddr, 0) & 0x0100)
+ break;
+ /*
+ * We used to clear the InitDone bit, 0x0100, here but Mark Stockton
+ * reports that doing so triggers a bug in the '974.
+ */
+ lp->a.write_csr(ioaddr, 0, 0x0042);
+
+ if (netif_msg_ifup(lp))
+ printk(KERN_DEBUG
+ "%s: pcnet32 open after %d ticks, init block %#x csr0 %4.4x.\n",
+ dev->name, i,
+ (u32) (lp->dma_addr +
+ offsetof(struct pcnet32_private, init_block)),
+ lp->a.read_csr(ioaddr, 0));
+
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ return 0; /* Always succeed */
+
+ err_free_ring:
+ /* free any allocated skbuffs */
+ for (i = 0; i < lp->rx_ring_size; i++) {
+ lp->rx_ring[i].status = 0;
+ if (lp->rx_skbuff[i]) {
+ pci_unmap_single(lp->pci_dev, lp->rx_dma_addr[i],
+ PKT_BUF_SZ - 2, PCI_DMA_FROMDEVICE);
+ dev_kfree_skb(lp->rx_skbuff[i]);
+ }
+ lp->rx_skbuff[i] = NULL;
+ lp->rx_dma_addr[i] = 0;
+ }
+
+ pcnet32_free_ring(dev);
+
+ /*
+ * Switch back to 16bit mode to avoid problems with dumb
+ * DOS packet driver after a warm reboot
+ */
+ lp->a.write_bcr(ioaddr, 20, 4);
+
+ err_free_irq:
+ spin_unlock_irqrestore(&lp->lock, flags);
+ free_irq(dev->irq, dev);
+ return rc;
}
/*
@@ -1746,727 +1801,893 @@ err_free_irq:
* restarting the chip, but I'm too lazy to do so right now. dplatt@3do.com
*/
-static void
-pcnet32_purge_tx_ring(struct net_device *dev)
+static void pcnet32_purge_tx_ring(struct net_device *dev)
{
- struct pcnet32_private *lp = dev->priv;
- int i;
-
- for (i = 0; i < lp->tx_ring_size; i++) {
- lp->tx_ring[i].status = 0; /* CPU owns buffer */
- wmb(); /* Make sure adapter sees owner change */
- if (lp->tx_skbuff[i]) {
- pci_unmap_single(lp->pci_dev, lp->tx_dma_addr[i],
- lp->tx_skbuff[i]->len, PCI_DMA_TODEVICE);
- dev_kfree_skb_any(lp->tx_skbuff[i]);
- }
- lp->tx_skbuff[i] = NULL;
- lp->tx_dma_addr[i] = 0;
- }
-}
+ struct pcnet32_private *lp = dev->priv;
+ int i;
+ for (i = 0; i < lp->tx_ring_size; i++) {
+ lp->tx_ring[i].status = 0; /* CPU owns buffer */
+ wmb(); /* Make sure adapter sees owner change */
+ if (lp->tx_skbuff[i]) {
+ pci_unmap_single(lp->pci_dev, lp->tx_dma_addr[i],
+ lp->tx_skbuff[i]->len,
+ PCI_DMA_TODEVICE);
+ dev_kfree_skb_any(lp->tx_skbuff[i]);
+ }
+ lp->tx_skbuff[i] = NULL;
+ lp->tx_dma_addr[i] = 0;
+ }
+}
/* Initialize the PCNET32 Rx and Tx rings. */
-static int
-pcnet32_init_ring(struct net_device *dev)
+static int pcnet32_init_ring(struct net_device *dev)
{
- struct pcnet32_private *lp = dev->priv;
- int i;
-
- lp->tx_full = 0;
- lp->cur_rx = lp->cur_tx = 0;
- lp->dirty_rx = lp->dirty_tx = 0;
-
- for (i = 0; i < lp->rx_ring_size; i++) {
- struct sk_buff *rx_skbuff = lp->rx_skbuff[i];
- if (rx_skbuff == NULL) {
- if (!(rx_skbuff = lp->rx_skbuff[i] = dev_alloc_skb (PKT_BUF_SZ))) {
- /* there is not much, we can do at this point */
- if (pcnet32_debug & NETIF_MSG_DRV)
- printk(KERN_ERR "%s: pcnet32_init_ring dev_alloc_skb failed.\n",
- dev->name);
- return -1;
- }
- skb_reserve (rx_skbuff, 2);
- }
-
- rmb();
- if (lp->rx_dma_addr[i] == 0)
- lp->rx_dma_addr[i] = pci_map_single(lp->pci_dev, rx_skbuff->data,
- PKT_BUF_SZ-2, PCI_DMA_FROMDEVICE);
- lp->rx_ring[i].base = (u32)le32_to_cpu(lp->rx_dma_addr[i]);
- lp->rx_ring[i].buf_length = le16_to_cpu(2-PKT_BUF_SZ);
- wmb(); /* Make sure owner changes after all others are visible */
- lp->rx_ring[i].status = le16_to_cpu(0x8000);
- }
- /* The Tx buffer address is filled in as needed, but we do need to clear
- * the upper ownership bit. */
- for (i = 0; i < lp->tx_ring_size; i++) {
- lp->tx_ring[i].status = 0; /* CPU owns buffer */
- wmb(); /* Make sure adapter sees owner change */
- lp->tx_ring[i].base = 0;
- lp->tx_dma_addr[i] = 0;
- }
-
- lp->init_block.tlen_rlen = le16_to_cpu(lp->tx_len_bits | lp->rx_len_bits);
- for (i = 0; i < 6; i++)
- lp->init_block.phys_addr[i] = dev->dev_addr[i];
- lp->init_block.rx_ring = (u32)le32_to_cpu(lp->rx_ring_dma_addr);
- lp->init_block.tx_ring = (u32)le32_to_cpu(lp->tx_ring_dma_addr);
- wmb(); /* Make sure all changes are visible */
- return 0;
+ struct pcnet32_private *lp = dev->priv;
+ int i;
+
+ lp->tx_full = 0;
+ lp->cur_rx = lp->cur_tx = 0;
+ lp->dirty_rx = lp->dirty_tx = 0;
+
+ for (i = 0; i < lp->rx_ring_size; i++) {
+ struct sk_buff *rx_skbuff = lp->rx_skbuff[i];
+ if (rx_skbuff == NULL) {
+ if (!
+ (rx_skbuff = lp->rx_skbuff[i] =
+ dev_alloc_skb(PKT_BUF_SZ))) {
+ /* there is not much, we can do at this point */
+ if (pcnet32_debug & NETIF_MSG_DRV)
+ printk(KERN_ERR
+ "%s: pcnet32_init_ring dev_alloc_skb failed.\n",
+ dev->name);
+ return -1;
+ }
+ skb_reserve(rx_skbuff, 2);
+ }
+
+ rmb();
+ if (lp->rx_dma_addr[i] == 0)
+ lp->rx_dma_addr[i] =
+ pci_map_single(lp->pci_dev, rx_skbuff->data,
+ PKT_BUF_SZ - 2, PCI_DMA_FROMDEVICE);
+ lp->rx_ring[i].base = (u32) le32_to_cpu(lp->rx_dma_addr[i]);
+ lp->rx_ring[i].buf_length = le16_to_cpu(2 - PKT_BUF_SZ);
+ wmb(); /* Make sure owner changes after all others are visible */
+ lp->rx_ring[i].status = le16_to_cpu(0x8000);
+ }
+ /* The Tx buffer address is filled in as needed, but we do need to clear
+ * the upper ownership bit. */
+ for (i = 0; i < lp->tx_ring_size; i++) {
+ lp->tx_ring[i].status = 0; /* CPU owns buffer */
+ wmb(); /* Make sure adapter sees owner change */
+ lp->tx_ring[i].base = 0;
+ lp->tx_dma_addr[i] = 0;
+ }
+
+ lp->init_block.tlen_rlen =
+ le16_to_cpu(lp->tx_len_bits | lp->rx_len_bits);
+ for (i = 0; i < 6; i++)
+ lp->init_block.phys_addr[i] = dev->dev_addr[i];
+ lp->init_block.rx_ring = (u32) le32_to_cpu(lp->rx_ring_dma_addr);
+ lp->init_block.tx_ring = (u32) le32_to_cpu(lp->tx_ring_dma_addr);
+ wmb(); /* Make sure all changes are visible */
+ return 0;
}
/* the pcnet32 has been issued a stop or reset. Wait for the stop bit
* then flush the pending transmit operations, re-initialize the ring,
* and tell the chip to initialize.
*/
-static void
-pcnet32_restart(struct net_device *dev, unsigned int csr0_bits)
+static void pcnet32_restart(struct net_device *dev, unsigned int csr0_bits)
{
- struct pcnet32_private *lp = dev->priv;
- unsigned long ioaddr = dev->base_addr;
- int i;
+ struct pcnet32_private *lp = dev->priv;
+ unsigned long ioaddr = dev->base_addr;
+ int i;
- /* wait for stop */
- for (i=0; i<100; i++)
- if (lp->a.read_csr(ioaddr, 0) & 0x0004)
- break;
+ /* wait for stop */
+ for (i = 0; i < 100; i++)
+ if (lp->a.read_csr(ioaddr, 0) & 0x0004)
+ break;
- if (i >= 100 && netif_msg_drv(lp))
- printk(KERN_ERR "%s: pcnet32_restart timed out waiting for stop.\n",
- dev->name);
+ if (i >= 100 && netif_msg_drv(lp))
+ printk(KERN_ERR
+ "%s: pcnet32_restart timed out waiting for stop.\n",
+ dev->name);
- pcnet32_purge_tx_ring(dev);
- if (pcnet32_init_ring(dev))
- return;
+ pcnet32_purge_tx_ring(dev);
+ if (pcnet32_init_ring(dev))
+ return;
- /* ReInit Ring */
- lp->a.write_csr (ioaddr, 0, 1);
- i = 0;
- while (i++ < 1000)
- if (lp->a.read_csr (ioaddr, 0) & 0x0100)
- break;
+ /* ReInit Ring */
+ lp->a.write_csr(ioaddr, 0, 1);
+ i = 0;
+ while (i++ < 1000)
+ if (lp->a.read_csr(ioaddr, 0) & 0x0100)
+ break;
- lp->a.write_csr (ioaddr, 0, csr0_bits);
+ lp->a.write_csr(ioaddr, 0, csr0_bits);
}
-
-static void
-pcnet32_tx_timeout (struct net_device *dev)
+static void pcnet32_tx_timeout(struct net_device *dev)
{
- struct pcnet32_private *lp = dev->priv;
- unsigned long ioaddr = dev->base_addr, flags;
-
- spin_lock_irqsave(&lp->lock, flags);
- /* Transmitter timeout, serious problems. */
- if (pcnet32_debug & NETIF_MSG_DRV)
- printk(KERN_ERR "%s: transmit timed out, status %4.4x, resetting.\n",
- dev->name, lp->a.read_csr(ioaddr, 0));
- lp->a.write_csr (ioaddr, 0, 0x0004);
- lp->stats.tx_errors++;
- if (netif_msg_tx_err(lp)) {
- int i;
- printk(KERN_DEBUG " Ring data dump: dirty_tx %d cur_tx %d%s cur_rx %d.",
- lp->dirty_tx, lp->cur_tx, lp->tx_full ? " (full)" : "",
- lp->cur_rx);
- for (i = 0 ; i < lp->rx_ring_size; i++)
- printk("%s %08x %04x %08x %04x", i & 1 ? "" : "\n ",
- le32_to_cpu(lp->rx_ring[i].base),
- (-le16_to_cpu(lp->rx_ring[i].buf_length)) & 0xffff,
- le32_to_cpu(lp->rx_ring[i].msg_length),
- le16_to_cpu(lp->rx_ring[i].status));
- for (i = 0 ; i < lp->tx_ring_size; i++)
- printk("%s %08x %04x %08x %04x", i & 1 ? "" : "\n ",
- le32_to_cpu(lp->tx_ring[i].base),
- (-le16_to_cpu(lp->tx_ring[i].length)) & 0xffff,
- le32_to_cpu(lp->tx_ring[i].misc),
- le16_to_cpu(lp->tx_ring[i].status));
- printk("\n");
- }
- pcnet32_restart(dev, 0x0042);
-
- dev->trans_start = jiffies;
- netif_wake_queue(dev);
-
- spin_unlock_irqrestore(&lp->lock, flags);
-}
+ struct pcnet32_private *lp = dev->priv;
+ unsigned long ioaddr = dev->base_addr, flags;
+
+ spin_lock_irqsave(&lp->lock, flags);
+ /* Transmitter timeout, serious problems. */
+ if (pcnet32_debug & NETIF_MSG_DRV)
+ printk(KERN_ERR
+ "%s: transmit timed out, status %4.4x, resetting.\n",
+ dev->name, lp->a.read_csr(ioaddr, 0));
+ lp->a.write_csr(ioaddr, 0, 0x0004);
+ lp->stats.tx_errors++;
+ if (netif_msg_tx_err(lp)) {
+ int i;
+ printk(KERN_DEBUG
+ " Ring data dump: dirty_tx %d cur_tx %d%s cur_rx %d.",
+ lp->dirty_tx, lp->cur_tx, lp->tx_full ? " (full)" : "",
+ lp->cur_rx);
+ for (i = 0; i < lp->rx_ring_size; i++)
+ printk("%s %08x %04x %08x %04x", i & 1 ? "" : "\n ",
+ le32_to_cpu(lp->rx_ring[i].base),
+ (-le16_to_cpu(lp->rx_ring[i].buf_length)) &
+ 0xffff, le32_to_cpu(lp->rx_ring[i].msg_length),
+ le16_to_cpu(lp->rx_ring[i].status));
+ for (i = 0; i < lp->tx_ring_size; i++)
+ printk("%s %08x %04x %08x %04x", i & 1 ? "" : "\n ",
+ le32_to_cpu(lp->tx_ring[i].base),
+ (-le16_to_cpu(lp->tx_ring[i].length)) & 0xffff,
+ le32_to_cpu(lp->tx_ring[i].misc),
+ le16_to_cpu(lp->tx_ring[i].status));
+ printk("\n");
+ }
+ pcnet32_restart(dev, 0x0042);
+
+ dev->trans_start = jiffies;
+ netif_wake_queue(dev);
+ spin_unlock_irqrestore(&lp->lock, flags);
+}
-static int
-pcnet32_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static int pcnet32_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
- struct pcnet32_private *lp = dev->priv;
- unsigned long ioaddr = dev->base_addr;
- u16 status;
- int entry;
- unsigned long flags;
+ struct pcnet32_private *lp = dev->priv;
+ unsigned long ioaddr = dev->base_addr;
+ u16 status;
+ int entry;
+ unsigned long flags;
- spin_lock_irqsave(&lp->lock, flags);
+ spin_lock_irqsave(&lp->lock, flags);
- if (netif_msg_tx_queued(lp)) {
- printk(KERN_DEBUG "%s: pcnet32_start_xmit() called, csr0 %4.4x.\n",
- dev->name, lp->a.read_csr(ioaddr, 0));
- }
+ if (netif_msg_tx_queued(lp)) {
+ printk(KERN_DEBUG
+ "%s: pcnet32_start_xmit() called, csr0 %4.4x.\n",
+ dev->name, lp->a.read_csr(ioaddr, 0));
+ }
- /* Default status -- will not enable Successful-TxDone
- * interrupt when that option is available to us.
- */
- status = 0x8300;
+ /* Default status -- will not enable Successful-TxDone
+ * interrupt when that option is available to us.
+ */
+ status = 0x8300;
- /* Fill in a Tx ring entry */
+ /* Fill in a Tx ring entry */
- /* Mask to ring buffer boundary. */
- entry = lp->cur_tx & lp->tx_mod_mask;
+ /* Mask to ring buffer boundary. */
+ entry = lp->cur_tx & lp->tx_mod_mask;
- /* Caution: the write order is important here, set the status
- * with the "ownership" bits last. */
+ /* Caution: the write order is important here, set the status
+ * with the "ownership" bits last. */
- lp->tx_ring[entry].length = le16_to_cpu(-skb->len);
+ lp->tx_ring[entry].length = le16_to_cpu(-skb->len);
- lp->tx_ring[entry].misc = 0x00000000;
+ lp->tx_ring[entry].misc = 0x00000000;
- lp->tx_skbuff[entry] = skb;
- lp->tx_dma_addr[entry] = pci_map_single(lp->pci_dev, skb->data, skb->len,
- PCI_DMA_TODEVICE);
- lp->tx_ring[entry].base = (u32)le32_to_cpu(lp->tx_dma_addr[entry]);
- wmb(); /* Make sure owner changes after all others are visible */
- lp->tx_ring[entry].status = le16_to_cpu(status);
+ lp->tx_skbuff[entry] = skb;
+ lp->tx_dma_addr[entry] =
+ pci_map_single(lp->pci_dev, skb->data, skb->len, PCI_DMA_TODEVICE);
+ lp->tx_ring[entry].base = (u32) le32_to_cpu(lp->tx_dma_addr[entry]);
+ wmb(); /* Make sure owner changes after all others are visible */
+ lp->tx_ring[entry].status = le16_to_cpu(status);
- lp->cur_tx++;
- lp->stats.tx_bytes += skb->len;
+ lp->cur_tx++;
+ lp->stats.tx_bytes += skb->len;
- /* Trigger an immediate send poll. */
- lp->a.write_csr (ioaddr, 0, 0x0048);
+ /* Trigger an immediate send poll. */
+ lp->a.write_csr(ioaddr, 0, 0x0048);
- dev->trans_start = jiffies;
+ dev->trans_start = jiffies;
- if (lp->tx_ring[(entry+1) & lp->tx_mod_mask].base != 0) {
- lp->tx_full = 1;
- netif_stop_queue(dev);
- }
- spin_unlock_irqrestore(&lp->lock, flags);
- return 0;
+ if (lp->tx_ring[(entry + 1) & lp->tx_mod_mask].base != 0) {
+ lp->tx_full = 1;
+ netif_stop_queue(dev);
+ }
+ spin_unlock_irqrestore(&lp->lock, flags);
+ return 0;
}
/* The PCNET32 interrupt handler. */
static irqreturn_t
-pcnet32_interrupt(int irq, void *dev_id, struct pt_regs * regs)
+pcnet32_interrupt(int irq, void *dev_id, struct pt_regs *regs)
{
- struct net_device *dev = dev_id;
- struct pcnet32_private *lp;
- unsigned long ioaddr;
- u16 csr0,rap;
- int boguscnt = max_interrupt_work;
- int must_restart;
-
- if (!dev) {
- if (pcnet32_debug & NETIF_MSG_INTR)
- printk (KERN_DEBUG "%s(): irq %d for unknown device\n",
- __FUNCTION__, irq);
- return IRQ_NONE;
- }
-
- ioaddr = dev->base_addr;
- lp = dev->priv;
-
- spin_lock(&lp->lock);
-
- rap = lp->a.read_rap(ioaddr);
- while ((csr0 = lp->a.read_csr (ioaddr, 0)) & 0x8f00 && --boguscnt >= 0) {
- if (csr0 == 0xffff) {
- break; /* PCMCIA remove happened */
+ struct net_device *dev = dev_id;
+ struct pcnet32_private *lp;
+ unsigned long ioaddr;
+ u16 csr0, rap;
+ int boguscnt = max_interrupt_work;
+ int must_restart;
+
+ if (!dev) {
+ if (pcnet32_debug & NETIF_MSG_INTR)
+ printk(KERN_DEBUG "%s(): irq %d for unknown device\n",
+ __FUNCTION__, irq);
+ return IRQ_NONE;
}
- /* Acknowledge all of the current interrupt sources ASAP. */
- lp->a.write_csr (ioaddr, 0, csr0 & ~0x004f);
- must_restart = 0;
+ ioaddr = dev->base_addr;
+ lp = dev->priv;
- if (netif_msg_intr(lp))
- printk(KERN_DEBUG "%s: interrupt csr0=%#2.2x new csr=%#2.2x.\n",
- dev->name, csr0, lp->a.read_csr (ioaddr, 0));
-
- if (csr0 & 0x0400) /* Rx interrupt */
- pcnet32_rx(dev);
-
- if (csr0 & 0x0200) { /* Tx-done interrupt */
- unsigned int dirty_tx = lp->dirty_tx;
- int delta;
-
- while (dirty_tx != lp->cur_tx) {
- int entry = dirty_tx & lp->tx_mod_mask;
- int status = (short)le16_to_cpu(lp->tx_ring[entry].status);
-
- if (status < 0)
- break; /* It still hasn't been Txed */
-
- lp->tx_ring[entry].base = 0;
-
- if (status & 0x4000) {
- /* There was an major error, log it. */
- int err_status = le32_to_cpu(lp->tx_ring[entry].misc);
- lp->stats.tx_errors++;
- if (netif_msg_tx_err(lp))
- printk(KERN_ERR "%s: Tx error status=%04x err_status=%08x\n",
- dev->name, status, err_status);
- if (err_status & 0x04000000) lp->stats.tx_aborted_errors++;
- if (err_status & 0x08000000) lp->stats.tx_carrier_errors++;
- if (err_status & 0x10000000) lp->stats.tx_window_errors++;
+ spin_lock(&lp->lock);
+
+ rap = lp->a.read_rap(ioaddr);
+ while ((csr0 = lp->a.read_csr(ioaddr, 0)) & 0x8f00 && --boguscnt >= 0) {
+ if (csr0 == 0xffff) {
+ break; /* PCMCIA remove happened */
+ }
+ /* Acknowledge all of the current interrupt sources ASAP. */
+ lp->a.write_csr(ioaddr, 0, csr0 & ~0x004f);
+
+ must_restart = 0;
+
+ if (netif_msg_intr(lp))
+ printk(KERN_DEBUG
+ "%s: interrupt csr0=%#2.2x new csr=%#2.2x.\n",
+ dev->name, csr0, lp->a.read_csr(ioaddr, 0));
+
+ if (csr0 & 0x0400) /* Rx interrupt */
+ pcnet32_rx(dev);
+
+ if (csr0 & 0x0200) { /* Tx-done interrupt */
+ unsigned int dirty_tx = lp->dirty_tx;
+ int delta;
+
+ while (dirty_tx != lp->cur_tx) {
+ int entry = dirty_tx & lp->tx_mod_mask;
+ int status =
+ (short)le16_to_cpu(lp->tx_ring[entry].
+ status);
+
+ if (status < 0)
+ break; /* It still hasn't been Txed */
+
+ lp->tx_ring[entry].base = 0;
+
+ if (status & 0x4000) {
+ /* There was an major error, log it. */
+ int err_status =
+ le32_to_cpu(lp->tx_ring[entry].
+ misc);
+ lp->stats.tx_errors++;
+ if (netif_msg_tx_err(lp))
+ printk(KERN_ERR
+ "%s: Tx error status=%04x err_status=%08x\n",
+ dev->name, status,
+ err_status);
+ if (err_status & 0x04000000)
+ lp->stats.tx_aborted_errors++;
+ if (err_status & 0x08000000)
+ lp->stats.tx_carrier_errors++;
+ if (err_status & 0x10000000)
+ lp->stats.tx_window_errors++;
#ifndef DO_DXSUFLO
- if (err_status & 0x40000000) {
- lp->stats.tx_fifo_errors++;
- /* Ackk! On FIFO errors the Tx unit is turned off! */
- /* Remove this verbosity later! */
- if (netif_msg_tx_err(lp))
- printk(KERN_ERR "%s: Tx FIFO error! CSR0=%4.4x\n",
- dev->name, csr0);
- must_restart = 1;
- }
+ if (err_status & 0x40000000) {
+ lp->stats.tx_fifo_errors++;
+ /* Ackk! On FIFO errors the Tx unit is turned off! */
+ /* Remove this verbosity later! */
+ if (netif_msg_tx_err(lp))
+ printk(KERN_ERR
+ "%s: Tx FIFO error! CSR0=%4.4x\n",
+ dev->name, csr0);
+ must_restart = 1;
+ }
#else
- if (err_status & 0x40000000) {
- lp->stats.tx_fifo_errors++;
- if (! lp->dxsuflo) { /* If controller doesn't recover ... */
- /* Ackk! On FIFO errors the Tx unit is turned off! */
- /* Remove this verbosity later! */
- if (netif_msg_tx_err(lp))
- printk(KERN_ERR "%s: Tx FIFO error! CSR0=%4.4x\n",
- dev->name, csr0);
- must_restart = 1;
- }
- }
+ if (err_status & 0x40000000) {
+ lp->stats.tx_fifo_errors++;
+ if (!lp->dxsuflo) { /* If controller doesn't recover ... */
+ /* Ackk! On FIFO errors the Tx unit is turned off! */
+ /* Remove this verbosity later! */
+ if (netif_msg_tx_err
+ (lp))
+ printk(KERN_ERR
+ "%s: Tx FIFO error! CSR0=%4.4x\n",
+ dev->
+ name,
+ csr0);
+ must_restart = 1;
+ }
+ }
#endif
- } else {
- if (status & 0x1800)
- lp->stats.collisions++;
- lp->stats.tx_packets++;
+ } else {
+ if (status & 0x1800)
+ lp->stats.collisions++;
+ lp->stats.tx_packets++;
+ }
+
+ /* We must free the original skb */
+ if (lp->tx_skbuff[entry]) {
+ pci_unmap_single(lp->pci_dev,
+ lp->tx_dma_addr[entry],
+ lp->tx_skbuff[entry]->
+ len, PCI_DMA_TODEVICE);
+ dev_kfree_skb_irq(lp->tx_skbuff[entry]);
+ lp->tx_skbuff[entry] = NULL;
+ lp->tx_dma_addr[entry] = 0;
+ }
+ dirty_tx++;
+ }
+
+ delta =
+ (lp->cur_tx - dirty_tx) & (lp->tx_mod_mask +
+ lp->tx_ring_size);
+ if (delta > lp->tx_ring_size) {
+ if (netif_msg_drv(lp))
+ printk(KERN_ERR
+ "%s: out-of-sync dirty pointer, %d vs. %d, full=%d.\n",
+ dev->name, dirty_tx, lp->cur_tx,
+ lp->tx_full);
+ dirty_tx += lp->tx_ring_size;
+ delta -= lp->tx_ring_size;
+ }
+
+ if (lp->tx_full &&
+ netif_queue_stopped(dev) &&
+ delta < lp->tx_ring_size - 2) {
+ /* The ring is no longer full, clear tbusy. */
+ lp->tx_full = 0;
+ netif_wake_queue(dev);
+ }
+ lp->dirty_tx = dirty_tx;
+ }
+
+ /* Log misc errors. */
+ if (csr0 & 0x4000)
+ lp->stats.tx_errors++; /* Tx babble. */
+ if (csr0 & 0x1000) {
+ /*
+ * this happens when our receive ring is full. This shouldn't
+ * be a problem as we will see normal rx interrupts for the frames
+ * in the receive ring. But there are some PCI chipsets (I can
+ * reproduce this on SP3G with Intel saturn chipset) which have
+ * sometimes problems and will fill up the receive ring with
+ * error descriptors. In this situation we don't get a rx
+ * interrupt, but a missed frame interrupt sooner or later.
+ * So we try to clean up our receive ring here.
+ */
+ pcnet32_rx(dev);
+ lp->stats.rx_errors++; /* Missed a Rx frame. */
+ }
+ if (csr0 & 0x0800) {
+ if (netif_msg_drv(lp))
+ printk(KERN_ERR
+ "%s: Bus master arbitration failure, status %4.4x.\n",
+ dev->name, csr0);
+ /* unlike for the lance, there is no restart needed */
}
- /* We must free the original skb */
- if (lp->tx_skbuff[entry]) {
- pci_unmap_single(lp->pci_dev, lp->tx_dma_addr[entry],
- lp->tx_skbuff[entry]->len, PCI_DMA_TODEVICE);
- dev_kfree_skb_irq(lp->tx_skbuff[entry]);
- lp->tx_skbuff[entry] = NULL;
- lp->tx_dma_addr[entry] = 0;
+ if (must_restart) {
+ /* reset the chip to clear the error condition, then restart */
+ lp->a.reset(ioaddr);
+ lp->a.write_csr(ioaddr, 4, 0x0915);
+ pcnet32_restart(dev, 0x0002);
+ netif_wake_queue(dev);
}
- dirty_tx++;
- }
-
- delta = (lp->cur_tx - dirty_tx) & (lp->tx_mod_mask + lp->tx_ring_size);
- if (delta > lp->tx_ring_size) {
- if (netif_msg_drv(lp))
- printk(KERN_ERR "%s: out-of-sync dirty pointer, %d vs. %d, full=%d.\n",
- dev->name, dirty_tx, lp->cur_tx, lp->tx_full);
- dirty_tx += lp->tx_ring_size;
- delta -= lp->tx_ring_size;
- }
-
- if (lp->tx_full &&
- netif_queue_stopped(dev) &&
- delta < lp->tx_ring_size - 2) {
- /* The ring is no longer full, clear tbusy. */
- lp->tx_full = 0;
- netif_wake_queue (dev);
- }
- lp->dirty_tx = dirty_tx;
- }
-
- /* Log misc errors. */
- if (csr0 & 0x4000) lp->stats.tx_errors++; /* Tx babble. */
- if (csr0 & 0x1000) {
- /*
- * this happens when our receive ring is full. This shouldn't
- * be a problem as we will see normal rx interrupts for the frames
- * in the receive ring. But there are some PCI chipsets (I can
- * reproduce this on SP3G with Intel saturn chipset) which have
- * sometimes problems and will fill up the receive ring with
- * error descriptors. In this situation we don't get a rx
- * interrupt, but a missed frame interrupt sooner or later.
- * So we try to clean up our receive ring here.
- */
- pcnet32_rx(dev);
- lp->stats.rx_errors++; /* Missed a Rx frame. */
- }
- if (csr0 & 0x0800) {
- if (netif_msg_drv(lp))
- printk(KERN_ERR "%s: Bus master arbitration failure, status %4.4x.\n",
- dev->name, csr0);
- /* unlike for the lance, there is no restart needed */
- }
-
- if (must_restart) {
- /* reset the chip to clear the error condition, then restart */
- lp->a.reset(ioaddr);
- lp->a.write_csr(ioaddr, 4, 0x0915);
- pcnet32_restart(dev, 0x0002);
- netif_wake_queue(dev);
- }
- }
-
- /* Set interrupt enable. */
- lp->a.write_csr (ioaddr, 0, 0x0040);
- lp->a.write_rap (ioaddr,rap);
-
- if (netif_msg_intr(lp))
- printk(KERN_DEBUG "%s: exiting interrupt, csr0=%#4.4x.\n",
- dev->name, lp->a.read_csr (ioaddr, 0));
-
- spin_unlock(&lp->lock);
-
- return IRQ_HANDLED;
+ }
+
+ /* Set interrupt enable. */
+ lp->a.write_csr(ioaddr, 0, 0x0040);
+ lp->a.write_rap(ioaddr, rap);
+
+ if (netif_msg_intr(lp))
+ printk(KERN_DEBUG "%s: exiting interrupt, csr0=%#4.4x.\n",
+ dev->name, lp->a.read_csr(ioaddr, 0));
+
+ spin_unlock(&lp->lock);
+
+ return IRQ_HANDLED;
}
-static int
-pcnet32_rx(struct net_device *dev)
+static int pcnet32_rx(struct net_device *dev)
{
- struct pcnet32_private *lp = dev->priv;
- int entry = lp->cur_rx & lp->rx_mod_mask;
- int boguscnt = lp->rx_ring_size / 2;
-
- /* If we own the next entry, it's a new packet. Send it up. */
- while ((short)le16_to_cpu(lp->rx_ring[entry].status) >= 0) {
- int status = (short)le16_to_cpu(lp->rx_ring[entry].status) >> 8;
-
- if (status != 0x03) { /* There was an error. */
- /*
- * There is a tricky error noted by John Murphy,
- * <murf@perftech.com> to Russ Nelson: Even with full-sized
- * buffers it's possible for a jabber packet to use two
- * buffers, with only the last correctly noting the error.
- */
- if (status & 0x01) /* Only count a general error at the */
- lp->stats.rx_errors++; /* end of a packet.*/
- if (status & 0x20) lp->stats.rx_frame_errors++;
- if (status & 0x10) lp->stats.rx_over_errors++;
- if (status & 0x08) lp->stats.rx_crc_errors++;
- if (status & 0x04) lp->stats.rx_fifo_errors++;
- lp->rx_ring[entry].status &= le16_to_cpu(0x03ff);
- } else {
- /* Malloc up new buffer, compatible with net-2e. */
- short pkt_len = (le32_to_cpu(lp->rx_ring[entry].msg_length) & 0xfff)-4;
- struct sk_buff *skb;
-
- /* Discard oversize frames. */
- if (unlikely(pkt_len > PKT_BUF_SZ - 2)) {
- if (netif_msg_drv(lp))
- printk(KERN_ERR "%s: Impossible packet size %d!\n",
- dev->name, pkt_len);
- lp->stats.rx_errors++;
- } else if (pkt_len < 60) {
- if (netif_msg_rx_err(lp))
- printk(KERN_ERR "%s: Runt packet!\n", dev->name);
- lp->stats.rx_errors++;
- } else {
- int rx_in_place = 0;
-
- if (pkt_len > rx_copybreak) {
- struct sk_buff *newskb;
-
- if ((newskb = dev_alloc_skb(PKT_BUF_SZ))) {
- skb_reserve (newskb, 2);
- skb = lp->rx_skbuff[entry];
- pci_unmap_single(lp->pci_dev, lp->rx_dma_addr[entry],
- PKT_BUF_SZ-2, PCI_DMA_FROMDEVICE);
- skb_put (skb, pkt_len);
- lp->rx_skbuff[entry] = newskb;
- newskb->dev = dev;
- lp->rx_dma_addr[entry] =
- pci_map_single(lp->pci_dev, newskb->data,
- PKT_BUF_SZ-2, PCI_DMA_FROMDEVICE);
- lp->rx_ring[entry].base = le32_to_cpu(lp->rx_dma_addr[entry]);
- rx_in_place = 1;
- } else
- skb = NULL;
+ struct pcnet32_private *lp = dev->priv;
+ int entry = lp->cur_rx & lp->rx_mod_mask;
+ int boguscnt = lp->rx_ring_size / 2;
+
+ /* If we own the next entry, it's a new packet. Send it up. */
+ while ((short)le16_to_cpu(lp->rx_ring[entry].status) >= 0) {
+ int status = (short)le16_to_cpu(lp->rx_ring[entry].status) >> 8;
+
+ if (status != 0x03) { /* There was an error. */
+ /*
+ * There is a tricky error noted by John Murphy,
+ * <murf@perftech.com> to Russ Nelson: Even with full-sized
+ * buffers it's possible for a jabber packet to use two
+ * buffers, with only the last correctly noting the error.
+ */
+ if (status & 0x01) /* Only count a general error at the */
+ lp->stats.rx_errors++; /* end of a packet. */
+ if (status & 0x20)
+ lp->stats.rx_frame_errors++;
+ if (status & 0x10)
+ lp->stats.rx_over_errors++;
+ if (status & 0x08)
+ lp->stats.rx_crc_errors++;
+ if (status & 0x04)
+ lp->stats.rx_fifo_errors++;
+ lp->rx_ring[entry].status &= le16_to_cpu(0x03ff);
} else {
- skb = dev_alloc_skb(pkt_len+2);
- }
-
- if (skb == NULL) {
- int i;
- if (netif_msg_drv(lp))
- printk(KERN_ERR "%s: Memory squeeze, deferring packet.\n",
- dev->name);
- for (i = 0; i < lp->rx_ring_size; i++)
- if ((short)le16_to_cpu(lp->rx_ring[(entry+i)
- & lp->rx_mod_mask].status) < 0)
- break;
-
- if (i > lp->rx_ring_size -2) {
- lp->stats.rx_dropped++;
- lp->rx_ring[entry].status |= le16_to_cpu(0x8000);
- wmb(); /* Make sure adapter sees owner change */
- lp->cur_rx++;
- }
- break;
- }
- skb->dev = dev;
- if (!rx_in_place) {
- skb_reserve(skb,2); /* 16 byte align */
- skb_put(skb,pkt_len); /* Make room */
- pci_dma_sync_single_for_cpu(lp->pci_dev,
- lp->rx_dma_addr[entry],
- PKT_BUF_SZ-2,
- PCI_DMA_FROMDEVICE);
- eth_copy_and_sum(skb,
- (unsigned char *)(lp->rx_skbuff[entry]->data),
- pkt_len,0);
- pci_dma_sync_single_for_device(lp->pci_dev,
- lp->rx_dma_addr[entry],
- PKT_BUF_SZ-2,
- PCI_DMA_FROMDEVICE);
+ /* Malloc up new buffer, compatible with net-2e. */
+ short pkt_len =
+ (le32_to_cpu(lp->rx_ring[entry].msg_length) & 0xfff)
+ - 4;
+ struct sk_buff *skb;
+
+ /* Discard oversize frames. */
+ if (unlikely(pkt_len > PKT_BUF_SZ - 2)) {
+ if (netif_msg_drv(lp))
+ printk(KERN_ERR
+ "%s: Impossible packet size %d!\n",
+ dev->name, pkt_len);
+ lp->stats.rx_errors++;
+ } else if (pkt_len < 60) {
+ if (netif_msg_rx_err(lp))
+ printk(KERN_ERR "%s: Runt packet!\n",
+ dev->name);
+ lp->stats.rx_errors++;
+ } else {
+ int rx_in_place = 0;
+
+ if (pkt_len > rx_copybreak) {
+ struct sk_buff *newskb;
+
+ if ((newskb =
+ dev_alloc_skb(PKT_BUF_SZ))) {
+ skb_reserve(newskb, 2);
+ skb = lp->rx_skbuff[entry];
+ pci_unmap_single(lp->pci_dev,
+ lp->
+ rx_dma_addr
+ [entry],
+ PKT_BUF_SZ - 2,
+ PCI_DMA_FROMDEVICE);
+ skb_put(skb, pkt_len);
+ lp->rx_skbuff[entry] = newskb;
+ newskb->dev = dev;
+ lp->rx_dma_addr[entry] =
+ pci_map_single(lp->pci_dev,
+ newskb->data,
+ PKT_BUF_SZ -
+ 2,
+ PCI_DMA_FROMDEVICE);
+ lp->rx_ring[entry].base =
+ le32_to_cpu(lp->
+ rx_dma_addr
+ [entry]);
+ rx_in_place = 1;
+ } else
+ skb = NULL;
+ } else {
+ skb = dev_alloc_skb(pkt_len + 2);
+ }
+
+ if (skb == NULL) {
+ int i;
+ if (netif_msg_drv(lp))
+ printk(KERN_ERR
+ "%s: Memory squeeze, deferring packet.\n",
+ dev->name);
+ for (i = 0; i < lp->rx_ring_size; i++)
+ if ((short)
+ le16_to_cpu(lp->
+ rx_ring[(entry +
+ i)
+ & lp->
+ rx_mod_mask].
+ status) < 0)
+ break;
+
+ if (i > lp->rx_ring_size - 2) {
+ lp->stats.rx_dropped++;
+ lp->rx_ring[entry].status |=
+ le16_to_cpu(0x8000);
+ wmb(); /* Make sure adapter sees owner change */
+ lp->cur_rx++;
+ }
+ break;
+ }
+ skb->dev = dev;
+ if (!rx_in_place) {
+ skb_reserve(skb, 2); /* 16 byte align */
+ skb_put(skb, pkt_len); /* Make room */
+ pci_dma_sync_single_for_cpu(lp->pci_dev,
+ lp->
+ rx_dma_addr
+ [entry],
+ PKT_BUF_SZ -
+ 2,
+ PCI_DMA_FROMDEVICE);
+ eth_copy_and_sum(skb,
+ (unsigned char *)(lp->
+ rx_skbuff
+ [entry]->
+ data),
+ pkt_len, 0);
+ pci_dma_sync_single_for_device(lp->
+ pci_dev,
+ lp->
+ rx_dma_addr
+ [entry],
+ PKT_BUF_SZ
+ - 2,
+ PCI_DMA_FROMDEVICE);
+ }
+ lp->stats.rx_bytes += skb->len;
+ skb->protocol = eth_type_trans(skb, dev);
+ netif_rx(skb);
+ dev->last_rx = jiffies;
+ lp->stats.rx_packets++;
+ }
}
- lp->stats.rx_bytes += skb->len;
- skb->protocol=eth_type_trans(skb,dev);
- netif_rx(skb);
- dev->last_rx = jiffies;
- lp->stats.rx_packets++;
- }
+ /*
+ * The docs say that the buffer length isn't touched, but Andrew Boyd
+ * of QNX reports that some revs of the 79C965 clear it.
+ */
+ lp->rx_ring[entry].buf_length = le16_to_cpu(2 - PKT_BUF_SZ);
+ wmb(); /* Make sure owner changes after all others are visible */
+ lp->rx_ring[entry].status |= le16_to_cpu(0x8000);
+ entry = (++lp->cur_rx) & lp->rx_mod_mask;
+ if (--boguscnt <= 0)
+ break; /* don't stay in loop forever */
}
- /*
- * The docs say that the buffer length isn't touched, but Andrew Boyd
- * of QNX reports that some revs of the 79C965 clear it.
- */
- lp->rx_ring[entry].buf_length = le16_to_cpu(2-PKT_BUF_SZ);
- wmb(); /* Make sure owner changes after all others are visible */
- lp->rx_ring[entry].status |= le16_to_cpu(0x8000);
- entry = (++lp->cur_rx) & lp->rx_mod_mask;
- if (--boguscnt <= 0) break; /* don't stay in loop forever */
- }
-
- return 0;
+
+ return 0;
}
-static int
-pcnet32_close(struct net_device *dev)
+static int pcnet32_close(struct net_device *dev)
{
- unsigned long ioaddr = dev->base_addr;
- struct pcnet32_private *lp = dev->priv;
- int i;
- unsigned long flags;
+ unsigned long ioaddr = dev->base_addr;
+ struct pcnet32_private *lp = dev->priv;
+ int i;
+ unsigned long flags;
- del_timer_sync(&lp->watchdog_timer);
+ del_timer_sync(&lp->watchdog_timer);
- netif_stop_queue(dev);
+ netif_stop_queue(dev);
- spin_lock_irqsave(&lp->lock, flags);
+ spin_lock_irqsave(&lp->lock, flags);
- lp->stats.rx_missed_errors = lp->a.read_csr (ioaddr, 112);
+ lp->stats.rx_missed_errors = lp->a.read_csr(ioaddr, 112);
- if (netif_msg_ifdown(lp))
- printk(KERN_DEBUG "%s: Shutting down ethercard, status was %2.2x.\n",
- dev->name, lp->a.read_csr (ioaddr, 0));
+ if (netif_msg_ifdown(lp))
+ printk(KERN_DEBUG
+ "%s: Shutting down ethercard, status was %2.2x.\n",
+ dev->name, lp->a.read_csr(ioaddr, 0));
- /* We stop the PCNET32 here -- it occasionally polls memory if we don't. */
- lp->a.write_csr (ioaddr, 0, 0x0004);
+ /* We stop the PCNET32 here -- it occasionally polls memory if we don't. */
+ lp->a.write_csr(ioaddr, 0, 0x0004);
- /*
- * Switch back to 16bit mode to avoid problems with dumb
- * DOS packet driver after a warm reboot
- */
- lp->a.write_bcr (ioaddr, 20, 4);
+ /*
+ * Switch back to 16bit mode to avoid problems with dumb
+ * DOS packet driver after a warm reboot
+ */
+ lp->a.write_bcr(ioaddr, 20, 4);
- spin_unlock_irqrestore(&lp->lock, flags);
+ spin_unlock_irqrestore(&lp->lock, flags);
- free_irq(dev->irq, dev);
+ free_irq(dev->irq, dev);
- spin_lock_irqsave(&lp->lock, flags);
+ spin_lock_irqsave(&lp->lock, flags);
- /* free all allocated skbuffs */
- for (i = 0; i < lp->rx_ring_size; i++) {
- lp->rx_ring[i].status = 0;
- wmb(); /* Make sure adapter sees owner change */
- if (lp->rx_skbuff[i]) {
- pci_unmap_single(lp->pci_dev, lp->rx_dma_addr[i], PKT_BUF_SZ-2,
- PCI_DMA_FROMDEVICE);
- dev_kfree_skb(lp->rx_skbuff[i]);
+ /* free all allocated skbuffs */
+ for (i = 0; i < lp->rx_ring_size; i++) {
+ lp->rx_ring[i].status = 0;
+ wmb(); /* Make sure adapter sees owner change */
+ if (lp->rx_skbuff[i]) {
+ pci_unmap_single(lp->pci_dev, lp->rx_dma_addr[i],
+ PKT_BUF_SZ - 2, PCI_DMA_FROMDEVICE);
+ dev_kfree_skb(lp->rx_skbuff[i]);
+ }
+ lp->rx_skbuff[i] = NULL;
+ lp->rx_dma_addr[i] = 0;
}
- lp->rx_skbuff[i] = NULL;
- lp->rx_dma_addr[i] = 0;
- }
- for (i = 0; i < lp->tx_ring_size; i++) {
- lp->tx_ring[i].status = 0; /* CPU owns buffer */
- wmb(); /* Make sure adapter sees owner change */
- if (lp->tx_skbuff[i]) {
- pci_unmap_single(lp->pci_dev, lp->tx_dma_addr[i],
- lp->tx_skbuff[i]->len, PCI_DMA_TODEVICE);
- dev_kfree_skb(lp->tx_skbuff[i]);
+ for (i = 0; i < lp->tx_ring_size; i++) {
+ lp->tx_ring[i].status = 0; /* CPU owns buffer */
+ wmb(); /* Make sure adapter sees owner change */
+ if (lp->tx_skbuff[i]) {
+ pci_unmap_single(lp->pci_dev, lp->tx_dma_addr[i],
+ lp->tx_skbuff[i]->len,
+ PCI_DMA_TODEVICE);
+ dev_kfree_skb(lp->tx_skbuff[i]);
+ }
+ lp->tx_skbuff[i] = NULL;
+ lp->tx_dma_addr[i] = 0;
}
- lp->tx_skbuff[i] = NULL;
- lp->tx_dma_addr[i] = 0;
- }
- spin_unlock_irqrestore(&lp->lock, flags);
+ spin_unlock_irqrestore(&lp->lock, flags);
- return 0;
+ return 0;
}
-static struct net_device_stats *
-pcnet32_get_stats(struct net_device *dev)
+static struct net_device_stats *pcnet32_get_stats(struct net_device *dev)
{
- struct pcnet32_private *lp = dev->priv;
- unsigned long ioaddr = dev->base_addr;
- u16 saved_addr;
- unsigned long flags;
-
- spin_lock_irqsave(&lp->lock, flags);
- saved_addr = lp->a.read_rap(ioaddr);
- lp->stats.rx_missed_errors = lp->a.read_csr (ioaddr, 112);
- lp->a.write_rap(ioaddr, saved_addr);
- spin_unlock_irqrestore(&lp->lock, flags);
-
- return &lp->stats;
+ struct pcnet32_private *lp = dev->priv;
+ unsigned long ioaddr = dev->base_addr;
+ u16 saved_addr;
+ unsigned long flags;
+
+ spin_lock_irqsave(&lp->lock, flags);
+ saved_addr = lp->a.read_rap(ioaddr);
+ lp->stats.rx_missed_errors = lp->a.read_csr(ioaddr, 112);
+ lp->a.write_rap(ioaddr, saved_addr);
+ spin_unlock_irqrestore(&lp->lock, flags);
+
+ return &lp->stats;
}
/* taken from the sunlance driver, which it took from the depca driver */
-static void pcnet32_load_multicast (struct net_device *dev)
+static void pcnet32_load_multicast(struct net_device *dev)
{
- struct pcnet32_private *lp = dev->priv;
- volatile struct pcnet32_init_block *ib = &lp->init_block;
- volatile u16 *mcast_table = (u16 *)&ib->filter;
- struct dev_mc_list *dmi=dev->mc_list;
- char *addrs;
- int i;
- u32 crc;
-
- /* set all multicast bits */
- if (dev->flags & IFF_ALLMULTI) {
- ib->filter[0] = 0xffffffff;
- ib->filter[1] = 0xffffffff;
+ struct pcnet32_private *lp = dev->priv;
+ volatile struct pcnet32_init_block *ib = &lp->init_block;
+ volatile u16 *mcast_table = (u16 *) & ib->filter;
+ struct dev_mc_list *dmi = dev->mc_list;
+ char *addrs;
+ int i;
+ u32 crc;
+
+ /* set all multicast bits */
+ if (dev->flags & IFF_ALLMULTI) {
+ ib->filter[0] = 0xffffffff;
+ ib->filter[1] = 0xffffffff;
+ return;
+ }
+ /* clear the multicast filter */
+ ib->filter[0] = 0;
+ ib->filter[1] = 0;
+
+ /* Add addresses */
+ for (i = 0; i < dev->mc_count; i++) {
+ addrs = dmi->dmi_addr;
+ dmi = dmi->next;
+
+ /* multicast address? */
+ if (!(*addrs & 1))
+ continue;
+
+ crc = ether_crc_le(6, addrs);
+ crc = crc >> 26;
+ mcast_table[crc >> 4] =
+ le16_to_cpu(le16_to_cpu(mcast_table[crc >> 4]) |
+ (1 << (crc & 0xf)));
+ }
return;
- }
- /* clear the multicast filter */
- ib->filter[0] = 0;
- ib->filter[1] = 0;
-
- /* Add addresses */
- for (i = 0; i < dev->mc_count; i++) {
- addrs = dmi->dmi_addr;
- dmi = dmi->next;
-
- /* multicast address? */
- if (!(*addrs & 1))
- continue;
-
- crc = ether_crc_le(6, addrs);
- crc = crc >> 26;
- mcast_table [crc >> 4] = le16_to_cpu(
- le16_to_cpu(mcast_table [crc >> 4]) | (1 << (crc & 0xf)));
- }
- return;
}
-
/*
* Set or clear the multicast filter for this adaptor.
*/
static void pcnet32_set_multicast_list(struct net_device *dev)
{
- unsigned long ioaddr = dev->base_addr, flags;
- struct pcnet32_private *lp = dev->priv;
-
- spin_lock_irqsave(&lp->lock, flags);
- if (dev->flags&IFF_PROMISC) {
- /* Log any net taps. */
- if (netif_msg_hw(lp))
- printk(KERN_INFO "%s: Promiscuous mode enabled.\n", dev->name);
- lp->init_block.mode = le16_to_cpu(0x8000 | (lp->options & PCNET32_PORT_PORTSEL) << 7);
- } else {
- lp->init_block.mode = le16_to_cpu((lp->options & PCNET32_PORT_PORTSEL) << 7);
- pcnet32_load_multicast (dev);
- }
-
- lp->a.write_csr (ioaddr, 0, 0x0004); /* Temporarily stop the lance. */
- pcnet32_restart(dev, 0x0042); /* Resume normal operation */
- netif_wake_queue(dev);
-
- spin_unlock_irqrestore(&lp->lock, flags);
+ unsigned long ioaddr = dev->base_addr, flags;
+ struct pcnet32_private *lp = dev->priv;
+
+ spin_lock_irqsave(&lp->lock, flags);
+ if (dev->flags & IFF_PROMISC) {
+ /* Log any net taps. */
+ if (netif_msg_hw(lp))
+ printk(KERN_INFO "%s: Promiscuous mode enabled.\n",
+ dev->name);
+ lp->init_block.mode =
+ le16_to_cpu(0x8000 | (lp->options & PCNET32_PORT_PORTSEL) <<
+ 7);
+ } else {
+ lp->init_block.mode =
+ le16_to_cpu((lp->options & PCNET32_PORT_PORTSEL) << 7);
+ pcnet32_load_multicast(dev);
+ }
+
+ lp->a.write_csr(ioaddr, 0, 0x0004); /* Temporarily stop the lance. */
+ pcnet32_restart(dev, 0x0042); /* Resume normal operation */
+ netif_wake_queue(dev);
+
+ spin_unlock_irqrestore(&lp->lock, flags);
}
/* This routine assumes that the lp->lock is held */
static int mdio_read(struct net_device *dev, int phy_id, int reg_num)
{
- struct pcnet32_private *lp = dev->priv;
- unsigned long ioaddr = dev->base_addr;
- u16 val_out;
+ struct pcnet32_private *lp = dev->priv;
+ unsigned long ioaddr = dev->base_addr;
+ u16 val_out;
- if (!lp->mii)
- return 0;
+ if (!lp->mii)
+ return 0;
- lp->a.write_bcr(ioaddr, 33, ((phy_id & 0x1f) << 5) | (reg_num & 0x1f));
- val_out = lp->a.read_bcr(ioaddr, 34);
+ lp->a.write_bcr(ioaddr, 33, ((phy_id & 0x1f) << 5) | (reg_num & 0x1f));
+ val_out = lp->a.read_bcr(ioaddr, 34);
- return val_out;
+ return val_out;
}
/* This routine assumes that the lp->lock is held */
static void mdio_write(struct net_device *dev, int phy_id, int reg_num, int val)
{
- struct pcnet32_private *lp = dev->priv;
- unsigned long ioaddr = dev->base_addr;
+ struct pcnet32_private *lp = dev->priv;
+ unsigned long ioaddr = dev->base_addr;
- if (!lp->mii)
- return;
+ if (!lp->mii)
+ return;
- lp->a.write_bcr(ioaddr, 33, ((phy_id & 0x1f) << 5) | (reg_num & 0x1f));
- lp->a.write_bcr(ioaddr, 34, val);
+ lp->a.write_bcr(ioaddr, 33, ((phy_id & 0x1f) << 5) | (reg_num & 0x1f));
+ lp->a.write_bcr(ioaddr, 34, val);
}
static int pcnet32_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
{
- struct pcnet32_private *lp = dev->priv;
- int rc;
- unsigned long flags;
+ struct pcnet32_private *lp = dev->priv;
+ int rc;
+ unsigned long flags;
+
+ /* SIOC[GS]MIIxxx ioctls */
+ if (lp->mii) {
+ spin_lock_irqsave(&lp->lock, flags);
+ rc = generic_mii_ioctl(&lp->mii_if, if_mii(rq), cmd, NULL);
+ spin_unlock_irqrestore(&lp->lock, flags);
+ } else {
+ rc = -EOPNOTSUPP;
+ }
+
+ return rc;
+}
+
+static int pcnet32_check_otherphy(struct net_device *dev)
+{
+ struct pcnet32_private *lp = dev->priv;
+ struct mii_if_info mii = lp->mii_if;
+ u16 bmcr;
+ int i;
- /* SIOC[GS]MIIxxx ioctls */
- if (lp->mii) {
- spin_lock_irqsave(&lp->lock, flags);
- rc = generic_mii_ioctl(&lp->mii_if, if_mii(rq), cmd, NULL);
- spin_unlock_irqrestore(&lp->lock, flags);
- } else {
- rc = -EOPNOTSUPP;
- }
+ for (i = 0; i < PCNET32_MAX_PHYS; i++) {
+ if (i == lp->mii_if.phy_id)
+ continue; /* skip active phy */
+ if (lp->phymask & (1 << i)) {
+ mii.phy_id = i;
+ if (mii_link_ok(&mii)) {
+ /* found PHY with active link */
+ if (netif_msg_link(lp))
+ printk(KERN_INFO
+ "%s: Using PHY number %d.\n",
+ dev->name, i);
+
+ /* isolate inactive phy */
+ bmcr =
+ mdio_read(dev, lp->mii_if.phy_id, MII_BMCR);
+ mdio_write(dev, lp->mii_if.phy_id, MII_BMCR,
+ bmcr | BMCR_ISOLATE);
+
+ /* de-isolate new phy */
+ bmcr = mdio_read(dev, i, MII_BMCR);
+ mdio_write(dev, i, MII_BMCR,
+ bmcr & ~BMCR_ISOLATE);
+
+ /* set new phy address */
+ lp->mii_if.phy_id = i;
+ return 1;
+ }
+ }
+ }
+ return 0;
+}
+
+/*
+ * Show the status of the media. Similar to mii_check_media however it
+ * correctly shows the link speed for all (tested) pcnet32 variants.
+ * Devices with no mii just report link state without speed.
+ *
+ * Caller is assumed to hold and release the lp->lock.
+ */
- return rc;
+static void pcnet32_check_media(struct net_device *dev, int verbose)
+{
+ struct pcnet32_private *lp = dev->priv;
+ int curr_link;
+ int prev_link = netif_carrier_ok(dev) ? 1 : 0;
+ u32 bcr9;
+
+ if (lp->mii) {
+ curr_link = mii_link_ok(&lp->mii_if);
+ } else {
+ ulong ioaddr = dev->base_addr; /* card base I/O address */
+ curr_link = (lp->a.read_bcr(ioaddr, 4) != 0xc0);
+ }
+ if (!curr_link) {
+ if (prev_link || verbose) {
+ netif_carrier_off(dev);
+ if (netif_msg_link(lp))
+ printk(KERN_INFO "%s: link down\n", dev->name);
+ }
+ if (lp->phycount > 1) {
+ curr_link = pcnet32_check_otherphy(dev);
+ prev_link = 0;
+ }
+ } else if (verbose || !prev_link) {
+ netif_carrier_on(dev);
+ if (lp->mii) {
+ if (netif_msg_link(lp)) {
+ struct ethtool_cmd ecmd;
+ mii_ethtool_gset(&lp->mii_if, &ecmd);
+ printk(KERN_INFO
+ "%s: link up, %sMbps, %s-duplex\n",
+ dev->name,
+ (ecmd.speed == SPEED_100) ? "100" : "10",
+ (ecmd.duplex ==
+ DUPLEX_FULL) ? "full" : "half");
+ }
+ bcr9 = lp->a.read_bcr(dev->base_addr, 9);
+ if ((bcr9 & (1 << 0)) != lp->mii_if.full_duplex) {
+ if (lp->mii_if.full_duplex)
+ bcr9 |= (1 << 0);
+ else
+ bcr9 &= ~(1 << 0);
+ lp->a.write_bcr(dev->base_addr, 9, bcr9);
+ }
+ } else {
+ if (netif_msg_link(lp))
+ printk(KERN_INFO "%s: link up\n", dev->name);
+ }
+ }
}
+/*
+ * Check for loss of link and link establishment.
+ * Can not use mii_check_media because it does nothing if mode is forced.
+ */
+
static void pcnet32_watchdog(struct net_device *dev)
{
- struct pcnet32_private *lp = dev->priv;
- unsigned long flags;
+ struct pcnet32_private *lp = dev->priv;
+ unsigned long flags;
- /* Print the link status if it has changed */
- if (lp->mii) {
+ /* Print the link status if it has changed */
spin_lock_irqsave(&lp->lock, flags);
- mii_check_media (&lp->mii_if, netif_msg_link(lp), 0);
+ pcnet32_check_media(dev, 0);
spin_unlock_irqrestore(&lp->lock, flags);
- }
- mod_timer (&(lp->watchdog_timer), PCNET32_WATCHDOG_TIMEOUT);
+ mod_timer(&(lp->watchdog_timer), PCNET32_WATCHDOG_TIMEOUT);
}
static void __devexit pcnet32_remove_one(struct pci_dev *pdev)
{
- struct net_device *dev = pci_get_drvdata(pdev);
-
- if (dev) {
- struct pcnet32_private *lp = dev->priv;
-
- unregister_netdev(dev);
- pcnet32_free_ring(dev);
- release_region(dev->base_addr, PCNET32_TOTAL_SIZE);
- pci_free_consistent(lp->pci_dev, sizeof(*lp), lp, lp->dma_addr);
- free_netdev(dev);
- pci_disable_device(pdev);
- pci_set_drvdata(pdev, NULL);
- }
+ struct net_device *dev = pci_get_drvdata(pdev);
+
+ if (dev) {
+ struct pcnet32_private *lp = dev->priv;
+
+ unregister_netdev(dev);
+ pcnet32_free_ring(dev);
+ release_region(dev->base_addr, PCNET32_TOTAL_SIZE);
+ pci_free_consistent(lp->pci_dev, sizeof(*lp), lp, lp->dma_addr);
+ free_netdev(dev);
+ pci_disable_device(pdev);
+ pci_set_drvdata(pdev, NULL);
+ }
}
static struct pci_driver pcnet32_driver = {
- .name = DRV_NAME,
- .probe = pcnet32_probe_pci,
- .remove = __devexit_p(pcnet32_remove_one),
- .id_table = pcnet32_pci_tbl,
+ .name = DRV_NAME,
+ .probe = pcnet32_probe_pci,
+ .remove = __devexit_p(pcnet32_remove_one),
+ .id_table = pcnet32_pci_tbl,
};
/* An additional parameter that may be passed in... */
@@ -2477,9 +2698,11 @@ static int pcnet32_have_pci;
module_param(debug, int, 0);
MODULE_PARM_DESC(debug, DRV_NAME " debug level");
module_param(max_interrupt_work, int, 0);
-MODULE_PARM_DESC(max_interrupt_work, DRV_NAME " maximum events handled per interrupt");
+MODULE_PARM_DESC(max_interrupt_work,
+ DRV_NAME " maximum events handled per interrupt");
module_param(rx_copybreak, int, 0);
-MODULE_PARM_DESC(rx_copybreak, DRV_NAME " copy breakpoint for copy-only-tiny-frames");
+MODULE_PARM_DESC(rx_copybreak,
+ DRV_NAME " copy breakpoint for copy-only-tiny-frames");
module_param(tx_start_pt, int, 0);
MODULE_PARM_DESC(tx_start_pt, DRV_NAME " transmit start point (0-3)");
module_param(pcnet32vlb, int, 0);
@@ -2490,7 +2713,9 @@ module_param_array(full_duplex, int, NULL, 0);
MODULE_PARM_DESC(full_duplex, DRV_NAME " full duplex setting(s) (1)");
/* Module Parameter for HomePNA cards added by Patrick Simmons, 2004 */
module_param_array(homepna, int, NULL, 0);
-MODULE_PARM_DESC(homepna, DRV_NAME " mode for 79C978 cards (1 for HomePNA, 0 for Ethernet, default Ethernet");
+MODULE_PARM_DESC(homepna,
+ DRV_NAME
+ " mode for 79C978 cards (1 for HomePNA, 0 for Ethernet, default Ethernet");
MODULE_AUTHOR("Thomas Bogendoerfer");
MODULE_DESCRIPTION("Driver for PCnet32 and PCnetPCI based ethercards");
@@ -2500,44 +2725,44 @@ MODULE_LICENSE("GPL");
static int __init pcnet32_init_module(void)
{
- printk(KERN_INFO "%s", version);
+ printk(KERN_INFO "%s", version);
- pcnet32_debug = netif_msg_init(debug, PCNET32_MSG_DEFAULT);
+ pcnet32_debug = netif_msg_init(debug, PCNET32_MSG_DEFAULT);
- if ((tx_start_pt >= 0) && (tx_start_pt <= 3))
- tx_start = tx_start_pt;
+ if ((tx_start_pt >= 0) && (tx_start_pt <= 3))
+ tx_start = tx_start_pt;
- /* find the PCI devices */
- if (!pci_module_init(&pcnet32_driver))
- pcnet32_have_pci = 1;
+ /* find the PCI devices */
+ if (!pci_module_init(&pcnet32_driver))
+ pcnet32_have_pci = 1;
- /* should we find any remaining VLbus devices ? */
- if (pcnet32vlb)
- pcnet32_probe_vlbus();
+ /* should we find any remaining VLbus devices ? */
+ if (pcnet32vlb)
+ pcnet32_probe_vlbus();
- if (cards_found && (pcnet32_debug & NETIF_MSG_PROBE))
- printk(KERN_INFO PFX "%d cards_found.\n", cards_found);
+ if (cards_found && (pcnet32_debug & NETIF_MSG_PROBE))
+ printk(KERN_INFO PFX "%d cards_found.\n", cards_found);
- return (pcnet32_have_pci + cards_found) ? 0 : -ENODEV;
+ return (pcnet32_have_pci + cards_found) ? 0 : -ENODEV;
}
static void __exit pcnet32_cleanup_module(void)
{
- struct net_device *next_dev;
-
- while (pcnet32_dev) {
- struct pcnet32_private *lp = pcnet32_dev->priv;
- next_dev = lp->next;
- unregister_netdev(pcnet32_dev);
- pcnet32_free_ring(pcnet32_dev);
- release_region(pcnet32_dev->base_addr, PCNET32_TOTAL_SIZE);
- pci_free_consistent(lp->pci_dev, sizeof(*lp), lp, lp->dma_addr);
- free_netdev(pcnet32_dev);
- pcnet32_dev = next_dev;
- }
+ struct net_device *next_dev;
+
+ while (pcnet32_dev) {
+ struct pcnet32_private *lp = pcnet32_dev->priv;
+ next_dev = lp->next;
+ unregister_netdev(pcnet32_dev);
+ pcnet32_free_ring(pcnet32_dev);
+ release_region(pcnet32_dev->base_addr, PCNET32_TOTAL_SIZE);
+ pci_free_consistent(lp->pci_dev, sizeof(*lp), lp, lp->dma_addr);
+ free_netdev(pcnet32_dev);
+ pcnet32_dev = next_dev;
+ }
- if (pcnet32_have_pci)
- pci_unregister_driver(&pcnet32_driver);
+ if (pcnet32_have_pci)
+ pci_unregister_driver(&pcnet32_driver);
}
module_init(pcnet32_init_module);
diff --git a/drivers/net/skfp/fplustm.c b/drivers/net/skfp/fplustm.c
index a4b2b6975d6..0784f558ca9 100644
--- a/drivers/net/skfp/fplustm.c
+++ b/drivers/net/skfp/fplustm.c
@@ -549,12 +549,12 @@ void formac_tx_restart(struct s_smc *smc)
static void enable_formac(struct s_smc *smc)
{
/* set formac IMSK : 0 enables irq */
- outpw(FM_A(FM_IMSK1U),~mac_imsk1u) ;
- outpw(FM_A(FM_IMSK1L),~mac_imsk1l) ;
- outpw(FM_A(FM_IMSK2U),~mac_imsk2u) ;
- outpw(FM_A(FM_IMSK2L),~mac_imsk2l) ;
- outpw(FM_A(FM_IMSK3U),~mac_imsk3u) ;
- outpw(FM_A(FM_IMSK3L),~mac_imsk3l) ;
+ outpw(FM_A(FM_IMSK1U),(unsigned short)~mac_imsk1u);
+ outpw(FM_A(FM_IMSK1L),(unsigned short)~mac_imsk1l);
+ outpw(FM_A(FM_IMSK2U),(unsigned short)~mac_imsk2u);
+ outpw(FM_A(FM_IMSK2L),(unsigned short)~mac_imsk2l);
+ outpw(FM_A(FM_IMSK3U),(unsigned short)~mac_imsk3u);
+ outpw(FM_A(FM_IMSK3L),(unsigned short)~mac_imsk3l);
}
#if 0 /* Removed because the driver should use the ASICs TX complete IRQ. */
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index 25e028b7ce4..4eda81d41b1 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -44,7 +44,7 @@
#include "skge.h"
#define DRV_NAME "skge"
-#define DRV_VERSION "1.3"
+#define DRV_VERSION "1.4"
#define PFX DRV_NAME " "
#define DEFAULT_TX_RING_SIZE 128
@@ -104,7 +104,6 @@ static const int txqaddr[] = { Q_XA1, Q_XA2 };
static const int rxqaddr[] = { Q_R1, Q_R2 };
static const u32 rxirqmask[] = { IS_R1_F, IS_R2_F };
static const u32 txirqmask[] = { IS_XA1_F, IS_XA2_F };
-static const u32 portirqmask[] = { IS_PORT_1, IS_PORT_2 };
static int skge_get_regs_len(struct net_device *dev)
{
@@ -728,19 +727,18 @@ static struct ethtool_ops skge_ethtool_ops = {
* Allocate ring elements and chain them together
* One-to-one association of board descriptors with ring elements
*/
-static int skge_ring_alloc(struct skge_ring *ring, void *vaddr, u64 base)
+static int skge_ring_alloc(struct skge_ring *ring, void *vaddr, u32 base)
{
struct skge_tx_desc *d;
struct skge_element *e;
int i;
- ring->start = kmalloc(sizeof(*e)*ring->count, GFP_KERNEL);
+ ring->start = kcalloc(sizeof(*e), ring->count, GFP_KERNEL);
if (!ring->start)
return -ENOMEM;
for (i = 0, e = ring->start, d = vaddr; i < ring->count; i++, e++, d++) {
e->desc = d;
- e->skb = NULL;
if (i == ring->count - 1) {
e->next = ring->start;
d->next_offset = base;
@@ -2169,27 +2167,31 @@ static int skge_up(struct net_device *dev)
if (!skge->mem)
return -ENOMEM;
+ BUG_ON(skge->dma & 7);
+
+ if ((u64)skge->dma >> 32 != ((u64) skge->dma + skge->mem_size) >> 32) {
+ printk(KERN_ERR PFX "pci_alloc_consistent region crosses 4G boundary\n");
+ err = -EINVAL;
+ goto free_pci_mem;
+ }
+
memset(skge->mem, 0, skge->mem_size);
- if ((err = skge_ring_alloc(&skge->rx_ring, skge->mem, skge->dma)))
+ err = skge_ring_alloc(&skge->rx_ring, skge->mem, skge->dma);
+ if (err)
goto free_pci_mem;
err = skge_rx_fill(skge);
if (err)
goto free_rx_ring;
- if ((err = skge_ring_alloc(&skge->tx_ring, skge->mem + rx_size,
- skge->dma + rx_size)))
+ err = skge_ring_alloc(&skge->tx_ring, skge->mem + rx_size,
+ skge->dma + rx_size);
+ if (err)
goto free_rx_ring;
skge->tx_avail = skge->tx_ring.count - 1;
- /* Enable IRQ from port */
- spin_lock_irq(&hw->hw_lock);
- hw->intr_mask |= portirqmask[port];
- skge_write32(hw, B0_IMSK, hw->intr_mask);
- spin_unlock_irq(&hw->hw_lock);
-
/* Initialize MAC */
spin_lock_bh(&hw->phy_lock);
if (hw->chip_id == CHIP_ID_GENESIS)
@@ -2246,11 +2248,6 @@ static int skge_down(struct net_device *dev)
else
yukon_stop(skge);
- spin_lock_irq(&hw->hw_lock);
- hw->intr_mask &= ~portirqmask[skge->port];
- skge_write32(hw, B0_IMSK, hw->intr_mask);
- spin_unlock_irq(&hw->hw_lock);
-
/* Stop transmitter */
skge_write8(hw, Q_ADDR(txqaddr[port], Q_CSR), CSR_STOP);
skge_write32(hw, RB_ADDR(txqaddr[port], RB_CTRL),
@@ -2307,18 +2304,15 @@ static int skge_xmit_frame(struct sk_buff *skb, struct net_device *dev)
int i;
u32 control, len;
u64 map;
- unsigned long flags;
skb = skb_padto(skb, ETH_ZLEN);
if (!skb)
return NETDEV_TX_OK;
- local_irq_save(flags);
if (!spin_trylock(&skge->tx_lock)) {
- /* Collision - tell upper layer to requeue */
- local_irq_restore(flags);
- return NETDEV_TX_LOCKED;
- }
+ /* Collision - tell upper layer to requeue */
+ return NETDEV_TX_LOCKED;
+ }
if (unlikely(skge->tx_avail < skb_shinfo(skb)->nr_frags +1)) {
if (!netif_queue_stopped(dev)) {
@@ -2327,7 +2321,7 @@ static int skge_xmit_frame(struct sk_buff *skb, struct net_device *dev)
printk(KERN_WARNING PFX "%s: ring full when queue awake!\n",
dev->name);
}
- spin_unlock_irqrestore(&skge->tx_lock, flags);
+ spin_unlock(&skge->tx_lock);
return NETDEV_TX_BUSY;
}
@@ -2402,8 +2396,10 @@ static int skge_xmit_frame(struct sk_buff *skb, struct net_device *dev)
netif_stop_queue(dev);
}
+ mmiowb();
+ spin_unlock(&skge->tx_lock);
+
dev->trans_start = jiffies;
- spin_unlock_irqrestore(&skge->tx_lock, flags);
return NETDEV_TX_OK;
}
@@ -2416,7 +2412,7 @@ static inline void skge_tx_free(struct skge_hw *hw, struct skge_element *e)
pci_unmap_addr(e, mapaddr),
pci_unmap_len(e, maplen),
PCI_DMA_TODEVICE);
- dev_kfree_skb_any(e->skb);
+ dev_kfree_skb(e->skb);
e->skb = NULL;
} else {
pci_unmap_page(hw->pdev,
@@ -2430,15 +2426,14 @@ static void skge_tx_clean(struct skge_port *skge)
{
struct skge_ring *ring = &skge->tx_ring;
struct skge_element *e;
- unsigned long flags;
- spin_lock_irqsave(&skge->tx_lock, flags);
+ spin_lock_bh(&skge->tx_lock);
for (e = ring->to_clean; e != ring->to_use; e = e->next) {
++skge->tx_avail;
skge_tx_free(skge->hw, e);
}
ring->to_clean = e;
- spin_unlock_irqrestore(&skge->tx_lock, flags);
+ spin_unlock_bh(&skge->tx_lock);
}
static void skge_tx_timeout(struct net_device *dev)
@@ -2663,6 +2658,37 @@ resubmit:
return NULL;
}
+static void skge_tx_done(struct skge_port *skge)
+{
+ struct skge_ring *ring = &skge->tx_ring;
+ struct skge_element *e;
+
+ spin_lock(&skge->tx_lock);
+ for (e = ring->to_clean; prefetch(e->next), e != ring->to_use; e = e->next) {
+ struct skge_tx_desc *td = e->desc;
+ u32 control;
+
+ rmb();
+ control = td->control;
+ if (control & BMU_OWN)
+ break;
+
+ if (unlikely(netif_msg_tx_done(skge)))
+ printk(KERN_DEBUG PFX "%s: tx done slot %td status 0x%x\n",
+ skge->netdev->name, e - ring->start, td->status);
+
+ skge_tx_free(skge->hw, e);
+ e->skb = NULL;
+ ++skge->tx_avail;
+ }
+ ring->to_clean = e;
+ skge_write8(skge->hw, Q_ADDR(txqaddr[skge->port], Q_CSR), CSR_IRQ_CL_F);
+
+ if (skge->tx_avail > MAX_SKB_FRAGS + 1)
+ netif_wake_queue(skge->netdev);
+
+ spin_unlock(&skge->tx_lock);
+}
static int skge_poll(struct net_device *dev, int *budget)
{
@@ -2670,8 +2696,10 @@ static int skge_poll(struct net_device *dev, int *budget)
struct skge_hw *hw = skge->hw;
struct skge_ring *ring = &skge->rx_ring;
struct skge_element *e;
- unsigned int to_do = min(dev->quota, *budget);
- unsigned int work_done = 0;
+ int to_do = min(dev->quota, *budget);
+ int work_done = 0;
+
+ skge_tx_done(skge);
for (e = ring->to_clean; prefetch(e->next), work_done < to_do; e = e->next) {
struct skge_rx_desc *rd = e->desc;
@@ -2683,8 +2711,8 @@ static int skge_poll(struct net_device *dev, int *budget)
if (control & BMU_OWN)
break;
- skb = skge_rx_get(skge, e, control, rd->status,
- le16_to_cpu(rd->csum2));
+ skb = skge_rx_get(skge, e, control, rd->status,
+ le16_to_cpu(rd->csum2));
if (likely(skb)) {
dev->last_rx = jiffies;
netif_receive_skb(skb);
@@ -2705,49 +2733,15 @@ static int skge_poll(struct net_device *dev, int *budget)
if (work_done >= to_do)
return 1; /* not done */
- spin_lock_irq(&hw->hw_lock);
- __netif_rx_complete(dev);
- hw->intr_mask |= portirqmask[skge->port];
+ netif_rx_complete(dev);
+ mmiowb();
+
+ hw->intr_mask |= skge->port == 0 ? (IS_R1_F|IS_XA1_F) : (IS_R2_F|IS_XA2_F);
skge_write32(hw, B0_IMSK, hw->intr_mask);
- spin_unlock_irq(&hw->hw_lock);
return 0;
}
-static inline void skge_tx_intr(struct net_device *dev)
-{
- struct skge_port *skge = netdev_priv(dev);
- struct skge_hw *hw = skge->hw;
- struct skge_ring *ring = &skge->tx_ring;
- struct skge_element *e;
-
- spin_lock(&skge->tx_lock);
- for (e = ring->to_clean; prefetch(e->next), e != ring->to_use; e = e->next) {
- struct skge_tx_desc *td = e->desc;
- u32 control;
-
- rmb();
- control = td->control;
- if (control & BMU_OWN)
- break;
-
- if (unlikely(netif_msg_tx_done(skge)))
- printk(KERN_DEBUG PFX "%s: tx done slot %td status 0x%x\n",
- dev->name, e - ring->start, td->status);
-
- skge_tx_free(hw, e);
- e->skb = NULL;
- ++skge->tx_avail;
- }
- ring->to_clean = e;
- skge_write8(hw, Q_ADDR(txqaddr[skge->port], Q_CSR), CSR_IRQ_CL_F);
-
- if (skge->tx_avail > MAX_SKB_FRAGS + 1)
- netif_wake_queue(dev);
-
- spin_unlock(&skge->tx_lock);
-}
-
/* Parity errors seem to happen when Genesis is connected to a switch
* with no other ports present. Heartbeat error??
*/
@@ -2770,17 +2764,6 @@ static void skge_mac_parity(struct skge_hw *hw, int port)
? GMF_CLI_TX_FC : GMF_CLI_TX_PE);
}
-static void skge_pci_clear(struct skge_hw *hw)
-{
- u16 status;
-
- pci_read_config_word(hw->pdev, PCI_STATUS, &status);
- skge_write8(hw, B2_TST_CTRL1, TST_CFG_WRITE_ON);
- pci_write_config_word(hw->pdev, PCI_STATUS,
- status | PCI_STATUS_ERROR_BITS);
- skge_write8(hw, B2_TST_CTRL1, TST_CFG_WRITE_OFF);
-}
-
static void skge_mac_intr(struct skge_hw *hw, int port)
{
if (hw->chip_id == CHIP_ID_GENESIS)
@@ -2822,23 +2805,39 @@ static void skge_error_irq(struct skge_hw *hw)
if (hwstatus & IS_M2_PAR_ERR)
skge_mac_parity(hw, 1);
- if (hwstatus & IS_R1_PAR_ERR)
+ if (hwstatus & IS_R1_PAR_ERR) {
+ printk(KERN_ERR PFX "%s: receive queue parity error\n",
+ hw->dev[0]->name);
skge_write32(hw, B0_R1_CSR, CSR_IRQ_CL_P);
+ }
- if (hwstatus & IS_R2_PAR_ERR)
+ if (hwstatus & IS_R2_PAR_ERR) {
+ printk(KERN_ERR PFX "%s: receive queue parity error\n",
+ hw->dev[1]->name);
skge_write32(hw, B0_R2_CSR, CSR_IRQ_CL_P);
+ }
if (hwstatus & (IS_IRQ_MST_ERR|IS_IRQ_STAT)) {
- printk(KERN_ERR PFX "hardware error detected (status 0x%x)\n",
- hwstatus);
+ u16 pci_status, pci_cmd;
+
+ pci_read_config_word(hw->pdev, PCI_COMMAND, &pci_cmd);
+ pci_read_config_word(hw->pdev, PCI_STATUS, &pci_status);
- skge_pci_clear(hw);
+ printk(KERN_ERR PFX "%s: PCI error cmd=%#x status=%#x\n",
+ pci_name(hw->pdev), pci_cmd, pci_status);
+
+ /* Write the error bits back to clear them. */
+ pci_status &= PCI_STATUS_ERROR_BITS;
+ skge_write8(hw, B2_TST_CTRL1, TST_CFG_WRITE_ON);
+ pci_write_config_word(hw->pdev, PCI_COMMAND,
+ pci_cmd | PCI_COMMAND_SERR | PCI_COMMAND_PARITY);
+ pci_write_config_word(hw->pdev, PCI_STATUS, pci_status);
+ skge_write8(hw, B2_TST_CTRL1, TST_CFG_WRITE_OFF);
/* if error still set then just ignore it */
hwstatus = skge_read32(hw, B0_HWE_ISRC);
if (hwstatus & IS_IRQ_STAT) {
- pr_debug("IRQ status %x: still set ignoring hardware errors\n",
- hwstatus);
+ printk(KERN_INFO PFX "unable to clear error (so ignoring them)\n");
hw->intr_mask &= ~IS_HW_ERR;
}
}
@@ -2855,12 +2854,11 @@ static void skge_extirq(unsigned long data)
int port;
spin_lock(&hw->phy_lock);
- for (port = 0; port < 2; port++) {
+ for (port = 0; port < hw->ports; port++) {
struct net_device *dev = hw->dev[port];
+ struct skge_port *skge = netdev_priv(dev);
- if (dev && netif_running(dev)) {
- struct skge_port *skge = netdev_priv(dev);
-
+ if (netif_running(dev)) {
if (hw->chip_id != CHIP_ID_GENESIS)
yukon_phy_intr(skge);
else
@@ -2869,38 +2867,39 @@ static void skge_extirq(unsigned long data)
}
spin_unlock(&hw->phy_lock);
- spin_lock_irq(&hw->hw_lock);
hw->intr_mask |= IS_EXT_REG;
skge_write32(hw, B0_IMSK, hw->intr_mask);
- spin_unlock_irq(&hw->hw_lock);
}
static irqreturn_t skge_intr(int irq, void *dev_id, struct pt_regs *regs)
{
struct skge_hw *hw = dev_id;
- u32 status = skge_read32(hw, B0_SP_ISRC);
+ u32 status;
- if (status == 0 || status == ~0) /* hotplug or shared irq */
+ /* Reading this register masks IRQ */
+ status = skge_read32(hw, B0_SP_ISRC);
+ if (status == 0)
return IRQ_NONE;
- spin_lock(&hw->hw_lock);
- if (status & IS_R1_F) {
+ if (status & IS_EXT_REG) {
+ hw->intr_mask &= ~IS_EXT_REG;
+ tasklet_schedule(&hw->ext_tasklet);
+ }
+
+ if (status & (IS_R1_F|IS_XA1_F)) {
skge_write8(hw, Q_ADDR(Q_R1, Q_CSR), CSR_IRQ_CL_F);
- hw->intr_mask &= ~IS_R1_F;
+ hw->intr_mask &= ~(IS_R1_F|IS_XA1_F);
netif_rx_schedule(hw->dev[0]);
}
- if (status & IS_R2_F) {
+ if (status & (IS_R2_F|IS_XA2_F)) {
skge_write8(hw, Q_ADDR(Q_R2, Q_CSR), CSR_IRQ_CL_F);
- hw->intr_mask &= ~IS_R2_F;
+ hw->intr_mask &= ~(IS_R2_F|IS_XA2_F);
netif_rx_schedule(hw->dev[1]);
}
- if (status & IS_XA1_F)
- skge_tx_intr(hw->dev[0]);
-
- if (status & IS_XA2_F)
- skge_tx_intr(hw->dev[1]);
+ if (likely((status & hw->intr_mask) == 0))
+ return IRQ_HANDLED;
if (status & IS_PA_TO_RX1) {
struct skge_port *skge = netdev_priv(hw->dev[0]);
@@ -2929,13 +2928,7 @@ static irqreturn_t skge_intr(int irq, void *dev_id, struct pt_regs *regs)
if (status & IS_HW_ERR)
skge_error_irq(hw);
- if (status & IS_EXT_REG) {
- hw->intr_mask &= ~IS_EXT_REG;
- tasklet_schedule(&hw->ext_tasklet);
- }
-
skge_write32(hw, B0_IMSK, hw->intr_mask);
- spin_unlock(&hw->hw_lock);
return IRQ_HANDLED;
}
@@ -3010,7 +3003,7 @@ static const char *skge_board_name(const struct skge_hw *hw)
static int skge_reset(struct skge_hw *hw)
{
u32 reg;
- u16 ctst;
+ u16 ctst, pci_status;
u8 t8, mac_cfg, pmd_type, phy_type;
int i;
@@ -3021,8 +3014,13 @@ static int skge_reset(struct skge_hw *hw)
skge_write8(hw, B0_CTST, CS_RST_CLR);
/* clear PCI errors, if any */
- skge_pci_clear(hw);
+ skge_write8(hw, B2_TST_CTRL1, TST_CFG_WRITE_ON);
+ skge_write8(hw, B2_TST_CTRL2, 0);
+ pci_read_config_word(hw->pdev, PCI_STATUS, &pci_status);
+ pci_write_config_word(hw->pdev, PCI_STATUS,
+ pci_status | PCI_STATUS_ERROR_BITS);
+ skge_write8(hw, B2_TST_CTRL1, TST_CFG_WRITE_OFF);
skge_write8(hw, B0_CTST, CS_MRST_CLR);
/* restore CLK_RUN bits (for Yukon-Lite) */
@@ -3081,7 +3079,10 @@ static int skge_reset(struct skge_hw *hw)
else
hw->ram_size = t8 * 4096;
- hw->intr_mask = IS_HW_ERR | IS_EXT_REG;
+ hw->intr_mask = IS_HW_ERR | IS_EXT_REG | IS_PORT_1;
+ if (hw->ports > 1)
+ hw->intr_mask |= IS_PORT_2;
+
if (hw->chip_id == CHIP_ID_GENESIS)
genesis_init(hw);
else {
@@ -3251,13 +3252,15 @@ static int __devinit skge_probe(struct pci_dev *pdev,
struct skge_hw *hw;
int err, using_dac = 0;
- if ((err = pci_enable_device(pdev))) {
+ err = pci_enable_device(pdev);
+ if (err) {
printk(KERN_ERR PFX "%s cannot enable PCI device\n",
pci_name(pdev));
goto err_out;
}
- if ((err = pci_request_regions(pdev, DRV_NAME))) {
+ err = pci_request_regions(pdev, DRV_NAME);
+ if (err) {
printk(KERN_ERR PFX "%s cannot obtain PCI resources\n",
pci_name(pdev));
goto err_out_disable_pdev;
@@ -3265,22 +3268,18 @@ static int __devinit skge_probe(struct pci_dev *pdev,
pci_set_master(pdev);
- if (sizeof(dma_addr_t) > sizeof(u32) &&
- !(err = pci_set_dma_mask(pdev, DMA_64BIT_MASK))) {
+ if (!pci_set_dma_mask(pdev, DMA_64BIT_MASK)) {
using_dac = 1;
err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
- if (err < 0) {
- printk(KERN_ERR PFX "%s unable to obtain 64 bit DMA "
- "for consistent allocations\n", pci_name(pdev));
- goto err_out_free_regions;
- }
- } else {
- err = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
- if (err) {
- printk(KERN_ERR PFX "%s no usable DMA configuration\n",
- pci_name(pdev));
- goto err_out_free_regions;
- }
+ } else if (!(err = pci_set_dma_mask(pdev, DMA_32BIT_MASK))) {
+ using_dac = 0;
+ err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
+ }
+
+ if (err) {
+ printk(KERN_ERR PFX "%s no usable DMA configuration\n",
+ pci_name(pdev));
+ goto err_out_free_regions;
}
#ifdef __BIG_ENDIAN
@@ -3304,7 +3303,6 @@ static int __devinit skge_probe(struct pci_dev *pdev,
hw->pdev = pdev;
spin_lock_init(&hw->phy_lock);
- spin_lock_init(&hw->hw_lock);
tasklet_init(&hw->ext_tasklet, skge_extirq, (unsigned long) hw);
hw->regs = ioremap_nocache(pci_resource_start(pdev, 0), 0x4000);
@@ -3314,7 +3312,8 @@ static int __devinit skge_probe(struct pci_dev *pdev,
goto err_out_free_hw;
}
- if ((err = request_irq(pdev->irq, skge_intr, SA_SHIRQ, DRV_NAME, hw))) {
+ err = request_irq(pdev->irq, skge_intr, SA_SHIRQ, DRV_NAME, hw);
+ if (err) {
printk(KERN_ERR PFX "%s: cannot assign irq %d\n",
pci_name(pdev), pdev->irq);
goto err_out_iounmap;
@@ -3332,7 +3331,8 @@ static int __devinit skge_probe(struct pci_dev *pdev,
if ((dev = skge_devinit(hw, 0, using_dac)) == NULL)
goto err_out_led_off;
- if ((err = register_netdev(dev))) {
+ err = register_netdev(dev);
+ if (err) {
printk(KERN_ERR PFX "%s: cannot register net device\n",
pci_name(pdev));
goto err_out_free_netdev;
@@ -3387,7 +3387,6 @@ static void __devexit skge_remove(struct pci_dev *pdev)
skge_write32(hw, B0_IMSK, 0);
skge_write16(hw, B0_LED, LED_STAT_OFF);
- skge_pci_clear(hw);
skge_write8(hw, B0_CTST, CS_RST_SET);
tasklet_kill(&hw->ext_tasklet);
diff --git a/drivers/net/skge.h b/drivers/net/skge.h
index 941f12a333b..2efdacc290e 100644
--- a/drivers/net/skge.h
+++ b/drivers/net/skge.h
@@ -2402,7 +2402,6 @@ struct skge_hw {
struct tasklet_struct ext_tasklet;
spinlock_t phy_lock;
- spinlock_t hw_lock;
};
enum {
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index 73260364cba..f08fe6c884b 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -51,7 +51,7 @@
#include "sky2.h"
#define DRV_NAME "sky2"
-#define DRV_VERSION "0.15"
+#define DRV_VERSION "1.1"
#define PFX DRV_NAME " "
/*
@@ -61,10 +61,6 @@
* a receive requires one (or two if using 64 bit dma).
*/
-#define is_ec_a1(hw) \
- unlikely((hw)->chip_id == CHIP_ID_YUKON_EC && \
- (hw)->chip_rev == CHIP_REV_YU_EC_A1)
-
#define RX_LE_SIZE 512
#define RX_LE_BYTES (RX_LE_SIZE*sizeof(struct sky2_rx_le))
#define RX_MAX_PENDING (RX_LE_SIZE/2 - 2)
@@ -96,6 +92,10 @@ static int copybreak __read_mostly = 256;
module_param(copybreak, int, 0);
MODULE_PARM_DESC(copybreak, "Receive copy threshold");
+static int disable_msi = 0;
+module_param(disable_msi, int, 0);
+MODULE_PARM_DESC(disable_msi, "Disable Message Signaled Interrupt (MSI)");
+
static const struct pci_device_id sky2_id_table[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, 0x9000) },
{ PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, 0x9E00) },
@@ -504,9 +504,9 @@ static void sky2_phy_init(struct sky2_hw *hw, unsigned port)
/* Force a renegotiation */
static void sky2_phy_reinit(struct sky2_port *sky2)
{
- down(&sky2->phy_sema);
+ spin_lock_bh(&sky2->phy_lock);
sky2_phy_init(sky2->hw, sky2->port);
- up(&sky2->phy_sema);
+ spin_unlock_bh(&sky2->phy_lock);
}
static void sky2_mac_init(struct sky2_hw *hw, unsigned port)
@@ -571,9 +571,9 @@ static void sky2_mac_init(struct sky2_hw *hw, unsigned port)
sky2_read16(hw, SK_REG(port, GMAC_IRQ_SRC));
- down(&sky2->phy_sema);
+ spin_lock_bh(&sky2->phy_lock);
sky2_phy_init(hw, port);
- up(&sky2->phy_sema);
+ spin_unlock_bh(&sky2->phy_lock);
/* MIB clear */
reg = gma_read16(hw, port, GM_PHY_ADDR);
@@ -725,37 +725,11 @@ static inline struct sky2_tx_le *get_tx_le(struct sky2_port *sky2)
return le;
}
-/*
- * This is a workaround code taken from SysKonnect sk98lin driver
- * to deal with chip bug on Yukon EC rev 0 in the wraparound case.
- */
-static void sky2_put_idx(struct sky2_hw *hw, unsigned q,
- u16 idx, u16 *last, u16 size)
+/* Update chip's next pointer */
+static inline void sky2_put_idx(struct sky2_hw *hw, unsigned q, u16 idx)
{
wmb();
- if (is_ec_a1(hw) && idx < *last) {
- u16 hwget = sky2_read16(hw, Y2_QADDR(q, PREF_UNIT_GET_IDX));
-
- if (hwget == 0) {
- /* Start prefetching again */
- sky2_write8(hw, Y2_QADDR(q, PREF_UNIT_FIFO_WM), 0xe0);
- goto setnew;
- }
-
- if (hwget == size - 1) {
- /* set watermark to one list element */
- sky2_write8(hw, Y2_QADDR(q, PREF_UNIT_FIFO_WM), 8);
-
- /* set put index to first list element */
- sky2_write16(hw, Y2_QADDR(q, PREF_UNIT_PUT_IDX), 0);
- } else /* have hardware go to end of list */
- sky2_write16(hw, Y2_QADDR(q, PREF_UNIT_PUT_IDX),
- size - 1);
- } else {
-setnew:
- sky2_write16(hw, Y2_QADDR(q, PREF_UNIT_PUT_IDX), idx);
- }
- *last = idx;
+ sky2_write16(hw, Y2_QADDR(q, PREF_UNIT_PUT_IDX), idx);
mmiowb();
}
@@ -878,7 +852,7 @@ static int sky2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (!netif_running(dev))
return -ENODEV; /* Phy still in reset */
- switch(cmd) {
+ switch (cmd) {
case SIOCGMIIPHY:
data->phy_id = PHY_ADDR_MARV;
@@ -886,9 +860,9 @@ static int sky2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
case SIOCGMIIREG: {
u16 val = 0;
- down(&sky2->phy_sema);
+ spin_lock_bh(&sky2->phy_lock);
err = __gm_phy_read(hw, sky2->port, data->reg_num & 0x1f, &val);
- up(&sky2->phy_sema);
+ spin_unlock_bh(&sky2->phy_lock);
data->val_out = val;
break;
@@ -898,10 +872,10 @@ static int sky2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (!capable(CAP_NET_ADMIN))
return -EPERM;
- down(&sky2->phy_sema);
+ spin_lock_bh(&sky2->phy_lock);
err = gm_phy_write(hw, sky2->port, data->reg_num & 0x1f,
data->val_in);
- up(&sky2->phy_sema);
+ spin_unlock_bh(&sky2->phy_lock);
break;
}
return err;
@@ -1001,7 +975,6 @@ static int sky2_rx_start(struct sky2_port *sky2)
/* Tell chip about available buffers */
sky2_write16(hw, Y2_QADDR(rxq, PREF_UNIT_PUT_IDX), sky2->rx_put);
- sky2->rx_last_put = sky2_read16(hw, Y2_QADDR(rxq, PREF_UNIT_PUT_IDX));
return 0;
nomem:
sky2_rx_clean(sky2);
@@ -1014,7 +987,7 @@ static int sky2_up(struct net_device *dev)
struct sky2_port *sky2 = netdev_priv(dev);
struct sky2_hw *hw = sky2->hw;
unsigned port = sky2->port;
- u32 ramsize, rxspace;
+ u32 ramsize, rxspace, imask;
int err = -ENOMEM;
if (netif_msg_ifup(sky2))
@@ -1079,10 +1052,10 @@ static int sky2_up(struct net_device *dev)
goto err_out;
/* Enable interrupts from phy/mac for port */
- spin_lock_irq(&hw->hw_lock);
- hw->intr_mask |= (port == 0) ? Y2_IS_PORT_1 : Y2_IS_PORT_2;
- sky2_write32(hw, B0_IMSK, hw->intr_mask);
- spin_unlock_irq(&hw->hw_lock);
+ imask = sky2_read32(hw, B0_IMSK);
+ imask |= (port == 0) ? Y2_IS_PORT_1 : Y2_IS_PORT_2;
+ sky2_write32(hw, B0_IMSK, imask);
+
return 0;
err_out:
@@ -1299,8 +1272,7 @@ static int sky2_xmit_frame(struct sk_buff *skb, struct net_device *dev)
netif_stop_queue(dev);
}
- sky2_put_idx(hw, txqaddr[sky2->port], sky2->tx_prod,
- &sky2->tx_last_put, TX_RING_SIZE);
+ sky2_put_idx(hw, txqaddr[sky2->port], sky2->tx_prod);
out_unlock:
spin_unlock(&sky2->tx_lock);
@@ -1332,7 +1304,7 @@ static void sky2_tx_complete(struct sky2_port *sky2, u16 done)
struct tx_ring_info *re = sky2->tx_ring + put;
struct sk_buff *skb = re->skb;
- nxt = re->idx;
+ nxt = re->idx;
BUG_ON(nxt >= TX_RING_SIZE);
prefetch(sky2->tx_ring + nxt);
@@ -1348,7 +1320,7 @@ static void sky2_tx_complete(struct sky2_port *sky2, u16 done)
struct tx_ring_info *fre;
fre = sky2->tx_ring + (put + i + 1) % TX_RING_SIZE;
pci_unmap_page(pdev, pci_unmap_addr(fre, mapaddr),
- skb_shinfo(skb)->frags[i].size,
+ skb_shinfo(skb)->frags[i].size,
PCI_DMA_TODEVICE);
}
@@ -1356,7 +1328,7 @@ static void sky2_tx_complete(struct sky2_port *sky2, u16 done)
}
sky2->tx_cons = put;
- if (netif_queue_stopped(dev) && tx_avail(sky2) > MAX_SKB_TX_LE)
+ if (tx_avail(sky2) > MAX_SKB_TX_LE)
netif_wake_queue(dev);
}
@@ -1375,6 +1347,7 @@ static int sky2_down(struct net_device *dev)
struct sky2_hw *hw = sky2->hw;
unsigned port = sky2->port;
u16 ctrl;
+ u32 imask;
/* Never really got started! */
if (!sky2->tx_le)
@@ -1386,14 +1359,6 @@ static int sky2_down(struct net_device *dev)
/* Stop more packets from being queued */
netif_stop_queue(dev);
- /* Disable port IRQ */
- spin_lock_irq(&hw->hw_lock);
- hw->intr_mask &= ~((sky2->port == 0) ? Y2_IS_IRQ_PHY1 : Y2_IS_IRQ_PHY2);
- sky2_write32(hw, B0_IMSK, hw->intr_mask);
- spin_unlock_irq(&hw->hw_lock);
-
- flush_scheduled_work();
-
sky2_phy_reset(hw, port);
/* Stop transmitter */
@@ -1437,6 +1402,11 @@ static int sky2_down(struct net_device *dev)
sky2_write8(hw, SK_REG(port, RX_GMF_CTRL_T), GMF_RST_SET);
sky2_write8(hw, SK_REG(port, TX_GMF_CTRL_T), GMF_RST_SET);
+ /* Disable port IRQ */
+ imask = sky2_read32(hw, B0_IMSK);
+ imask &= ~(sky2->port == 0) ? Y2_IS_PORT_1 : Y2_IS_PORT_2;
+ sky2_write32(hw, B0_IMSK, imask);
+
/* turn off LED's */
sky2_write16(hw, B0_Y2LED, LED_STAT_OFF);
@@ -1631,20 +1601,19 @@ static int sky2_autoneg_done(struct sky2_port *sky2, u16 aux)
return 0;
}
-/*
- * Interrupt from PHY are handled outside of interrupt context
- * because accessing phy registers requires spin wait which might
- * cause excess interrupt latency.
- */
-static void sky2_phy_task(void *arg)
+/* Interrupt from PHY */
+static void sky2_phy_intr(struct sky2_hw *hw, unsigned port)
{
- struct sky2_port *sky2 = arg;
- struct sky2_hw *hw = sky2->hw;
+ struct net_device *dev = hw->dev[port];
+ struct sky2_port *sky2 = netdev_priv(dev);
u16 istatus, phystat;
- down(&sky2->phy_sema);
- istatus = gm_phy_read(hw, sky2->port, PHY_MARV_INT_STAT);
- phystat = gm_phy_read(hw, sky2->port, PHY_MARV_PHY_STAT);
+ spin_lock(&sky2->phy_lock);
+ istatus = gm_phy_read(hw, port, PHY_MARV_INT_STAT);
+ phystat = gm_phy_read(hw, port, PHY_MARV_PHY_STAT);
+
+ if (!netif_running(dev))
+ goto out;
if (netif_msg_intr(sky2))
printk(KERN_INFO PFX "%s: phy interrupt status 0x%x 0x%x\n",
@@ -1670,12 +1639,7 @@ static void sky2_phy_task(void *arg)
sky2_link_down(sky2);
}
out:
- up(&sky2->phy_sema);
-
- spin_lock_irq(&hw->hw_lock);
- hw->intr_mask |= (sky2->port == 0) ? Y2_IS_IRQ_PHY1 : Y2_IS_IRQ_PHY2;
- sky2_write32(hw, B0_IMSK, hw->intr_mask);
- spin_unlock_irq(&hw->hw_lock);
+ spin_unlock(&sky2->phy_lock);
}
@@ -1687,31 +1651,40 @@ static void sky2_tx_timeout(struct net_device *dev)
struct sky2_port *sky2 = netdev_priv(dev);
struct sky2_hw *hw = sky2->hw;
unsigned txq = txqaddr[sky2->port];
- u16 ridx;
-
- /* Maybe we just missed an status interrupt */
- spin_lock(&sky2->tx_lock);
- ridx = sky2_read16(hw,
- sky2->port == 0 ? STAT_TXA1_RIDX : STAT_TXA2_RIDX);
- sky2_tx_complete(sky2, ridx);
- spin_unlock(&sky2->tx_lock);
-
- if (!netif_queue_stopped(dev)) {
- if (net_ratelimit())
- pr_info(PFX "transmit interrupt missed? recovered\n");
- return;
- }
+ u16 report, done;
if (netif_msg_timer(sky2))
printk(KERN_ERR PFX "%s: tx timeout\n", dev->name);
- sky2_write32(hw, Q_ADDR(txq, Q_CSR), BMU_STOP);
- sky2_write32(hw, Y2_QADDR(txq, PREF_UNIT_CTRL), PREF_UNIT_RST_SET);
+ report = sky2_read16(hw, sky2->port == 0 ? STAT_TXA1_RIDX : STAT_TXA2_RIDX);
+ done = sky2_read16(hw, Q_ADDR(txq, Q_DONE));
- sky2_tx_clean(sky2);
+ printk(KERN_DEBUG PFX "%s: transmit ring %u .. %u report=%u done=%u\n",
+ dev->name,
+ sky2->tx_cons, sky2->tx_prod, report, done);
- sky2_qset(hw, txq);
- sky2_prefetch_init(hw, txq, sky2->tx_le_map, TX_RING_SIZE - 1);
+ if (report != done) {
+ printk(KERN_INFO PFX "status burst pending (irq moderation?)\n");
+
+ sky2_write8(hw, STAT_TX_TIMER_CTRL, TIM_STOP);
+ sky2_write8(hw, STAT_TX_TIMER_CTRL, TIM_START);
+ } else if (report != sky2->tx_cons) {
+ printk(KERN_INFO PFX "status report lost?\n");
+
+ spin_lock_bh(&sky2->tx_lock);
+ sky2_tx_complete(sky2, report);
+ spin_unlock_bh(&sky2->tx_lock);
+ } else {
+ printk(KERN_INFO PFX "hardware hung? flushing\n");
+
+ sky2_write32(hw, Q_ADDR(txq, Q_CSR), BMU_STOP);
+ sky2_write32(hw, Y2_QADDR(txq, PREF_UNIT_CTRL), PREF_UNIT_RST_SET);
+
+ sky2_tx_clean(sky2);
+
+ sky2_qset(hw, txq);
+ sky2_prefetch_init(hw, txq, sky2->tx_le_map, TX_RING_SIZE - 1);
+ }
}
@@ -1730,6 +1703,7 @@ static int sky2_change_mtu(struct net_device *dev, int new_mtu)
struct sky2_hw *hw = sky2->hw;
int err;
u16 ctl, mode;
+ u32 imask;
if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU)
return -EINVAL;
@@ -1742,12 +1716,15 @@ static int sky2_change_mtu(struct net_device *dev, int new_mtu)
return 0;
}
+ imask = sky2_read32(hw, B0_IMSK);
sky2_write32(hw, B0_IMSK, 0);
dev->trans_start = jiffies; /* prevent tx timeout */
netif_stop_queue(dev);
netif_poll_disable(hw->dev[0]);
+ synchronize_irq(hw->pdev->irq);
+
ctl = gma_read16(hw, sky2->port, GM_GP_CTRL);
gma_write16(hw, sky2->port, GM_GP_CTRL, ctl & ~GM_GPCR_RX_ENA);
sky2_rx_stop(sky2);
@@ -1766,7 +1743,7 @@ static int sky2_change_mtu(struct net_device *dev, int new_mtu)
sky2_write8(hw, RB_ADDR(rxqaddr[sky2->port], RB_CTRL), RB_ENA_OP_MD);
err = sky2_rx_start(sky2);
- sky2_write32(hw, B0_IMSK, hw->intr_mask);
+ sky2_write32(hw, B0_IMSK, imask);
if (err)
dev_close(dev);
@@ -1843,8 +1820,7 @@ resubmit:
sky2_rx_add(sky2, re->mapaddr);
/* Tell receiver about new buffers. */
- sky2_put_idx(sky2->hw, rxqaddr[sky2->port], sky2->rx_put,
- &sky2->rx_last_put, RX_LE_SIZE);
+ sky2_put_idx(sky2->hw, rxqaddr[sky2->port], sky2->rx_put);
return skb;
@@ -1871,76 +1847,51 @@ error:
goto resubmit;
}
-/*
- * Check for transmit complete
- */
-#define TX_NO_STATUS 0xffff
-
-static void sky2_tx_check(struct sky2_hw *hw, int port, u16 last)
+/* Transmit complete */
+static inline void sky2_tx_done(struct net_device *dev, u16 last)
{
- if (last != TX_NO_STATUS) {
- struct net_device *dev = hw->dev[port];
- if (dev && netif_running(dev)) {
- struct sky2_port *sky2 = netdev_priv(dev);
+ struct sky2_port *sky2 = netdev_priv(dev);
- spin_lock(&sky2->tx_lock);
- sky2_tx_complete(sky2, last);
- spin_unlock(&sky2->tx_lock);
- }
+ if (netif_running(dev)) {
+ spin_lock(&sky2->tx_lock);
+ sky2_tx_complete(sky2, last);
+ spin_unlock(&sky2->tx_lock);
}
}
-/*
- * Both ports share the same status interrupt, therefore there is only
- * one poll routine.
- */
-static int sky2_poll(struct net_device *dev0, int *budget)
+/* Process status response ring */
+static int sky2_status_intr(struct sky2_hw *hw, int to_do)
{
- struct sky2_hw *hw = ((struct sky2_port *) netdev_priv(dev0))->hw;
- unsigned int to_do = min(dev0->quota, *budget);
- unsigned int work_done = 0;
- u16 hwidx;
- u16 tx_done[2] = { TX_NO_STATUS, TX_NO_STATUS };
-
- sky2_write32(hw, STAT_CTRL, SC_STAT_CLR_IRQ);
-
- /*
- * Kick the STAT_LEV_TIMER_CTRL timer.
- * This fixes my hangs on Yukon-EC (0xb6) rev 1.
- * The if clause is there to start the timer only if it has been
- * configured correctly and not been disabled via ethtool.
- */
- if (sky2_read8(hw, STAT_LEV_TIMER_CTRL) == TIM_START) {
- sky2_write8(hw, STAT_LEV_TIMER_CTRL, TIM_STOP);
- sky2_write8(hw, STAT_LEV_TIMER_CTRL, TIM_START);
- }
+ int work_done = 0;
- hwidx = sky2_read16(hw, STAT_PUT_IDX);
- BUG_ON(hwidx >= STATUS_RING_SIZE);
rmb();
- while (hwidx != hw->st_idx) {
+ for(;;) {
struct sky2_status_le *le = hw->st_le + hw->st_idx;
struct net_device *dev;
struct sky2_port *sky2;
struct sk_buff *skb;
u32 status;
u16 length;
+ u8 link, opcode;
+
+ opcode = le->opcode;
+ if (!opcode)
+ break;
+ opcode &= ~HW_OWNER;
- le = hw->st_le + hw->st_idx;
hw->st_idx = (hw->st_idx + 1) % STATUS_RING_SIZE;
- prefetch(hw->st_le + hw->st_idx);
+ le->opcode = 0;
- BUG_ON(le->link >= 2);
- dev = hw->dev[le->link];
- if (dev == NULL || !netif_running(dev))
- continue;
+ link = le->link;
+ BUG_ON(link >= 2);
+ dev = hw->dev[link];
sky2 = netdev_priv(dev);
- status = le32_to_cpu(le->status);
- length = le16_to_cpu(le->length);
+ length = le->length;
+ status = le->status;
- switch (le->opcode & ~HW_OWNER) {
+ switch (opcode) {
case OP_RXSTAT:
skb = sky2_receive(sky2, length, status);
if (!skb)
@@ -1980,42 +1931,23 @@ static int sky2_poll(struct net_device *dev0, int *budget)
case OP_TXINDEXLE:
/* TX index reports status for both ports */
- tx_done[0] = status & 0xffff;
- tx_done[1] = ((status >> 24) & 0xff)
- | (u16)(length & 0xf) << 8;
+ sky2_tx_done(hw->dev[0], status & 0xffff);
+ if (hw->dev[1])
+ sky2_tx_done(hw->dev[1],
+ ((status >> 24) & 0xff)
+ | (u16)(length & 0xf) << 8);
break;
default:
if (net_ratelimit())
printk(KERN_WARNING PFX
- "unknown status opcode 0x%x\n", le->opcode);
+ "unknown status opcode 0x%x\n", opcode);
break;
}
}
exit_loop:
- sky2_tx_check(hw, 0, tx_done[0]);
- sky2_tx_check(hw, 1, tx_done[1]);
-
- if (sky2_read8(hw, STAT_TX_TIMER_CTRL) == TIM_START) {
- sky2_write8(hw, STAT_TX_TIMER_CTRL, TIM_STOP);
- sky2_write8(hw, STAT_TX_TIMER_CTRL, TIM_START);
- }
-
- if (likely(work_done < to_do)) {
- spin_lock_irq(&hw->hw_lock);
- __netif_rx_complete(dev0);
-
- hw->intr_mask |= Y2_IS_STAT_BMU;
- sky2_write32(hw, B0_IMSK, hw->intr_mask);
- spin_unlock_irq(&hw->hw_lock);
-
- return 0;
- } else {
- *budget -= work_done;
- dev0->quota -= work_done;
- return 1;
- }
+ return work_done;
}
static void sky2_hw_error(struct sky2_hw *hw, unsigned port, u32 status)
@@ -2134,57 +2066,97 @@ static void sky2_mac_intr(struct sky2_hw *hw, unsigned port)
}
}
-static void sky2_phy_intr(struct sky2_hw *hw, unsigned port)
+/* This should never happen it is a fatal situation */
+static void sky2_descriptor_error(struct sky2_hw *hw, unsigned port,
+ const char *rxtx, u32 mask)
{
struct net_device *dev = hw->dev[port];
struct sky2_port *sky2 = netdev_priv(dev);
+ u32 imask;
+
+ printk(KERN_ERR PFX "%s: %s descriptor error (hardware problem)\n",
+ dev ? dev->name : "<not registered>", rxtx);
- hw->intr_mask &= ~(port == 0 ? Y2_IS_IRQ_PHY1 : Y2_IS_IRQ_PHY2);
- sky2_write32(hw, B0_IMSK, hw->intr_mask);
+ imask = sky2_read32(hw, B0_IMSK);
+ imask &= ~mask;
+ sky2_write32(hw, B0_IMSK, imask);
- schedule_work(&sky2->phy_task);
+ if (dev) {
+ spin_lock(&sky2->phy_lock);
+ sky2_link_down(sky2);
+ spin_unlock(&sky2->phy_lock);
+ }
}
-static irqreturn_t sky2_intr(int irq, void *dev_id, struct pt_regs *regs)
+static int sky2_poll(struct net_device *dev0, int *budget)
{
- struct sky2_hw *hw = dev_id;
- struct net_device *dev0 = hw->dev[0];
- u32 status;
+ struct sky2_hw *hw = ((struct sky2_port *) netdev_priv(dev0))->hw;
+ int work_limit = min(dev0->quota, *budget);
+ int work_done = 0;
+ u32 status = sky2_read32(hw, B0_Y2_SP_EISR);
- status = sky2_read32(hw, B0_Y2_SP_ISRC2);
- if (status == 0 || status == ~0)
- return IRQ_NONE;
+ if (unlikely(status & ~Y2_IS_STAT_BMU)) {
+ if (status & Y2_IS_HW_ERR)
+ sky2_hw_intr(hw);
- spin_lock(&hw->hw_lock);
- if (status & Y2_IS_HW_ERR)
- sky2_hw_intr(hw);
+ if (status & Y2_IS_IRQ_PHY1)
+ sky2_phy_intr(hw, 0);
- /* Do NAPI for Rx and Tx status */
- if (status & Y2_IS_STAT_BMU) {
- hw->intr_mask &= ~Y2_IS_STAT_BMU;
- sky2_write32(hw, B0_IMSK, hw->intr_mask);
+ if (status & Y2_IS_IRQ_PHY2)
+ sky2_phy_intr(hw, 1);
- if (likely(__netif_rx_schedule_prep(dev0))) {
- prefetch(&hw->st_le[hw->st_idx]);
- __netif_rx_schedule(dev0);
- }
+ if (status & Y2_IS_IRQ_MAC1)
+ sky2_mac_intr(hw, 0);
+
+ if (status & Y2_IS_IRQ_MAC2)
+ sky2_mac_intr(hw, 1);
+
+ if (status & Y2_IS_CHK_RX1)
+ sky2_descriptor_error(hw, 0, "receive", Y2_IS_CHK_RX1);
+
+ if (status & Y2_IS_CHK_RX2)
+ sky2_descriptor_error(hw, 1, "receive", Y2_IS_CHK_RX2);
+
+ if (status & Y2_IS_CHK_TXA1)
+ sky2_descriptor_error(hw, 0, "transmit", Y2_IS_CHK_TXA1);
+
+ if (status & Y2_IS_CHK_TXA2)
+ sky2_descriptor_error(hw, 1, "transmit", Y2_IS_CHK_TXA2);
}
- if (status & Y2_IS_IRQ_PHY1)
- sky2_phy_intr(hw, 0);
+ if (status & Y2_IS_STAT_BMU) {
+ work_done = sky2_status_intr(hw, work_limit);
+ *budget -= work_done;
+ dev0->quota -= work_done;
+
+ if (work_done >= work_limit)
+ return 1;
- if (status & Y2_IS_IRQ_PHY2)
- sky2_phy_intr(hw, 1);
+ sky2_write32(hw, STAT_CTRL, SC_STAT_CLR_IRQ);
+ }
- if (status & Y2_IS_IRQ_MAC1)
- sky2_mac_intr(hw, 0);
+ netif_rx_complete(dev0);
- if (status & Y2_IS_IRQ_MAC2)
- sky2_mac_intr(hw, 1);
+ status = sky2_read32(hw, B0_Y2_SP_LISR);
+ return 0;
+}
- sky2_write32(hw, B0_Y2_SP_ICR, 2);
+static irqreturn_t sky2_intr(int irq, void *dev_id, struct pt_regs *regs)
+{
+ struct sky2_hw *hw = dev_id;
+ struct net_device *dev0 = hw->dev[0];
+ u32 status;
- spin_unlock(&hw->hw_lock);
+ /* Reading this mask interrupts as side effect */
+ status = sky2_read32(hw, B0_Y2_SP_ISRC2);
+ if (status == 0 || status == ~0)
+ return IRQ_NONE;
+
+ prefetch(&hw->st_le[hw->st_idx]);
+ if (likely(__netif_rx_schedule_prep(dev0)))
+ __netif_rx_schedule(dev0);
+ else
+ printk(KERN_DEBUG PFX "irq race detected\n");
return IRQ_HANDLED;
}
@@ -2238,6 +2210,23 @@ static int sky2_reset(struct sky2_hw *hw)
return -EOPNOTSUPP;
}
+ hw->chip_rev = (sky2_read8(hw, B2_MAC_CFG) & CFG_CHIP_R_MSK) >> 4;
+
+ /* This rev is really old, and requires untested workarounds */
+ if (hw->chip_id == CHIP_ID_YUKON_EC && hw->chip_rev == CHIP_REV_YU_EC_A1) {
+ printk(KERN_ERR PFX "%s: unsupported revision Yukon-%s (0x%x) rev %d\n",
+ pci_name(hw->pdev), yukon2_name[hw->chip_id - CHIP_ID_YUKON_XL],
+ hw->chip_id, hw->chip_rev);
+ return -EOPNOTSUPP;
+ }
+
+ /* This chip is new and not tested yet */
+ if (hw->chip_id == CHIP_ID_YUKON_EC_U) {
+ pr_info(PFX "%s: is a version of Yukon 2 chipset that has not been tested yet.\n",
+ pci_name(hw->pdev));
+ pr_info("Please report success/failure to maintainer <shemminger@osdl.org>\n");
+ }
+
/* disable ASF */
if (hw->chip_id <= CHIP_ID_YUKON_EC) {
sky2_write8(hw, B28_Y2_ASF_STAT_CMD, Y2_ASF_RESET);
@@ -2258,7 +2247,7 @@ static int sky2_reset(struct sky2_hw *hw)
sky2_write8(hw, B0_CTST, CS_MRST_CLR);
/* clear any PEX errors */
- if (pci_find_capability(hw->pdev, PCI_CAP_ID_EXP))
+ if (pci_find_capability(hw->pdev, PCI_CAP_ID_EXP))
sky2_pci_write32(hw, PEX_UNC_ERR_STAT, 0xffffffffUL);
@@ -2271,7 +2260,6 @@ static int sky2_reset(struct sky2_hw *hw)
if (!(sky2_read8(hw, B2_Y2_CLK_GATE) & Y2_STATUS_LNK2_INAC))
++hw->ports;
}
- hw->chip_rev = (sky2_read8(hw, B2_MAC_CFG) & CFG_CHIP_R_MSK) >> 4;
sky2_set_power_state(hw, PCI_D0);
@@ -2337,30 +2325,18 @@ static int sky2_reset(struct sky2_hw *hw)
/* Set the list last index */
sky2_write16(hw, STAT_LAST_IDX, STATUS_RING_SIZE - 1);
- /* These status setup values are copied from SysKonnect's driver */
- if (is_ec_a1(hw)) {
- /* WA for dev. #4.3 */
- sky2_write16(hw, STAT_TX_IDX_TH, 0xfff); /* Tx Threshold */
-
- /* set Status-FIFO watermark */
- sky2_write8(hw, STAT_FIFO_WM, 0x21); /* WA for dev. #4.18 */
+ sky2_write16(hw, STAT_TX_IDX_TH, 10);
+ sky2_write8(hw, STAT_FIFO_WM, 16);
- /* set Status-FIFO ISR watermark */
- sky2_write8(hw, STAT_FIFO_ISR_WM, 0x07); /* WA for dev. #4.18 */
- sky2_write32(hw, STAT_TX_TIMER_INI, sky2_us2clk(hw, 10000));
- } else {
- sky2_write16(hw, STAT_TX_IDX_TH, 10);
- sky2_write8(hw, STAT_FIFO_WM, 16);
-
- /* set Status-FIFO ISR watermark */
- if (hw->chip_id == CHIP_ID_YUKON_XL && hw->chip_rev == 0)
- sky2_write8(hw, STAT_FIFO_ISR_WM, 4);
- else
- sky2_write8(hw, STAT_FIFO_ISR_WM, 16);
+ /* set Status-FIFO ISR watermark */
+ if (hw->chip_id == CHIP_ID_YUKON_XL && hw->chip_rev == 0)
+ sky2_write8(hw, STAT_FIFO_ISR_WM, 4);
+ else
+ sky2_write8(hw, STAT_FIFO_ISR_WM, 16);
- sky2_write32(hw, STAT_TX_TIMER_INI, sky2_us2clk(hw, 1000));
- sky2_write32(hw, STAT_ISR_TIMER_INI, sky2_us2clk(hw, 7));
- }
+ sky2_write32(hw, STAT_TX_TIMER_INI, sky2_us2clk(hw, 1000));
+ sky2_write32(hw, STAT_ISR_TIMER_INI, sky2_us2clk(hw, 20));
+ sky2_write32(hw, STAT_LEV_TIMER_INI, sky2_us2clk(hw, 100));
/* enable status unit */
sky2_write32(hw, STAT_CTRL, SC_STAT_OP_ON);
@@ -2743,7 +2719,7 @@ static int sky2_phys_id(struct net_device *dev, u32 data)
ms = data * 1000;
/* save initial values */
- down(&sky2->phy_sema);
+ spin_lock_bh(&sky2->phy_lock);
if (hw->chip_id == CHIP_ID_YUKON_XL) {
u16 pg = gm_phy_read(hw, port, PHY_MARV_EXT_ADR);
gm_phy_write(hw, port, PHY_MARV_EXT_ADR, 3);
@@ -2759,9 +2735,9 @@ static int sky2_phys_id(struct net_device *dev, u32 data)
sky2_led(hw, port, onoff);
onoff = !onoff;
- up(&sky2->phy_sema);
+ spin_unlock_bh(&sky2->phy_lock);
interrupted = msleep_interruptible(250);
- down(&sky2->phy_sema);
+ spin_lock_bh(&sky2->phy_lock);
ms -= 250;
}
@@ -2776,7 +2752,7 @@ static int sky2_phys_id(struct net_device *dev, u32 data)
gm_phy_write(hw, port, PHY_MARV_LED_CTRL, ledctrl);
gm_phy_write(hw, port, PHY_MARV_LED_OVER, ledover);
}
- up(&sky2->phy_sema);
+ spin_unlock_bh(&sky2->phy_lock);
return 0;
}
@@ -2806,38 +2782,6 @@ static int sky2_set_pauseparam(struct net_device *dev,
return err;
}
-#ifdef CONFIG_PM
-static void sky2_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
-{
- struct sky2_port *sky2 = netdev_priv(dev);
-
- wol->supported = WAKE_MAGIC;
- wol->wolopts = sky2->wol ? WAKE_MAGIC : 0;
-}
-
-static int sky2_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
-{
- struct sky2_port *sky2 = netdev_priv(dev);
- struct sky2_hw *hw = sky2->hw;
-
- if (wol->wolopts != WAKE_MAGIC && wol->wolopts != 0)
- return -EOPNOTSUPP;
-
- sky2->wol = wol->wolopts == WAKE_MAGIC;
-
- if (sky2->wol) {
- memcpy_toio(hw->regs + WOL_MAC_ADDR, dev->dev_addr, ETH_ALEN);
-
- sky2_write16(hw, WOL_CTRL_STAT,
- WOL_CTL_ENA_PME_ON_MAGIC_PKT |
- WOL_CTL_ENA_MAGIC_PKT_UNIT);
- } else
- sky2_write16(hw, WOL_CTRL_STAT, WOL_CTL_DEFAULT);
-
- return 0;
-}
-#endif
-
static int sky2_get_coalesce(struct net_device *dev,
struct ethtool_coalesce *ecmd)
{
@@ -2878,19 +2822,11 @@ static int sky2_set_coalesce(struct net_device *dev,
{
struct sky2_port *sky2 = netdev_priv(dev);
struct sky2_hw *hw = sky2->hw;
- const u32 tmin = sky2_clk2us(hw, 1);
- const u32 tmax = 5000;
-
- if (ecmd->tx_coalesce_usecs != 0 &&
- (ecmd->tx_coalesce_usecs < tmin || ecmd->tx_coalesce_usecs > tmax))
- return -EINVAL;
-
- if (ecmd->rx_coalesce_usecs != 0 &&
- (ecmd->rx_coalesce_usecs < tmin || ecmd->rx_coalesce_usecs > tmax))
- return -EINVAL;
+ const u32 tmax = sky2_clk2us(hw, 0x0ffffff);
- if (ecmd->rx_coalesce_usecs_irq != 0 &&
- (ecmd->rx_coalesce_usecs_irq < tmin || ecmd->rx_coalesce_usecs_irq > tmax))
+ if (ecmd->tx_coalesce_usecs > tmax ||
+ ecmd->rx_coalesce_usecs > tmax ||
+ ecmd->rx_coalesce_usecs_irq > tmax)
return -EINVAL;
if (ecmd->tx_max_coalesced_frames >= TX_RING_SIZE-1)
@@ -3025,10 +2961,6 @@ static struct ethtool_ops sky2_ethtool_ops = {
.set_ringparam = sky2_set_ringparam,
.get_pauseparam = sky2_get_pauseparam,
.set_pauseparam = sky2_set_pauseparam,
-#ifdef CONFIG_PM
- .get_wol = sky2_get_wol,
- .set_wol = sky2_set_wol,
-#endif
.phys_id = sky2_phys_id,
.get_stats_count = sky2_get_stats_count,
.get_ethtool_stats = sky2_get_ethtool_stats,
@@ -3082,16 +3014,15 @@ static __devinit struct net_device *sky2_init_netdev(struct sky2_hw *hw,
sky2->speed = -1;
sky2->advertising = sky2_supported_modes(hw);
- /* Receive checksum disabled for Yukon XL
+ /* Receive checksum disabled for Yukon XL
* because of observed problems with incorrect
* values when multiple packets are received in one interrupt
*/
sky2->rx_csum = (hw->chip_id != CHIP_ID_YUKON_XL);
- INIT_WORK(&sky2->phy_task, sky2_phy_task, sky2);
- init_MUTEX(&sky2->phy_sema);
+ spin_lock_init(&sky2->phy_lock);
sky2->tx_pending = TX_DEF_PENDING;
- sky2->rx_pending = is_ec_a1(hw) ? 8 : RX_DEF_PENDING;
+ sky2->rx_pending = RX_DEF_PENDING;
sky2->rx_bufsize = sky2_buf_size(ETH_DATA_LEN);
hw->dev[port] = dev;
@@ -3133,6 +3064,66 @@ static void __devinit sky2_show_addr(struct net_device *dev)
dev->dev_addr[3], dev->dev_addr[4], dev->dev_addr[5]);
}
+/* Handle software interrupt used during MSI test */
+static irqreturn_t __devinit sky2_test_intr(int irq, void *dev_id,
+ struct pt_regs *regs)
+{
+ struct sky2_hw *hw = dev_id;
+ u32 status = sky2_read32(hw, B0_Y2_SP_ISRC2);
+
+ if (status == 0)
+ return IRQ_NONE;
+
+ if (status & Y2_IS_IRQ_SW) {
+ hw->msi_detected = 1;
+ wake_up(&hw->msi_wait);
+ sky2_write8(hw, B0_CTST, CS_CL_SW_IRQ);
+ }
+ sky2_write32(hw, B0_Y2_SP_ICR, 2);
+
+ return IRQ_HANDLED;
+}
+
+/* Test interrupt path by forcing a a software IRQ */
+static int __devinit sky2_test_msi(struct sky2_hw *hw)
+{
+ struct pci_dev *pdev = hw->pdev;
+ int err;
+
+ sky2_write32(hw, B0_IMSK, Y2_IS_IRQ_SW);
+
+ err = request_irq(pdev->irq, sky2_test_intr, SA_SHIRQ, DRV_NAME, hw);
+ if (err) {
+ printk(KERN_ERR PFX "%s: cannot assign irq %d\n",
+ pci_name(pdev), pdev->irq);
+ return err;
+ }
+
+ init_waitqueue_head (&hw->msi_wait);
+
+ sky2_write8(hw, B0_CTST, CS_ST_SW_IRQ);
+ wmb();
+
+ wait_event_timeout(hw->msi_wait, hw->msi_detected, HZ/10);
+
+ if (!hw->msi_detected) {
+ /* MSI test failed, go back to INTx mode */
+ printk(KERN_WARNING PFX "%s: No interrupt was generated using MSI, "
+ "switching to INTx mode. Please report this failure to "
+ "the PCI maintainer and include system chipset information.\n",
+ pci_name(pdev));
+
+ err = -EOPNOTSUPP;
+ sky2_write8(hw, B0_CTST, CS_CL_SW_IRQ);
+ }
+
+ sky2_write32(hw, B0_IMSK, 0);
+
+ free_irq(pdev->irq, hw);
+
+ return err;
+}
+
static int __devinit sky2_probe(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
@@ -3201,7 +3192,6 @@ static int __devinit sky2_probe(struct pci_dev *pdev,
goto err_out_free_hw;
}
hw->pm_cap = pm_cap;
- spin_lock_init(&hw->hw_lock);
#ifdef __BIG_ENDIAN
/* byte swap descriptors in hardware */
@@ -3254,21 +3244,29 @@ static int __devinit sky2_probe(struct pci_dev *pdev,
}
}
- err = request_irq(pdev->irq, sky2_intr, SA_SHIRQ, DRV_NAME, hw);
+ if (!disable_msi && pci_enable_msi(pdev) == 0) {
+ err = sky2_test_msi(hw);
+ if (err == -EOPNOTSUPP)
+ pci_disable_msi(pdev);
+ else if (err)
+ goto err_out_unregister;
+ }
+
+ err = request_irq(pdev->irq, sky2_intr, SA_SHIRQ, DRV_NAME, hw);
if (err) {
printk(KERN_ERR PFX "%s: cannot assign irq %d\n",
pci_name(pdev), pdev->irq);
goto err_out_unregister;
}
- hw->intr_mask = Y2_IS_BASE;
- sky2_write32(hw, B0_IMSK, hw->intr_mask);
+ sky2_write32(hw, B0_IMSK, Y2_IS_BASE);
pci_set_drvdata(pdev, hw);
return 0;
err_out_unregister:
+ pci_disable_msi(pdev);
if (dev1) {
unregister_netdev(dev1);
free_netdev(dev1);
@@ -3311,6 +3309,7 @@ static void __devexit sky2_remove(struct pci_dev *pdev)
sky2_read8(hw, B0_CTST);
free_irq(pdev->irq, hw);
+ pci_disable_msi(pdev);
pci_free_consistent(pdev, STATUS_LE_BYTES, hw->st_le, hw->st_dma);
pci_release_regions(pdev);
pci_disable_device(pdev);
diff --git a/drivers/net/sky2.h b/drivers/net/sky2.h
index dce955c76f3..d63cd5a1b71 100644
--- a/drivers/net/sky2.h
+++ b/drivers/net/sky2.h
@@ -278,13 +278,11 @@ enum {
Y2_IS_CHK_TXS1 = 1<<1, /* Descriptor error TXS 1 */
Y2_IS_CHK_TXA1 = 1<<0, /* Descriptor error TXA 1 */
- Y2_IS_BASE = Y2_IS_HW_ERR | Y2_IS_STAT_BMU |
- Y2_IS_POLL_CHK | Y2_IS_TWSI_RDY |
- Y2_IS_IRQ_SW | Y2_IS_TIMINT,
- Y2_IS_PORT_1 = Y2_IS_IRQ_PHY1 | Y2_IS_IRQ_MAC1 |
- Y2_IS_CHK_RX1 | Y2_IS_CHK_TXA1 | Y2_IS_CHK_TXS1,
- Y2_IS_PORT_2 = Y2_IS_IRQ_PHY2 | Y2_IS_IRQ_MAC2 |
- Y2_IS_CHK_RX2 | Y2_IS_CHK_TXA2 | Y2_IS_CHK_TXS2,
+ Y2_IS_BASE = Y2_IS_HW_ERR | Y2_IS_STAT_BMU,
+ Y2_IS_PORT_1 = Y2_IS_IRQ_PHY1 | Y2_IS_IRQ_MAC1
+ | Y2_IS_CHK_TXA1 | Y2_IS_CHK_RX1,
+ Y2_IS_PORT_2 = Y2_IS_IRQ_PHY2 | Y2_IS_IRQ_MAC2
+ | Y2_IS_CHK_TXA2 | Y2_IS_CHK_RX2,
};
/* B2_IRQM_HWE_MSK 32 bit IRQ Moderation HW Error Mask */
@@ -1832,6 +1830,7 @@ struct sky2_port {
struct net_device *netdev;
unsigned port;
u32 msg_enable;
+ spinlock_t phy_lock;
spinlock_t tx_lock ____cacheline_aligned_in_smp;
struct tx_ring_info *tx_ring;
@@ -1840,7 +1839,6 @@ struct sky2_port {
u16 tx_prod; /* next le to use */
u32 tx_addr64;
u16 tx_pending;
- u16 tx_last_put;
u16 tx_last_mss;
struct ring_info *rx_ring ____cacheline_aligned_in_smp;
@@ -1849,7 +1847,6 @@ struct sky2_port {
u16 rx_next; /* next re to check */
u16 rx_put; /* next le index to use */
u16 rx_pending;
- u16 rx_last_put;
u16 rx_bufsize;
#ifdef SKY2_VLAN_TAG_USED
u16 rx_tag;
@@ -1865,20 +1862,15 @@ struct sky2_port {
u8 rx_pause;
u8 tx_pause;
u8 rx_csum;
- u8 wol;
struct net_device_stats net_stats;
- struct work_struct phy_task;
- struct semaphore phy_sema;
};
struct sky2_hw {
void __iomem *regs;
struct pci_dev *pdev;
struct net_device *dev[2];
- spinlock_t hw_lock;
- u32 intr_mask;
int pm_cap;
u8 chip_id;
@@ -1889,6 +1881,8 @@ struct sky2_hw {
struct sky2_status_le *st_le;
u32 st_idx;
dma_addr_t st_dma;
+ int msi_detected;
+ wait_queue_head_t msi_wait;
};
/* Register accessor for memory mapped device */
diff --git a/drivers/net/smc91x.c b/drivers/net/smc91x.c
index 75e9b3b910c..0e9833adf9f 100644
--- a/drivers/net/smc91x.c
+++ b/drivers/net/smc91x.c
@@ -215,15 +215,12 @@ struct smc_local {
spinlock_t lock;
-#ifdef SMC_CAN_USE_DATACS
- u32 __iomem *datacs;
-#endif
-
#ifdef SMC_USE_PXA_DMA
/* DMA needs the physical address of the chip */
u_long physaddr;
#endif
void __iomem *base;
+ void __iomem *datacs;
};
#if SMC_DEBUG > 0
@@ -2104,9 +2101,8 @@ static int smc_enable_device(struct platform_device *pdev)
* Set the appropriate byte/word mode.
*/
ecsr = readb(addr + (ECSR << SMC_IO_SHIFT)) & ~ECSR_IOIS8;
-#ifndef SMC_CAN_USE_16BIT
- ecsr |= ECSR_IOIS8;
-#endif
+ if (!SMC_CAN_USE_16BIT)
+ ecsr |= ECSR_IOIS8;
writeb(ecsr, addr + (ECSR << SMC_IO_SHIFT));
local_irq_restore(flags);
@@ -2143,40 +2139,39 @@ static void smc_release_attrib(struct platform_device *pdev)
release_mem_region(res->start, ATTRIB_SIZE);
}
-#ifdef SMC_CAN_USE_DATACS
-static void smc_request_datacs(struct platform_device *pdev, struct net_device *ndev)
+static inline void smc_request_datacs(struct platform_device *pdev, struct net_device *ndev)
{
- struct resource * res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "smc91x-data32");
- struct smc_local *lp = netdev_priv(ndev);
+ if (SMC_CAN_USE_DATACS) {
+ struct resource * res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "smc91x-data32");
+ struct smc_local *lp = netdev_priv(ndev);
- if (!res)
- return;
+ if (!res)
+ return;
- if(!request_mem_region(res->start, SMC_DATA_EXTENT, CARDNAME)) {
- printk(KERN_INFO "%s: failed to request datacs memory region.\n", CARDNAME);
- return;
- }
+ if(!request_mem_region(res->start, SMC_DATA_EXTENT, CARDNAME)) {
+ printk(KERN_INFO "%s: failed to request datacs memory region.\n", CARDNAME);
+ return;
+ }
- lp->datacs = ioremap(res->start, SMC_DATA_EXTENT);
+ lp->datacs = ioremap(res->start, SMC_DATA_EXTENT);
+ }
}
static void smc_release_datacs(struct platform_device *pdev, struct net_device *ndev)
{
- struct smc_local *lp = netdev_priv(ndev);
- struct resource * res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "smc91x-data32");
+ if (SMC_CAN_USE_DATACS) {
+ struct smc_local *lp = netdev_priv(ndev);
+ struct resource * res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "smc91x-data32");
- if (lp->datacs)
- iounmap(lp->datacs);
+ if (lp->datacs)
+ iounmap(lp->datacs);
- lp->datacs = NULL;
+ lp->datacs = NULL;
- if (res)
- release_mem_region(res->start, SMC_DATA_EXTENT);
+ if (res)
+ release_mem_region(res->start, SMC_DATA_EXTENT);
+ }
}
-#else
-static void smc_request_datacs(struct platform_device *pdev, struct net_device *ndev) {}
-static void smc_release_datacs(struct platform_device *pdev, struct net_device *ndev) {}
-#endif
/*
* smc_init(void)
diff --git a/drivers/net/smc91x.h b/drivers/net/smc91x.h
index e0efd1964e7..e1be1af5120 100644
--- a/drivers/net/smc91x.h
+++ b/drivers/net/smc91x.h
@@ -275,7 +275,10 @@ SMC_outw(u16 val, void __iomem *ioaddr, int reg)
#define SMC_insw(a,r,p,l) readsw ((void*) ((a) + (r)), p, l)
#define SMC_outw(v,a,r) ({ writew ((v), (a) + (r)); LPD7A40X_IOBARRIER; })
-static inline void SMC_outsw (unsigned long a, int r, unsigned char* p, int l)
+#define SMC_outsw LPD7A40X_SMC_outsw
+
+static inline void LPD7A40X_SMC_outsw(unsigned long a, int r,
+ unsigned char* p, int l)
{
unsigned short* ps = (unsigned short*) p;
while (l-- > 0) {
@@ -342,10 +345,6 @@ static inline void SMC_outsw (unsigned long a, int r, unsigned char* p, int l)
#endif
-#ifndef SMC_IRQ_FLAGS
-#define SMC_IRQ_FLAGS SA_TRIGGER_RISING
-#endif
-
#ifdef SMC_USE_PXA_DMA
/*
* Let's use the DMA engine on the XScale PXA2xx for RX packets. This is
@@ -441,10 +440,85 @@ smc_pxa_dma_irq(int dma, void *dummy, struct pt_regs *regs)
#endif /* SMC_USE_PXA_DMA */
-/* Because of bank switching, the LAN91x uses only 16 I/O ports */
+/*
+ * Everything a particular hardware setup needs should have been defined
+ * at this point. Add stubs for the undefined cases, mainly to avoid
+ * compilation warnings since they'll be optimized away, or to prevent buggy
+ * use of them.
+ */
+
+#if ! SMC_CAN_USE_32BIT
+#define SMC_inl(ioaddr, reg) ({ BUG(); 0; })
+#define SMC_outl(x, ioaddr, reg) BUG()
+#define SMC_insl(a, r, p, l) BUG()
+#define SMC_outsl(a, r, p, l) BUG()
+#endif
+
+#if !defined(SMC_insl) || !defined(SMC_outsl)
+#define SMC_insl(a, r, p, l) BUG()
+#define SMC_outsl(a, r, p, l) BUG()
+#endif
+
+#if ! SMC_CAN_USE_16BIT
+
+/*
+ * Any 16-bit access is performed with two 8-bit accesses if the hardware
+ * can't do it directly. Most registers are 16-bit so those are mandatory.
+ */
+#define SMC_outw(x, ioaddr, reg) \
+ do { \
+ unsigned int __val16 = (x); \
+ SMC_outb( __val16, ioaddr, reg ); \
+ SMC_outb( __val16 >> 8, ioaddr, reg + (1 << SMC_IO_SHIFT));\
+ } while (0)
+#define SMC_inw(ioaddr, reg) \
+ ({ \
+ unsigned int __val16; \
+ __val16 = SMC_inb( ioaddr, reg ); \
+ __val16 |= SMC_inb( ioaddr, reg + (1 << SMC_IO_SHIFT)) << 8; \
+ __val16; \
+ })
+
+#define SMC_insw(a, r, p, l) BUG()
+#define SMC_outsw(a, r, p, l) BUG()
+
+#endif
+
+#if !defined(SMC_insw) || !defined(SMC_outsw)
+#define SMC_insw(a, r, p, l) BUG()
+#define SMC_outsw(a, r, p, l) BUG()
+#endif
+
+#if ! SMC_CAN_USE_8BIT
+#define SMC_inb(ioaddr, reg) ({ BUG(); 0; })
+#define SMC_outb(x, ioaddr, reg) BUG()
+#define SMC_insb(a, r, p, l) BUG()
+#define SMC_outsb(a, r, p, l) BUG()
+#endif
+
+#if !defined(SMC_insb) || !defined(SMC_outsb)
+#define SMC_insb(a, r, p, l) BUG()
+#define SMC_outsb(a, r, p, l) BUG()
+#endif
+
+#ifndef SMC_CAN_USE_DATACS
+#define SMC_CAN_USE_DATACS 0
+#endif
+
#ifndef SMC_IO_SHIFT
#define SMC_IO_SHIFT 0
#endif
+
+#ifndef SMC_IRQ_FLAGS
+#define SMC_IRQ_FLAGS SA_TRIGGER_RISING
+#endif
+
+#ifndef SMC_INTERRUPT_PREAMBLE
+#define SMC_INTERRUPT_PREAMBLE
+#endif
+
+
+/* Because of bank switching, the LAN91x uses only 16 I/O ports */
#define SMC_IO_EXTENT (16 << SMC_IO_SHIFT)
#define SMC_DATA_EXTENT (4)
@@ -817,6 +891,11 @@ static const char * chip_ids[ 16 ] = {
* Note: the following macros do *not* select the bank -- this must
* be done separately as needed in the main code. The SMC_REG() macro
* only uses the bank argument for debugging purposes (when enabled).
+ *
+ * Note: despite inline functions being safer, everything leading to this
+ * should preferably be macros to let BUG() display the line number in
+ * the core source code since we're interested in the top call site
+ * not in any inline function location.
*/
#if SMC_DEBUG > 0
@@ -834,62 +913,142 @@ static const char * chip_ids[ 16 ] = {
#define SMC_REG(reg, bank) (reg<<SMC_IO_SHIFT)
#endif
-#if SMC_CAN_USE_8BIT
-#define SMC_GET_PN() SMC_inb( ioaddr, PN_REG )
-#define SMC_SET_PN(x) SMC_outb( x, ioaddr, PN_REG )
-#define SMC_GET_AR() SMC_inb( ioaddr, AR_REG )
-#define SMC_GET_TXFIFO() SMC_inb( ioaddr, TXFIFO_REG )
-#define SMC_GET_RXFIFO() SMC_inb( ioaddr, RXFIFO_REG )
-#define SMC_GET_INT() SMC_inb( ioaddr, INT_REG )
-#define SMC_ACK_INT(x) SMC_outb( x, ioaddr, INT_REG )
-#define SMC_GET_INT_MASK() SMC_inb( ioaddr, IM_REG )
-#define SMC_SET_INT_MASK(x) SMC_outb( x, ioaddr, IM_REG )
-#else
-#define SMC_GET_PN() (SMC_inw( ioaddr, PN_REG ) & 0xFF)
-#define SMC_SET_PN(x) SMC_outw( x, ioaddr, PN_REG )
-#define SMC_GET_AR() (SMC_inw( ioaddr, PN_REG ) >> 8)
-#define SMC_GET_TXFIFO() (SMC_inw( ioaddr, TXFIFO_REG ) & 0xFF)
-#define SMC_GET_RXFIFO() (SMC_inw( ioaddr, TXFIFO_REG ) >> 8)
-#define SMC_GET_INT() (SMC_inw( ioaddr, INT_REG ) & 0xFF)
+/*
+ * Hack Alert: Some setups just can't write 8 or 16 bits reliably when not
+ * aligned to a 32 bit boundary. I tell you that does exist!
+ * Fortunately the affected register accesses can be easily worked around
+ * since we can write zeroes to the preceeding 16 bits without adverse
+ * effects and use a 32-bit access.
+ *
+ * Enforce it on any 32-bit capable setup for now.
+ */
+#define SMC_MUST_ALIGN_WRITE SMC_CAN_USE_32BIT
+
+#define SMC_GET_PN() \
+ ( SMC_CAN_USE_8BIT ? (SMC_inb(ioaddr, PN_REG)) \
+ : (SMC_inw(ioaddr, PN_REG) & 0xFF) )
+
+#define SMC_SET_PN(x) \
+ do { \
+ if (SMC_MUST_ALIGN_WRITE) \
+ SMC_outl((x)<<16, ioaddr, SMC_REG(0, 2)); \
+ else if (SMC_CAN_USE_8BIT) \
+ SMC_outb(x, ioaddr, PN_REG); \
+ else \
+ SMC_outw(x, ioaddr, PN_REG); \
+ } while (0)
+
+#define SMC_GET_AR() \
+ ( SMC_CAN_USE_8BIT ? (SMC_inb(ioaddr, AR_REG)) \
+ : (SMC_inw(ioaddr, PN_REG) >> 8) )
+
+#define SMC_GET_TXFIFO() \
+ ( SMC_CAN_USE_8BIT ? (SMC_inb(ioaddr, TXFIFO_REG)) \
+ : (SMC_inw(ioaddr, TXFIFO_REG) & 0xFF) )
+
+#define SMC_GET_RXFIFO() \
+ ( SMC_CAN_USE_8BIT ? (SMC_inb(ioaddr, RXFIFO_REG)) \
+ : (SMC_inw(ioaddr, TXFIFO_REG) >> 8) )
+
+#define SMC_GET_INT() \
+ ( SMC_CAN_USE_8BIT ? (SMC_inb(ioaddr, INT_REG)) \
+ : (SMC_inw(ioaddr, INT_REG) & 0xFF) )
+
#define SMC_ACK_INT(x) \
do { \
- unsigned long __flags; \
- int __mask; \
- local_irq_save(__flags); \
- __mask = SMC_inw( ioaddr, INT_REG ) & ~0xff; \
- SMC_outw( __mask | (x), ioaddr, INT_REG ); \
- local_irq_restore(__flags); \
+ if (SMC_CAN_USE_8BIT) \
+ SMC_outb(x, ioaddr, INT_REG); \
+ else { \
+ unsigned long __flags; \
+ int __mask; \
+ local_irq_save(__flags); \
+ __mask = SMC_inw( ioaddr, INT_REG ) & ~0xff; \
+ SMC_outw( __mask | (x), ioaddr, INT_REG ); \
+ local_irq_restore(__flags); \
+ } \
+ } while (0)
+
+#define SMC_GET_INT_MASK() \
+ ( SMC_CAN_USE_8BIT ? (SMC_inb(ioaddr, IM_REG)) \
+ : (SMC_inw( ioaddr, INT_REG ) >> 8) )
+
+#define SMC_SET_INT_MASK(x) \
+ do { \
+ if (SMC_CAN_USE_8BIT) \
+ SMC_outb(x, ioaddr, IM_REG); \
+ else \
+ SMC_outw((x) << 8, ioaddr, INT_REG); \
+ } while (0)
+
+#define SMC_CURRENT_BANK() SMC_inw(ioaddr, BANK_SELECT)
+
+#define SMC_SELECT_BANK(x) \
+ do { \
+ if (SMC_MUST_ALIGN_WRITE) \
+ SMC_outl((x)<<16, ioaddr, 12<<SMC_IO_SHIFT); \
+ else \
+ SMC_outw(x, ioaddr, BANK_SELECT); \
+ } while (0)
+
+#define SMC_GET_BASE() SMC_inw(ioaddr, BASE_REG)
+
+#define SMC_SET_BASE(x) SMC_outw(x, ioaddr, BASE_REG)
+
+#define SMC_GET_CONFIG() SMC_inw(ioaddr, CONFIG_REG)
+
+#define SMC_SET_CONFIG(x) SMC_outw(x, ioaddr, CONFIG_REG)
+
+#define SMC_GET_COUNTER() SMC_inw(ioaddr, COUNTER_REG)
+
+#define SMC_GET_CTL() SMC_inw(ioaddr, CTL_REG)
+
+#define SMC_SET_CTL(x) SMC_outw(x, ioaddr, CTL_REG)
+
+#define SMC_GET_MII() SMC_inw(ioaddr, MII_REG)
+
+#define SMC_SET_MII(x) SMC_outw(x, ioaddr, MII_REG)
+
+#define SMC_GET_MIR() SMC_inw(ioaddr, MIR_REG)
+
+#define SMC_SET_MIR(x) SMC_outw(x, ioaddr, MIR_REG)
+
+#define SMC_GET_MMU_CMD() SMC_inw(ioaddr, MMU_CMD_REG)
+
+#define SMC_SET_MMU_CMD(x) SMC_outw(x, ioaddr, MMU_CMD_REG)
+
+#define SMC_GET_FIFO() SMC_inw(ioaddr, FIFO_REG)
+
+#define SMC_GET_PTR() SMC_inw(ioaddr, PTR_REG)
+
+#define SMC_SET_PTR(x) \
+ do { \
+ if (SMC_MUST_ALIGN_WRITE) \
+ SMC_outl((x)<<16, ioaddr, SMC_REG(4, 2)); \
+ else \
+ SMC_outw(x, ioaddr, PTR_REG); \
} while (0)
-#define SMC_GET_INT_MASK() (SMC_inw( ioaddr, INT_REG ) >> 8)
-#define SMC_SET_INT_MASK(x) SMC_outw( (x) << 8, ioaddr, INT_REG )
-#endif
-#define SMC_CURRENT_BANK() SMC_inw( ioaddr, BANK_SELECT )
-#define SMC_SELECT_BANK(x) SMC_outw( x, ioaddr, BANK_SELECT )
-#define SMC_GET_BASE() SMC_inw( ioaddr, BASE_REG )
-#define SMC_SET_BASE(x) SMC_outw( x, ioaddr, BASE_REG )
-#define SMC_GET_CONFIG() SMC_inw( ioaddr, CONFIG_REG )
-#define SMC_SET_CONFIG(x) SMC_outw( x, ioaddr, CONFIG_REG )
-#define SMC_GET_COUNTER() SMC_inw( ioaddr, COUNTER_REG )
-#define SMC_GET_CTL() SMC_inw( ioaddr, CTL_REG )
-#define SMC_SET_CTL(x) SMC_outw( x, ioaddr, CTL_REG )
-#define SMC_GET_MII() SMC_inw( ioaddr, MII_REG )
-#define SMC_SET_MII(x) SMC_outw( x, ioaddr, MII_REG )
-#define SMC_GET_MIR() SMC_inw( ioaddr, MIR_REG )
-#define SMC_SET_MIR(x) SMC_outw( x, ioaddr, MIR_REG )
-#define SMC_GET_MMU_CMD() SMC_inw( ioaddr, MMU_CMD_REG )
-#define SMC_SET_MMU_CMD(x) SMC_outw( x, ioaddr, MMU_CMD_REG )
-#define SMC_GET_FIFO() SMC_inw( ioaddr, FIFO_REG )
-#define SMC_GET_PTR() SMC_inw( ioaddr, PTR_REG )
-#define SMC_SET_PTR(x) SMC_outw( x, ioaddr, PTR_REG )
-#define SMC_GET_EPH_STATUS() SMC_inw( ioaddr, EPH_STATUS_REG )
-#define SMC_GET_RCR() SMC_inw( ioaddr, RCR_REG )
-#define SMC_SET_RCR(x) SMC_outw( x, ioaddr, RCR_REG )
-#define SMC_GET_REV() SMC_inw( ioaddr, REV_REG )
-#define SMC_GET_RPC() SMC_inw( ioaddr, RPC_REG )
-#define SMC_SET_RPC(x) SMC_outw( x, ioaddr, RPC_REG )
-#define SMC_GET_TCR() SMC_inw( ioaddr, TCR_REG )
-#define SMC_SET_TCR(x) SMC_outw( x, ioaddr, TCR_REG )
+#define SMC_GET_EPH_STATUS() SMC_inw(ioaddr, EPH_STATUS_REG)
+
+#define SMC_GET_RCR() SMC_inw(ioaddr, RCR_REG)
+
+#define SMC_SET_RCR(x) SMC_outw(x, ioaddr, RCR_REG)
+
+#define SMC_GET_REV() SMC_inw(ioaddr, REV_REG)
+
+#define SMC_GET_RPC() SMC_inw(ioaddr, RPC_REG)
+
+#define SMC_SET_RPC(x) \
+ do { \
+ if (SMC_MUST_ALIGN_WRITE) \
+ SMC_outl((x)<<16, ioaddr, SMC_REG(8, 0)); \
+ else \
+ SMC_outw(x, ioaddr, RPC_REG); \
+ } while (0)
+
+#define SMC_GET_TCR() SMC_inw(ioaddr, TCR_REG)
+
+#define SMC_SET_TCR(x) SMC_outw(x, ioaddr, TCR_REG)
#ifndef SMC_GET_MAC_ADDR
#define SMC_GET_MAC_ADDR(addr) \
@@ -920,151 +1079,84 @@ static const char * chip_ids[ 16 ] = {
SMC_outw( mt[6] | (mt[7] << 8), ioaddr, MCAST_REG4 ); \
} while (0)
-#if SMC_CAN_USE_32BIT
-/*
- * Some setups just can't write 8 or 16 bits reliably when not aligned
- * to a 32 bit boundary. I tell you that exists!
- * We re-do the ones here that can be easily worked around if they can have
- * their low parts written to 0 without adverse effects.
- */
-#undef SMC_SELECT_BANK
-#define SMC_SELECT_BANK(x) SMC_outl( (x)<<16, ioaddr, 12<<SMC_IO_SHIFT )
-#undef SMC_SET_RPC
-#define SMC_SET_RPC(x) SMC_outl( (x)<<16, ioaddr, SMC_REG(8, 0) )
-#undef SMC_SET_PN
-#define SMC_SET_PN(x) SMC_outl( (x)<<16, ioaddr, SMC_REG(0, 2) )
-#undef SMC_SET_PTR
-#define SMC_SET_PTR(x) SMC_outl( (x)<<16, ioaddr, SMC_REG(4, 2) )
-#endif
-
-#if SMC_CAN_USE_32BIT
-#define SMC_PUT_PKT_HDR(status, length) \
- SMC_outl( (status) | (length) << 16, ioaddr, DATA_REG )
-#define SMC_GET_PKT_HDR(status, length) \
- do { \
- unsigned int __val = SMC_inl( ioaddr, DATA_REG ); \
- (status) = __val & 0xffff; \
- (length) = __val >> 16; \
- } while (0)
-#else
#define SMC_PUT_PKT_HDR(status, length) \
do { \
- SMC_outw( status, ioaddr, DATA_REG ); \
- SMC_outw( length, ioaddr, DATA_REG ); \
- } while (0)
-#define SMC_GET_PKT_HDR(status, length) \
- do { \
- (status) = SMC_inw( ioaddr, DATA_REG ); \
- (length) = SMC_inw( ioaddr, DATA_REG ); \
+ if (SMC_CAN_USE_32BIT) \
+ SMC_outl((status) | (length)<<16, ioaddr, DATA_REG); \
+ else { \
+ SMC_outw(status, ioaddr, DATA_REG); \
+ SMC_outw(length, ioaddr, DATA_REG); \
+ } \
} while (0)
-#endif
-#if SMC_CAN_USE_32BIT
-#define _SMC_PUSH_DATA(p, l) \
+#define SMC_GET_PKT_HDR(status, length) \
do { \
- char *__ptr = (p); \
- int __len = (l); \
- if (__len >= 2 && (unsigned long)__ptr & 2) { \
- __len -= 2; \
- SMC_outw( *(u16 *)__ptr, ioaddr, DATA_REG ); \
- __ptr += 2; \
- } \
- SMC_outsl( ioaddr, DATA_REG, __ptr, __len >> 2); \
- if (__len & 2) { \
- __ptr += (__len & ~3); \
- SMC_outw( *((u16 *)__ptr), ioaddr, DATA_REG ); \
+ if (SMC_CAN_USE_32BIT) { \
+ unsigned int __val = SMC_inl(ioaddr, DATA_REG); \
+ (status) = __val & 0xffff; \
+ (length) = __val >> 16; \
+ } else { \
+ (status) = SMC_inw(ioaddr, DATA_REG); \
+ (length) = SMC_inw(ioaddr, DATA_REG); \
} \
} while (0)
-#define _SMC_PULL_DATA(p, l) \
- do { \
- char *__ptr = (p); \
- int __len = (l); \
- if ((unsigned long)__ptr & 2) { \
- /* \
- * We want 32bit alignment here. \
- * Since some buses perform a full 32bit \
- * fetch even for 16bit data we can't use \
- * SMC_inw() here. Back both source (on chip \
- * and destination) pointers of 2 bytes. \
- */ \
- __ptr -= 2; \
- __len += 2; \
- SMC_SET_PTR( 2|PTR_READ|PTR_RCV|PTR_AUTOINC ); \
- } \
- __len += 2; \
- SMC_insl( ioaddr, DATA_REG, __ptr, __len >> 2); \
- } while (0)
-#elif SMC_CAN_USE_16BIT
-#define _SMC_PUSH_DATA(p, l) SMC_outsw( ioaddr, DATA_REG, p, (l) >> 1 )
-#define _SMC_PULL_DATA(p, l) SMC_insw ( ioaddr, DATA_REG, p, (l) >> 1 )
-#elif SMC_CAN_USE_8BIT
-#define _SMC_PUSH_DATA(p, l) SMC_outsb( ioaddr, DATA_REG, p, l )
-#define _SMC_PULL_DATA(p, l) SMC_insb ( ioaddr, DATA_REG, p, l )
-#endif
-#if ! SMC_CAN_USE_16BIT
-#define SMC_outw(x, ioaddr, reg) \
+#define SMC_PUSH_DATA(p, l) \
do { \
- unsigned int __val16 = (x); \
- SMC_outb( __val16, ioaddr, reg ); \
- SMC_outb( __val16 >> 8, ioaddr, reg + (1 << SMC_IO_SHIFT));\
+ if (SMC_CAN_USE_32BIT) { \
+ void *__ptr = (p); \
+ int __len = (l); \
+ void *__ioaddr = ioaddr; \
+ if (__len >= 2 && (unsigned long)__ptr & 2) { \
+ __len -= 2; \
+ SMC_outw(*(u16 *)__ptr, ioaddr, DATA_REG); \
+ __ptr += 2; \
+ } \
+ if (SMC_CAN_USE_DATACS && lp->datacs) \
+ __ioaddr = lp->datacs; \
+ SMC_outsl(__ioaddr, DATA_REG, __ptr, __len>>2); \
+ if (__len & 2) { \
+ __ptr += (__len & ~3); \
+ SMC_outw(*((u16 *)__ptr), ioaddr, DATA_REG); \
+ } \
+ } else if (SMC_CAN_USE_16BIT) \
+ SMC_outsw(ioaddr, DATA_REG, p, (l) >> 1); \
+ else if (SMC_CAN_USE_8BIT) \
+ SMC_outsb(ioaddr, DATA_REG, p, l); \
} while (0)
-#define SMC_inw(ioaddr, reg) \
- ({ \
- unsigned int __val16; \
- __val16 = SMC_inb( ioaddr, reg ); \
- __val16 |= SMC_inb( ioaddr, reg + (1 << SMC_IO_SHIFT)) << 8; \
- __val16; \
- })
-#endif
-
-#ifdef SMC_CAN_USE_DATACS
-#define SMC_PUSH_DATA(p, l) \
- if ( lp->datacs ) { \
- unsigned char *__ptr = (p); \
- int __len = (l); \
- if (__len >= 2 && (unsigned long)__ptr & 2) { \
- __len -= 2; \
- SMC_outw( *((u16 *)__ptr), ioaddr, DATA_REG ); \
- __ptr += 2; \
- } \
- outsl(lp->datacs, __ptr, __len >> 2); \
- if (__len & 2) { \
- __ptr += (__len & ~3); \
- SMC_outw( *((u16 *)__ptr), ioaddr, DATA_REG ); \
- } \
- } else { \
- _SMC_PUSH_DATA(p, l); \
- }
#define SMC_PULL_DATA(p, l) \
- if ( lp->datacs ) { \
- unsigned char *__ptr = (p); \
- int __len = (l); \
- if ((unsigned long)__ptr & 2) { \
- /* \
- * We want 32bit alignment here. \
- * Since some buses perform a full 32bit \
- * fetch even for 16bit data we can't use \
- * SMC_inw() here. Back both source (on chip \
- * and destination) pointers of 2 bytes. \
- */ \
- __ptr -= 2; \
+ do { \
+ if (SMC_CAN_USE_32BIT) { \
+ void *__ptr = (p); \
+ int __len = (l); \
+ void *__ioaddr = ioaddr; \
+ if ((unsigned long)__ptr & 2) { \
+ /* \
+ * We want 32bit alignment here. \
+ * Since some buses perform a full \
+ * 32bit fetch even for 16bit data \
+ * we can't use SMC_inw() here. \
+ * Back both source (on-chip) and \
+ * destination pointers of 2 bytes. \
+ * This is possible since the call to \
+ * SMC_GET_PKT_HDR() already advanced \
+ * the source pointer of 4 bytes, and \
+ * the skb_reserve(skb, 2) advanced \
+ * the destination pointer of 2 bytes. \
+ */ \
+ __ptr -= 2; \
+ __len += 2; \
+ SMC_SET_PTR(2|PTR_READ|PTR_RCV|PTR_AUTOINC); \
+ } \
+ if (SMC_CAN_USE_DATACS && lp->datacs) \
+ __ioaddr = lp->datacs; \
__len += 2; \
- SMC_SET_PTR( 2|PTR_READ|PTR_RCV|PTR_AUTOINC ); \
- } \
- __len += 2; \
- insl( lp->datacs, __ptr, __len >> 2); \
- } else { \
- _SMC_PULL_DATA(p, l); \
- }
-#else
-#define SMC_PUSH_DATA(p, l) _SMC_PUSH_DATA(p, l)
-#define SMC_PULL_DATA(p, l) _SMC_PULL_DATA(p, l)
-#endif
-
-#if !defined (SMC_INTERRUPT_PREAMBLE)
-# define SMC_INTERRUPT_PREAMBLE
-#endif
+ SMC_insl(__ioaddr, DATA_REG, __ptr, __len>>2); \
+ } else if (SMC_CAN_USE_16BIT) \
+ SMC_insw(ioaddr, DATA_REG, p, (l) >> 1); \
+ else if (SMC_CAN_USE_8BIT) \
+ SMC_insb(ioaddr, DATA_REG, p, l); \
+ } while (0)
#endif /* _SMC91X_H_ */
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index ff79e68b347..7b82ff090d4 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -3639,7 +3639,7 @@ iscsi_tcp_init(void)
taskcache = kmem_cache_create("iscsi_taskcache",
sizeof(struct iscsi_data_task), 0,
- SLAB_HWCACHE_ALIGN | SLAB_NO_REAP, NULL, NULL);
+ SLAB_HWCACHE_ALIGN, NULL, NULL);
if (!taskcache)
return -ENOMEM;
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index a8b05ce5de5..7405d0df95d 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1139,32 +1139,6 @@ sg_fasync(int fd, struct file *filp, int mode)
return (retval < 0) ? retval : 0;
}
-/* When startFinish==1 increments page counts for pages other than the
- first of scatter gather elements obtained from alloc_pages().
- When startFinish==0 decrements ... */
-static void
-sg_rb_correct4mmap(Sg_scatter_hold * rsv_schp, int startFinish)
-{
- struct scatterlist *sg = rsv_schp->buffer;
- struct page *page;
- int k, m;
-
- SCSI_LOG_TIMEOUT(3, printk("sg_rb_correct4mmap: startFinish=%d, scatg=%d\n",
- startFinish, rsv_schp->k_use_sg));
- /* N.B. correction _not_ applied to base page of each allocation */
- for (k = 0; k < rsv_schp->k_use_sg; ++k, ++sg) {
- for (m = PAGE_SIZE; m < sg->length; m += PAGE_SIZE) {
- page = sg->page;
- if (startFinish)
- get_page(page);
- else {
- if (page_count(page) > 0)
- __put_page(page);
- }
- }
- }
-}
-
static struct page *
sg_vma_nopage(struct vm_area_struct *vma, unsigned long addr, int *type)
{
@@ -1236,10 +1210,7 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma)
sa += len;
}
- if (0 == sfp->mmap_called) {
- sg_rb_correct4mmap(rsv_schp, 1); /* do only once per fd lifetime */
- sfp->mmap_called = 1;
- }
+ sfp->mmap_called = 1;
vma->vm_flags |= VM_RESERVED;
vma->vm_private_data = sfp;
vma->vm_ops = &sg_mmap_vm_ops;
@@ -2388,8 +2359,6 @@ __sg_remove_sfp(Sg_device * sdp, Sg_fd * sfp)
SCSI_LOG_TIMEOUT(6,
printk("__sg_remove_sfp: bufflen=%d, k_use_sg=%d\n",
(int) sfp->reserve.bufflen, (int) sfp->reserve.k_use_sg));
- if (sfp->mmap_called)
- sg_rb_correct4mmap(&sfp->reserve, 0); /* undo correction */
sg_remove_scat(&sfp->reserve);
}
sfp->parentdp = NULL;
@@ -2471,9 +2440,9 @@ sg_page_malloc(int rqSz, int lowDma, int *retSzp)
return resp;
if (lowDma)
- page_mask = GFP_ATOMIC | GFP_DMA | __GFP_NOWARN;
+ page_mask = GFP_ATOMIC | GFP_DMA | __GFP_COMP | __GFP_NOWARN;
else
- page_mask = GFP_ATOMIC | __GFP_NOWARN;
+ page_mask = GFP_ATOMIC | __GFP_COMP | __GFP_NOWARN;
for (order = 0, a_size = PAGE_SIZE; a_size < rqSz;
order++, a_size <<= 1) ;
diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index 89e5413cc2a..c66ef96c71b 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -866,7 +866,7 @@ config SERIAL_M32R_PLDSIO
config SERIAL_TXX9
bool "TMPTX39XX/49XX SIO support"
- depends HAS_TXX9_SERIAL && BROKEN
+ depends HAS_TXX9_SERIAL
select SERIAL_CORE
default y
diff --git a/drivers/serial/serial_txx9.c b/drivers/serial/serial_txx9.c
index ee98a867bc6..141173efd46 100644
--- a/drivers/serial/serial_txx9.c
+++ b/drivers/serial/serial_txx9.c
@@ -33,6 +33,10 @@
* 1.02 Cleanup. (import 8250.c changes)
* 1.03 Fix low-latency mode. (import 8250.c changes)
* 1.04 Remove usage of deprecated functions, cleanup.
+ * 1.05 More strict check in verify_port. Cleanup.
+ * 1.06 Do not insert a char caused previous overrun.
+ * Fix some spin_locks.
+ * Do not call uart_add_one_port for absent ports.
*/
#include <linux/config.h>
@@ -57,7 +61,7 @@
#include <asm/io.h>
#include <asm/irq.h>
-static char *serial_version = "1.04";
+static char *serial_version = "1.06";
static char *serial_name = "TX39/49 Serial driver";
#define PASS_LIMIT 256
@@ -94,6 +98,8 @@ static char *serial_name = "TX39/49 Serial driver";
#define UART_NR 4
#endif
+#define HIGH_BITS_OFFSET ((sizeof(long)-sizeof(int))*8)
+
struct uart_txx9_port {
struct uart_port port;
@@ -210,7 +216,7 @@ static inline unsigned int sio_in(struct uart_txx9_port *up, int offset)
{
switch (up->port.iotype) {
default:
- return *(volatile u32 *)(up->port.membase + offset);
+ return __raw_readl(up->port.membase + offset);
case UPIO_PORT:
return inl(up->port.iobase + offset);
}
@@ -221,7 +227,7 @@ sio_out(struct uart_txx9_port *up, int offset, int value)
{
switch (up->port.iotype) {
default:
- *(volatile u32 *)(up->port.membase + offset) = value;
+ __raw_writel(value, up->port.membase + offset);
break;
case UPIO_PORT:
outl(value, up->port.iobase + offset);
@@ -259,34 +265,19 @@ sio_quot_set(struct uart_txx9_port *up, int quot)
static void serial_txx9_stop_tx(struct uart_port *port)
{
struct uart_txx9_port *up = (struct uart_txx9_port *)port;
- unsigned long flags;
-
- spin_lock_irqsave(&up->port.lock, flags);
sio_mask(up, TXX9_SIDICR, TXX9_SIDICR_TIE);
- spin_unlock_irqrestore(&up->port.lock, flags);
}
static void serial_txx9_start_tx(struct uart_port *port)
{
struct uart_txx9_port *up = (struct uart_txx9_port *)port;
- unsigned long flags;
-
- spin_lock_irqsave(&up->port.lock, flags);
sio_set(up, TXX9_SIDICR, TXX9_SIDICR_TIE);
- spin_unlock_irqrestore(&up->port.lock, flags);
}
static void serial_txx9_stop_rx(struct uart_port *port)
{
struct uart_txx9_port *up = (struct uart_txx9_port *)port;
- unsigned long flags;
-
- spin_lock_irqsave(&up->port.lock, flags);
up->port.read_status_mask &= ~TXX9_SIDISR_RDIS;
-#if 0
- sio_mask(up, TXX9_SIDICR, TXX9_SIDICR_RIE);
-#endif
- spin_unlock_irqrestore(&up->port.lock, flags);
}
static void serial_txx9_enable_ms(struct uart_port *port)
@@ -302,12 +293,16 @@ receive_chars(struct uart_txx9_port *up, unsigned int *status, struct pt_regs *r
unsigned int disr = *status;
int max_count = 256;
char flag;
+ unsigned int next_ignore_status_mask;
do {
ch = sio_in(up, TXX9_SIRFIFO);
flag = TTY_NORMAL;
up->port.icount.rx++;
+ /* mask out RFDN_MASK bit added by previous overrun */
+ next_ignore_status_mask =
+ up->port.ignore_status_mask & ~TXX9_SIDISR_RFDN_MASK;
if (unlikely(disr & (TXX9_SIDISR_UBRK | TXX9_SIDISR_UPER |
TXX9_SIDISR_UFER | TXX9_SIDISR_UOER))) {
/*
@@ -328,8 +323,17 @@ receive_chars(struct uart_txx9_port *up, unsigned int *status, struct pt_regs *r
up->port.icount.parity++;
else if (disr & TXX9_SIDISR_UFER)
up->port.icount.frame++;
- if (disr & TXX9_SIDISR_UOER)
+ if (disr & TXX9_SIDISR_UOER) {
up->port.icount.overrun++;
+ /*
+ * The receiver read buffer still hold
+ * a char which caused overrun.
+ * Ignore next char by adding RFDN_MASK
+ * to ignore_status_mask temporarily.
+ */
+ next_ignore_status_mask |=
+ TXX9_SIDISR_RFDN_MASK;
+ }
/*
* Mask off conditions which should be ingored.
@@ -349,6 +353,7 @@ receive_chars(struct uart_txx9_port *up, unsigned int *status, struct pt_regs *r
uart_insert_char(&up->port, disr, TXX9_SIDISR_UOER, ch, flag);
ignore_char:
+ up->port.ignore_status_mask = next_ignore_status_mask;
disr = sio_in(up, TXX9_SIDISR);
} while (!(disr & TXX9_SIDISR_UVALID) && (max_count-- > 0));
spin_unlock(&up->port.lock);
@@ -450,14 +455,11 @@ static unsigned int serial_txx9_get_mctrl(struct uart_port *port)
static void serial_txx9_set_mctrl(struct uart_port *port, unsigned int mctrl)
{
struct uart_txx9_port *up = (struct uart_txx9_port *)port;
- unsigned long flags;
- spin_lock_irqsave(&up->port.lock, flags);
if (mctrl & TIOCM_RTS)
sio_mask(up, TXX9_SIFLCR, TXX9_SIFLCR_RTSSC);
else
sio_set(up, TXX9_SIFLCR, TXX9_SIFLCR_RTSSC);
- spin_unlock_irqrestore(&up->port.lock, flags);
}
static void serial_txx9_break_ctl(struct uart_port *port, int break_state)
@@ -784,8 +786,14 @@ static void serial_txx9_config_port(struct uart_port *port, int uflags)
static int
serial_txx9_verify_port(struct uart_port *port, struct serial_struct *ser)
{
- if (ser->irq < 0 ||
- ser->baud_base < 9600 || ser->type != PORT_TXX9)
+ unsigned long new_port = ser->port;
+ if (HIGH_BITS_OFFSET)
+ new_port += (unsigned long)ser->port_high << HIGH_BITS_OFFSET;
+ if (ser->type != port->type ||
+ ser->irq != port->irq ||
+ ser->io_type != port->iotype ||
+ new_port != port->iobase ||
+ (unsigned long)ser->iomem_base != port->mapbase)
return -EINVAL;
return 0;
}
@@ -827,7 +835,8 @@ static void __init serial_txx9_register_ports(struct uart_driver *drv)
up->port.line = i;
up->port.ops = &serial_txx9_pops;
- uart_add_one_port(drv, &up->port);
+ if (up->port.iobase || up->port.mapbase)
+ uart_add_one_port(drv, &up->port);
}
}
@@ -927,11 +936,6 @@ static int serial_txx9_console_setup(struct console *co, char *options)
return -ENODEV;
/*
- * Temporary fix.
- */
- spin_lock_init(&port->lock);
-
- /*
* Disable UART interrupts, set DTR and RTS high
* and set speed.
*/
@@ -1041,11 +1045,10 @@ static int __devinit serial_txx9_register_port(struct uart_port *port)
mutex_lock(&serial_txx9_mutex);
for (i = 0; i < UART_NR; i++) {
uart = &serial_txx9_ports[i];
- if (uart->port.type == PORT_UNKNOWN)
+ if (!(uart->port.iobase || uart->port.mapbase))
break;
}
if (i < UART_NR) {
- uart_remove_one_port(&serial_txx9_reg, &uart->port);
uart->port.iobase = port->iobase;
uart->port.membase = port->membase;
uart->port.irq = port->irq;
@@ -1080,9 +1083,8 @@ static void __devexit serial_txx9_unregister_port(int line)
uart->port.type = PORT_UNKNOWN;
uart->port.iobase = 0;
uart->port.mapbase = 0;
- uart->port.membase = 0;
+ uart->port.membase = NULL;
uart->port.dev = NULL;
- uart_add_one_port(&serial_txx9_reg, &uart->port);
mutex_unlock(&serial_txx9_mutex);
}
@@ -1198,8 +1200,11 @@ static void __exit serial_txx9_exit(void)
#ifdef ENABLE_SERIAL_TXX9_PCI
pci_unregister_driver(&serial_txx9_pci_driver);
#endif
- for (i = 0; i < UART_NR; i++)
- uart_remove_one_port(&serial_txx9_reg, &serial_txx9_ports[i].port);
+ for (i = 0; i < UART_NR; i++) {
+ struct uart_txx9_port *up = &serial_txx9_ports[i];
+ if (up->port.iobase || up->port.mapbase)
+ uart_remove_one_port(&serial_txx9_reg, &up->port);
+ }
uart_unregister_driver(&serial_txx9_reg);
}
diff --git a/drivers/serial/vr41xx_siu.c b/drivers/serial/vr41xx_siu.c
index d61494d185c..bd6294132c1 100644
--- a/drivers/serial/vr41xx_siu.c
+++ b/drivers/serial/vr41xx_siu.c
@@ -919,7 +919,7 @@ static struct uart_driver siu_uart_driver = {
.cons = SERIAL_VR41XX_CONSOLE,
};
-static int siu_probe(struct platform_device *dev)
+static int __devinit siu_probe(struct platform_device *dev)
{
struct uart_port *port;
int num, i, retval;
@@ -953,7 +953,7 @@ static int siu_probe(struct platform_device *dev)
return 0;
}
-static int siu_remove(struct platform_device *dev)
+static int __devexit siu_remove(struct platform_device *dev)
{
struct uart_port *port;
int i;
@@ -1006,21 +1006,28 @@ static struct platform_device *siu_platform_device;
static struct platform_driver siu_device_driver = {
.probe = siu_probe,
- .remove = siu_remove,
+ .remove = __devexit_p(siu_remove),
.suspend = siu_suspend,
.resume = siu_resume,
.driver = {
.name = "SIU",
+ .owner = THIS_MODULE,
},
};
-static int __devinit vr41xx_siu_init(void)
+static int __init vr41xx_siu_init(void)
{
int retval;
- siu_platform_device = platform_device_register_simple("SIU", -1, NULL, 0);
- if (IS_ERR(siu_platform_device))
- return PTR_ERR(siu_platform_device);
+ siu_platform_device = platform_device_alloc("SIU", -1);
+ if (!siu_platform_device)
+ return -ENOMEM;
+
+ retval = platform_device_add(siu_platform_device);
+ if (retval < 0) {
+ platform_device_put(siu_platform_device);
+ return retval;
+ }
retval = platform_driver_register(&siu_device_driver);
if (retval < 0)
@@ -1029,10 +1036,9 @@ static int __devinit vr41xx_siu_init(void)
return retval;
}
-static void __devexit vr41xx_siu_exit(void)
+static void __exit vr41xx_siu_exit(void)
{
platform_driver_unregister(&siu_device_driver);
-
platform_device_unregister(siu_platform_device);
}
diff --git a/drivers/sn/ioc4.c b/drivers/sn/ioc4.c
index ea75b3d0612..67140a5804f 100644
--- a/drivers/sn/ioc4.c
+++ b/drivers/sn/ioc4.c
@@ -31,7 +31,7 @@
#include <linux/ioc4.h>
#include <linux/mmtimer.h>
#include <linux/rtc.h>
-#include <linux/rwsem.h>
+#include <linux/mutex.h>
#include <asm/sn/addrs.h>
#include <asm/sn/clksupport.h>
#include <asm/sn/shub_mmr.h>
@@ -54,11 +54,10 @@
* Submodule management *
************************/
-static LIST_HEAD(ioc4_devices);
-static DECLARE_RWSEM(ioc4_devices_rwsem);
+static DEFINE_MUTEX(ioc4_mutex);
+static LIST_HEAD(ioc4_devices);
static LIST_HEAD(ioc4_submodules);
-static DECLARE_RWSEM(ioc4_submodules_rwsem);
/* Register an IOC4 submodule */
int
@@ -66,15 +65,13 @@ ioc4_register_submodule(struct ioc4_submodule *is)
{
struct ioc4_driver_data *idd;
- down_write(&ioc4_submodules_rwsem);
+ mutex_lock(&ioc4_mutex);
list_add(&is->is_list, &ioc4_submodules);
- up_write(&ioc4_submodules_rwsem);
/* Initialize submodule for each IOC4 */
if (!is->is_probe)
- return 0;
+ goto out;
- down_read(&ioc4_devices_rwsem);
list_for_each_entry(idd, &ioc4_devices, idd_list) {
if (is->is_probe(idd)) {
printk(KERN_WARNING
@@ -84,8 +81,8 @@ ioc4_register_submodule(struct ioc4_submodule *is)
pci_name(idd->idd_pdev));
}
}
- up_read(&ioc4_devices_rwsem);
-
+ out:
+ mutex_unlock(&ioc4_mutex);
return 0;
}
@@ -95,15 +92,13 @@ ioc4_unregister_submodule(struct ioc4_submodule *is)
{
struct ioc4_driver_data *idd;
- down_write(&ioc4_submodules_rwsem);
+ mutex_lock(&ioc4_mutex);
list_del(&is->is_list);
- up_write(&ioc4_submodules_rwsem);
/* Remove submodule for each IOC4 */
if (!is->is_remove)
- return;
+ goto out;
- down_read(&ioc4_devices_rwsem);
list_for_each_entry(idd, &ioc4_devices, idd_list) {
if (is->is_remove(idd)) {
printk(KERN_WARNING
@@ -113,7 +108,8 @@ ioc4_unregister_submodule(struct ioc4_submodule *is)
pci_name(idd->idd_pdev));
}
}
- up_read(&ioc4_devices_rwsem);
+ out:
+ mutex_unlock(&ioc4_mutex);
}
/*********************
@@ -312,12 +308,11 @@ ioc4_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id)
/* Track PCI-device specific data */
idd->idd_serial_data = NULL;
pci_set_drvdata(idd->idd_pdev, idd);
- down_write(&ioc4_devices_rwsem);
+
+ mutex_lock(&ioc4_mutex);
list_add(&idd->idd_list, &ioc4_devices);
- up_write(&ioc4_devices_rwsem);
/* Add this IOC4 to all submodules */
- down_read(&ioc4_submodules_rwsem);
list_for_each_entry(is, &ioc4_submodules, is_list) {
if (is->is_probe && is->is_probe(idd)) {
printk(KERN_WARNING
@@ -327,7 +322,7 @@ ioc4_probe(struct pci_dev *pdev, const struct pci_device_id *pci_id)
pci_name(idd->idd_pdev));
}
}
- up_read(&ioc4_submodules_rwsem);
+ mutex_unlock(&ioc4_mutex);
return 0;
@@ -351,7 +346,7 @@ ioc4_remove(struct pci_dev *pdev)
idd = pci_get_drvdata(pdev);
/* Remove this IOC4 from all submodules */
- down_read(&ioc4_submodules_rwsem);
+ mutex_lock(&ioc4_mutex);
list_for_each_entry(is, &ioc4_submodules, is_list) {
if (is->is_remove && is->is_remove(idd)) {
printk(KERN_WARNING
@@ -361,7 +356,7 @@ ioc4_remove(struct pci_dev *pdev)
pci_name(idd->idd_pdev));
}
}
- up_read(&ioc4_submodules_rwsem);
+ mutex_unlock(&ioc4_mutex);
/* Release resources */
iounmap(idd->idd_misc_regs);
@@ -377,9 +372,9 @@ ioc4_remove(struct pci_dev *pdev)
pci_disable_device(pdev);
/* Remove and free driver data */
- down_write(&ioc4_devices_rwsem);
+ mutex_lock(&ioc4_mutex);
list_del(&idd->idd_list);
- up_write(&ioc4_devices_rwsem);
+ mutex_unlock(&ioc4_mutex);
kfree(idd);
}
diff --git a/drivers/video/acornfb.c b/drivers/video/acornfb.c
index b058273527b..76448d6ae89 100644
--- a/drivers/video/acornfb.c
+++ b/drivers/video/acornfb.c
@@ -1269,7 +1269,7 @@ free_unused_pages(unsigned int virtual_start, unsigned int virtual_end)
*/
page = virt_to_page(virtual_start);
ClearPageReserved(page);
- set_page_count(page, 1);
+ init_page_count(page);
free_page(virtual_start);
virtual_start += PAGE_SIZE;
diff --git a/drivers/video/i810/i810_main.c b/drivers/video/i810/i810_main.c
index d8467c03b49..788297e9d59 100644
--- a/drivers/video/i810/i810_main.c
+++ b/drivers/video/i810/i810_main.c
@@ -1508,7 +1508,7 @@ static int i810fb_cursor(struct fb_info *info, struct fb_cursor *cursor)
int size = ((cursor->image.width + 7) >> 3) *
cursor->image.height;
int i;
- u8 *data = kmalloc(64 * 8, GFP_KERNEL);
+ u8 *data = kmalloc(64 * 8, GFP_ATOMIC);
if (data == NULL)
return -ENOMEM;
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 3ad8455f857..651a9e14d9a 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -614,6 +614,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
sb = dir->i_sb;
v9ses = v9fs_inode2v9ses(dir);
+ dentry->d_op = &v9fs_dentry_operations;
dirfid = v9fs_fid_lookup(dentry->d_parent);
if (!dirfid) {
@@ -681,8 +682,6 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
goto FreeFcall;
fid->qid = fcall->params.rstat.stat.qid;
-
- dentry->d_op = &v9fs_dentry_operations;
v9fs_stat2inode(&fcall->params.rstat.stat, inode, inode->i_sb);
d_add(dentry, inode);
diff --git a/fs/buffer.c b/fs/buffer.c
index a9b39940200..1d3683d496f 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3051,68 +3051,6 @@ asmlinkage long sys_bdflush(int func, long data)
}
/*
- * Migration function for pages with buffers. This function can only be used
- * if the underlying filesystem guarantees that no other references to "page"
- * exist.
- */
-#ifdef CONFIG_MIGRATION
-int buffer_migrate_page(struct page *newpage, struct page *page)
-{
- struct address_space *mapping = page->mapping;
- struct buffer_head *bh, *head;
- int rc;
-
- if (!mapping)
- return -EAGAIN;
-
- if (!page_has_buffers(page))
- return migrate_page(newpage, page);
-
- head = page_buffers(page);
-
- rc = migrate_page_remove_references(newpage, page, 3);
- if (rc)
- return rc;
-
- bh = head;
- do {
- get_bh(bh);
- lock_buffer(bh);
- bh = bh->b_this_page;
-
- } while (bh != head);
-
- ClearPagePrivate(page);
- set_page_private(newpage, page_private(page));
- set_page_private(page, 0);
- put_page(page);
- get_page(newpage);
-
- bh = head;
- do {
- set_bh_page(bh, newpage, bh_offset(bh));
- bh = bh->b_this_page;
-
- } while (bh != head);
-
- SetPagePrivate(newpage);
-
- migrate_page_copy(newpage, page);
-
- bh = head;
- do {
- unlock_buffer(bh);
- put_bh(bh);
- bh = bh->b_this_page;
-
- } while (bh != head);
-
- return 0;
-}
-EXPORT_SYMBOL(buffer_migrate_page);
-#endif
-
-/*
* Buffer-head allocation
*/
static kmem_cache_t *bh_cachep;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index b3519528994..25fa8bba8cb 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -56,48 +56,10 @@ static void huge_pagevec_release(struct pagevec *pvec)
pagevec_reinit(pvec);
}
-/*
- * huge_pages_needed tries to determine the number of new huge pages that
- * will be required to fully populate this VMA. This will be equal to
- * the size of the VMA in huge pages minus the number of huge pages
- * (covered by this VMA) that are found in the page cache.
- *
- * Result is in bytes to be compatible with is_hugepage_mem_enough()
- */
-static unsigned long
-huge_pages_needed(struct address_space *mapping, struct vm_area_struct *vma)
-{
- int i;
- struct pagevec pvec;
- unsigned long start = vma->vm_start;
- unsigned long end = vma->vm_end;
- unsigned long hugepages = (end - start) >> HPAGE_SHIFT;
- pgoff_t next = vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT);
- pgoff_t endpg = next + hugepages;
-
- pagevec_init(&pvec, 0);
- while (next < endpg) {
- if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE))
- break;
- for (i = 0; i < pagevec_count(&pvec); i++) {
- struct page *page = pvec.pages[i];
- if (page->index > next)
- next = page->index;
- if (page->index >= endpg)
- break;
- next++;
- hugepages--;
- }
- huge_pagevec_release(&pvec);
- }
- return hugepages << HPAGE_SHIFT;
-}
-
static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct inode *inode = file->f_dentry->d_inode;
- struct address_space *mapping = inode->i_mapping;
- unsigned long bytes;
+ struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
loff_t len, vma_len;
int ret;
@@ -113,10 +75,6 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
if (vma->vm_end - vma->vm_start < HPAGE_SIZE)
return -EINVAL;
- bytes = huge_pages_needed(mapping, vma);
- if (!is_hugepage_mem_enough(bytes))
- return -ENOMEM;
-
vma_len = (loff_t)(vma->vm_end - vma->vm_start);
mutex_lock(&inode->i_mutex);
@@ -129,6 +87,10 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
if (!(vma->vm_flags & VM_WRITE) && len > inode->i_size)
goto out;
+ if (vma->vm_flags & VM_MAYSHARE)
+ if (hugetlb_extend_reservation(info, len >> HPAGE_SHIFT) != 0)
+ goto out;
+
ret = 0;
hugetlb_prefault_arch_hook(vma->vm_mm);
if (inode->i_size < len)
@@ -227,13 +189,18 @@ static void truncate_huge_page(struct page *page)
put_page(page);
}
-static void truncate_hugepages(struct address_space *mapping, loff_t lstart)
+static void truncate_hugepages(struct inode *inode, loff_t lstart)
{
+ struct address_space *mapping = &inode->i_data;
const pgoff_t start = lstart >> HPAGE_SHIFT;
struct pagevec pvec;
pgoff_t next;
int i;
+ hugetlb_truncate_reservation(HUGETLBFS_I(inode),
+ lstart >> HPAGE_SHIFT);
+ if (!mapping->nrpages)
+ return;
pagevec_init(&pvec, 0);
next = start;
while (1) {
@@ -262,8 +229,7 @@ static void truncate_hugepages(struct address_space *mapping, loff_t lstart)
static void hugetlbfs_delete_inode(struct inode *inode)
{
- if (inode->i_data.nrpages)
- truncate_hugepages(&inode->i_data, 0);
+ truncate_hugepages(inode, 0);
clear_inode(inode);
}
@@ -296,8 +262,7 @@ static void hugetlbfs_forget_inode(struct inode *inode)
inode->i_state |= I_FREEING;
inodes_stat.nr_inodes--;
spin_unlock(&inode_lock);
- if (inode->i_data.nrpages)
- truncate_hugepages(&inode->i_data, 0);
+ truncate_hugepages(inode, 0);
clear_inode(inode);
destroy_inode(inode);
}
@@ -356,7 +321,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
if (!prio_tree_empty(&mapping->i_mmap))
hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);
spin_unlock(&mapping->i_mmap_lock);
- truncate_hugepages(mapping, offset);
+ truncate_hugepages(inode, offset);
return 0;
}
@@ -573,6 +538,7 @@ static struct inode *hugetlbfs_alloc_inode(struct super_block *sb)
hugetlbfs_inc_free_inodes(sbinfo);
return NULL;
}
+ p->prereserved_hpages = 0;
return &p->vfs_inode;
}
@@ -771,21 +737,6 @@ static struct file_system_type hugetlbfs_fs_type = {
static struct vfsmount *hugetlbfs_vfsmount;
-/*
- * Return the next identifier for a shm file
- */
-static unsigned long hugetlbfs_counter(void)
-{
- static DEFINE_SPINLOCK(lock);
- static unsigned long counter;
- unsigned long ret;
-
- spin_lock(&lock);
- ret = ++counter;
- spin_unlock(&lock);
- return ret;
-}
-
static int can_do_hugetlb_shm(void)
{
return likely(capable(CAP_IPC_LOCK) ||
@@ -801,18 +752,16 @@ struct file *hugetlb_zero_setup(size_t size)
struct dentry *dentry, *root;
struct qstr quick_string;
char buf[16];
+ static atomic_t counter;
if (!can_do_hugetlb_shm())
return ERR_PTR(-EPERM);
- if (!is_hugepage_mem_enough(size))
- return ERR_PTR(-ENOMEM);
-
if (!user_shm_lock(size, current->user))
return ERR_PTR(-ENOMEM);
root = hugetlbfs_vfsmount->mnt_root;
- snprintf(buf, 16, "%lu", hugetlbfs_counter());
+ snprintf(buf, 16, "%u", atomic_inc_return(&counter));
quick_string.name = buf;
quick_string.len = strlen(quick_string.name);
quick_string.hash = 0;
@@ -831,6 +780,11 @@ struct file *hugetlb_zero_setup(size_t size)
if (!inode)
goto out_file;
+ error = -ENOMEM;
+ if (hugetlb_extend_reservation(HUGETLBFS_I(inode),
+ size >> HPAGE_SHIFT) != 0)
+ goto out_inode;
+
d_instantiate(dentry, inode);
inode->i_size = size;
inode->i_nlink = 0;
@@ -841,6 +795,8 @@ struct file *hugetlb_zero_setup(size_t size)
file->f_mode = FMODE_WRITE | FMODE_READ;
return file;
+out_inode:
+ iput(inode);
out_file:
put_filp(file);
out_dentry:
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 8dd3aafec49..09e1c57a86a 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -959,7 +959,7 @@ static int ocfs2_initialize_mem_caches(void)
ocfs2_lock_cache = kmem_cache_create("ocfs2_lock",
sizeof(struct ocfs2_journal_lock),
0,
- SLAB_NO_REAP|SLAB_HWCACHE_ALIGN,
+ SLAB_HWCACHE_ALIGN,
NULL, NULL);
if (!ocfs2_lock_cache)
return -ENOMEM;
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 3f810acd0bf..b1ca234068f 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -87,8 +87,7 @@ static int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
xpages = 1UL << order;
npages = (newsize + PAGE_SIZE - 1) >> PAGE_SHIFT;
- for (loop = 0; loop < npages; loop++)
- set_page_count(pages + loop, 1);
+ split_page(pages, order);
/* trim off any pages we don't actually require */
for (loop = npages; loop < xpages; loop++)
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index bfb4f2917bb..8cdfa415165 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -29,6 +29,7 @@
#include <linux/blkdev.h>
#include <linux/hash.h>
#include <linux/kthread.h>
+#include <linux/migrate.h>
#include "xfs_linux.h"
STATIC kmem_zone_t *xfs_buf_zone;
diff --git a/include/asm-i386/acpi.h b/include/asm-i386/acpi.h
index 55059abf9c9..20f52395421 100644
--- a/include/asm-i386/acpi.h
+++ b/include/asm-i386/acpi.h
@@ -103,6 +103,12 @@ __acpi_release_global_lock (unsigned int *lock)
:"=r"(n_hi), "=r"(n_lo) \
:"0"(n_hi), "1"(n_lo))
+#ifdef CONFIG_X86_IO_APIC
+extern void check_acpi_pci(void);
+#else
+static inline void check_acpi_pci(void) { }
+#endif
+
#ifdef CONFIG_ACPI
extern int acpi_lapic;
extern int acpi_ioapic;
@@ -128,8 +134,6 @@ extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq);
extern int skip_ioapic_setup;
extern int acpi_skip_timer_override;
-extern void check_acpi_pci(void);
-
static inline void disable_ioapic_setup(void)
{
skip_ioapic_setup = 1;
@@ -142,8 +146,6 @@ static inline int ioapic_setup_disabled(void)
#else
static inline void disable_ioapic_setup(void) { }
-static inline void check_acpi_pci(void) { }
-
#endif
static inline void acpi_noirq_set(void) { acpi_noirq = 1; }
diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h
index 088a945bf26..ee056c41a9f 100644
--- a/include/asm-i386/pgtable.h
+++ b/include/asm-i386/pgtable.h
@@ -219,13 +219,12 @@ extern unsigned long pg0[];
* The following only work if pte_present() is true.
* Undefined behaviour if not..
*/
-#define __LARGE_PTE (_PAGE_PSE | _PAGE_PRESENT)
static inline int pte_user(pte_t pte) { return (pte).pte_low & _PAGE_USER; }
static inline int pte_read(pte_t pte) { return (pte).pte_low & _PAGE_USER; }
static inline int pte_dirty(pte_t pte) { return (pte).pte_low & _PAGE_DIRTY; }
static inline int pte_young(pte_t pte) { return (pte).pte_low & _PAGE_ACCESSED; }
static inline int pte_write(pte_t pte) { return (pte).pte_low & _PAGE_RW; }
-static inline int pte_huge(pte_t pte) { return ((pte).pte_low & __LARGE_PTE) == __LARGE_PTE; }
+static inline int pte_huge(pte_t pte) { return (pte).pte_low & _PAGE_PSE; }
/*
* The following only works if pte_present() is not true.
@@ -242,7 +241,7 @@ static inline pte_t pte_mkexec(pte_t pte) { (pte).pte_low |= _PAGE_USER; return
static inline pte_t pte_mkdirty(pte_t pte) { (pte).pte_low |= _PAGE_DIRTY; return pte; }
static inline pte_t pte_mkyoung(pte_t pte) { (pte).pte_low |= _PAGE_ACCESSED; return pte; }
static inline pte_t pte_mkwrite(pte_t pte) { (pte).pte_low |= _PAGE_RW; return pte; }
-static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= __LARGE_PTE; return pte; }
+static inline pte_t pte_mkhuge(pte_t pte) { (pte).pte_low |= _PAGE_PSE; return pte; }
#ifdef CONFIG_X86_PAE
# include <asm/pgtable-3level.h>
diff --git a/include/asm-ia64/intel_intrin.h b/include/asm-ia64/intel_intrin.h
index a7122d85017..d069b6acddc 100644
--- a/include/asm-ia64/intel_intrin.h
+++ b/include/asm-ia64/intel_intrin.h
@@ -5,113 +5,10 @@
*
* Copyright (C) 2002,2003 Jun Nakajima <jun.nakajima@intel.com>
* Copyright (C) 2002,2003 Suresh Siddha <suresh.b.siddha@intel.com>
+ * Copyright (C) 2005,2006 Hongjiu Lu <hongjiu.lu@intel.com>
*
*/
-#include <asm/types.h>
-
-void __lfetch(int lfhint, void *y);
-void __lfetch_excl(int lfhint, void *y);
-void __lfetch_fault(int lfhint, void *y);
-void __lfetch_fault_excl(int lfhint, void *y);
-
-/* In the following, whichFloatReg should be an integer from 0-127 */
-void __ldfs(const int whichFloatReg, void *src);
-void __ldfd(const int whichFloatReg, void *src);
-void __ldfe(const int whichFloatReg, void *src);
-void __ldf8(const int whichFloatReg, void *src);
-void __ldf_fill(const int whichFloatReg, void *src);
-void __stfs(void *dst, const int whichFloatReg);
-void __stfd(void *dst, const int whichFloatReg);
-void __stfe(void *dst, const int whichFloatReg);
-void __stf8(void *dst, const int whichFloatReg);
-void __stf_spill(void *dst, const int whichFloatReg);
-
-void __st1_rel(void *dst, const __s8 value);
-void __st2_rel(void *dst, const __s16 value);
-void __st4_rel(void *dst, const __s32 value);
-void __st8_rel(void *dst, const __s64 value);
-__u8 __ld1_acq(void *src);
-__u16 __ld2_acq(void *src);
-__u32 __ld4_acq(void *src);
-__u64 __ld8_acq(void *src);
-
-__u64 __fetchadd4_acq(__u32 *addend, const int increment);
-__u64 __fetchadd4_rel(__u32 *addend, const int increment);
-__u64 __fetchadd8_acq(__u64 *addend, const int increment);
-__u64 __fetchadd8_rel(__u64 *addend, const int increment);
-
-__u64 __getf_exp(double d);
-
-/* OS Related Itanium(R) Intrinsics */
-
-/* The names to use for whichReg and whichIndReg below come from
- the include file asm/ia64regs.h */
-
-__u64 __getIndReg(const int whichIndReg, __s64 index);
-__u64 __getReg(const int whichReg);
-
-void __setIndReg(const int whichIndReg, __s64 index, __u64 value);
-void __setReg(const int whichReg, __u64 value);
-
-void __mf(void);
-void __mfa(void);
-void __synci(void);
-void __itcd(__s64 pa);
-void __itci(__s64 pa);
-void __itrd(__s64 whichTransReg, __s64 pa);
-void __itri(__s64 whichTransReg, __s64 pa);
-void __ptce(__s64 va);
-void __ptcl(__s64 va, __s64 pagesz);
-void __ptcg(__s64 va, __s64 pagesz);
-void __ptcga(__s64 va, __s64 pagesz);
-void __ptri(__s64 va, __s64 pagesz);
-void __ptrd(__s64 va, __s64 pagesz);
-void __invala (void);
-void __invala_gr(const int whichGeneralReg /* 0-127 */ );
-void __invala_fr(const int whichFloatReg /* 0-127 */ );
-void __nop(const int);
-void __fc(__u64 *addr);
-void __sum(int mask);
-void __rum(int mask);
-void __ssm(int mask);
-void __rsm(int mask);
-__u64 __thash(__s64);
-__u64 __ttag(__s64);
-__s64 __tpa(__s64);
-
-/* Intrinsics for implementing get/put_user macros */
-void __st_user(const char *tableName, __u64 addr, char size, char relocType, __u64 val);
-void __ld_user(const char *tableName, __u64 addr, char size, char relocType);
-
-/* This intrinsic does not generate code, it creates a barrier across which
- * the compiler will not schedule data access instructions.
- */
-void __memory_barrier(void);
-
-void __isrlz(void);
-void __dsrlz(void);
-
-__u64 _m64_mux1(__u64 a, const int n);
-__u64 __thash(__u64);
-
-/* Lock and Atomic Operation Related Intrinsics */
-__u64 _InterlockedExchange8(volatile __u8 *trgt, __u8 value);
-__u64 _InterlockedExchange16(volatile __u16 *trgt, __u16 value);
-__s64 _InterlockedExchange(volatile __u32 *trgt, __u32 value);
-__s64 _InterlockedExchange64(volatile __u64 *trgt, __u64 value);
-
-__u64 _InterlockedCompareExchange8_rel(volatile __u8 *dest, __u64 xchg, __u64 comp);
-__u64 _InterlockedCompareExchange8_acq(volatile __u8 *dest, __u64 xchg, __u64 comp);
-__u64 _InterlockedCompareExchange16_rel(volatile __u16 *dest, __u64 xchg, __u64 comp);
-__u64 _InterlockedCompareExchange16_acq(volatile __u16 *dest, __u64 xchg, __u64 comp);
-__u64 _InterlockedCompareExchange_rel(volatile __u32 *dest, __u64 xchg, __u64 comp);
-__u64 _InterlockedCompareExchange_acq(volatile __u32 *dest, __u64 xchg, __u64 comp);
-__u64 _InterlockedCompareExchange64_rel(volatile __u64 *dest, __u64 xchg, __u64 comp);
-__u64 _InterlockedCompareExchange64_acq(volatile __u64 *dest, __u64 xchg, __u64 comp);
-
-__s64 _m64_dep_mi(const int v, __s64 s, const int p, const int len);
-__s64 _m64_shrp(__s64 a, __s64 b, const int count);
-__s64 _m64_popcnt(__s64 a);
+#include <ia64intrin.h>
#define ia64_barrier() __memory_barrier()
@@ -122,15 +19,16 @@ __s64 _m64_popcnt(__s64 a);
#define ia64_getreg __getReg
#define ia64_setreg __setReg
-#define ia64_hint(x)
+#define ia64_hint __hint
+#define ia64_hint_pause __hint_pause
-#define ia64_mux1_brcst 0
-#define ia64_mux1_mix 8
-#define ia64_mux1_shuf 9
-#define ia64_mux1_alt 10
-#define ia64_mux1_rev 11
+#define ia64_mux1_brcst _m64_mux1_brcst
+#define ia64_mux1_mix _m64_mux1_mix
+#define ia64_mux1_shuf _m64_mux1_shuf
+#define ia64_mux1_alt _m64_mux1_alt
+#define ia64_mux1_rev _m64_mux1_rev
-#define ia64_mux1 _m64_mux1
+#define ia64_mux1(x,v) _m_to_int64(_m64_mux1(_m_from_int64(x), (v)))
#define ia64_popcnt _m64_popcnt
#define ia64_getf_exp __getf_exp
#define ia64_shrp _m64_shrp
@@ -158,7 +56,7 @@ __s64 _m64_popcnt(__s64 a);
#define ia64_stf8 __stf8
#define ia64_stf_spill __stf_spill
-#define ia64_mf __mf
+#define ia64_mf __mf
#define ia64_mfa __mfa
#define ia64_fetchadd4_acq __fetchadd4_acq
@@ -234,10 +132,10 @@ __s64 _m64_popcnt(__s64 a);
/* Values for lfhint in __lfetch and __lfetch_fault */
-#define ia64_lfhint_none 0
-#define ia64_lfhint_nt1 1
-#define ia64_lfhint_nt2 2
-#define ia64_lfhint_nta 3
+#define ia64_lfhint_none __lfhint_none
+#define ia64_lfhint_nt1 __lfhint_nt1
+#define ia64_lfhint_nt2 __lfhint_nt2
+#define ia64_lfhint_nta __lfhint_nta
#define ia64_lfetch __lfetch
#define ia64_lfetch_excl __lfetch_excl
@@ -254,4 +152,6 @@ do { \
} \
} while (0)
+#define __builtin_trap() __break(0);
+
#endif /* _ASM_IA64_INTEL_INTRIN_H */
diff --git a/include/asm-ia64/machvec.h b/include/asm-ia64/machvec.h
index ca5ea994d68..c3e4ed8a3e1 100644
--- a/include/asm-ia64/machvec.h
+++ b/include/asm-ia64/machvec.h
@@ -20,6 +20,7 @@ struct scatterlist;
struct page;
struct mm_struct;
struct pci_bus;
+struct task_struct;
typedef void ia64_mv_setup_t (char **);
typedef void ia64_mv_cpu_init_t (void);
@@ -34,6 +35,7 @@ typedef int ia64_mv_pci_legacy_read_t (struct pci_bus *, u16 port, u32 *val,
u8 size);
typedef int ia64_mv_pci_legacy_write_t (struct pci_bus *, u16 port, u32 val,
u8 size);
+typedef void ia64_mv_migrate_t(struct task_struct * task);
/* DMA-mapping interface: */
typedef void ia64_mv_dma_init (void);
@@ -85,6 +87,11 @@ machvec_noop_mm (struct mm_struct *mm)
{
}
+static inline void
+machvec_noop_task (struct task_struct *task)
+{
+}
+
extern void machvec_setup (char **);
extern void machvec_timer_interrupt (int, void *, struct pt_regs *);
extern void machvec_dma_sync_single (struct device *, dma_addr_t, size_t, int);
@@ -146,6 +153,7 @@ extern void machvec_tlb_migrate_finish (struct mm_struct *);
# define platform_readw_relaxed ia64_mv.readw_relaxed
# define platform_readl_relaxed ia64_mv.readl_relaxed
# define platform_readq_relaxed ia64_mv.readq_relaxed
+# define platform_migrate ia64_mv.migrate
# endif
/* __attribute__((__aligned__(16))) is required to make size of the
@@ -194,6 +202,7 @@ struct ia64_machine_vector {
ia64_mv_readw_relaxed_t *readw_relaxed;
ia64_mv_readl_relaxed_t *readl_relaxed;
ia64_mv_readq_relaxed_t *readq_relaxed;
+ ia64_mv_migrate_t *migrate;
} __attribute__((__aligned__(16))); /* align attrib? see above comment */
#define MACHVEC_INIT(name) \
@@ -238,6 +247,7 @@ struct ia64_machine_vector {
platform_readw_relaxed, \
platform_readl_relaxed, \
platform_readq_relaxed, \
+ platform_migrate, \
}
extern struct ia64_machine_vector ia64_mv;
@@ -386,5 +396,8 @@ extern ia64_mv_dma_supported swiotlb_dma_supported;
#ifndef platform_readq_relaxed
# define platform_readq_relaxed __ia64_readq_relaxed
#endif
+#ifndef platform_migrate
+# define platform_migrate machvec_noop_task
+#endif
#endif /* _ASM_IA64_MACHVEC_H */
diff --git a/include/asm-ia64/machvec_sn2.h b/include/asm-ia64/machvec_sn2.h
index 03d00faf03b..da1d43755af 100644
--- a/include/asm-ia64/machvec_sn2.h
+++ b/include/asm-ia64/machvec_sn2.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2002-2003, 2006 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (c) 2002-2003,2006 Silicon Graphics, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of version 2 of the GNU General Public License
@@ -66,6 +66,7 @@ extern ia64_mv_dma_sync_single_for_device sn_dma_sync_single_for_device;
extern ia64_mv_dma_sync_sg_for_device sn_dma_sync_sg_for_device;
extern ia64_mv_dma_mapping_error sn_dma_mapping_error;
extern ia64_mv_dma_supported sn_dma_supported;
+extern ia64_mv_migrate_t sn_migrate;
/*
* This stuff has dual use!
@@ -115,6 +116,7 @@ extern ia64_mv_dma_supported sn_dma_supported;
#define platform_dma_sync_sg_for_device sn_dma_sync_sg_for_device
#define platform_dma_mapping_error sn_dma_mapping_error
#define platform_dma_supported sn_dma_supported
+#define platform_migrate sn_migrate
#include <asm/sn/io.h>
diff --git a/include/asm-ia64/mca.h b/include/asm-ia64/mca.h
index c7d9c9ed38b..bfbbb8da79c 100644
--- a/include/asm-ia64/mca.h
+++ b/include/asm-ia64/mca.h
@@ -131,6 +131,8 @@ struct ia64_mca_cpu {
/* Array of physical addresses of each CPU's MCA area. */
extern unsigned long __per_cpu_mca[NR_CPUS];
+extern int cpe_vector;
+extern int ia64_cpe_irq;
extern void ia64_mca_init(void);
extern void ia64_mca_cpu_init(void *);
extern void ia64_os_mca_dispatch(void);
diff --git a/include/asm-ia64/mutex.h b/include/asm-ia64/mutex.h
index 458c1f7fbc1..5a3224f6af3 100644
--- a/include/asm-ia64/mutex.h
+++ b/include/asm-ia64/mutex.h
@@ -1,9 +1,92 @@
/*
- * Pull in the generic implementation for the mutex fastpath.
+ * ia64 implementation of the mutex fastpath.
*
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
+ * Copyright (C) 2006 Ken Chen <kenneth.w.chen@intel.com>
+ *
+ */
+
+#ifndef _ASM_MUTEX_H
+#define _ASM_MUTEX_H
+
+/**
+ * __mutex_fastpath_lock - try to take the lock by moving the count
+ * from 1 to a 0 value
+ * @count: pointer of type atomic_t
+ * @fail_fn: function to call if the original value was not 1
+ *
+ * Change the count from 1 to a value lower than 1, and call <fail_fn> if
+ * it wasn't 1 originally. This function MUST leave the value lower than
+ * 1 even when the "1" assertion wasn't true.
+ */
+static inline void
+__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
+{
+ if (unlikely(ia64_fetchadd4_acq(count, -1) != 1))
+ fail_fn(count);
+}
+
+/**
+ * __mutex_fastpath_lock_retval - try to take the lock by moving the count
+ * from 1 to a 0 value
+ * @count: pointer of type atomic_t
+ * @fail_fn: function to call if the original value was not 1
+ *
+ * Change the count from 1 to a value lower than 1, and call <fail_fn> if
+ * it wasn't 1 originally. This function returns 0 if the fastpath succeeds,
+ * or anything the slow path function returns.
+ */
+static inline int
+__mutex_fastpath_lock_retval(atomic_t *count, int (*fail_fn)(atomic_t *))
+{
+ if (unlikely(ia64_fetchadd4_acq(count, -1) != 1))
+ return fail_fn(count);
+ return 0;
+}
+
+/**
+ * __mutex_fastpath_unlock - try to promote the count from 0 to 1
+ * @count: pointer of type atomic_t
+ * @fail_fn: function to call if the original value was not 0
+ *
+ * Try to promote the count from 0 to 1. If it wasn't 0, call <fail_fn>.
+ * In the failure case, this function is allowed to either set the value to
+ * 1, or to set it to a value lower than 1.
+ *
+ * If the implementation sets it to a value of lower than 1, then the
+ * __mutex_slowpath_needs_to_unlock() macro needs to return 1, it needs
+ * to return 0 otherwise.
+ */
+static inline void
+__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
+{
+ int ret = ia64_fetchadd4_rel(count, 1);
+ if (unlikely(ret < 0))
+ fail_fn(count);
+}
+
+#define __mutex_slowpath_needs_to_unlock() 1
+
+/**
+ * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
+ *
+ * @count: pointer of type atomic_t
+ * @fail_fn: fallback function
+ *
+ * Change the count from 1 to a value lower than 1, and return 0 (failure)
+ * if it wasn't 1 originally, or return 1 (success) otherwise. This function
+ * MUST leave the value lower than 1 even when the "1" assertion wasn't true.
+ * Additionally, if the value was < 0 originally, this function must not leave
+ * it to 0 on failure.
+ *
+ * If the architecture has no effective trylock variant, it should call the
+ * <fail_fn> spinlock-based trylock variant unconditionally.
*/
+static inline int
+__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
+{
+ if (likely(cmpxchg_acq(count, 1, 0)) == 1)
+ return 1;
+ return 0;
+}
-#include <asm-generic/mutex-dec.h>
+#endif
diff --git a/include/asm-ia64/page.h b/include/asm-ia64/page.h
index 5e6362a786b..3ab27333dae 100644
--- a/include/asm-ia64/page.h
+++ b/include/asm-ia64/page.h
@@ -57,6 +57,8 @@
# define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
# define ARCH_HAS_HUGEPAGE_ONLY_RANGE
+# define ARCH_HAS_PREPARE_HUGEPAGE_RANGE
+# define ARCH_HAS_HUGETLB_FREE_PGD_RANGE
#endif /* CONFIG_HUGETLB_PAGE */
#ifdef __ASSEMBLY__
diff --git a/include/asm-ia64/pgtable.h b/include/asm-ia64/pgtable.h
index e2560c58384..c0f8144f234 100644
--- a/include/asm-ia64/pgtable.h
+++ b/include/asm-ia64/pgtable.h
@@ -314,7 +314,7 @@ ia64_phys_addr_valid (unsigned long addr)
#define pte_mkyoung(pte) (__pte(pte_val(pte) | _PAGE_A))
#define pte_mkclean(pte) (__pte(pte_val(pte) & ~_PAGE_D))
#define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_D))
-#define pte_mkhuge(pte) (__pte(pte_val(pte) | _PAGE_P))
+#define pte_mkhuge(pte) (__pte(pte_val(pte)))
/*
* Macro to a page protection value as "uncacheable". Note that "protection" is really a
@@ -505,9 +505,6 @@ extern struct page *zero_page_memmap_ptr;
#define HUGETLB_PGDIR_SHIFT (HPAGE_SHIFT + 2*(PAGE_SHIFT-3))
#define HUGETLB_PGDIR_SIZE (__IA64_UL(1) << HUGETLB_PGDIR_SHIFT)
#define HUGETLB_PGDIR_MASK (~(HUGETLB_PGDIR_SIZE-1))
-struct mmu_gather;
-void hugetlb_free_pgd_range(struct mmu_gather **tlb, unsigned long addr,
- unsigned long end, unsigned long floor, unsigned long ceiling);
#endif
/*
diff --git a/include/asm-ia64/processor.h b/include/asm-ia64/processor.h
index 23c8e1be191..128fefd8056 100644
--- a/include/asm-ia64/processor.h
+++ b/include/asm-ia64/processor.h
@@ -50,7 +50,8 @@
#define IA64_THREAD_PM_VALID (__IA64_UL(1) << 2) /* performance registers valid? */
#define IA64_THREAD_UAC_NOPRINT (__IA64_UL(1) << 3) /* don't log unaligned accesses */
#define IA64_THREAD_UAC_SIGBUS (__IA64_UL(1) << 4) /* generate SIGBUS on unaligned acc. */
- /* bit 5 is currently unused */
+#define IA64_THREAD_MIGRATION (__IA64_UL(1) << 5) /* require migration
+ sync at ctx sw */
#define IA64_THREAD_FPEMU_NOPRINT (__IA64_UL(1) << 6) /* don't log any fpswa faults */
#define IA64_THREAD_FPEMU_SIGFPE (__IA64_UL(1) << 7) /* send a SIGFPE for fpswa faults */
diff --git a/include/asm-ia64/signal.h b/include/asm-ia64/signal.h
index 608168d713d..5e328ed5d01 100644
--- a/include/asm-ia64/signal.h
+++ b/include/asm-ia64/signal.h
@@ -158,8 +158,6 @@ struct k_sigaction {
#define ptrace_signal_deliver(regs, cookie) do { } while (0)
-void set_sigdelayed(pid_t pid, int signo, int code, void __user *addr);
-
#endif /* __KERNEL__ */
# endif /* !__ASSEMBLY__ */
diff --git a/include/asm-ia64/sn/addrs.h b/include/asm-ia64/sn/addrs.h
index 2c32e4b77b5..1d9efe54166 100644
--- a/include/asm-ia64/sn/addrs.h
+++ b/include/asm-ia64/sn/addrs.h
@@ -283,5 +283,13 @@
#define REMOTE_HUB_L(n, a) HUB_L(REMOTE_HUB_ADDR((n), (a)))
#define REMOTE_HUB_S(n, a, d) HUB_S(REMOTE_HUB_ADDR((n), (a)), (d))
+/*
+ * Coretalk address breakdown
+ */
+#define CTALK_NASID_SHFT 40
+#define CTALK_NASID_MASK (0x3FFFULL << CTALK_NASID_SHFT)
+#define CTALK_CID_SHFT 38
+#define CTALK_CID_MASK (0x3ULL << CTALK_CID_SHFT)
+#define CTALK_NODE_OFFSET 0x3FFFFFFFFF
#endif /* _ASM_IA64_SN_ADDRS_H */
diff --git a/include/asm-ia64/sn/rw_mmr.h b/include/asm-ia64/sn/rw_mmr.h
index f40fd1a5510..2d78f4c5a45 100644
--- a/include/asm-ia64/sn/rw_mmr.h
+++ b/include/asm-ia64/sn/rw_mmr.h
@@ -3,15 +3,14 @@
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
- * Copyright (C) 2002-2004 Silicon Graphics, Inc. All Rights Reserved.
+ * Copyright (C) 2002-2006 Silicon Graphics, Inc. All Rights Reserved.
*/
#ifndef _ASM_IA64_SN_RW_MMR_H
#define _ASM_IA64_SN_RW_MMR_H
/*
- * This file contains macros used to access MMR registers via
- * uncached physical addresses.
+ * This file that access MMRs via uncached physical addresses.
* pio_phys_read_mmr - read an MMR
* pio_phys_write_mmr - write an MMR
* pio_atomic_phys_write_mmrs - atomically write 1 or 2 MMRs with psr.ic=0
@@ -22,53 +21,8 @@
*/
-extern inline long
-pio_phys_read_mmr(volatile long *mmr)
-{
- long val;
- asm volatile
- ("mov r2=psr;;"
- "rsm psr.i | psr.dt;;"
- "srlz.i;;"
- "ld8.acq %0=[%1];;"
- "mov psr.l=r2;;"
- "srlz.i;;"
- : "=r"(val)
- : "r"(mmr)
- : "r2");
- return val;
-}
-
-
-
-extern inline void
-pio_phys_write_mmr(volatile long *mmr, long val)
-{
- asm volatile
- ("mov r2=psr;;"
- "rsm psr.i | psr.dt;;"
- "srlz.i;;"
- "st8.rel [%0]=%1;;"
- "mov psr.l=r2;;"
- "srlz.i;;"
- :: "r"(mmr), "r"(val)
- : "r2", "memory");
-}
-
-extern inline void
-pio_atomic_phys_write_mmrs(volatile long *mmr1, long val1, volatile long *mmr2, long val2)
-{
- asm volatile
- ("mov r2=psr;;"
- "rsm psr.i | psr.dt | psr.ic;;"
- "cmp.ne p9,p0=%2,r0;"
- "srlz.i;;"
- "st8.rel [%0]=%1;"
- "(p9) st8.rel [%2]=%3;;"
- "mov psr.l=r2;;"
- "srlz.i;;"
- :: "r"(mmr1), "r"(val1), "r"(mmr2), "r"(val2)
- : "p9", "r2", "memory");
-}
+extern long pio_phys_read_mmr(volatile long *mmr);
+extern void pio_phys_write_mmr(volatile long *mmr, long val);
+extern void pio_atomic_phys_write_mmrs(volatile long *mmr1, long val1, volatile long *mmr2, long val2);
#endif /* _ASM_IA64_SN_RW_MMR_H */
diff --git a/include/asm-ia64/sn/tioce.h b/include/asm-ia64/sn/tioce.h
index d4c990712ea..893468e1b41 100644
--- a/include/asm-ia64/sn/tioce.h
+++ b/include/asm-ia64/sn/tioce.h
@@ -11,7 +11,7 @@
/* CE ASIC part & mfgr information */
#define TIOCE_PART_NUM 0xCE00
-#define TIOCE_MFGR_NUM 0x36
+#define TIOCE_SRC_ID 0x01
#define TIOCE_REV_A 0x1
/* CE Virtual PPB Vendor/Device IDs */
@@ -20,7 +20,7 @@
/* CE Host Bridge Vendor/Device IDs */
#define CE_HOST_BRIDGE_VENDOR_ID 0x10a9
-#define CE_HOST_BRIDGE_DEVICE_ID 0x4003
+#define CE_HOST_BRIDGE_DEVICE_ID 0x4001
#define TIOCE_NUM_M40_ATES 4096
@@ -463,6 +463,25 @@ typedef volatile struct tioce {
u64 ce_end_of_struct; /* 0x044400 */
} tioce_t;
+/* ce_lsiX_gb_cfg1 register bit masks & shifts */
+#define CE_LSI_GB_CFG1_RXL0S_THS_SHFT 0
+#define CE_LSI_GB_CFG1_RXL0S_THS_MASK (0xffULL << 0)
+#define CE_LSI_GB_CFG1_RXL0S_SMP_SHFT 8
+#define CE_LSI_GB_CFG1_RXL0S_SMP_MASK (0xfULL << 8);
+#define CE_LSI_GB_CFG1_RXL0S_ADJ_SHFT 12
+#define CE_LSI_GB_CFG1_RXL0S_ADJ_MASK (0x7ULL << 12)
+#define CE_LSI_GB_CFG1_RXL0S_FLT_SHFT 15
+#define CE_LSI_GB_CFG1_RXL0S_FLT_MASK (0x1ULL << 15)
+#define CE_LSI_GB_CFG1_LPBK_SEL_SHFT 16
+#define CE_LSI_GB_CFG1_LPBK_SEL_MASK (0x3ULL << 16)
+#define CE_LSI_GB_CFG1_LPBK_EN_SHFT 18
+#define CE_LSI_GB_CFG1_LPBK_EN_MASK (0x1ULL << 18)
+#define CE_LSI_GB_CFG1_RVRS_LB_SHFT 19
+#define CE_LSI_GB_CFG1_RVRS_LB_MASK (0x1ULL << 19)
+#define CE_LSI_GB_CFG1_RVRS_CLK_SHFT 20
+#define CE_LSI_GB_CFG1_RVRS_CLK_MASK (0x3ULL << 20)
+#define CE_LSI_GB_CFG1_SLF_TS_SHFT 24
+#define CE_LSI_GB_CFG1_SLF_TS_MASK (0xfULL << 24)
/* ce_adm_int_mask/ce_adm_int_status register bit defines */
#define CE_ADM_INT_CE_ERROR_SHFT 0
@@ -592,6 +611,11 @@ typedef volatile struct tioce {
#define CE_URE_RD_MRG_ENABLE (0x1ULL << 0)
#define CE_URE_WRT_MRG_ENABLE1 (0x1ULL << 4)
#define CE_URE_WRT_MRG_ENABLE2 (0x1ULL << 5)
+#define CE_URE_WRT_MRG_TIMER_SHFT 12
+#define CE_URE_WRT_MRG_TIMER_MASK (0x7FFULL << CE_URE_WRT_MRG_TIMER_SHFT)
+#define CE_URE_WRT_MRG_TIMER(x) (((u64)(x) << \
+ CE_URE_WRT_MRG_TIMER_SHFT) & \
+ CE_URE_WRT_MRG_TIMER_MASK)
#define CE_URE_RSPQ_BYPASS_DISABLE (0x1ULL << 24)
#define CE_URE_UPS_DAT1_PAR_DISABLE (0x1ULL << 32)
#define CE_URE_UPS_HDR1_PAR_DISABLE (0x1ULL << 33)
@@ -653,8 +677,12 @@ typedef volatile struct tioce {
#define CE_URE_SI (0x1ULL << 0)
#define CE_URE_ELAL_SHFT 4
#define CE_URE_ELAL_MASK (0x7ULL << CE_URE_ELAL_SHFT)
+#define CE_URE_ELAL_SET(n) (((u64)(n) << CE_URE_ELAL_SHFT) & \
+ CE_URE_ELAL_MASK)
#define CE_URE_ELAL1_SHFT 8
#define CE_URE_ELAL1_MASK (0x7ULL << CE_URE_ELAL1_SHFT)
+#define CE_URE_ELAL1_SET(n) (((u64)(n) << CE_URE_ELAL1_SHFT) & \
+ CE_URE_ELAL1_MASK)
#define CE_URE_SCC (0x1ULL << 12)
#define CE_URE_PN1_SHFT 16
#define CE_URE_PN1_MASK (0xFFULL << CE_URE_PN1_SHFT)
@@ -675,8 +703,12 @@ typedef volatile struct tioce {
#define CE_URE_HPC (0x1ULL << 6)
#define CE_URE_SPLV_SHFT 7
#define CE_URE_SPLV_MASK (0xFFULL << CE_URE_SPLV_SHFT)
+#define CE_URE_SPLV_SET(n) (((u64)(n) << CE_URE_SPLV_SHFT) & \
+ CE_URE_SPLV_MASK)
#define CE_URE_SPLS_SHFT 15
#define CE_URE_SPLS_MASK (0x3ULL << CE_URE_SPLS_SHFT)
+#define CE_URE_SPLS_SET(n) (((u64)(n) << CE_URE_SPLS_SHFT) & \
+ CE_URE_SPLS_MASK)
#define CE_URE_PSN1_SHFT 19
#define CE_URE_PSN1_MASK (0x1FFFULL << CE_URE_PSN1_SHFT)
#define CE_URE_PSN2_SHFT 32
diff --git a/include/asm-ia64/sn/xpc.h b/include/asm-ia64/sn/xpc.h
index df7f5f4f3cd..aa3b8ace903 100644
--- a/include/asm-ia64/sn/xpc.h
+++ b/include/asm-ia64/sn/xpc.h
@@ -1227,28 +1227,6 @@ xpc_map_bte_errors(bte_result_t error)
-static inline void *
-xpc_kmalloc_cacheline_aligned(size_t size, gfp_t flags, void **base)
-{
- /* see if kmalloc will give us cachline aligned memory by default */
- *base = kmalloc(size, flags);
- if (*base == NULL) {
- return NULL;
- }
- if ((u64) *base == L1_CACHE_ALIGN((u64) *base)) {
- return *base;
- }
- kfree(*base);
-
- /* nope, we'll have to do it ourselves */
- *base = kmalloc(size + L1_CACHE_BYTES, flags);
- if (*base == NULL) {
- return NULL;
- }
- return (void *) L1_CACHE_ALIGN((u64) *base);
-}
-
-
/*
* Check to see if there is any channel activity to/from the specified
* partition.
diff --git a/include/asm-ia64/system.h b/include/asm-ia64/system.h
index 06253871562..cd4233d66f1 100644
--- a/include/asm-ia64/system.h
+++ b/include/asm-ia64/system.h
@@ -244,6 +244,13 @@ extern void ia64_load_extra (struct task_struct *task);
__ia64_save_fpu((prev)->thread.fph); \
} \
__switch_to(prev, next, last); \
+ /* "next" in old context is "current" in new context */ \
+ if (unlikely((current->thread.flags & IA64_THREAD_MIGRATION) && \
+ (task_cpu(current) != \
+ task_thread_info(current)->last_cpu))) { \
+ platform_migrate(current); \
+ task_thread_info(current)->last_cpu = task_cpu(current); \
+ } \
} while (0)
#else
# define switch_to(prev,next,last) __switch_to(prev, next, last)
diff --git a/include/asm-ia64/thread_info.h b/include/asm-ia64/thread_info.h
index 1d6518fe1f0..56394a2c705 100644
--- a/include/asm-ia64/thread_info.h
+++ b/include/asm-ia64/thread_info.h
@@ -26,16 +26,10 @@ struct thread_info {
struct exec_domain *exec_domain;/* execution domain */
__u32 flags; /* thread_info flags (see TIF_*) */
__u32 cpu; /* current CPU */
+ __u32 last_cpu; /* Last CPU thread ran on */
mm_segment_t addr_limit; /* user-level address space limit */
int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */
struct restart_block restart_block;
- struct {
- int signo;
- int code;
- void __user *addr;
- unsigned long start_time;
- pid_t pid;
- } sigdelayed; /* Saved information for TIF_SIGDELAYED */
};
#define THREAD_SIZE KERNEL_STACK_SIZE
@@ -89,7 +83,6 @@ struct thread_info {
#define TIF_NEED_RESCHED 2 /* rescheduling necessary */
#define TIF_SYSCALL_TRACE 3 /* syscall trace active */
#define TIF_SYSCALL_AUDIT 4 /* syscall auditing active */
-#define TIF_SIGDELAYED 5 /* signal delayed from MCA/INIT/NMI/PMI context */
#define TIF_POLLING_NRFLAG 16 /* true if poll_idle() is polling TIF_NEED_RESCHED */
#define TIF_MEMDIE 17
#define TIF_MCA_INIT 18 /* this task is processing MCA or INIT */
@@ -101,13 +94,12 @@ struct thread_info {
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
-#define _TIF_SIGDELAYED (1 << TIF_SIGDELAYED)
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
#define _TIF_MCA_INIT (1 << TIF_MCA_INIT)
#define _TIF_DB_DISABLED (1 << TIF_DB_DISABLED)
/* "work to do on user-return" bits */
-#define TIF_ALLWORK_MASK (_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SIGDELAYED)
+#define TIF_ALLWORK_MASK (_TIF_NOTIFY_RESUME|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT)
/* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT */
#define TIF_WORK_MASK (TIF_ALLWORK_MASK&~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT))
diff --git a/include/asm-powerpc/pgtable.h b/include/asm-powerpc/pgtable.h
index e38931379a7..185ee15963a 100644
--- a/include/asm-powerpc/pgtable.h
+++ b/include/asm-powerpc/pgtable.h
@@ -468,11 +468,6 @@ extern pgd_t swapper_pg_dir[];
extern void paging_init(void);
-#ifdef CONFIG_HUGETLB_PAGE
-#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \
- free_pgd_range(tlb, addr, end, floor, ceiling)
-#endif
-
/*
* This gets called at the end of handling a page fault, when
* the kernel has put a new PTE into the page table for the process.
diff --git a/include/asm-s390/pgalloc.h b/include/asm-s390/pgalloc.h
index 3417dd71ab4..e28aaf28e4a 100644
--- a/include/asm-s390/pgalloc.h
+++ b/include/asm-s390/pgalloc.h
@@ -158,11 +158,4 @@ static inline void pte_free(struct page *pte)
#define __pte_free_tlb(tlb,pte) tlb_remove_page(tlb,pte)
-/*
- * This establishes kernel virtual mappings (e.g., as a result of a
- * vmalloc call). Since s390-esame uses a separate kernel page table,
- * there is nothing to do here... :)
- */
-#define set_pgdir(addr,entry) do { } while(0)
-
#endif /* _S390_PGALLOC_H */
diff --git a/include/asm-sh64/pgalloc.h b/include/asm-sh64/pgalloc.h
index 678251ac1db..b29dd468817 100644
--- a/include/asm-sh64/pgalloc.h
+++ b/include/asm-sh64/pgalloc.h
@@ -167,22 +167,6 @@ static __inline__ void pmd_free(pmd_t *pmd)
extern int do_check_pgt_cache(int, int);
-static inline void set_pgdir(unsigned long address, pgd_t entry)
-{
- struct task_struct * p;
- pgd_t *pgd;
-
- read_lock(&tasklist_lock);
- for_each_process(p) {
- if (!p->mm)
- continue;
- *pgd_offset(p->mm,address) = entry;
- }
- read_unlock(&tasklist_lock);
- for (pgd = (pgd_t *)pgd_quicklist; pgd; pgd = (pgd_t *)*(unsigned long *)pgd)
- pgd[address >> PGDIR_SHIFT] = entry;
-}
-
#define pmd_populate_kernel(mm, pmd, pte) \
set_pmd(pmd, __pmd(_PAGE_TABLE + (unsigned long) (pte)))
diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h
index 715fd94cf57..a617d364d08 100644
--- a/include/asm-x86_64/pgtable.h
+++ b/include/asm-x86_64/pgtable.h
@@ -273,7 +273,7 @@ static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; }
static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; }
-static inline int pte_huge(pte_t pte) { return (pte_val(pte) & __LARGE_PTE) == __LARGE_PTE; }
+static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_PSE; }
static inline pte_t pte_rdprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; }
static inline pte_t pte_exprotect(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) & ~_PAGE_USER)); return pte; }
@@ -285,7 +285,7 @@ static inline pte_t pte_mkexec(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _
static inline pte_t pte_mkdirty(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_DIRTY)); return pte; }
static inline pte_t pte_mkyoung(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_ACCESSED)); return pte; }
static inline pte_t pte_mkwrite(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_RW)); return pte; }
-static inline pte_t pte_mkhuge(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | __LARGE_PTE)); return pte; }
+static inline pte_t pte_mkhuge(pte_t pte) { set_pte(&pte, __pte(pte_val(pte) | _PAGE_PSE)); return pte; }
struct vm_area_struct;
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 68d82ad6b17..d6f1019625a 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -20,10 +20,7 @@ void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long)
int hugetlb_prefault(struct address_space *, struct vm_area_struct *);
int hugetlb_report_meminfo(char *);
int hugetlb_report_node_meminfo(int, char *);
-int is_hugepage_mem_enough(size_t);
unsigned long hugetlb_total_pages(void);
-struct page *alloc_huge_page(struct vm_area_struct *, unsigned long);
-void free_huge_page(struct page *);
int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, int write_access);
@@ -39,18 +36,35 @@ struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
int write);
struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
pmd_t *pmd, int write);
-int is_aligned_hugepage_range(unsigned long addr, unsigned long len);
int pmd_huge(pmd_t pmd);
+void hugetlb_change_protection(struct vm_area_struct *vma,
+ unsigned long address, unsigned long end, pgprot_t newprot);
#ifndef ARCH_HAS_HUGEPAGE_ONLY_RANGE
#define is_hugepage_only_range(mm, addr, len) 0
-#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \
- do { } while (0)
+#endif
+
+#ifndef ARCH_HAS_HUGETLB_FREE_PGD_RANGE
+#define hugetlb_free_pgd_range free_pgd_range
+#else
+void hugetlb_free_pgd_range(struct mmu_gather **tlb, unsigned long addr,
+ unsigned long end, unsigned long floor,
+ unsigned long ceiling);
#endif
#ifndef ARCH_HAS_PREPARE_HUGEPAGE_RANGE
-#define prepare_hugepage_range(addr, len) \
- is_aligned_hugepage_range(addr, len)
+/*
+ * If the arch doesn't supply something else, assume that hugepage
+ * size aligned regions are ok without further preparation.
+ */
+static inline int prepare_hugepage_range(unsigned long addr, unsigned long len)
+{
+ if (len & ~HPAGE_MASK)
+ return -EINVAL;
+ if (addr & ~HPAGE_MASK)
+ return -EINVAL;
+ return 0;
+}
#else
int prepare_hugepage_range(unsigned long addr, unsigned long len);
#endif
@@ -87,20 +101,17 @@ static inline unsigned long hugetlb_total_pages(void)
#define copy_hugetlb_page_range(src, dst, vma) ({ BUG(); 0; })
#define hugetlb_prefault(mapping, vma) ({ BUG(); 0; })
#define unmap_hugepage_range(vma, start, end) BUG()
-#define is_hugepage_mem_enough(size) 0
#define hugetlb_report_meminfo(buf) 0
#define hugetlb_report_node_meminfo(n, buf) 0
#define follow_huge_pmd(mm, addr, pmd, write) NULL
-#define is_aligned_hugepage_range(addr, len) 0
#define prepare_hugepage_range(addr, len) (-EINVAL)
#define pmd_huge(x) 0
#define is_hugepage_only_range(mm, addr, len) 0
-#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \
- do { } while (0)
-#define alloc_huge_page(vma, addr) ({ NULL; })
-#define free_huge_page(p) ({ (void)(p); BUG(); })
+#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
#define hugetlb_fault(mm, vma, addr, write) ({ BUG(); 0; })
+#define hugetlb_change_protection(vma, address, end, newprot)
+
#ifndef HPAGE_MASK
#define HPAGE_MASK PAGE_MASK /* Keep the compiler happy */
#define HPAGE_SIZE PAGE_SIZE
@@ -128,6 +139,8 @@ struct hugetlbfs_sb_info {
struct hugetlbfs_inode_info {
struct shared_policy policy;
+ /* Protected by the (global) hugetlb_lock */
+ unsigned long prereserved_hpages;
struct inode vfs_inode;
};
@@ -144,6 +157,10 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb)
extern struct file_operations hugetlbfs_file_operations;
extern struct vm_operations_struct hugetlb_vm_ops;
struct file *hugetlb_zero_setup(size_t);
+int hugetlb_extend_reservation(struct hugetlbfs_inode_info *info,
+ unsigned long atleast_hpages);
+void hugetlb_truncate_reservation(struct hugetlbfs_inode_info *info,
+ unsigned long atmost_hpages);
int hugetlb_get_quota(struct address_space *mapping);
void hugetlb_put_quota(struct address_space *mapping);
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
new file mode 100644
index 00000000000..7d09962c3c0
--- /dev/null
+++ b/include/linux/migrate.h
@@ -0,0 +1,36 @@
+#ifndef _LINUX_MIGRATE_H
+#define _LINUX_MIGRATE_H
+
+#include <linux/config.h>
+#include <linux/mm.h>
+
+#ifdef CONFIG_MIGRATION
+extern int isolate_lru_page(struct page *p, struct list_head *pagelist);
+extern int putback_lru_pages(struct list_head *l);
+extern int migrate_page(struct page *, struct page *);
+extern void migrate_page_copy(struct page *, struct page *);
+extern int migrate_page_remove_references(struct page *, struct page *, int);
+extern int migrate_pages(struct list_head *l, struct list_head *t,
+ struct list_head *moved, struct list_head *failed);
+int migrate_pages_to(struct list_head *pagelist,
+ struct vm_area_struct *vma, int dest);
+extern int fail_migrate_page(struct page *, struct page *);
+
+extern int migrate_prep(void);
+
+#else
+
+static inline int isolate_lru_page(struct page *p, struct list_head *list)
+ { return -ENOSYS; }
+static inline int putback_lru_pages(struct list_head *l) { return 0; }
+static inline int migrate_pages(struct list_head *l, struct list_head *t,
+ struct list_head *moved, struct list_head *failed) { return -ENOSYS; }
+
+static inline int migrate_prep(void) { return -ENOSYS; }
+
+/* Possible settings for the migrate_page() method in address_operations */
+#define migrate_page NULL
+#define fail_migrate_page NULL
+
+#endif /* CONFIG_MIGRATION */
+#endif /* _LINUX_MIGRATE_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 498ff8778fb..6aa016f1d3a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -286,43 +286,34 @@ struct page {
*
* Also, many kernel routines increase the page count before a critical
* routine so they can be sure the page doesn't go away from under them.
- *
- * Since 2.6.6 (approx), a free page has ->_count = -1. This is so that we
- * can use atomic_add_negative(-1, page->_count) to detect when the page
- * becomes free and so that we can also use atomic_inc_and_test to atomically
- * detect when we just tried to grab a ref on a page which some other CPU has
- * already deemed to be freeable.
- *
- * NO code should make assumptions about this internal detail! Use the provided
- * macros which retain the old rules: page_count(page) == 0 is a free page.
*/
/*
* Drop a ref, return true if the logical refcount fell to zero (the page has
* no users)
*/
-#define put_page_testzero(p) \
- ({ \
- BUG_ON(atomic_read(&(p)->_count) == -1);\
- atomic_add_negative(-1, &(p)->_count); \
- })
+static inline int put_page_testzero(struct page *page)
+{
+ BUG_ON(atomic_read(&page->_count) == 0);
+ return atomic_dec_and_test(&page->_count);
+}
/*
- * Grab a ref, return true if the page previously had a logical refcount of
- * zero. ie: returns true if we just grabbed an already-deemed-to-be-free page
+ * Try to grab a ref unless the page has a refcount of zero, return false if
+ * that is the case.
*/
-#define get_page_testone(p) atomic_inc_and_test(&(p)->_count)
-
-#define set_page_count(p,v) atomic_set(&(p)->_count, (v) - 1)
-#define __put_page(p) atomic_dec(&(p)->_count)
+static inline int get_page_unless_zero(struct page *page)
+{
+ return atomic_inc_not_zero(&page->_count);
+}
extern void FASTCALL(__page_cache_release(struct page *));
static inline int page_count(struct page *page)
{
- if (PageCompound(page))
+ if (unlikely(PageCompound(page)))
page = (struct page *)page_private(page);
- return atomic_read(&page->_count) + 1;
+ return atomic_read(&page->_count);
}
static inline void get_page(struct page *page)
@@ -332,8 +323,19 @@ static inline void get_page(struct page *page)
atomic_inc(&page->_count);
}
+/*
+ * Setup the page count before being freed into the page allocator for
+ * the first time (boot or memory hotplug)
+ */
+static inline void init_page_count(struct page *page)
+{
+ atomic_set(&page->_count, 1);
+}
+
void put_page(struct page *page);
+void split_page(struct page *page, unsigned int order);
+
/*
* Multiple processes may "see" the same page. E.g. for untouched
* mappings of /dev/null, all processes see the same page full of
@@ -1046,7 +1048,7 @@ int in_gate_area_no_task(unsigned long addr);
int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *,
void __user *, size_t *, loff_t *);
-int shrink_slab(unsigned long scanned, gfp_t gfp_mask,
+unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
unsigned long lru_pages);
void drop_pagecache(void);
void drop_slab(void);
diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h
index 8ac854f7f19..3b6723dfaff 100644
--- a/include/linux/mm_inline.h
+++ b/include/linux/mm_inline.h
@@ -32,7 +32,7 @@ del_page_from_lru(struct zone *zone, struct page *page)
{
list_del(&page->lru);
if (PageActive(page)) {
- ClearPageActive(page);
+ __ClearPageActive(page);
zone->nr_active--;
} else {
zone->nr_inactive--;
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index d52999c4333..9ea629c02a4 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -86,8 +86,9 @@
* - The __xxx_page_state variants can be used safely when interrupts are
* disabled.
* - The __xxx_page_state variants can be used if the field is only
- * modified from process context, or only modified from interrupt context.
- * In this case, the field should be commented here.
+ * modified from process context and protected from preemption, or only
+ * modified from interrupt context. In this case, the field should be
+ * commented here.
*/
struct page_state {
unsigned long nr_dirty; /* Dirty writeable pages */
@@ -239,22 +240,19 @@ extern void __mod_page_state_offset(unsigned long offset, unsigned long delta);
#define __ClearPageDirty(page) __clear_bit(PG_dirty, &(page)->flags)
#define TestClearPageDirty(page) test_and_clear_bit(PG_dirty, &(page)->flags)
-#define SetPageLRU(page) set_bit(PG_lru, &(page)->flags)
#define PageLRU(page) test_bit(PG_lru, &(page)->flags)
-#define TestSetPageLRU(page) test_and_set_bit(PG_lru, &(page)->flags)
-#define TestClearPageLRU(page) test_and_clear_bit(PG_lru, &(page)->flags)
+#define SetPageLRU(page) set_bit(PG_lru, &(page)->flags)
+#define ClearPageLRU(page) clear_bit(PG_lru, &(page)->flags)
+#define __ClearPageLRU(page) __clear_bit(PG_lru, &(page)->flags)
#define PageActive(page) test_bit(PG_active, &(page)->flags)
#define SetPageActive(page) set_bit(PG_active, &(page)->flags)
#define ClearPageActive(page) clear_bit(PG_active, &(page)->flags)
-#define TestClearPageActive(page) test_and_clear_bit(PG_active, &(page)->flags)
-#define TestSetPageActive(page) test_and_set_bit(PG_active, &(page)->flags)
+#define __ClearPageActive(page) __clear_bit(PG_active, &(page)->flags)
#define PageSlab(page) test_bit(PG_slab, &(page)->flags)
-#define SetPageSlab(page) set_bit(PG_slab, &(page)->flags)
-#define ClearPageSlab(page) clear_bit(PG_slab, &(page)->flags)
-#define TestClearPageSlab(page) test_and_clear_bit(PG_slab, &(page)->flags)
-#define TestSetPageSlab(page) test_and_set_bit(PG_slab, &(page)->flags)
+#define __SetPageSlab(page) __set_bit(PG_slab, &(page)->flags)
+#define __ClearPageSlab(page) __clear_bit(PG_slab, &(page)->flags)
#ifdef CONFIG_HIGHMEM
#define PageHighMem(page) is_highmem(page_zone(page))
@@ -329,8 +327,8 @@ extern void __mod_page_state_offset(unsigned long offset, unsigned long delta);
#define TestClearPageReclaim(page) test_and_clear_bit(PG_reclaim, &(page)->flags)
#define PageCompound(page) test_bit(PG_compound, &(page)->flags)
-#define SetPageCompound(page) set_bit(PG_compound, &(page)->flags)
-#define ClearPageCompound(page) clear_bit(PG_compound, &(page)->flags)
+#define __SetPageCompound(page) __set_bit(PG_compound, &(page)->flags)
+#define __ClearPageCompound(page) __clear_bit(PG_compound, &(page)->flags)
#ifdef CONFIG_SWAP
#define PageSwapCache(page) test_bit(PG_swapcache, &(page)->flags)
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 0b2ba67ff13..b739ac1f7ca 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -11,8 +11,6 @@
#ifndef _LINUX_RTC_H_
#define _LINUX_RTC_H_
-#include <linux/interrupt.h>
-
/*
* The struct used to pass data via the following ioctl. Similar to the
* struct tm in <time.h>, but it needs to be here so that the kernel
@@ -95,6 +93,8 @@ struct rtc_pll_info {
#ifdef __KERNEL__
+#include <linux/interrupt.h>
+
typedef struct rtc_task {
void (*func)(void *private_data);
void *private_data;
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 8cf52939d0a..2b28c849d75 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -38,7 +38,6 @@ typedef struct kmem_cache kmem_cache_t;
#define SLAB_DEBUG_INITIAL 0x00000200UL /* Call constructor (as verifier) */
#define SLAB_RED_ZONE 0x00000400UL /* Red zone objs in a cache */
#define SLAB_POISON 0x00000800UL /* Poison objects */
-#define SLAB_NO_REAP 0x00001000UL /* never reap from the cache */
#define SLAB_HWCACHE_ALIGN 0x00002000UL /* align objs on a h/w cache lines */
#define SLAB_CACHE_DMA 0x00004000UL /* use GFP_DMA memory */
#define SLAB_MUST_HWCACHE_ALIGN 0x00008000UL /* force alignment */
@@ -118,7 +117,7 @@ extern void *kzalloc(size_t, gfp_t);
*/
static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
{
- if (n != 0 && size > INT_MAX / n)
+ if (n != 0 && size > ULONG_MAX / n)
return NULL;
return kzalloc(n * size, flags);
}
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 44153fdf73f..d699a16b0cb 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -52,23 +52,12 @@ extern void smp_cpus_done(unsigned int max_cpus);
/*
* Call a function on all other processors
*/
-extern int smp_call_function (void (*func) (void *info), void *info,
- int retry, int wait);
+int smp_call_function(void(*func)(void *info), void *info, int retry, int wait);
/*
* Call a function on all processors
*/
-static inline int on_each_cpu(void (*func) (void *info), void *info,
- int retry, int wait)
-{
- int ret = 0;
-
- preempt_disable();
- ret = smp_call_function(func, info, retry, wait);
- func(info);
- preempt_enable();
- return ret;
-}
+int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait);
#define MSG_ALL_BUT_SELF 0x8000 /* Assume <32768 CPU's */
#define MSG_ALL 0x8001
@@ -94,7 +83,13 @@ void smp_prepare_boot_cpu(void);
#define raw_smp_processor_id() 0
#define hard_smp_processor_id() 0
#define smp_call_function(func,info,retry,wait) ({ 0; })
-#define on_each_cpu(func,info,retry,wait) ({ func(info); 0; })
+#define on_each_cpu(func,info,retry,wait) \
+ ({ \
+ local_irq_disable(); \
+ func(info); \
+ local_irq_enable(); \
+ 0; \
+ })
static inline void smp_send_reschedule(int cpu) { }
#define num_booting_cpus() 1
#define smp_prepare_boot_cpu() do {} while (0)
diff --git a/include/linux/swap.h b/include/linux/swap.h
index d572b19afb7..12415dd9445 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -172,9 +172,24 @@ extern int rotate_reclaimable_page(struct page *page);
extern void swap_setup(void);
/* linux/mm/vmscan.c */
-extern int try_to_free_pages(struct zone **, gfp_t);
-extern int shrink_all_memory(int);
+extern unsigned long try_to_free_pages(struct zone **, gfp_t);
+extern unsigned long shrink_all_memory(unsigned long nr_pages);
extern int vm_swappiness;
+extern int remove_mapping(struct address_space *mapping, struct page *page);
+
+/* possible outcome of pageout() */
+typedef enum {
+ /* failed to write page out, page is locked */
+ PAGE_KEEP,
+ /* move page to the active list, page is locked */
+ PAGE_ACTIVATE,
+ /* page has been sent to the disk successfully, page is unlocked */
+ PAGE_SUCCESS,
+ /* page is clean and locked */
+ PAGE_CLEAN,
+} pageout_t;
+
+extern pageout_t pageout(struct page *page, struct address_space *mapping);
#ifdef CONFIG_NUMA
extern int zone_reclaim_mode;
@@ -188,25 +203,6 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order)
}
#endif
-#ifdef CONFIG_MIGRATION
-extern int isolate_lru_page(struct page *p);
-extern int putback_lru_pages(struct list_head *l);
-extern int migrate_page(struct page *, struct page *);
-extern void migrate_page_copy(struct page *, struct page *);
-extern int migrate_page_remove_references(struct page *, struct page *, int);
-extern int migrate_pages(struct list_head *l, struct list_head *t,
- struct list_head *moved, struct list_head *failed);
-extern int fail_migrate_page(struct page *, struct page *);
-#else
-static inline int isolate_lru_page(struct page *p) { return -ENOSYS; }
-static inline int putback_lru_pages(struct list_head *l) { return 0; }
-static inline int migrate_pages(struct list_head *l, struct list_head *t,
- struct list_head *moved, struct list_head *failed) { return -ENOSYS; }
-/* Possible settings for the migrate_page() method in address_operations */
-#define migrate_page NULL
-#define fail_migrate_page NULL
-#endif
-
#ifdef CONFIG_MMU
/* linux/mm/shmem.c */
extern int shmem_unuse(swp_entry_t entry, struct page *page);
diff --git a/kernel/fork.c b/kernel/fork.c
index b373322ca49..9bd7b65ee41 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1534,6 +1534,12 @@ asmlinkage long sys_unshare(unsigned long unshare_flags)
check_unshare_flags(&unshare_flags);
+ /* Return -EINVAL for all unsupported flags */
+ err = -EINVAL;
+ if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
+ CLONE_VM|CLONE_FILES|CLONE_SYSVSEM))
+ goto bad_unshare_out;
+
if ((err = unshare_thread(unshare_flags)))
goto bad_unshare_out;
if ((err = unshare_fs(unshare_flags, &new_fs)))
diff --git a/kernel/sched.c b/kernel/sched.c
index 4d46e90f59c..6b6e0d70eb3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -707,12 +707,6 @@ static int recalc_task_prio(task_t *p, unsigned long long now)
DEF_TIMESLICE);
} else {
/*
- * The lower the sleep avg a task has the more
- * rapidly it will rise with sleep time.
- */
- sleep_time *= (MAX_BONUS - CURRENT_BONUS(p)) ? : 1;
-
- /*
* Tasks waking from uninterruptible sleep are
* limited in their sleep_avg rise as they
* are likely to be waiting on I/O
diff --git a/kernel/softirq.c b/kernel/softirq.c
index ad3295cdded..ec8fed42a86 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -16,6 +16,7 @@
#include <linux/cpu.h>
#include <linux/kthread.h>
#include <linux/rcupdate.h>
+#include <linux/smp.h>
#include <asm/irq.h>
/*
@@ -495,3 +496,22 @@ __init int spawn_ksoftirqd(void)
register_cpu_notifier(&cpu_nfb);
return 0;
}
+
+#ifdef CONFIG_SMP
+/*
+ * Call a function on all processors
+ */
+int on_each_cpu(void (*func) (void *info), void *info, int retry, int wait)
+{
+ int ret = 0;
+
+ preempt_disable();
+ ret = smp_call_function(func, info, retry, wait);
+ local_irq_disable();
+ func(info);
+ local_irq_enable();
+ preempt_enable();
+ return ret;
+}
+EXPORT_SYMBOL(on_each_cpu);
+#endif
diff --git a/lib/string.c b/lib/string.c
index 037a48acedb..b3c28a3f633 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -403,7 +403,6 @@ char *strpbrk(const char *cs, const char *ct)
}
return NULL;
}
-EXPORT_SYMBOL(strpbrk);
#endif
#ifndef __HAVE_ARCH_STRSEP
diff --git a/mm/Kconfig b/mm/Kconfig
index a9cb80ae640..bd80460360d 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -137,5 +137,11 @@ config SPLIT_PTLOCK_CPUS
# support for page migration
#
config MIGRATION
+ bool "Page migration"
def_bool y if NUMA || SPARSEMEM || DISCONTIGMEM
depends on SWAP
+ help
+ Allows the migration of the physical location of pages of processes
+ while the virtual addresses are not changed. This is useful for
+ example on NUMA systems to put pages nearer to the processors accessing
+ the page.
diff --git a/mm/Makefile b/mm/Makefile
index 9aa03fa1dcc..f10c753dce6 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -22,3 +22,5 @@ obj-$(CONFIG_SLOB) += slob.o
obj-$(CONFIG_SLAB) += slab.o
obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
obj-$(CONFIG_FS_XIP) += filemap_xip.o
+obj-$(CONFIG_MIGRATION) += migrate.o
+
diff --git a/mm/filemap.c b/mm/filemap.c
index 44da3d47699..e8f58f7dd7a 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -30,6 +30,8 @@
#include <linux/security.h>
#include <linux/syscalls.h>
#include "filemap.h"
+#include "internal.h"
+
/*
* FIXME: remove all knowledge of the buffer layer from the core VM
*/
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 508707704d2..ebad6bbb350 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -13,24 +13,48 @@
#include <linux/pagemap.h>
#include <linux/mempolicy.h>
#include <linux/cpuset.h>
+#include <linux/mutex.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <linux/hugetlb.h>
+#include "internal.h"
const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
-static unsigned long nr_huge_pages, free_huge_pages;
+static unsigned long nr_huge_pages, free_huge_pages, reserved_huge_pages;
unsigned long max_huge_pages;
static struct list_head hugepage_freelists[MAX_NUMNODES];
static unsigned int nr_huge_pages_node[MAX_NUMNODES];
static unsigned int free_huge_pages_node[MAX_NUMNODES];
-
/*
* Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages
*/
static DEFINE_SPINLOCK(hugetlb_lock);
+static void clear_huge_page(struct page *page, unsigned long addr)
+{
+ int i;
+
+ might_sleep();
+ for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); i++) {
+ cond_resched();
+ clear_user_highpage(page + i, addr);
+ }
+}
+
+static void copy_huge_page(struct page *dst, struct page *src,
+ unsigned long addr)
+{
+ int i;
+
+ might_sleep();
+ for (i = 0; i < HPAGE_SIZE/PAGE_SIZE; i++) {
+ cond_resched();
+ copy_user_highpage(dst + i, src + i, addr + i*PAGE_SIZE);
+ }
+}
+
static void enqueue_huge_page(struct page *page)
{
int nid = page_to_nid(page);
@@ -64,57 +88,176 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma,
return page;
}
-static struct page *alloc_fresh_huge_page(void)
+static void free_huge_page(struct page *page)
+{
+ BUG_ON(page_count(page));
+
+ INIT_LIST_HEAD(&page->lru);
+
+ spin_lock(&hugetlb_lock);
+ enqueue_huge_page(page);
+ spin_unlock(&hugetlb_lock);
+}
+
+static int alloc_fresh_huge_page(void)
{
static int nid = 0;
struct page *page;
page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP|__GFP_NOWARN,
HUGETLB_PAGE_ORDER);
- nid = (nid + 1) % num_online_nodes();
+ nid = next_node(nid, node_online_map);
+ if (nid == MAX_NUMNODES)
+ nid = first_node(node_online_map);
if (page) {
+ page[1].lru.next = (void *)free_huge_page; /* dtor */
spin_lock(&hugetlb_lock);
nr_huge_pages++;
nr_huge_pages_node[page_to_nid(page)]++;
spin_unlock(&hugetlb_lock);
+ put_page(page); /* free it into the hugepage allocator */
+ return 1;
}
- return page;
+ return 0;
}
-void free_huge_page(struct page *page)
+static struct page *alloc_huge_page(struct vm_area_struct *vma,
+ unsigned long addr)
{
- BUG_ON(page_count(page));
+ struct inode *inode = vma->vm_file->f_dentry->d_inode;
+ struct page *page;
+ int use_reserve = 0;
+ unsigned long idx;
- INIT_LIST_HEAD(&page->lru);
- page[1].lru.next = NULL; /* reset dtor */
+ spin_lock(&hugetlb_lock);
+
+ if (vma->vm_flags & VM_MAYSHARE) {
+
+ /* idx = radix tree index, i.e. offset into file in
+ * HPAGE_SIZE units */
+ idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
+ + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
+
+ /* The hugetlbfs specific inode info stores the number
+ * of "guaranteed available" (huge) pages. That is,
+ * the first 'prereserved_hpages' pages of the inode
+ * are either already instantiated, or have been
+ * pre-reserved (by hugetlb_reserve_for_inode()). Here
+ * we're in the process of instantiating the page, so
+ * we use this to determine whether to draw from the
+ * pre-reserved pool or the truly free pool. */
+ if (idx < HUGETLBFS_I(inode)->prereserved_hpages)
+ use_reserve = 1;
+ }
+
+ if (!use_reserve) {
+ if (free_huge_pages <= reserved_huge_pages)
+ goto fail;
+ } else {
+ BUG_ON(reserved_huge_pages == 0);
+ reserved_huge_pages--;
+ }
+
+ page = dequeue_huge_page(vma, addr);
+ if (!page)
+ goto fail;
+
+ spin_unlock(&hugetlb_lock);
+ set_page_refcounted(page);
+ return page;
+
+ fail:
+ WARN_ON(use_reserve); /* reserved allocations shouldn't fail */
+ spin_unlock(&hugetlb_lock);
+ return NULL;
+}
+
+/* hugetlb_extend_reservation()
+ *
+ * Ensure that at least 'atleast' hugepages are, and will remain,
+ * available to instantiate the first 'atleast' pages of the given
+ * inode. If the inode doesn't already have this many pages reserved
+ * or instantiated, set aside some hugepages in the reserved pool to
+ * satisfy later faults (or fail now if there aren't enough, rather
+ * than getting the SIGBUS later).
+ */
+int hugetlb_extend_reservation(struct hugetlbfs_inode_info *info,
+ unsigned long atleast)
+{
+ struct inode *inode = &info->vfs_inode;
+ unsigned long change_in_reserve = 0;
+ int ret = 0;
spin_lock(&hugetlb_lock);
- enqueue_huge_page(page);
+ read_lock_irq(&inode->i_mapping->tree_lock);
+
+ if (info->prereserved_hpages >= atleast)
+ goto out;
+
+ /* Because we always call this on shared mappings, none of the
+ * pages beyond info->prereserved_hpages can have been
+ * instantiated, so we need to reserve all of them now. */
+ change_in_reserve = atleast - info->prereserved_hpages;
+
+ if ((reserved_huge_pages + change_in_reserve) > free_huge_pages) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ reserved_huge_pages += change_in_reserve;
+ info->prereserved_hpages = atleast;
+
+ out:
+ read_unlock_irq(&inode->i_mapping->tree_lock);
spin_unlock(&hugetlb_lock);
+
+ return ret;
}
-struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr)
+/* hugetlb_truncate_reservation()
+ *
+ * This returns pages reserved for the given inode to the general free
+ * hugepage pool. If the inode has any pages prereserved, but not
+ * instantiated, beyond offset (atmost << HPAGE_SIZE), then release
+ * them.
+ */
+void hugetlb_truncate_reservation(struct hugetlbfs_inode_info *info,
+ unsigned long atmost)
{
+ struct inode *inode = &info->vfs_inode;
+ struct address_space *mapping = inode->i_mapping;
+ unsigned long idx;
+ unsigned long change_in_reserve = 0;
struct page *page;
- int i;
spin_lock(&hugetlb_lock);
- page = dequeue_huge_page(vma, addr);
- if (!page) {
- spin_unlock(&hugetlb_lock);
- return NULL;
+ read_lock_irq(&inode->i_mapping->tree_lock);
+
+ if (info->prereserved_hpages <= atmost)
+ goto out;
+
+ /* Count pages which were reserved, but not instantiated, and
+ * which we can now release. */
+ for (idx = atmost; idx < info->prereserved_hpages; idx++) {
+ page = radix_tree_lookup(&mapping->page_tree, idx);
+ if (!page)
+ /* Pages which are already instantiated can't
+ * be unreserved (and in fact have already
+ * been removed from the reserved pool) */
+ change_in_reserve++;
}
+
+ BUG_ON(reserved_huge_pages < change_in_reserve);
+ reserved_huge_pages -= change_in_reserve;
+ info->prereserved_hpages = atmost;
+
+ out:
+ read_unlock_irq(&inode->i_mapping->tree_lock);
spin_unlock(&hugetlb_lock);
- set_page_count(page, 1);
- page[1].lru.next = (void *)free_huge_page; /* set dtor */
- for (i = 0; i < (HPAGE_SIZE/PAGE_SIZE); ++i)
- clear_user_highpage(&page[i], addr);
- return page;
}
static int __init hugetlb_init(void)
{
unsigned long i;
- struct page *page;
if (HPAGE_SHIFT == 0)
return 0;
@@ -123,12 +266,8 @@ static int __init hugetlb_init(void)
INIT_LIST_HEAD(&hugepage_freelists[i]);
for (i = 0; i < max_huge_pages; ++i) {
- page = alloc_fresh_huge_page();
- if (!page)
+ if (!alloc_fresh_huge_page())
break;
- spin_lock(&hugetlb_lock);
- enqueue_huge_page(page);
- spin_unlock(&hugetlb_lock);
}
max_huge_pages = free_huge_pages = nr_huge_pages = i;
printk("Total HugeTLB memory allocated, %ld\n", free_huge_pages);
@@ -154,9 +293,9 @@ static void update_and_free_page(struct page *page)
page[i].flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced |
1 << PG_dirty | 1 << PG_active | 1 << PG_reserved |
1 << PG_private | 1<< PG_writeback);
- set_page_count(&page[i], 0);
}
- set_page_count(page, 1);
+ page[1].lru.next = NULL;
+ set_page_refcounted(page);
__free_pages(page, HUGETLB_PAGE_ORDER);
}
@@ -188,12 +327,8 @@ static inline void try_to_free_low(unsigned long count)
static unsigned long set_max_huge_pages(unsigned long count)
{
while (count > nr_huge_pages) {
- struct page *page = alloc_fresh_huge_page();
- if (!page)
+ if (!alloc_fresh_huge_page())
return nr_huge_pages;
- spin_lock(&hugetlb_lock);
- enqueue_huge_page(page);
- spin_unlock(&hugetlb_lock);
}
if (count >= nr_huge_pages)
return nr_huge_pages;
@@ -225,9 +360,11 @@ int hugetlb_report_meminfo(char *buf)
return sprintf(buf,
"HugePages_Total: %5lu\n"
"HugePages_Free: %5lu\n"
+ "HugePages_Rsvd: %5lu\n"
"Hugepagesize: %5lu kB\n",
nr_huge_pages,
free_huge_pages,
+ reserved_huge_pages,
HPAGE_SIZE/1024);
}
@@ -240,11 +377,6 @@ int hugetlb_report_node_meminfo(int nid, char *buf)
nid, free_huge_pages_node[nid]);
}
-int is_hugepage_mem_enough(size_t size)
-{
- return (size + ~HPAGE_MASK)/HPAGE_SIZE <= free_huge_pages;
-}
-
/* Return the number pages of memory we physically have, in PAGE_SIZE units. */
unsigned long hugetlb_total_pages(void)
{
@@ -374,7 +506,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *ptep, pte_t pte)
{
struct page *old_page, *new_page;
- int i, avoidcopy;
+ int avoidcopy;
old_page = pte_page(pte);
@@ -395,9 +527,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
}
spin_unlock(&mm->page_table_lock);
- for (i = 0; i < HPAGE_SIZE/PAGE_SIZE; i++)
- copy_user_highpage(new_page + i, old_page + i,
- address + i*PAGE_SIZE);
+ copy_huge_page(new_page, old_page, address);
spin_lock(&mm->page_table_lock);
ptep = huge_pte_offset(mm, address & HPAGE_MASK);
@@ -442,6 +572,7 @@ retry:
ret = VM_FAULT_OOM;
goto out;
}
+ clear_huge_page(page, address);
if (vma->vm_flags & VM_SHARED) {
int err;
@@ -496,14 +627,24 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
pte_t *ptep;
pte_t entry;
int ret;
+ static DEFINE_MUTEX(hugetlb_instantiation_mutex);
ptep = huge_pte_alloc(mm, address);
if (!ptep)
return VM_FAULT_OOM;
+ /*
+ * Serialize hugepage allocation and instantiation, so that we don't
+ * get spurious allocation failures if two CPUs race to instantiate
+ * the same page in the page cache.
+ */
+ mutex_lock(&hugetlb_instantiation_mutex);
entry = *ptep;
- if (pte_none(entry))
- return hugetlb_no_page(mm, vma, address, ptep, write_access);
+ if (pte_none(entry)) {
+ ret = hugetlb_no_page(mm, vma, address, ptep, write_access);
+ mutex_unlock(&hugetlb_instantiation_mutex);
+ return ret;
+ }
ret = VM_FAULT_MINOR;
@@ -513,6 +654,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (write_access && !pte_write(entry))
ret = hugetlb_cow(mm, vma, address, ptep, entry);
spin_unlock(&mm->page_table_lock);
+ mutex_unlock(&hugetlb_instantiation_mutex);
return ret;
}
@@ -521,10 +663,10 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
struct page **pages, struct vm_area_struct **vmas,
unsigned long *position, int *length, int i)
{
- unsigned long vpfn, vaddr = *position;
+ unsigned long pfn_offset;
+ unsigned long vaddr = *position;
int remainder = *length;
- vpfn = vaddr/PAGE_SIZE;
spin_lock(&mm->page_table_lock);
while (vaddr < vma->vm_end && remainder) {
pte_t *pte;
@@ -552,19 +694,28 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
break;
}
- if (pages) {
- page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
- get_page(page);
- pages[i] = page;
- }
+ pfn_offset = (vaddr & ~HPAGE_MASK) >> PAGE_SHIFT;
+ page = pte_page(*pte);
+same_page:
+ get_page(page);
+ if (pages)
+ pages[i] = page + pfn_offset;
if (vmas)
vmas[i] = vma;
vaddr += PAGE_SIZE;
- ++vpfn;
+ ++pfn_offset;
--remainder;
++i;
+ if (vaddr < vma->vm_end && remainder &&
+ pfn_offset < HPAGE_SIZE/PAGE_SIZE) {
+ /*
+ * We use pfn_offset to avoid touching the pageframes
+ * of this compound page.
+ */
+ goto same_page;
+ }
}
spin_unlock(&mm->page_table_lock);
*length = remainder;
@@ -572,3 +723,32 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
return i;
}
+
+void hugetlb_change_protection(struct vm_area_struct *vma,
+ unsigned long address, unsigned long end, pgprot_t newprot)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ unsigned long start = address;
+ pte_t *ptep;
+ pte_t pte;
+
+ BUG_ON(address >= end);
+ flush_cache_range(vma, address, end);
+
+ spin_lock(&mm->page_table_lock);
+ for (; address < end; address += HPAGE_SIZE) {
+ ptep = huge_pte_offset(mm, address);
+ if (!ptep)
+ continue;
+ if (!pte_none(*ptep)) {
+ pte = huge_ptep_get_and_clear(mm, address, ptep);
+ pte = pte_mkhuge(pte_modify(pte, newprot));
+ set_huge_pte_at(mm, address, ptep, pte);
+ lazy_mmu_prot_update(pte);
+ }
+ }
+ spin_unlock(&mm->page_table_lock);
+
+ flush_tlb_range(vma, start, end);
+}
+
diff --git a/mm/internal.h b/mm/internal.h
index 17256bb2f4e..d20e3cc4aef 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -8,23 +8,33 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
+#ifndef __MM_INTERNAL_H
+#define __MM_INTERNAL_H
-static inline void set_page_refs(struct page *page, int order)
+#include <linux/mm.h>
+
+static inline void set_page_count(struct page *page, int v)
+{
+ atomic_set(&page->_count, v);
+}
+
+/*
+ * Turn a non-refcounted page (->_count == 0) into refcounted with
+ * a count of one.
+ */
+static inline void set_page_refcounted(struct page *page)
{
-#ifdef CONFIG_MMU
+ BUG_ON(PageCompound(page) && page_private(page) != (unsigned long)page);
+ BUG_ON(atomic_read(&page->_count));
set_page_count(page, 1);
-#else
- int i;
+}
- /*
- * We need to reference all the pages for this order, otherwise if
- * anyone accesses one of the pages with (get/put) it will be freed.
- * - eg: access_process_vm()
- */
- for (i = 0; i < (1 << order); i++)
- set_page_count(page + i, 1);
-#endif /* CONFIG_MMU */
+static inline void __put_page(struct page *page)
+{
+ atomic_dec(&page->_count);
}
extern void fastcall __init __free_pages_bootmem(struct page *page,
unsigned int order);
+
+#endif
diff --git a/mm/memory.c b/mm/memory.c
index 85e80a57db2..80c3fb370f9 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -277,7 +277,7 @@ void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
anon_vma_unlink(vma);
unlink_file_vma(vma);
- if (is_hugepage_only_range(vma->vm_mm, addr, HPAGE_SIZE)) {
+ if (is_vm_hugetlb_page(vma)) {
hugetlb_free_pgd_range(tlb, addr, vma->vm_end,
floor, next? next->vm_start: ceiling);
} else {
@@ -285,8 +285,7 @@ void free_pgtables(struct mmu_gather **tlb, struct vm_area_struct *vma,
* Optimization: gather nearby vmas into one call down
*/
while (next && next->vm_start <= vma->vm_end + PMD_SIZE
- && !is_hugepage_only_range(vma->vm_mm, next->vm_start,
- HPAGE_SIZE)) {
+ && !is_vm_hugetlb_page(next)) {
vma = next;
next = vma->vm_next;
anon_vma_unlink(vma);
@@ -388,7 +387,7 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_
{
unsigned long pfn = pte_pfn(pte);
- if (vma->vm_flags & VM_PFNMAP) {
+ if (unlikely(vma->vm_flags & VM_PFNMAP)) {
unsigned long off = (addr - vma->vm_start) >> PAGE_SHIFT;
if (pfn == vma->vm_pgoff + off)
return NULL;
@@ -396,18 +395,12 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_
return NULL;
}
- /*
- * Add some anal sanity checks for now. Eventually,
- * we should just do "return pfn_to_page(pfn)", but
- * in the meantime we check that we get a valid pfn,
- * and that the resulting page looks ok.
- *
- * Remove this test eventually!
- */
+#ifdef CONFIG_DEBUG_VM
if (unlikely(!pfn_valid(pfn))) {
print_bad_pte(vma, pte, addr);
return NULL;
}
+#endif
/*
* NOTE! We still have PageReserved() pages in the page
@@ -1221,9 +1214,7 @@ out:
* The page has to be a nice clean _individual_ kernel allocation.
* If you allocate a compound page, you need to have marked it as
* such (__GFP_COMP), or manually just split the page up yourself
- * (which is mainly an issue of doing "set_page_count(page, 1)" for
- * each sub-page, and then freeing them one by one when you free
- * them rather than freeing it as a compound page).
+ * (see split_page()).
*
* NOTE! Traditionally this was done with "remap_pfn_range()" which
* took an arbitrary page protection parameter. This doesn't allow
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index b21869a39f0..e93cc740c22 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -86,6 +86,7 @@
#include <linux/swap.h>
#include <linux/seq_file.h>
#include <linux/proc_fs.h>
+#include <linux/migrate.h>
#include <asm/tlbflush.h>
#include <asm/uaccess.h>
@@ -95,11 +96,8 @@
#define MPOL_MF_INVERT (MPOL_MF_INTERNAL << 1) /* Invert check for nodemask */
#define MPOL_MF_STATS (MPOL_MF_INTERNAL << 2) /* Gather statistics */
-/* The number of pages to migrate per call to migrate_pages() */
-#define MIGRATE_CHUNK_SIZE 256
-
-static kmem_cache_t *policy_cache;
-static kmem_cache_t *sn_cache;
+static struct kmem_cache *policy_cache;
+static struct kmem_cache *sn_cache;
#define PDprintk(fmt...)
@@ -331,17 +329,10 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
struct vm_area_struct *first, *vma, *prev;
if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
- /* Must have swap device for migration */
- if (nr_swap_pages <= 0)
- return ERR_PTR(-ENODEV);
- /*
- * Clear the LRU lists so pages can be isolated.
- * Note that pages may be moved off the LRU after we have
- * drained them. Those pages will fail to migrate like other
- * pages that may be busy.
- */
- lru_add_drain_all();
+ err = migrate_prep();
+ if (err)
+ return ERR_PTR(err);
}
first = find_vma(mm, start);
@@ -550,92 +541,18 @@ long do_get_mempolicy(int *policy, nodemask_t *nmask,
return err;
}
+#ifdef CONFIG_MIGRATION
/*
* page migration
*/
-
static void migrate_page_add(struct page *page, struct list_head *pagelist,
unsigned long flags)
{
/*
* Avoid migrating a page that is shared with others.
*/
- if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1) {
- if (isolate_lru_page(page))
- list_add_tail(&page->lru, pagelist);
- }
-}
-
-/*
- * Migrate the list 'pagelist' of pages to a certain destination.
- *
- * Specify destination with either non-NULL vma or dest_node >= 0
- * Return the number of pages not migrated or error code
- */
-static int migrate_pages_to(struct list_head *pagelist,
- struct vm_area_struct *vma, int dest)
-{
- LIST_HEAD(newlist);
- LIST_HEAD(moved);
- LIST_HEAD(failed);
- int err = 0;
- unsigned long offset = 0;
- int nr_pages;
- struct page *page;
- struct list_head *p;
-
-redo:
- nr_pages = 0;
- list_for_each(p, pagelist) {
- if (vma) {
- /*
- * The address passed to alloc_page_vma is used to
- * generate the proper interleave behavior. We fake
- * the address here by an increasing offset in order
- * to get the proper distribution of pages.
- *
- * No decision has been made as to which page
- * a certain old page is moved to so we cannot
- * specify the correct address.
- */
- page = alloc_page_vma(GFP_HIGHUSER, vma,
- offset + vma->vm_start);
- offset += PAGE_SIZE;
- }
- else
- page = alloc_pages_node(dest, GFP_HIGHUSER, 0);
-
- if (!page) {
- err = -ENOMEM;
- goto out;
- }
- list_add_tail(&page->lru, &newlist);
- nr_pages++;
- if (nr_pages > MIGRATE_CHUNK_SIZE)
- break;
- }
- err = migrate_pages(pagelist, &newlist, &moved, &failed);
-
- putback_lru_pages(&moved); /* Call release pages instead ?? */
-
- if (err >= 0 && list_empty(&newlist) && !list_empty(pagelist))
- goto redo;
-out:
- /* Return leftover allocated pages */
- while (!list_empty(&newlist)) {
- page = list_entry(newlist.next, struct page, lru);
- list_del(&page->lru);
- __free_page(page);
- }
- list_splice(&failed, pagelist);
- if (err < 0)
- return err;
-
- /* Calculate number of leftover pages */
- nr_pages = 0;
- list_for_each(p, pagelist)
- nr_pages++;
- return nr_pages;
+ if ((flags & MPOL_MF_MOVE_ALL) || page_mapcount(page) == 1)
+ isolate_lru_page(page, pagelist);
}
/*
@@ -742,8 +659,23 @@ int do_migrate_pages(struct mm_struct *mm,
if (err < 0)
return err;
return busy;
+
}
+#else
+
+static void migrate_page_add(struct page *page, struct list_head *pagelist,
+ unsigned long flags)
+{
+}
+
+int do_migrate_pages(struct mm_struct *mm,
+ const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags)
+{
+ return -ENOSYS;
+}
+#endif
+
long do_mbind(unsigned long start, unsigned long len,
unsigned long mode, nodemask_t *nmask, unsigned long flags)
{
@@ -808,6 +740,7 @@ long do_mbind(unsigned long start, unsigned long len,
if (!err && nr_failed && (flags & MPOL_MF_STRICT))
err = -EIO;
}
+
if (!list_empty(&pagelist))
putback_lru_pages(&pagelist);
diff --git a/mm/mempool.c b/mm/mempool.c
index 1a99b80480d..f71893ed354 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -278,14 +278,14 @@ EXPORT_SYMBOL(mempool_free);
*/
void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data)
{
- kmem_cache_t *mem = (kmem_cache_t *) pool_data;
+ struct kmem_cache *mem = pool_data;
return kmem_cache_alloc(mem, gfp_mask);
}
EXPORT_SYMBOL(mempool_alloc_slab);
void mempool_free_slab(void *element, void *pool_data)
{
- kmem_cache_t *mem = (kmem_cache_t *) pool_data;
+ struct kmem_cache *mem = pool_data;
kmem_cache_free(mem, element);
}
EXPORT_SYMBOL(mempool_free_slab);
diff --git a/mm/migrate.c b/mm/migrate.c
new file mode 100644
index 00000000000..09f6e4aa87f
--- /dev/null
+++ b/mm/migrate.c
@@ -0,0 +1,655 @@
+/*
+ * Memory Migration functionality - linux/mm/migration.c
+ *
+ * Copyright (C) 2006 Silicon Graphics, Inc., Christoph Lameter
+ *
+ * Page migration was first developed in the context of the memory hotplug
+ * project. The main authors of the migration code are:
+ *
+ * IWAMOTO Toshihiro <iwamoto@valinux.co.jp>
+ * Hirokazu Takahashi <taka@valinux.co.jp>
+ * Dave Hansen <haveblue@us.ibm.com>
+ * Christoph Lameter <clameter@sgi.com>
+ */
+
+#include <linux/migrate.h>
+#include <linux/module.h>
+#include <linux/swap.h>
+#include <linux/pagemap.h>
+#include <linux/buffer_head.h> /* for try_to_release_page(),
+ buffer_heads_over_limit */
+#include <linux/mm_inline.h>
+#include <linux/pagevec.h>
+#include <linux/rmap.h>
+#include <linux/topology.h>
+#include <linux/cpu.h>
+#include <linux/cpuset.h>
+#include <linux/swapops.h>
+
+#include "internal.h"
+
+#include "internal.h"
+
+/* The maximum number of pages to take off the LRU for migration */
+#define MIGRATE_CHUNK_SIZE 256
+
+#define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru))
+
+/*
+ * Isolate one page from the LRU lists. If successful put it onto
+ * the indicated list with elevated page count.
+ *
+ * Result:
+ * -EBUSY: page not on LRU list
+ * 0: page removed from LRU list and added to the specified list.
+ */
+int isolate_lru_page(struct page *page, struct list_head *pagelist)
+{
+ int ret = -EBUSY;
+
+ if (PageLRU(page)) {
+ struct zone *zone = page_zone(page);
+
+ spin_lock_irq(&zone->lru_lock);
+ if (PageLRU(page)) {
+ ret = 0;
+ get_page(page);
+ ClearPageLRU(page);
+ if (PageActive(page))
+ del_page_from_active_list(zone, page);
+ else
+ del_page_from_inactive_list(zone, page);
+ list_add_tail(&page->lru, pagelist);
+ }
+ spin_unlock_irq(&zone->lru_lock);
+ }
+ return ret;
+}
+
+/*
+ * migrate_prep() needs to be called after we have compiled the list of pages
+ * to be migrated using isolate_lru_page() but before we begin a series of calls
+ * to migrate_pages().
+ */
+int migrate_prep(void)
+{
+ /* Must have swap device for migration */
+ if (nr_swap_pages <= 0)
+ return -ENODEV;
+
+ /*
+ * Clear the LRU lists so pages can be isolated.
+ * Note that pages may be moved off the LRU after we have
+ * drained them. Those pages will fail to migrate like other
+ * pages that may be busy.
+ */
+ lru_add_drain_all();
+
+ return 0;
+}
+
+static inline void move_to_lru(struct page *page)
+{
+ list_del(&page->lru);
+ if (PageActive(page)) {
+ /*
+ * lru_cache_add_active checks that
+ * the PG_active bit is off.
+ */
+ ClearPageActive(page);
+ lru_cache_add_active(page);
+ } else {
+ lru_cache_add(page);
+ }
+ put_page(page);
+}
+
+/*
+ * Add isolated pages on the list back to the LRU.
+ *
+ * returns the number of pages put back.
+ */
+int putback_lru_pages(struct list_head *l)
+{
+ struct page *page;
+ struct page *page2;
+ int count = 0;
+
+ list_for_each_entry_safe(page, page2, l, lru) {
+ move_to_lru(page);
+ count++;
+ }
+ return count;
+}
+
+/*
+ * Non migratable page
+ */
+int fail_migrate_page(struct page *newpage, struct page *page)
+{
+ return -EIO;
+}
+EXPORT_SYMBOL(fail_migrate_page);
+
+/*
+ * swapout a single page
+ * page is locked upon entry, unlocked on exit
+ */
+static int swap_page(struct page *page)
+{
+ struct address_space *mapping = page_mapping(page);
+
+ if (page_mapped(page) && mapping)
+ if (try_to_unmap(page, 1) != SWAP_SUCCESS)
+ goto unlock_retry;
+
+ if (PageDirty(page)) {
+ /* Page is dirty, try to write it out here */
+ switch(pageout(page, mapping)) {
+ case PAGE_KEEP:
+ case PAGE_ACTIVATE:
+ goto unlock_retry;
+
+ case PAGE_SUCCESS:
+ goto retry;
+
+ case PAGE_CLEAN:
+ ; /* try to free the page below */
+ }
+ }
+
+ if (PagePrivate(page)) {
+ if (!try_to_release_page(page, GFP_KERNEL) ||
+ (!mapping && page_count(page) == 1))
+ goto unlock_retry;
+ }
+
+ if (remove_mapping(mapping, page)) {
+ /* Success */
+ unlock_page(page);
+ return 0;
+ }
+
+unlock_retry:
+ unlock_page(page);
+
+retry:
+ return -EAGAIN;
+}
+EXPORT_SYMBOL(swap_page);
+
+/*
+ * Remove references for a page and establish the new page with the correct
+ * basic settings to be able to stop accesses to the page.
+ */
+int migrate_page_remove_references(struct page *newpage,
+ struct page *page, int nr_refs)
+{
+ struct address_space *mapping = page_mapping(page);
+ struct page **radix_pointer;
+
+ /*
+ * Avoid doing any of the following work if the page count
+ * indicates that the page is in use or truncate has removed
+ * the page.
+ */
+ if (!mapping || page_mapcount(page) + nr_refs != page_count(page))
+ return -EAGAIN;
+
+ /*
+ * Establish swap ptes for anonymous pages or destroy pte
+ * maps for files.
+ *
+ * In order to reestablish file backed mappings the fault handlers
+ * will take the radix tree_lock which may then be used to stop
+ * processses from accessing this page until the new page is ready.
+ *
+ * A process accessing via a swap pte (an anonymous page) will take a
+ * page_lock on the old page which will block the process until the
+ * migration attempt is complete. At that time the PageSwapCache bit
+ * will be examined. If the page was migrated then the PageSwapCache
+ * bit will be clear and the operation to retrieve the page will be
+ * retried which will find the new page in the radix tree. Then a new
+ * direct mapping may be generated based on the radix tree contents.
+ *
+ * If the page was not migrated then the PageSwapCache bit
+ * is still set and the operation may continue.
+ */
+ if (try_to_unmap(page, 1) == SWAP_FAIL)
+ /* A vma has VM_LOCKED set -> permanent failure */
+ return -EPERM;
+
+ /*
+ * Give up if we were unable to remove all mappings.
+ */
+ if (page_mapcount(page))
+ return -EAGAIN;
+
+ write_lock_irq(&mapping->tree_lock);
+
+ radix_pointer = (struct page **)radix_tree_lookup_slot(
+ &mapping->page_tree,
+ page_index(page));
+
+ if (!page_mapping(page) || page_count(page) != nr_refs ||
+ *radix_pointer != page) {
+ write_unlock_irq(&mapping->tree_lock);
+ return 1;
+ }
+
+ /*
+ * Now we know that no one else is looking at the page.
+ *
+ * Certain minimal information about a page must be available
+ * in order for other subsystems to properly handle the page if they
+ * find it through the radix tree update before we are finished
+ * copying the page.
+ */
+ get_page(newpage);
+ newpage->index = page->index;
+ newpage->mapping = page->mapping;
+ if (PageSwapCache(page)) {
+ SetPageSwapCache(newpage);
+ set_page_private(newpage, page_private(page));
+ }
+
+ *radix_pointer = newpage;
+ __put_page(page);
+ write_unlock_irq(&mapping->tree_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL(migrate_page_remove_references);
+
+/*
+ * Copy the page to its new location
+ */
+void migrate_page_copy(struct page *newpage, struct page *page)
+{
+ copy_highpage(newpage, page);
+
+ if (PageError(page))
+ SetPageError(newpage);
+ if (PageReferenced(page))
+ SetPageReferenced(newpage);
+ if (PageUptodate(page))
+ SetPageUptodate(newpage);
+ if (PageActive(page))
+ SetPageActive(newpage);
+ if (PageChecked(page))
+ SetPageChecked(newpage);
+ if (PageMappedToDisk(page))
+ SetPageMappedToDisk(newpage);
+
+ if (PageDirty(page)) {
+ clear_page_dirty_for_io(page);
+ set_page_dirty(newpage);
+ }
+
+ ClearPageSwapCache(page);
+ ClearPageActive(page);
+ ClearPagePrivate(page);
+ set_page_private(page, 0);
+ page->mapping = NULL;
+
+ /*
+ * If any waiters have accumulated on the new page then
+ * wake them up.
+ */
+ if (PageWriteback(newpage))
+ end_page_writeback(newpage);
+}
+EXPORT_SYMBOL(migrate_page_copy);
+
+/*
+ * Common logic to directly migrate a single page suitable for
+ * pages that do not use PagePrivate.
+ *
+ * Pages are locked upon entry and exit.
+ */
+int migrate_page(struct page *newpage, struct page *page)
+{
+ int rc;
+
+ BUG_ON(PageWriteback(page)); /* Writeback must be complete */
+
+ rc = migrate_page_remove_references(newpage, page, 2);
+
+ if (rc)
+ return rc;
+
+ migrate_page_copy(newpage, page);
+
+ /*
+ * Remove auxiliary swap entries and replace
+ * them with real ptes.
+ *
+ * Note that a real pte entry will allow processes that are not
+ * waiting on the page lock to use the new page via the page tables
+ * before the new page is unlocked.
+ */
+ remove_from_swap(newpage);
+ return 0;
+}
+EXPORT_SYMBOL(migrate_page);
+
+/*
+ * migrate_pages
+ *
+ * Two lists are passed to this function. The first list
+ * contains the pages isolated from the LRU to be migrated.
+ * The second list contains new pages that the pages isolated
+ * can be moved to. If the second list is NULL then all
+ * pages are swapped out.
+ *
+ * The function returns after 10 attempts or if no pages
+ * are movable anymore because to has become empty
+ * or no retryable pages exist anymore.
+ *
+ * Return: Number of pages not migrated when "to" ran empty.
+ */
+int migrate_pages(struct list_head *from, struct list_head *to,
+ struct list_head *moved, struct list_head *failed)
+{
+ int retry;
+ int nr_failed = 0;
+ int pass = 0;
+ struct page *page;
+ struct page *page2;
+ int swapwrite = current->flags & PF_SWAPWRITE;
+ int rc;
+
+ if (!swapwrite)
+ current->flags |= PF_SWAPWRITE;
+
+redo:
+ retry = 0;
+
+ list_for_each_entry_safe(page, page2, from, lru) {
+ struct page *newpage = NULL;
+ struct address_space *mapping;
+
+ cond_resched();
+
+ rc = 0;
+ if (page_count(page) == 1)
+ /* page was freed from under us. So we are done. */
+ goto next;
+
+ if (to && list_empty(to))
+ break;
+
+ /*
+ * Skip locked pages during the first two passes to give the
+ * functions holding the lock time to release the page. Later we
+ * use lock_page() to have a higher chance of acquiring the
+ * lock.
+ */
+ rc = -EAGAIN;
+ if (pass > 2)
+ lock_page(page);
+ else
+ if (TestSetPageLocked(page))
+ goto next;
+
+ /*
+ * Only wait on writeback if we have already done a pass where
+ * we we may have triggered writeouts for lots of pages.
+ */
+ if (pass > 0) {
+ wait_on_page_writeback(page);
+ } else {
+ if (PageWriteback(page))
+ goto unlock_page;
+ }
+
+ /*
+ * Anonymous pages must have swap cache references otherwise
+ * the information contained in the page maps cannot be
+ * preserved.
+ */
+ if (PageAnon(page) && !PageSwapCache(page)) {
+ if (!add_to_swap(page, GFP_KERNEL)) {
+ rc = -ENOMEM;
+ goto unlock_page;
+ }
+ }
+
+ if (!to) {
+ rc = swap_page(page);
+ goto next;
+ }
+
+ newpage = lru_to_page(to);
+ lock_page(newpage);
+
+ /*
+ * Pages are properly locked and writeback is complete.
+ * Try to migrate the page.
+ */
+ mapping = page_mapping(page);
+ if (!mapping)
+ goto unlock_both;
+
+ if (mapping->a_ops->migratepage) {
+ /*
+ * Most pages have a mapping and most filesystems
+ * should provide a migration function. Anonymous
+ * pages are part of swap space which also has its
+ * own migration function. This is the most common
+ * path for page migration.
+ */
+ rc = mapping->a_ops->migratepage(newpage, page);
+ goto unlock_both;
+ }
+
+ /*
+ * Default handling if a filesystem does not provide
+ * a migration function. We can only migrate clean
+ * pages so try to write out any dirty pages first.
+ */
+ if (PageDirty(page)) {
+ switch (pageout(page, mapping)) {
+ case PAGE_KEEP:
+ case PAGE_ACTIVATE:
+ goto unlock_both;
+
+ case PAGE_SUCCESS:
+ unlock_page(newpage);
+ goto next;
+
+ case PAGE_CLEAN:
+ ; /* try to migrate the page below */
+ }
+ }
+
+ /*
+ * Buffers are managed in a filesystem specific way.
+ * We must have no buffers or drop them.
+ */
+ if (!page_has_buffers(page) ||
+ try_to_release_page(page, GFP_KERNEL)) {
+ rc = migrate_page(newpage, page);
+ goto unlock_both;
+ }
+
+ /*
+ * On early passes with mapped pages simply
+ * retry. There may be a lock held for some
+ * buffers that may go away. Later
+ * swap them out.
+ */
+ if (pass > 4) {
+ /*
+ * Persistently unable to drop buffers..... As a
+ * measure of last resort we fall back to
+ * swap_page().
+ */
+ unlock_page(newpage);
+ newpage = NULL;
+ rc = swap_page(page);
+ goto next;
+ }
+
+unlock_both:
+ unlock_page(newpage);
+
+unlock_page:
+ unlock_page(page);
+
+next:
+ if (rc == -EAGAIN) {
+ retry++;
+ } else if (rc) {
+ /* Permanent failure */
+ list_move(&page->lru, failed);
+ nr_failed++;
+ } else {
+ if (newpage) {
+ /* Successful migration. Return page to LRU */
+ move_to_lru(newpage);
+ }
+ list_move(&page->lru, moved);
+ }
+ }
+ if (retry && pass++ < 10)
+ goto redo;
+
+ if (!swapwrite)
+ current->flags &= ~PF_SWAPWRITE;
+
+ return nr_failed + retry;
+}
+
+/*
+ * Migration function for pages with buffers. This function can only be used
+ * if the underlying filesystem guarantees that no other references to "page"
+ * exist.
+ */
+int buffer_migrate_page(struct page *newpage, struct page *page)
+{
+ struct address_space *mapping = page->mapping;
+ struct buffer_head *bh, *head;
+ int rc;
+
+ if (!mapping)
+ return -EAGAIN;
+
+ if (!page_has_buffers(page))
+ return migrate_page(newpage, page);
+
+ head = page_buffers(page);
+
+ rc = migrate_page_remove_references(newpage, page, 3);
+
+ if (rc)
+ return rc;
+
+ bh = head;
+ do {
+ get_bh(bh);
+ lock_buffer(bh);
+ bh = bh->b_this_page;
+
+ } while (bh != head);
+
+ ClearPagePrivate(page);
+ set_page_private(newpage, page_private(page));
+ set_page_private(page, 0);
+ put_page(page);
+ get_page(newpage);
+
+ bh = head;
+ do {
+ set_bh_page(bh, newpage, bh_offset(bh));
+ bh = bh->b_this_page;
+
+ } while (bh != head);
+
+ SetPagePrivate(newpage);
+
+ migrate_page_copy(newpage, page);
+
+ bh = head;
+ do {
+ unlock_buffer(bh);
+ put_bh(bh);
+ bh = bh->b_this_page;
+
+ } while (bh != head);
+
+ return 0;
+}
+EXPORT_SYMBOL(buffer_migrate_page);
+
+/*
+ * Migrate the list 'pagelist' of pages to a certain destination.
+ *
+ * Specify destination with either non-NULL vma or dest_node >= 0
+ * Return the number of pages not migrated or error code
+ */
+int migrate_pages_to(struct list_head *pagelist,
+ struct vm_area_struct *vma, int dest)
+{
+ LIST_HEAD(newlist);
+ LIST_HEAD(moved);
+ LIST_HEAD(failed);
+ int err = 0;
+ unsigned long offset = 0;
+ int nr_pages;
+ struct page *page;
+ struct list_head *p;
+
+redo:
+ nr_pages = 0;
+ list_for_each(p, pagelist) {
+ if (vma) {
+ /*
+ * The address passed to alloc_page_vma is used to
+ * generate the proper interleave behavior. We fake
+ * the address here by an increasing offset in order
+ * to get the proper distribution of pages.
+ *
+ * No decision has been made as to which page
+ * a certain old page is moved to so we cannot
+ * specify the correct address.
+ */
+ page = alloc_page_vma(GFP_HIGHUSER, vma,
+ offset + vma->vm_start);
+ offset += PAGE_SIZE;
+ }
+ else
+ page = alloc_pages_node(dest, GFP_HIGHUSER, 0);
+
+ if (!page) {
+ err = -ENOMEM;
+ goto out;
+ }
+ list_add_tail(&page->lru, &newlist);
+ nr_pages++;
+ if (nr_pages > MIGRATE_CHUNK_SIZE)
+ break;
+ }
+ err = migrate_pages(pagelist, &newlist, &moved, &failed);
+
+ putback_lru_pages(&moved); /* Call release pages instead ?? */
+
+ if (err >= 0 && list_empty(&newlist) && !list_empty(pagelist))
+ goto redo;
+out:
+ /* Return leftover allocated pages */
+ while (!list_empty(&newlist)) {
+ page = list_entry(newlist.next, struct page, lru);
+ list_del(&page->lru);
+ __free_page(page);
+ }
+ list_splice(&failed, pagelist);
+ if (err < 0)
+ return err;
+
+ /* Calculate number of leftover pages */
+ nr_pages = 0;
+ list_for_each(p, pagelist)
+ nr_pages++;
+ return nr_pages;
+}
diff --git a/mm/mmap.c b/mm/mmap.c
index 47556d2b3e9..0eb9894db6d 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -612,7 +612,7 @@ again: remove_next = 1 + (end > next->vm_end);
* If the vma has a ->close operation then the driver probably needs to release
* per-vma resources, so we don't attempt to merge those.
*/
-#define VM_SPECIAL (VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
+#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
static inline int is_mergeable_vma(struct vm_area_struct *vma,
struct file *file, unsigned long vm_flags)
@@ -845,14 +845,6 @@ void vm_stat_account(struct mm_struct *mm, unsigned long flags,
const unsigned long stack_flags
= VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN);
-#ifdef CONFIG_HUGETLB
- if (flags & VM_HUGETLB) {
- if (!(flags & VM_DONTCOPY))
- mm->shared_vm += pages;
- return;
- }
-#endif /* CONFIG_HUGETLB */
-
if (file) {
mm->shared_vm += pages;
if ((flags & (VM_EXEC|VM_WRITE)) == VM_EXEC)
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 653b8571c1e..4c14d4289b6 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -124,7 +124,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
* a MAP_NORESERVE private mapping to writable will now reserve.
*/
if (newflags & VM_WRITE) {
- if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_SHARED|VM_HUGETLB))) {
+ if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_SHARED))) {
charged = nrpages;
if (security_vm_enough_memory(charged))
return -ENOMEM;
@@ -166,7 +166,10 @@ success:
*/
vma->vm_flags = newflags;
vma->vm_page_prot = newprot;
- change_protection(vma, start, end, newprot);
+ if (is_vm_hugetlb_page(vma))
+ hugetlb_change_protection(vma, start, end, newprot);
+ else
+ change_protection(vma, start, end, newprot);
vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
vm_stat_account(mm, newflags, vma->vm_file, nrpages);
return 0;
@@ -240,11 +243,6 @@ sys_mprotect(unsigned long start, size_t len, unsigned long prot)
/* Here we know that vma->vm_start <= nstart < vma->vm_end. */
- if (is_vm_hugetlb_page(vma)) {
- error = -EACCES;
- goto out;
- }
-
newflags = vm_flags | (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC));
/* newflags >> 4 shift VM_MAY% in place of VM_% */
diff --git a/mm/nommu.c b/mm/nommu.c
index 4951f4786f2..db45efac17c 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -159,7 +159,7 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
/*
* kmalloc doesn't like __GFP_HIGHMEM for some reason
*/
- return kmalloc(size, gfp_mask & ~__GFP_HIGHMEM);
+ return kmalloc(size, (gfp_mask | __GFP_COMP) & ~__GFP_HIGHMEM);
}
struct page * vmalloc_to_page(void *addr)
@@ -623,7 +623,7 @@ static int do_mmap_private(struct vm_area_struct *vma, unsigned long len)
* - note that this may not return a page-aligned address if the object
* we're allocating is smaller than a page
*/
- base = kmalloc(len, GFP_KERNEL);
+ base = kmalloc(len, GFP_KERNEL|__GFP_COMP);
if (!base)
goto enomem;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 234bd4895d1..b7f14a4799a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -55,7 +55,6 @@ unsigned long totalhigh_pages __read_mostly;
long nr_swap_pages;
int percpu_pagelist_fraction;
-static void fastcall free_hot_cold_page(struct page *page, int cold);
static void __free_pages_ok(struct page *page, unsigned int order);
/*
@@ -190,7 +189,7 @@ static void prep_compound_page(struct page *page, unsigned long order)
for (i = 0; i < nr_pages; i++) {
struct page *p = page + i;
- SetPageCompound(p);
+ __SetPageCompound(p);
set_page_private(p, (unsigned long)page);
}
}
@@ -209,10 +208,24 @@ static void destroy_compound_page(struct page *page, unsigned long order)
if (unlikely(!PageCompound(p) |
(page_private(p) != (unsigned long)page)))
bad_page(page);
- ClearPageCompound(p);
+ __ClearPageCompound(p);
}
}
+static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
+{
+ int i;
+
+ BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM);
+ /*
+ * clear_highpage() will use KM_USER0, so it's a bug to use __GFP_ZERO
+ * and __GFP_HIGHMEM from hard or soft interrupt context.
+ */
+ BUG_ON((gfp_flags & __GFP_HIGHMEM) && in_interrupt());
+ for (i = 0; i < (1 << order); i++)
+ clear_highpage(page + i);
+}
+
/*
* function for dealing with page's order in buddy system.
* zone->lock is already acquired when we use these.
@@ -423,11 +436,6 @@ static void __free_pages_ok(struct page *page, unsigned int order)
mutex_debug_check_no_locks_freed(page_address(page),
PAGE_SIZE<<order);
-#ifndef CONFIG_MMU
- for (i = 1 ; i < (1 << order) ; ++i)
- __put_page(page + i);
-#endif
-
for (i = 0 ; i < (1 << order) ; ++i)
reserved += free_pages_check(page + i);
if (reserved)
@@ -448,28 +456,23 @@ void fastcall __init __free_pages_bootmem(struct page *page, unsigned int order)
if (order == 0) {
__ClearPageReserved(page);
set_page_count(page, 0);
-
- free_hot_cold_page(page, 0);
+ set_page_refcounted(page);
+ __free_page(page);
} else {
- LIST_HEAD(list);
int loop;
+ prefetchw(page);
for (loop = 0; loop < BITS_PER_LONG; loop++) {
struct page *p = &page[loop];
- if (loop + 16 < BITS_PER_LONG)
- prefetchw(p + 16);
+ if (loop + 1 < BITS_PER_LONG)
+ prefetchw(p + 1);
__ClearPageReserved(p);
set_page_count(p, 0);
}
- arch_free_page(page, order);
-
- mod_page_state(pgfree, 1 << order);
-
- list_add(&page->lru, &list);
- kernel_map_pages(page, 1 << order, 0);
- free_pages_bulk(page_zone(page), 1, &list, order);
+ set_page_refcounted(page);
+ __free_pages(page, order);
}
}
@@ -507,7 +510,7 @@ static inline void expand(struct zone *zone, struct page *page,
/*
* This page is about to be returned from the page allocator
*/
-static int prep_new_page(struct page *page, int order)
+static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
{
if (unlikely(page_mapcount(page) |
(page->mapping != NULL) |
@@ -536,8 +539,15 @@ static int prep_new_page(struct page *page, int order)
1 << PG_referenced | 1 << PG_arch_1 |
1 << PG_checked | 1 << PG_mappedtodisk);
set_page_private(page, 0);
- set_page_refs(page, order);
+ set_page_refcounted(page);
kernel_map_pages(page, 1 << order, 1);
+
+ if (gfp_flags & __GFP_ZERO)
+ prep_zero_page(page, order, gfp_flags);
+
+ if (order && (gfp_flags & __GFP_COMP))
+ prep_compound_page(page, order);
+
return 0;
}
@@ -593,13 +603,14 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
/*
* Called from the slab reaper to drain pagesets on a particular node that
* belong to the currently executing processor.
+ * Note that this function must be called with the thread pinned to
+ * a single processor.
*/
void drain_node_pages(int nodeid)
{
int i, z;
unsigned long flags;
- local_irq_save(flags);
for (z = 0; z < MAX_NR_ZONES; z++) {
struct zone *zone = NODE_DATA(nodeid)->node_zones + z;
struct per_cpu_pageset *pset;
@@ -609,11 +620,14 @@ void drain_node_pages(int nodeid)
struct per_cpu_pages *pcp;
pcp = &pset->pcp[i];
- free_pages_bulk(zone, pcp->count, &pcp->list, 0);
- pcp->count = 0;
+ if (pcp->count) {
+ local_irq_save(flags);
+ free_pages_bulk(zone, pcp->count, &pcp->list, 0);
+ pcp->count = 0;
+ local_irq_restore(flags);
+ }
}
}
- local_irq_restore(flags);
}
#endif
@@ -743,13 +757,22 @@ void fastcall free_cold_page(struct page *page)
free_hot_cold_page(page, 1);
}
-static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
+/*
+ * split_page takes a non-compound higher-order page, and splits it into
+ * n (1<<order) sub-pages: page[0..n]
+ * Each sub-page must be freed individually.
+ *
+ * Note: this is probably too low level an operation for use in drivers.
+ * Please consult with lkml before using this in your driver.
+ */
+void split_page(struct page *page, unsigned int order)
{
int i;
- BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM);
- for(i = 0; i < (1 << order); i++)
- clear_highpage(page + i);
+ BUG_ON(PageCompound(page));
+ BUG_ON(!page_count(page));
+ for (i = 1; i < (1 << order); i++)
+ set_page_refcounted(page + i);
}
/*
@@ -795,14 +818,8 @@ again:
put_cpu();
BUG_ON(bad_range(zone, page));
- if (prep_new_page(page, order))
+ if (prep_new_page(page, order, gfp_flags))
goto again;
-
- if (gfp_flags & __GFP_ZERO)
- prep_zero_page(page, order, gfp_flags);
-
- if (order && (gfp_flags & __GFP_COMP))
- prep_compound_page(page, order);
return page;
failed:
@@ -1214,24 +1231,22 @@ DEFINE_PER_CPU(long, nr_pagecache_local) = 0;
static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask)
{
- int cpu = 0;
+ unsigned cpu;
memset(ret, 0, nr * sizeof(unsigned long));
cpus_and(*cpumask, *cpumask, cpu_online_map);
- cpu = first_cpu(*cpumask);
- while (cpu < NR_CPUS) {
- unsigned long *in, *out, off;
-
- if (!cpu_isset(cpu, *cpumask))
- continue;
+ for_each_cpu_mask(cpu, *cpumask) {
+ unsigned long *in;
+ unsigned long *out;
+ unsigned off;
+ unsigned next_cpu;
in = (unsigned long *)&per_cpu(page_states, cpu);
- cpu = next_cpu(cpu, *cpumask);
-
- if (likely(cpu < NR_CPUS))
- prefetch(&per_cpu(page_states, cpu));
+ next_cpu = next_cpu(cpu, *cpumask);
+ if (likely(next_cpu < NR_CPUS))
+ prefetch(&per_cpu(page_states, next_cpu));
out = (unsigned long *)ret;
for (off = 0; off < nr; off++)
@@ -1764,7 +1779,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
continue;
page = pfn_to_page(pfn);
set_page_links(page, zone, nid, pfn);
- set_page_count(page, 1);
+ init_page_count(page);
reset_page_mapcount(page);
SetPageReserved(page);
INIT_LIST_HEAD(&page->lru);
diff --git a/mm/readahead.c b/mm/readahead.c
index 8d6eeaaa629..301b36c4a0c 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -52,13 +52,24 @@ static inline unsigned long get_min_readahead(struct file_ra_state *ra)
return (VM_MIN_READAHEAD * 1024) / PAGE_CACHE_SIZE;
}
+static inline void reset_ahead_window(struct file_ra_state *ra)
+{
+ /*
+ * ... but preserve ahead_start + ahead_size value,
+ * see 'recheck:' label in page_cache_readahead().
+ * Note: We never use ->ahead_size as rvalue without
+ * checking ->ahead_start != 0 first.
+ */
+ ra->ahead_size += ra->ahead_start;
+ ra->ahead_start = 0;
+}
+
static inline void ra_off(struct file_ra_state *ra)
{
ra->start = 0;
ra->flags = 0;
ra->size = 0;
- ra->ahead_start = 0;
- ra->ahead_size = 0;
+ reset_ahead_window(ra);
return;
}
@@ -72,10 +83,10 @@ static unsigned long get_init_ra_size(unsigned long size, unsigned long max)
{
unsigned long newsize = roundup_pow_of_two(size);
- if (newsize <= max / 64)
- newsize = newsize * newsize;
+ if (newsize <= max / 32)
+ newsize = newsize * 4;
else if (newsize <= max / 4)
- newsize = max / 4;
+ newsize = newsize * 2;
else
newsize = max;
return newsize;
@@ -426,8 +437,7 @@ static int make_ahead_window(struct address_space *mapping, struct file *filp,
* congestion. The ahead window will any way be closed
* in case we failed due to excessive page cache hits.
*/
- ra->ahead_start = 0;
- ra->ahead_size = 0;
+ reset_ahead_window(ra);
}
return ret;
@@ -520,11 +530,11 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra,
* If we get here we are doing sequential IO and this was not the first
* occurence (ie we have an existing window)
*/
-
if (ra->ahead_start == 0) { /* no ahead window yet */
if (!make_ahead_window(mapping, filp, ra, 0))
- goto out;
+ goto recheck;
}
+
/*
* Already have an ahead window, check if we crossed into it.
* If so, shift windows and issue a new ahead window.
@@ -536,6 +546,10 @@ page_cache_readahead(struct address_space *mapping, struct file_ra_state *ra,
ra->start = ra->ahead_start;
ra->size = ra->ahead_size;
make_ahead_window(mapping, filp, ra, 0);
+recheck:
+ /* prev_page shouldn't overrun the ahead window */
+ ra->prev_page = min(ra->prev_page,
+ ra->ahead_start + ra->ahead_size - 1);
}
out:
diff --git a/mm/rmap.c b/mm/rmap.c
index 67f0e20b101..1963e269314 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -56,13 +56,11 @@
#include <asm/tlbflush.h>
-//#define RMAP_DEBUG /* can be enabled only for debugging */
-
-kmem_cache_t *anon_vma_cachep;
+struct kmem_cache *anon_vma_cachep;
static inline void validate_anon_vma(struct vm_area_struct *find_vma)
{
-#ifdef RMAP_DEBUG
+#ifdef CONFIG_DEBUG_VM
struct anon_vma *anon_vma = find_vma->anon_vma;
struct vm_area_struct *vma;
unsigned int mapcount = 0;
@@ -166,7 +164,8 @@ void anon_vma_unlink(struct vm_area_struct *vma)
anon_vma_free(anon_vma);
}
-static void anon_vma_ctor(void *data, kmem_cache_t *cachep, unsigned long flags)
+static void anon_vma_ctor(void *data, struct kmem_cache *cachep,
+ unsigned long flags)
{
if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
SLAB_CTOR_CONSTRUCTOR) {
@@ -550,13 +549,14 @@ void page_add_file_rmap(struct page *page)
void page_remove_rmap(struct page *page)
{
if (atomic_add_negative(-1, &page->_mapcount)) {
- if (page_mapcount(page) < 0) {
+#ifdef CONFIG_DEBUG_VM
+ if (unlikely(page_mapcount(page) < 0)) {
printk (KERN_EMERG "Eeek! page_mapcount(page) went negative! (%d)\n", page_mapcount(page));
printk (KERN_EMERG " page->flags = %lx\n", page->flags);
printk (KERN_EMERG " page->count = %x\n", page_count(page));
printk (KERN_EMERG " page->mapping = %p\n", page->mapping);
}
-
+#endif
BUG_ON(page_mapcount(page) < 0);
/*
* It would be tidy to reset the PageAnon mapping here,
diff --git a/mm/shmem.c b/mm/shmem.c
index 7c455fbaff7..37eaf42ed2c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -875,7 +875,7 @@ redirty:
}
#ifdef CONFIG_NUMA
-static int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes)
+static inline int shmem_parse_mpol(char *value, int *policy, nodemask_t *policy_nodes)
{
char *nodelist = strchr(value, ':');
int err = 1;
@@ -2119,7 +2119,7 @@ failed:
return err;
}
-static kmem_cache_t *shmem_inode_cachep;
+static struct kmem_cache *shmem_inode_cachep;
static struct inode *shmem_alloc_inode(struct super_block *sb)
{
@@ -2139,7 +2139,8 @@ static void shmem_destroy_inode(struct inode *inode)
kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode));
}
-static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
+static void init_once(void *foo, struct kmem_cache *cachep,
+ unsigned long flags)
{
struct shmem_inode_info *p = (struct shmem_inode_info *) foo;
diff --git a/mm/slab.c b/mm/slab.c
index d0bd7f07ab0..1c8f5ee230d 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -50,7 +50,7 @@
* The head array is strictly LIFO and should improve the cache hit rates.
* On SMP, it additionally reduces the spinlock operations.
*
- * The c_cpuarray may not be read with enabled local interrupts -
+ * The c_cpuarray may not be read with enabled local interrupts -
* it's changed with a smp_call_function().
*
* SMP synchronization:
@@ -170,12 +170,12 @@
#if DEBUG
# define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \
SLAB_POISON | SLAB_HWCACHE_ALIGN | \
- SLAB_NO_REAP | SLAB_CACHE_DMA | \
+ SLAB_CACHE_DMA | \
SLAB_MUST_HWCACHE_ALIGN | SLAB_STORE_USER | \
SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
SLAB_DESTROY_BY_RCU)
#else
-# define CREATE_MASK (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \
+# define CREATE_MASK (SLAB_HWCACHE_ALIGN | \
SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \
SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
SLAB_DESTROY_BY_RCU)
@@ -266,16 +266,17 @@ struct array_cache {
unsigned int batchcount;
unsigned int touched;
spinlock_t lock;
- void *entry[0]; /*
- * Must have this definition in here for the proper
- * alignment of array_cache. Also simplifies accessing
- * the entries.
- * [0] is for gcc 2.95. It should really be [].
- */
+ void *entry[0]; /*
+ * Must have this definition in here for the proper
+ * alignment of array_cache. Also simplifies accessing
+ * the entries.
+ * [0] is for gcc 2.95. It should really be [].
+ */
};
-/* bootstrap: The caches do not work without cpuarrays anymore,
- * but the cpuarrays are allocated from the generic caches...
+/*
+ * bootstrap: The caches do not work without cpuarrays anymore, but the
+ * cpuarrays are allocated from the generic caches...
*/
#define BOOT_CPUCACHE_ENTRIES 1
struct arraycache_init {
@@ -291,13 +292,13 @@ struct kmem_list3 {
struct list_head slabs_full;
struct list_head slabs_free;
unsigned long free_objects;
- unsigned long next_reap;
- int free_touched;
unsigned int free_limit;
unsigned int colour_next; /* Per-node cache coloring */
spinlock_t list_lock;
struct array_cache *shared; /* shared per node */
struct array_cache **alien; /* on other nodes */
+ unsigned long next_reap; /* updated without locking */
+ int free_touched; /* updated without locking */
};
/*
@@ -310,10 +311,8 @@ struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
#define SIZE_L3 (1 + MAX_NUMNODES)
/*
- * This function must be completely optimized away if
- * a constant is passed to it. Mostly the same as
- * what is in linux/slab.h except it returns an
- * index.
+ * This function must be completely optimized away if a constant is passed to
+ * it. Mostly the same as what is in linux/slab.h except it returns an index.
*/
static __always_inline int index_of(const size_t size)
{
@@ -351,14 +350,14 @@ static void kmem_list3_init(struct kmem_list3 *parent)
parent->free_touched = 0;
}
-#define MAKE_LIST(cachep, listp, slab, nodeid) \
- do { \
- INIT_LIST_HEAD(listp); \
- list_splice(&(cachep->nodelists[nodeid]->slab), listp); \
+#define MAKE_LIST(cachep, listp, slab, nodeid) \
+ do { \
+ INIT_LIST_HEAD(listp); \
+ list_splice(&(cachep->nodelists[nodeid]->slab), listp); \
} while (0)
-#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \
- do { \
+#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \
+ do { \
MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \
MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \
MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \
@@ -373,28 +372,30 @@ static void kmem_list3_init(struct kmem_list3 *parent)
struct kmem_cache {
/* 1) per-cpu data, touched during every alloc/free */
struct array_cache *array[NR_CPUS];
+/* 2) Cache tunables. Protected by cache_chain_mutex */
unsigned int batchcount;
unsigned int limit;
unsigned int shared;
+
unsigned int buffer_size;
-/* 2) touched by every alloc & free from the backend */
+/* 3) touched by every alloc & free from the backend */
struct kmem_list3 *nodelists[MAX_NUMNODES];
- unsigned int flags; /* constant flags */
- unsigned int num; /* # of objs per slab */
- spinlock_t spinlock;
-/* 3) cache_grow/shrink */
+ unsigned int flags; /* constant flags */
+ unsigned int num; /* # of objs per slab */
+
+/* 4) cache_grow/shrink */
/* order of pgs per slab (2^n) */
unsigned int gfporder;
/* force GFP flags, e.g. GFP_DMA */
gfp_t gfpflags;
- size_t colour; /* cache colouring range */
+ size_t colour; /* cache colouring range */
unsigned int colour_off; /* colour offset */
struct kmem_cache *slabp_cache;
unsigned int slab_size;
- unsigned int dflags; /* dynamic flags */
+ unsigned int dflags; /* dynamic flags */
/* constructor func */
void (*ctor) (void *, struct kmem_cache *, unsigned long);
@@ -402,11 +403,11 @@ struct kmem_cache {
/* de-constructor func */
void (*dtor) (void *, struct kmem_cache *, unsigned long);
-/* 4) cache creation/removal */
+/* 5) cache creation/removal */
const char *name;
struct list_head next;
-/* 5) statistics */
+/* 6) statistics */
#if STATS
unsigned long num_active;
unsigned long num_allocations;
@@ -438,8 +439,9 @@ struct kmem_cache {
#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)
#define BATCHREFILL_LIMIT 16
-/* Optimization question: fewer reaps means less
- * probability for unnessary cpucache drain/refill cycles.
+/*
+ * Optimization question: fewer reaps means less probability for unnessary
+ * cpucache drain/refill cycles.
*
* OTOH the cpuarrays can contain lots of objects,
* which could lock up otherwise freeable slabs.
@@ -453,17 +455,19 @@ struct kmem_cache {
#define STATS_INC_ALLOCED(x) ((x)->num_allocations++)
#define STATS_INC_GROWN(x) ((x)->grown++)
#define STATS_INC_REAPED(x) ((x)->reaped++)
-#define STATS_SET_HIGH(x) do { if ((x)->num_active > (x)->high_mark) \
- (x)->high_mark = (x)->num_active; \
- } while (0)
+#define STATS_SET_HIGH(x) \
+ do { \
+ if ((x)->num_active > (x)->high_mark) \
+ (x)->high_mark = (x)->num_active; \
+ } while (0)
#define STATS_INC_ERR(x) ((x)->errors++)
#define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++)
#define STATS_INC_NODEFREES(x) ((x)->node_frees++)
-#define STATS_SET_FREEABLE(x, i) \
- do { if ((x)->max_freeable < i) \
- (x)->max_freeable = i; \
- } while (0)
-
+#define STATS_SET_FREEABLE(x, i) \
+ do { \
+ if ((x)->max_freeable < i) \
+ (x)->max_freeable = i; \
+ } while (0)
#define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit)
#define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss)
#define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit)
@@ -478,9 +482,7 @@ struct kmem_cache {
#define STATS_INC_ERR(x) do { } while (0)
#define STATS_INC_NODEALLOCS(x) do { } while (0)
#define STATS_INC_NODEFREES(x) do { } while (0)
-#define STATS_SET_FREEABLE(x, i) \
- do { } while (0)
-
+#define STATS_SET_FREEABLE(x, i) do { } while (0)
#define STATS_INC_ALLOCHIT(x) do { } while (0)
#define STATS_INC_ALLOCMISS(x) do { } while (0)
#define STATS_INC_FREEHIT(x) do { } while (0)
@@ -488,7 +490,8 @@ struct kmem_cache {
#endif
#if DEBUG
-/* Magic nums for obj red zoning.
+/*
+ * Magic nums for obj red zoning.
* Placed in the first word before and the first word after an obj.
*/
#define RED_INACTIVE 0x5A2CF071UL /* when obj is inactive */
@@ -499,7 +502,8 @@ struct kmem_cache {
#define POISON_FREE 0x6b /* for use-after-free poisoning */
#define POISON_END 0xa5 /* end-byte of poisoning */
-/* memory layout of objects:
+/*
+ * memory layout of objects:
* 0 : objp
* 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that
* the end of an object is aligned with the end of the real
@@ -508,7 +512,8 @@ struct kmem_cache {
* redzone word.
* cachep->obj_offset: The real object.
* cachep->buffer_size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long]
- * cachep->buffer_size - 1* BYTES_PER_WORD: last caller address [BYTES_PER_WORD long]
+ * cachep->buffer_size - 1* BYTES_PER_WORD: last caller address
+ * [BYTES_PER_WORD long]
*/
static int obj_offset(struct kmem_cache *cachep)
{
@@ -552,8 +557,8 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp)
#endif
/*
- * Maximum size of an obj (in 2^order pages)
- * and absolute limit for the gfp order.
+ * Maximum size of an obj (in 2^order pages) and absolute limit for the gfp
+ * order.
*/
#if defined(CONFIG_LARGE_ALLOCS)
#define MAX_OBJ_ORDER 13 /* up to 32Mb */
@@ -573,9 +578,10 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp)
#define BREAK_GFP_ORDER_LO 0
static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
-/* Functions for storing/retrieving the cachep and or slab from the
- * global 'mem_map'. These are used to find the slab an obj belongs to.
- * With kfree(), these are used to find the cache which an obj belongs to.
+/*
+ * Functions for storing/retrieving the cachep and or slab from the page
+ * allocator. These are used to find the slab an obj belongs to. With kfree(),
+ * these are used to find the cache which an obj belongs to.
*/
static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
{
@@ -584,6 +590,8 @@ static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
static inline struct kmem_cache *page_get_cache(struct page *page)
{
+ if (unlikely(PageCompound(page)))
+ page = (struct page *)page_private(page);
return (struct kmem_cache *)page->lru.next;
}
@@ -594,6 +602,8 @@ static inline void page_set_slab(struct page *page, struct slab *slab)
static inline struct slab *page_get_slab(struct page *page)
{
+ if (unlikely(PageCompound(page)))
+ page = (struct page *)page_private(page);
return (struct slab *)page->lru.prev;
}
@@ -609,7 +619,21 @@ static inline struct slab *virt_to_slab(const void *obj)
return page_get_slab(page);
}
-/* These are the default caches for kmalloc. Custom caches can have other sizes. */
+static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,
+ unsigned int idx)
+{
+ return slab->s_mem + cache->buffer_size * idx;
+}
+
+static inline unsigned int obj_to_index(struct kmem_cache *cache,
+ struct slab *slab, void *obj)
+{
+ return (unsigned)(obj - slab->s_mem) / cache->buffer_size;
+}
+
+/*
+ * These are the default caches for kmalloc. Custom caches can have other sizes.
+ */
struct cache_sizes malloc_sizes[] = {
#define CACHE(x) { .cs_size = (x) },
#include <linux/kmalloc_sizes.h>
@@ -642,8 +666,6 @@ static struct kmem_cache cache_cache = {
.limit = BOOT_CPUCACHE_ENTRIES,
.shared = 1,
.buffer_size = sizeof(struct kmem_cache),
- .flags = SLAB_NO_REAP,
- .spinlock = SPIN_LOCK_UNLOCKED,
.name = "kmem_cache",
#if DEBUG
.obj_size = sizeof(struct kmem_cache),
@@ -655,8 +677,8 @@ static DEFINE_MUTEX(cache_chain_mutex);
static struct list_head cache_chain;
/*
- * vm_enough_memory() looks at this to determine how many
- * slab-allocated pages are possibly freeable under pressure
+ * vm_enough_memory() looks at this to determine how many slab-allocated pages
+ * are possibly freeable under pressure
*
* SLAB_RECLAIM_ACCOUNT turns this on per-slab
*/
@@ -675,7 +697,8 @@ static enum {
static DEFINE_PER_CPU(struct work_struct, reap_work);
-static void free_block(struct kmem_cache *cachep, void **objpp, int len, int node);
+static void free_block(struct kmem_cache *cachep, void **objpp, int len,
+ int node);
static void enable_cpucache(struct kmem_cache *cachep);
static void cache_reap(void *unused);
static int __node_shrink(struct kmem_cache *cachep, int node);
@@ -685,7 +708,8 @@ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
return cachep->array[smp_processor_id()];
}
-static inline struct kmem_cache *__find_general_cachep(size_t size, gfp_t gfpflags)
+static inline struct kmem_cache *__find_general_cachep(size_t size,
+ gfp_t gfpflags)
{
struct cache_sizes *csizep = malloc_sizes;
@@ -720,8 +744,9 @@ static size_t slab_mgmt_size(size_t nr_objs, size_t align)
return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align);
}
-/* Calculate the number of objects and left-over bytes for a given
- buffer size. */
+/*
+ * Calculate the number of objects and left-over bytes for a given buffer size.
+ */
static void cache_estimate(unsigned long gfporder, size_t buffer_size,
size_t align, int flags, size_t *left_over,
unsigned int *num)
@@ -782,7 +807,8 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size,
#define slab_error(cachep, msg) __slab_error(__FUNCTION__, cachep, msg)
-static void __slab_error(const char *function, struct kmem_cache *cachep, char *msg)
+static void __slab_error(const char *function, struct kmem_cache *cachep,
+ char *msg)
{
printk(KERN_ERR "slab error in %s(): cache `%s': %s\n",
function, cachep->name, msg);
@@ -804,7 +830,7 @@ static void init_reap_node(int cpu)
node = next_node(cpu_to_node(cpu), node_online_map);
if (node == MAX_NUMNODES)
- node = 0;
+ node = first_node(node_online_map);
__get_cpu_var(reap_node) = node;
}
@@ -906,10 +932,8 @@ static void free_alien_cache(struct array_cache **ac_ptr)
if (!ac_ptr)
return;
-
for_each_node(i)
kfree(ac_ptr[i]);
-
kfree(ac_ptr);
}
@@ -943,7 +967,8 @@ static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
}
}
-static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **alien)
+static void drain_alien_cache(struct kmem_cache *cachep,
+ struct array_cache **alien)
{
int i = 0;
struct array_cache *ac;
@@ -986,20 +1011,22 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
switch (action) {
case CPU_UP_PREPARE:
mutex_lock(&cache_chain_mutex);
- /* we need to do this right in the beginning since
+ /*
+ * We need to do this right in the beginning since
* alloc_arraycache's are going to use this list.
* kmalloc_node allows us to add the slab to the right
* kmem_list3 and not this cpu's kmem_list3
*/
list_for_each_entry(cachep, &cache_chain, next) {
- /* setup the size64 kmemlist for cpu before we can
+ /*
+ * Set up the size64 kmemlist for cpu before we can
* begin anything. Make sure some other cpu on this
* node has not already allocated this
*/
if (!cachep->nodelists[node]) {
- if (!(l3 = kmalloc_node(memsize,
- GFP_KERNEL, node)))
+ l3 = kmalloc_node(memsize, GFP_KERNEL, node);
+ if (!l3)
goto bad;
kmem_list3_init(l3);
l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
@@ -1015,13 +1042,15 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
spin_lock_irq(&cachep->nodelists[node]->list_lock);
cachep->nodelists[node]->free_limit =
- (1 + nr_cpus_node(node)) *
- cachep->batchcount + cachep->num;
+ (1 + nr_cpus_node(node)) *
+ cachep->batchcount + cachep->num;
spin_unlock_irq(&cachep->nodelists[node]->list_lock);
}
- /* Now we can go ahead with allocating the shared array's
- & array cache's */
+ /*
+ * Now we can go ahead with allocating the shared arrays and
+ * array caches
+ */
list_for_each_entry(cachep, &cache_chain, next) {
struct array_cache *nc;
struct array_cache *shared;
@@ -1041,7 +1070,6 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
if (!alien)
goto bad;
cachep->array[cpu] = nc;
-
l3 = cachep->nodelists[node];
BUG_ON(!l3);
@@ -1061,7 +1089,6 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
}
#endif
spin_unlock_irq(&l3->list_lock);
-
kfree(shared);
free_alien_cache(alien);
}
@@ -1083,7 +1110,6 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
/* fall thru */
case CPU_UP_CANCELED:
mutex_lock(&cache_chain_mutex);
-
list_for_each_entry(cachep, &cache_chain, next) {
struct array_cache *nc;
struct array_cache *shared;
@@ -1150,7 +1176,7 @@ free_array_cache:
#endif
}
return NOTIFY_OK;
- bad:
+bad:
mutex_unlock(&cache_chain_mutex);
return NOTIFY_BAD;
}
@@ -1160,7 +1186,8 @@ static struct notifier_block cpucache_notifier = { &cpuup_callback, NULL, 0 };
/*
* swap the static kmem_list3 with kmalloced memory
*/
-static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, int nodeid)
+static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
+ int nodeid)
{
struct kmem_list3 *ptr;
@@ -1175,8 +1202,9 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, int no
local_irq_enable();
}
-/* Initialisation.
- * Called after the gfp() functions have been enabled, and before smp_init().
+/*
+ * Initialisation. Called after the page allocator have been initialised and
+ * before smp_init().
*/
void __init kmem_cache_init(void)
{
@@ -1201,9 +1229,9 @@ void __init kmem_cache_init(void)
/* Bootstrap is tricky, because several objects are allocated
* from caches that do not exist yet:
- * 1) initialize the cache_cache cache: it contains the struct kmem_cache
- * structures of all caches, except cache_cache itself: cache_cache
- * is statically allocated.
+ * 1) initialize the cache_cache cache: it contains the struct
+ * kmem_cache structures of all caches, except cache_cache itself:
+ * cache_cache is statically allocated.
* Initially an __init data area is used for the head array and the
* kmem_list3 structures, it's replaced with a kmalloc allocated
* array at the end of the bootstrap.
@@ -1226,7 +1254,8 @@ void __init kmem_cache_init(void)
cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
cache_cache.nodelists[numa_node_id()] = &initkmem_list3[CACHE_CACHE];
- cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, cache_line_size());
+ cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
+ cache_line_size());
for (order = 0; order < MAX_ORDER; order++) {
cache_estimate(order, cache_cache.buffer_size,
@@ -1245,24 +1274,26 @@ void __init kmem_cache_init(void)
sizes = malloc_sizes;
names = cache_names;
- /* Initialize the caches that provide memory for the array cache
- * and the kmem_list3 structures first.
- * Without this, further allocations will bug
+ /*
+ * Initialize the caches that provide memory for the array cache and the
+ * kmem_list3 structures first. Without this, further allocations will
+ * bug.
*/
sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name,
- sizes[INDEX_AC].cs_size,
- ARCH_KMALLOC_MINALIGN,
- (ARCH_KMALLOC_FLAGS |
- SLAB_PANIC), NULL, NULL);
+ sizes[INDEX_AC].cs_size,
+ ARCH_KMALLOC_MINALIGN,
+ ARCH_KMALLOC_FLAGS|SLAB_PANIC,
+ NULL, NULL);
- if (INDEX_AC != INDEX_L3)
+ if (INDEX_AC != INDEX_L3) {
sizes[INDEX_L3].cs_cachep =
- kmem_cache_create(names[INDEX_L3].name,
- sizes[INDEX_L3].cs_size,
- ARCH_KMALLOC_MINALIGN,
- (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL,
- NULL);
+ kmem_cache_create(names[INDEX_L3].name,
+ sizes[INDEX_L3].cs_size,
+ ARCH_KMALLOC_MINALIGN,
+ ARCH_KMALLOC_FLAGS|SLAB_PANIC,
+ NULL, NULL);
+ }
while (sizes->cs_size != ULONG_MAX) {
/*
@@ -1272,13 +1303,13 @@ void __init kmem_cache_init(void)
* Note for systems short on memory removing the alignment will
* allow tighter packing of the smaller caches.
*/
- if (!sizes->cs_cachep)
+ if (!sizes->cs_cachep) {
sizes->cs_cachep = kmem_cache_create(names->name,
- sizes->cs_size,
- ARCH_KMALLOC_MINALIGN,
- (ARCH_KMALLOC_FLAGS
- | SLAB_PANIC),
- NULL, NULL);
+ sizes->cs_size,
+ ARCH_KMALLOC_MINALIGN,
+ ARCH_KMALLOC_FLAGS|SLAB_PANIC,
+ NULL, NULL);
+ }
/* Inc off-slab bufctl limit until the ceiling is hit. */
if (!(OFF_SLAB(sizes->cs_cachep))) {
@@ -1287,13 +1318,11 @@ void __init kmem_cache_init(void)
}
sizes->cs_dmacachep = kmem_cache_create(names->name_dma,
- sizes->cs_size,
- ARCH_KMALLOC_MINALIGN,
- (ARCH_KMALLOC_FLAGS |
- SLAB_CACHE_DMA |
- SLAB_PANIC), NULL,
- NULL);
-
+ sizes->cs_size,
+ ARCH_KMALLOC_MINALIGN,
+ ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|
+ SLAB_PANIC,
+ NULL, NULL);
sizes++;
names++;
}
@@ -1345,20 +1374,22 @@ void __init kmem_cache_init(void)
struct kmem_cache *cachep;
mutex_lock(&cache_chain_mutex);
list_for_each_entry(cachep, &cache_chain, next)
- enable_cpucache(cachep);
+ enable_cpucache(cachep);
mutex_unlock(&cache_chain_mutex);
}
/* Done! */
g_cpucache_up = FULL;
- /* Register a cpu startup notifier callback
- * that initializes cpu_cache_get for all new cpus
+ /*
+ * Register a cpu startup notifier callback that initializes
+ * cpu_cache_get for all new cpus
*/
register_cpu_notifier(&cpucache_notifier);
- /* The reap timers are started later, with a module init call:
- * That part of the kernel is not yet operational.
+ /*
+ * The reap timers are started later, with a module init call: That part
+ * of the kernel is not yet operational.
*/
}
@@ -1366,16 +1397,13 @@ static int __init cpucache_init(void)
{
int cpu;
- /*
- * Register the timers that return unneeded
- * pages to gfp.
+ /*
+ * Register the timers that return unneeded pages to the page allocator
*/
for_each_online_cpu(cpu)
- start_cpu_timer(cpu);
-
+ start_cpu_timer(cpu);
return 0;
}
-
__initcall(cpucache_init);
/*
@@ -1402,7 +1430,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
atomic_add(i, &slab_reclaim_pages);
add_page_state(nr_slab, i);
while (i--) {
- SetPageSlab(page);
+ __SetPageSlab(page);
page++;
}
return addr;
@@ -1418,8 +1446,8 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
const unsigned long nr_freed = i;
while (i--) {
- if (!TestClearPageSlab(page))
- BUG();
+ BUG_ON(!PageSlab(page));
+ __ClearPageSlab(page);
page++;
}
sub_page_state(nr_slab, nr_freed);
@@ -1489,9 +1517,8 @@ static void dump_line(char *data, int offset, int limit)
{
int i;
printk(KERN_ERR "%03x:", offset);
- for (i = 0; i < limit; i++) {
+ for (i = 0; i < limit; i++)
printk(" %02x", (unsigned char)data[offset + i]);
- }
printk("\n");
}
#endif
@@ -1505,15 +1532,15 @@ static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)
if (cachep->flags & SLAB_RED_ZONE) {
printk(KERN_ERR "Redzone: 0x%lx/0x%lx.\n",
- *dbg_redzone1(cachep, objp),
- *dbg_redzone2(cachep, objp));
+ *dbg_redzone1(cachep, objp),
+ *dbg_redzone2(cachep, objp));
}
if (cachep->flags & SLAB_STORE_USER) {
printk(KERN_ERR "Last user: [<%p>]",
- *dbg_userword(cachep, objp));
+ *dbg_userword(cachep, objp));
print_symbol("(%s)",
- (unsigned long)*dbg_userword(cachep, objp));
+ (unsigned long)*dbg_userword(cachep, objp));
printk("\n");
}
realobj = (char *)objp + obj_offset(cachep);
@@ -1546,8 +1573,8 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
/* Print header */
if (lines == 0) {
printk(KERN_ERR
- "Slab corruption: start=%p, len=%d\n",
- realobj, size);
+ "Slab corruption: start=%p, len=%d\n",
+ realobj, size);
print_objinfo(cachep, objp, 0);
}
/* Hexdump the affected line */
@@ -1568,18 +1595,18 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
* exist:
*/
struct slab *slabp = virt_to_slab(objp);
- int objnr;
+ unsigned int objnr;
- objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size;
+ objnr = obj_to_index(cachep, slabp, objp);
if (objnr) {
- objp = slabp->s_mem + (objnr - 1) * cachep->buffer_size;
+ objp = index_to_obj(cachep, slabp, objnr - 1);
realobj = (char *)objp + obj_offset(cachep);
printk(KERN_ERR "Prev obj: start=%p, len=%d\n",
realobj, size);
print_objinfo(cachep, objp, 2);
}
if (objnr + 1 < cachep->num) {
- objp = slabp->s_mem + (objnr + 1) * cachep->buffer_size;
+ objp = index_to_obj(cachep, slabp, objnr + 1);
realobj = (char *)objp + obj_offset(cachep);
printk(KERN_ERR "Next obj: start=%p, len=%d\n",
realobj, size);
@@ -1591,22 +1618,25 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
#if DEBUG
/**
- * slab_destroy_objs - call the registered destructor for each object in
- * a slab that is to be destroyed.
+ * slab_destroy_objs - destroy a slab and its objects
+ * @cachep: cache pointer being destroyed
+ * @slabp: slab pointer being destroyed
+ *
+ * Call the registered destructor for each object in a slab that is being
+ * destroyed.
*/
static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp)
{
int i;
for (i = 0; i < cachep->num; i++) {
- void *objp = slabp->s_mem + cachep->buffer_size * i;
+ void *objp = index_to_obj(cachep, slabp, i);
if (cachep->flags & SLAB_POISON) {
#ifdef CONFIG_DEBUG_PAGEALLOC
- if ((cachep->buffer_size % PAGE_SIZE) == 0
- && OFF_SLAB(cachep))
+ if (cachep->buffer_size % PAGE_SIZE == 0 &&
+ OFF_SLAB(cachep))
kernel_map_pages(virt_to_page(objp),
- cachep->buffer_size / PAGE_SIZE,
- 1);
+ cachep->buffer_size / PAGE_SIZE, 1);
else
check_poison_obj(cachep, objp);
#else
@@ -1631,7 +1661,7 @@ static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp)
if (cachep->dtor) {
int i;
for (i = 0; i < cachep->num; i++) {
- void *objp = slabp->s_mem + cachep->buffer_size * i;
+ void *objp = index_to_obj(cachep, slabp, i);
(cachep->dtor) (objp, cachep, 0);
}
}
@@ -1639,9 +1669,13 @@ static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp)
#endif
/**
+ * slab_destroy - destroy and release all objects in a slab
+ * @cachep: cache pointer being destroyed
+ * @slabp: slab pointer being destroyed
+ *
* Destroy all the objs in a slab, and release the mem back to the system.
- * Before calling the slab must have been unlinked from the cache.
- * The cache-lock is not held/needed.
+ * Before calling the slab must have been unlinked from the cache. The
+ * cache-lock is not held/needed.
*/
static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
{
@@ -1662,8 +1696,10 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
}
}
-/* For setting up all the kmem_list3s for cache whose buffer_size is same
- as size of kmem_list3. */
+/*
+ * For setting up all the kmem_list3s for cache whose buffer_size is same as
+ * size of kmem_list3.
+ */
static void set_up_list3s(struct kmem_cache *cachep, int index)
{
int node;
@@ -1689,13 +1725,13 @@ static void set_up_list3s(struct kmem_cache *cachep, int index)
* high order pages for slabs. When the gfp() functions are more friendly
* towards high-order requests, this should be changed.
*/
-static inline size_t calculate_slab_order(struct kmem_cache *cachep,
+static size_t calculate_slab_order(struct kmem_cache *cachep,
size_t size, size_t align, unsigned long flags)
{
size_t left_over = 0;
int gfporder;
- for (gfporder = 0 ; gfporder <= MAX_GFP_ORDER; gfporder++) {
+ for (gfporder = 0; gfporder <= MAX_GFP_ORDER; gfporder++) {
unsigned int num;
size_t remainder;
@@ -1730,12 +1766,66 @@ static inline size_t calculate_slab_order(struct kmem_cache *cachep,
/*
* Acceptable internal fragmentation?
*/
- if ((left_over * 8) <= (PAGE_SIZE << gfporder))
+ if (left_over * 8 <= (PAGE_SIZE << gfporder))
break;
}
return left_over;
}
+static void setup_cpu_cache(struct kmem_cache *cachep)
+{
+ if (g_cpucache_up == FULL) {
+ enable_cpucache(cachep);
+ return;
+ }
+ if (g_cpucache_up == NONE) {
+ /*
+ * Note: the first kmem_cache_create must create the cache
+ * that's used by kmalloc(24), otherwise the creation of
+ * further caches will BUG().
+ */
+ cachep->array[smp_processor_id()] = &initarray_generic.cache;
+
+ /*
+ * If the cache that's used by kmalloc(sizeof(kmem_list3)) is
+ * the first cache, then we need to set up all its list3s,
+ * otherwise the creation of further caches will BUG().
+ */
+ set_up_list3s(cachep, SIZE_AC);
+ if (INDEX_AC == INDEX_L3)
+ g_cpucache_up = PARTIAL_L3;
+ else
+ g_cpucache_up = PARTIAL_AC;
+ } else {
+ cachep->array[smp_processor_id()] =
+ kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
+
+ if (g_cpucache_up == PARTIAL_AC) {
+ set_up_list3s(cachep, SIZE_L3);
+ g_cpucache_up = PARTIAL_L3;
+ } else {
+ int node;
+ for_each_online_node(node) {
+ cachep->nodelists[node] =
+ kmalloc_node(sizeof(struct kmem_list3),
+ GFP_KERNEL, node);
+ BUG_ON(!cachep->nodelists[node]);
+ kmem_list3_init(cachep->nodelists[node]);
+ }
+ }
+ }
+ cachep->nodelists[numa_node_id()]->next_reap =
+ jiffies + REAPTIMEOUT_LIST3 +
+ ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
+
+ cpu_cache_get(cachep)->avail = 0;
+ cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
+ cpu_cache_get(cachep)->batchcount = 1;
+ cpu_cache_get(cachep)->touched = 0;
+ cachep->batchcount = 1;
+ cachep->limit = BOOT_CPUCACHE_ENTRIES;
+}
+
/**
* kmem_cache_create - Create a cache.
* @name: A string which is used in /proc/slabinfo to identify this cache.
@@ -1751,9 +1841,8 @@ static inline size_t calculate_slab_order(struct kmem_cache *cachep,
* and the @dtor is run before the pages are handed back.
*
* @name must be valid until the cache is destroyed. This implies that
- * the module calling this has to destroy the cache before getting
- * unloaded.
- *
+ * the module calling this has to destroy the cache before getting unloaded.
+ *
* The flags are
*
* %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
@@ -1762,16 +1851,14 @@ static inline size_t calculate_slab_order(struct kmem_cache *cachep,
* %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
* for buffer overruns.
*
- * %SLAB_NO_REAP - Don't automatically reap this cache when we're under
- * memory pressure.
- *
* %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
* cacheline. This can be beneficial if you're counting cycles as closely
* as davem.
*/
struct kmem_cache *
kmem_cache_create (const char *name, size_t size, size_t align,
- unsigned long flags, void (*ctor)(void*, struct kmem_cache *, unsigned long),
+ unsigned long flags,
+ void (*ctor)(void*, struct kmem_cache *, unsigned long),
void (*dtor)(void*, struct kmem_cache *, unsigned long))
{
size_t left_over, slab_size, ralign;
@@ -1781,12 +1868,10 @@ kmem_cache_create (const char *name, size_t size, size_t align,
/*
* Sanity checks... these are all serious usage bugs.
*/
- if ((!name) ||
- in_interrupt() ||
- (size < BYTES_PER_WORD) ||
+ if (!name || in_interrupt() || (size < BYTES_PER_WORD) ||
(size > (1 << MAX_OBJ_ORDER) * PAGE_SIZE) || (dtor && !ctor)) {
- printk(KERN_ERR "%s: Early error in slab %s\n",
- __FUNCTION__, name);
+ printk(KERN_ERR "%s: Early error in slab %s\n", __FUNCTION__,
+ name);
BUG();
}
@@ -1840,8 +1925,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
* above the next power of two: caches with object sizes just above a
* power of two have a significant amount of internal fragmentation.
*/
- if ((size < 4096
- || fls(size - 1) == fls(size - 1 + 3 * BYTES_PER_WORD)))
+ if (size < 4096 || fls(size - 1) == fls(size-1 + 3 * BYTES_PER_WORD))
flags |= SLAB_RED_ZONE | SLAB_STORE_USER;
if (!(flags & SLAB_DESTROY_BY_RCU))
flags |= SLAB_POISON;
@@ -1853,13 +1937,14 @@ kmem_cache_create (const char *name, size_t size, size_t align,
BUG_ON(dtor);
/*
- * Always checks flags, a caller might be expecting debug
- * support which isn't available.
+ * Always checks flags, a caller might be expecting debug support which
+ * isn't available.
*/
if (flags & ~CREATE_MASK)
BUG();
- /* Check that size is in terms of words. This is needed to avoid
+ /*
+ * Check that size is in terms of words. This is needed to avoid
* unaligned accesses for some archs when redzoning is used, and makes
* sure any on-slab bufctl's are also correctly aligned.
*/
@@ -1868,12 +1953,14 @@ kmem_cache_create (const char *name, size_t size, size_t align,
size &= ~(BYTES_PER_WORD - 1);
}
- /* calculate out the final buffer alignment: */
+ /* calculate the final buffer alignment: */
+
/* 1) arch recommendation: can be overridden for debug */
if (flags & SLAB_HWCACHE_ALIGN) {
- /* Default alignment: as specified by the arch code.
- * Except if an object is really small, then squeeze multiple
- * objects into one cacheline.
+ /*
+ * Default alignment: as specified by the arch code. Except if
+ * an object is really small, then squeeze multiple objects into
+ * one cacheline.
*/
ralign = cache_line_size();
while (size <= ralign / 2)
@@ -1893,7 +1980,8 @@ kmem_cache_create (const char *name, size_t size, size_t align,
if (ralign > BYTES_PER_WORD)
flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
}
- /* 4) Store it. Note that the debug code below can reduce
+ /*
+ * 4) Store it. Note that the debug code below can reduce
* the alignment to BYTES_PER_WORD.
*/
align = ralign;
@@ -1978,7 +2066,6 @@ kmem_cache_create (const char *name, size_t size, size_t align,
cachep->gfpflags = 0;
if (flags & SLAB_CACHE_DMA)
cachep->gfpflags |= GFP_DMA;
- spin_lock_init(&cachep->spinlock);
cachep->buffer_size = size;
if (flags & CFLGS_OFF_SLAB)
@@ -1988,64 +2075,11 @@ kmem_cache_create (const char *name, size_t size, size_t align,
cachep->name = name;
- if (g_cpucache_up == FULL) {
- enable_cpucache(cachep);
- } else {
- if (g_cpucache_up == NONE) {
- /* Note: the first kmem_cache_create must create
- * the cache that's used by kmalloc(24), otherwise
- * the creation of further caches will BUG().
- */
- cachep->array[smp_processor_id()] =
- &initarray_generic.cache;
-
- /* If the cache that's used by
- * kmalloc(sizeof(kmem_list3)) is the first cache,
- * then we need to set up all its list3s, otherwise
- * the creation of further caches will BUG().
- */
- set_up_list3s(cachep, SIZE_AC);
- if (INDEX_AC == INDEX_L3)
- g_cpucache_up = PARTIAL_L3;
- else
- g_cpucache_up = PARTIAL_AC;
- } else {
- cachep->array[smp_processor_id()] =
- kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
-
- if (g_cpucache_up == PARTIAL_AC) {
- set_up_list3s(cachep, SIZE_L3);
- g_cpucache_up = PARTIAL_L3;
- } else {
- int node;
- for_each_online_node(node) {
-
- cachep->nodelists[node] =
- kmalloc_node(sizeof
- (struct kmem_list3),
- GFP_KERNEL, node);
- BUG_ON(!cachep->nodelists[node]);
- kmem_list3_init(cachep->
- nodelists[node]);
- }
- }
- }
- cachep->nodelists[numa_node_id()]->next_reap =
- jiffies + REAPTIMEOUT_LIST3 +
- ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
-
- BUG_ON(!cpu_cache_get(cachep));
- cpu_cache_get(cachep)->avail = 0;
- cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
- cpu_cache_get(cachep)->batchcount = 1;
- cpu_cache_get(cachep)->touched = 0;
- cachep->batchcount = 1;
- cachep->limit = BOOT_CPUCACHE_ENTRIES;
- }
+ setup_cpu_cache(cachep);
/* cache setup completed, link it into the list */
list_add(&cachep->next, &cache_chain);
- oops:
+oops:
if (!cachep && (flags & SLAB_PANIC))
panic("kmem_cache_create(): failed to create slab `%s'\n",
name);
@@ -2089,30 +2123,13 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
#define check_spinlock_acquired_node(x, y) do { } while(0)
#endif
-/*
- * Waits for all CPUs to execute func().
- */
-static void smp_call_function_all_cpus(void (*func)(void *arg), void *arg)
-{
- check_irq_on();
- preempt_disable();
-
- local_irq_disable();
- func(arg);
- local_irq_enable();
-
- if (smp_call_function(func, arg, 1, 1))
- BUG();
-
- preempt_enable();
-}
-
-static void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac,
- int force, int node);
+static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
+ struct array_cache *ac,
+ int force, int node);
static void do_drain(void *arg)
{
- struct kmem_cache *cachep = (struct kmem_cache *) arg;
+ struct kmem_cache *cachep = arg;
struct array_cache *ac;
int node = numa_node_id();
@@ -2129,14 +2146,12 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
struct kmem_list3 *l3;
int node;
- smp_call_function_all_cpus(do_drain, cachep);
+ on_each_cpu(do_drain, cachep, 1, 1);
check_irq_on();
for_each_online_node(node) {
l3 = cachep->nodelists[node];
if (l3) {
- spin_lock_irq(&l3->list_lock);
- drain_array_locked(cachep, l3->shared, 1, node);
- spin_unlock_irq(&l3->list_lock);
+ drain_array(cachep, l3, l3->shared, 1, node);
if (l3->alien)
drain_alien_cache(cachep, l3->alien);
}
@@ -2260,16 +2275,15 @@ int kmem_cache_destroy(struct kmem_cache *cachep)
/* NUMA: free the list3 structures */
for_each_online_node(i) {
- if ((l3 = cachep->nodelists[i])) {
+ l3 = cachep->nodelists[i];
+ if (l3) {
kfree(l3->shared);
free_alien_cache(l3->alien);
kfree(l3);
}
}
kmem_cache_free(&cache_cache, cachep);
-
unlock_cpu_hotplug();
-
return 0;
}
EXPORT_SYMBOL(kmem_cache_destroy);
@@ -2292,7 +2306,6 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
slabp->inuse = 0;
slabp->colouroff = colour_off;
slabp->s_mem = objp + colour_off;
-
return slabp;
}
@@ -2307,7 +2320,7 @@ static void cache_init_objs(struct kmem_cache *cachep,
int i;
for (i = 0; i < cachep->num; i++) {
- void *objp = slabp->s_mem + cachep->buffer_size * i;
+ void *objp = index_to_obj(cachep, slabp, i);
#if DEBUG
/* need to poison the objs? */
if (cachep->flags & SLAB_POISON)
@@ -2320,9 +2333,9 @@ static void cache_init_objs(struct kmem_cache *cachep,
*dbg_redzone2(cachep, objp) = RED_INACTIVE;
}
/*
- * Constructors are not allowed to allocate memory from
- * the same cache which they are a constructor for.
- * Otherwise, deadlock. They must also be threaded.
+ * Constructors are not allowed to allocate memory from the same
+ * cache which they are a constructor for. Otherwise, deadlock.
+ * They must also be threaded.
*/
if (cachep->ctor && !(cachep->flags & SLAB_POISON))
cachep->ctor(objp + obj_offset(cachep), cachep,
@@ -2336,8 +2349,8 @@ static void cache_init_objs(struct kmem_cache *cachep,
slab_error(cachep, "constructor overwrote the"
" start of an object");
}
- if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep)
- && cachep->flags & SLAB_POISON)
+ if ((cachep->buffer_size % PAGE_SIZE) == 0 &&
+ OFF_SLAB(cachep) && cachep->flags & SLAB_POISON)
kernel_map_pages(virt_to_page(objp),
cachep->buffer_size / PAGE_SIZE, 0);
#else
@@ -2352,18 +2365,16 @@ static void cache_init_objs(struct kmem_cache *cachep,
static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
{
- if (flags & SLAB_DMA) {
- if (!(cachep->gfpflags & GFP_DMA))
- BUG();
- } else {
- if (cachep->gfpflags & GFP_DMA)
- BUG();
- }
+ if (flags & SLAB_DMA)
+ BUG_ON(!(cachep->gfpflags & GFP_DMA));
+ else
+ BUG_ON(cachep->gfpflags & GFP_DMA);
}
-static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp, int nodeid)
+static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp,
+ int nodeid)
{
- void *objp = slabp->s_mem + (slabp->free * cachep->buffer_size);
+ void *objp = index_to_obj(cachep, slabp, slabp->free);
kmem_bufctl_t next;
slabp->inuse++;
@@ -2377,10 +2388,10 @@ static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp, int nod
return objp;
}
-static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, void *objp,
- int nodeid)
+static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,
+ void *objp, int nodeid)
{
- unsigned int objnr = (unsigned)(objp-slabp->s_mem) / cachep->buffer_size;
+ unsigned int objnr = obj_to_index(cachep, slabp, objp);
#if DEBUG
/* Verify that the slab belongs to the intended node */
@@ -2388,7 +2399,7 @@ static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, void *ob
if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) {
printk(KERN_ERR "slab: double free detected in cache "
- "'%s', objp %p\n", cachep->name, objp);
+ "'%s', objp %p\n", cachep->name, objp);
BUG();
}
#endif
@@ -2397,14 +2408,18 @@ static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, void *ob
slabp->inuse--;
}
-static void set_slab_attr(struct kmem_cache *cachep, struct slab *slabp, void *objp)
+static void set_slab_attr(struct kmem_cache *cachep, struct slab *slabp,
+ void *objp)
{
int i;
struct page *page;
/* Nasty!!!!!! I hope this is OK. */
- i = 1 << cachep->gfporder;
page = virt_to_page(objp);
+
+ i = 1;
+ if (likely(!PageCompound(page)))
+ i <<= cachep->gfporder;
do {
page_set_cache(page, cachep);
page_set_slab(page, slabp);
@@ -2425,8 +2440,9 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
unsigned long ctor_flags;
struct kmem_list3 *l3;
- /* Be lazy and only check for valid flags here,
- * keeping it out of the critical path in kmem_cache_alloc().
+ /*
+ * Be lazy and only check for valid flags here, keeping it out of the
+ * critical path in kmem_cache_alloc().
*/
if (flags & ~(SLAB_DMA | SLAB_LEVEL_MASK | SLAB_NO_GROW))
BUG();
@@ -2467,14 +2483,17 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
*/
kmem_flagcheck(cachep, flags);
- /* Get mem for the objs.
- * Attempt to allocate a physical page from 'nodeid',
+ /*
+ * Get mem for the objs. Attempt to allocate a physical page from
+ * 'nodeid'.
*/
- if (!(objp = kmem_getpages(cachep, flags, nodeid)))
+ objp = kmem_getpages(cachep, flags, nodeid);
+ if (!objp)
goto failed;
/* Get slab management. */
- if (!(slabp = alloc_slabmgmt(cachep, objp, offset, local_flags)))
+ slabp = alloc_slabmgmt(cachep, objp, offset, local_flags);
+ if (!slabp)
goto opps1;
slabp->nodeid = nodeid;
@@ -2493,9 +2512,9 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
l3->free_objects += cachep->num;
spin_unlock(&l3->list_lock);
return 1;
- opps1:
+opps1:
kmem_freepages(cachep, objp);
- failed:
+failed:
if (local_flags & __GFP_WAIT)
local_irq_disable();
return 0;
@@ -2538,8 +2557,8 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
page = virt_to_page(objp);
if (page_get_cache(page) != cachep) {
- printk(KERN_ERR
- "mismatch in kmem_cache_free: expected cache %p, got %p\n",
+ printk(KERN_ERR "mismatch in kmem_cache_free: expected "
+ "cache %p, got %p\n",
page_get_cache(page), cachep);
printk(KERN_ERR "%p is %s.\n", cachep, cachep->name);
printk(KERN_ERR "%p is %s.\n", page_get_cache(page),
@@ -2549,13 +2568,12 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
slabp = page_get_slab(page);
if (cachep->flags & SLAB_RED_ZONE) {
- if (*dbg_redzone1(cachep, objp) != RED_ACTIVE
- || *dbg_redzone2(cachep, objp) != RED_ACTIVE) {
- slab_error(cachep,
- "double free, or memory outside"
- " object was overwritten");
- printk(KERN_ERR
- "%p: redzone 1: 0x%lx, redzone 2: 0x%lx.\n",
+ if (*dbg_redzone1(cachep, objp) != RED_ACTIVE ||
+ *dbg_redzone2(cachep, objp) != RED_ACTIVE) {
+ slab_error(cachep, "double free, or memory outside"
+ " object was overwritten");
+ printk(KERN_ERR "%p: redzone 1:0x%lx, "
+ "redzone 2:0x%lx.\n",
objp, *dbg_redzone1(cachep, objp),
*dbg_redzone2(cachep, objp));
}
@@ -2565,15 +2583,16 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
if (cachep->flags & SLAB_STORE_USER)
*dbg_userword(cachep, objp) = caller;
- objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size;
+ objnr = obj_to_index(cachep, slabp, objp);
BUG_ON(objnr >= cachep->num);
- BUG_ON(objp != slabp->s_mem + objnr * cachep->buffer_size);
+ BUG_ON(objp != index_to_obj(cachep, slabp, objnr));
if (cachep->flags & SLAB_DEBUG_INITIAL) {
- /* Need to call the slab's constructor so the
- * caller can perform a verify of its state (debugging).
- * Called without the cache-lock held.
+ /*
+ * Need to call the slab's constructor so the caller can
+ * perform a verify of its state (debugging). Called without
+ * the cache-lock held.
*/
cachep->ctor(objp + obj_offset(cachep),
cachep, SLAB_CTOR_CONSTRUCTOR | SLAB_CTOR_VERIFY);
@@ -2586,7 +2605,7 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
}
if (cachep->flags & SLAB_POISON) {
#ifdef CONFIG_DEBUG_PAGEALLOC
- if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) {
+ if ((cachep->buffer_size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
store_stackinfo(cachep, objp, (unsigned long)caller);
kernel_map_pages(virt_to_page(objp),
cachep->buffer_size / PAGE_SIZE, 0);
@@ -2612,14 +2631,14 @@ static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)
goto bad;
}
if (entries != cachep->num - slabp->inuse) {
- bad:
- printk(KERN_ERR
- "slab: Internal list corruption detected in cache '%s'(%d), slabp %p(%d). Hexdump:\n",
- cachep->name, cachep->num, slabp, slabp->inuse);
+bad:
+ printk(KERN_ERR "slab: Internal list corruption detected in "
+ "cache '%s'(%d), slabp %p(%d). Hexdump:\n",
+ cachep->name, cachep->num, slabp, slabp->inuse);
for (i = 0;
i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t);
i++) {
- if ((i % 16) == 0)
+ if (i % 16 == 0)
printk("\n%03x:", i);
printk(" %02x", ((unsigned char *)slabp)[i]);
}
@@ -2641,12 +2660,13 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
check_irq_off();
ac = cpu_cache_get(cachep);
- retry:
+retry:
batchcount = ac->batchcount;
if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
- /* if there was little recent activity on this
- * cache, then perform only a partial refill.
- * Otherwise we could generate refill bouncing.
+ /*
+ * If there was little recent activity on this cache, then
+ * perform only a partial refill. Otherwise we could generate
+ * refill bouncing.
*/
batchcount = BATCHREFILL_LIMIT;
}
@@ -2702,29 +2722,29 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
list_add(&slabp->list, &l3->slabs_partial);
}
- must_grow:
+must_grow:
l3->free_objects -= ac->avail;
- alloc_done:
+alloc_done:
spin_unlock(&l3->list_lock);
if (unlikely(!ac->avail)) {
int x;
x = cache_grow(cachep, flags, numa_node_id());
- // cache_grow can reenable interrupts, then ac could change.
+ /* cache_grow can reenable interrupts, then ac could change. */
ac = cpu_cache_get(cachep);
- if (!x && ac->avail == 0) // no objects in sight? abort
+ if (!x && ac->avail == 0) /* no objects in sight? abort */
return NULL;
- if (!ac->avail) // objects refilled by interrupt?
+ if (!ac->avail) /* objects refilled by interrupt? */
goto retry;
}
ac->touched = 1;
return ac->entry[--ac->avail];
}
-static inline void
-cache_alloc_debugcheck_before(struct kmem_cache *cachep, gfp_t flags)
+static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
+ gfp_t flags)
{
might_sleep_if(flags & __GFP_WAIT);
#if DEBUG
@@ -2733,8 +2753,8 @@ cache_alloc_debugcheck_before(struct kmem_cache *cachep, gfp_t flags)
}
#if DEBUG
-static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, gfp_t flags,
- void *objp, void *caller)
+static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
+ gfp_t flags, void *objp, void *caller)
{
if (!objp)
return objp;
@@ -2754,15 +2774,14 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, gfp_t flags
*dbg_userword(cachep, objp) = caller;
if (cachep->flags & SLAB_RED_ZONE) {
- if (*dbg_redzone1(cachep, objp) != RED_INACTIVE
- || *dbg_redzone2(cachep, objp) != RED_INACTIVE) {
- slab_error(cachep,
- "double free, or memory outside"
- " object was overwritten");
+ if (*dbg_redzone1(cachep, objp) != RED_INACTIVE ||
+ *dbg_redzone2(cachep, objp) != RED_INACTIVE) {
+ slab_error(cachep, "double free, or memory outside"
+ " object was overwritten");
printk(KERN_ERR
- "%p: redzone 1: 0x%lx, redzone 2: 0x%lx.\n",
- objp, *dbg_redzone1(cachep, objp),
- *dbg_redzone2(cachep, objp));
+ "%p: redzone 1:0x%lx, redzone 2:0x%lx\n",
+ objp, *dbg_redzone1(cachep, objp),
+ *dbg_redzone2(cachep, objp));
}
*dbg_redzone1(cachep, objp) = RED_ACTIVE;
*dbg_redzone2(cachep, objp) = RED_ACTIVE;
@@ -2809,8 +2828,8 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
return objp;
}
-static __always_inline void *
-__cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
+static __always_inline void *__cache_alloc(struct kmem_cache *cachep,
+ gfp_t flags, void *caller)
{
unsigned long save_flags;
void *objp;
@@ -2830,7 +2849,8 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
/*
* A interface to enable slab creation on nodeid
*/
-static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
+static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
+ int nodeid)
{
struct list_head *entry;
struct slab *slabp;
@@ -2841,7 +2861,7 @@ static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int node
l3 = cachep->nodelists[nodeid];
BUG_ON(!l3);
- retry:
+retry:
check_irq_off();
spin_lock(&l3->list_lock);
entry = l3->slabs_partial.next;
@@ -2868,16 +2888,15 @@ static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int node
/* move slabp to correct slabp list: */
list_del(&slabp->list);
- if (slabp->free == BUFCTL_END) {
+ if (slabp->free == BUFCTL_END)
list_add(&slabp->list, &l3->slabs_full);
- } else {
+ else
list_add(&slabp->list, &l3->slabs_partial);
- }
spin_unlock(&l3->list_lock);
goto done;
- must_grow:
+must_grow:
spin_unlock(&l3->list_lock);
x = cache_grow(cachep, flags, nodeid);
@@ -2885,7 +2904,7 @@ static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int node
return NULL;
goto retry;
- done:
+done:
return obj;
}
#endif
@@ -2958,7 +2977,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
}
free_block(cachep, ac->entry, batchcount, node);
- free_done:
+free_done:
#if STATS
{
int i = 0;
@@ -2979,16 +2998,12 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
#endif
spin_unlock(&l3->list_lock);
ac->avail -= batchcount;
- memmove(ac->entry, &(ac->entry[batchcount]),
- sizeof(void *) * ac->avail);
+ memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
}
/*
- * __cache_free
- * Release an obj back to its cache. If the obj has a constructed
- * state, it must be in this state _before_ it is released.
- *
- * Called with disabled ints.
+ * Release an obj back to its cache. If the obj has a constructed state, it must
+ * be in this state _before_ it is released. Called with disabled ints.
*/
static inline void __cache_free(struct kmem_cache *cachep, void *objp)
{
@@ -3007,9 +3022,9 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp)
if (unlikely(slabp->nodeid != numa_node_id())) {
struct array_cache *alien = NULL;
int nodeid = slabp->nodeid;
- struct kmem_list3 *l3 =
- cachep->nodelists[numa_node_id()];
+ struct kmem_list3 *l3;
+ l3 = cachep->nodelists[numa_node_id()];
STATS_INC_NODEFREES(cachep);
if (l3->alien && l3->alien[nodeid]) {
alien = l3->alien[nodeid];
@@ -3093,7 +3108,7 @@ int fastcall kmem_ptr_validate(struct kmem_cache *cachep, void *ptr)
if (unlikely(page_get_cache(page) != cachep))
goto out;
return 1;
- out:
+out:
return 0;
}
@@ -3119,7 +3134,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
local_irq_save(save_flags);
if (nodeid == -1 || nodeid == numa_node_id() ||
- !cachep->nodelists[nodeid])
+ !cachep->nodelists[nodeid])
ptr = ____cache_alloc(cachep, flags);
else
ptr = __cache_alloc_node(cachep, flags, nodeid);
@@ -3148,6 +3163,7 @@ EXPORT_SYMBOL(kmalloc_node);
* kmalloc - allocate memory
* @size: how many bytes of memory are required.
* @flags: the type of memory to allocate.
+ * @caller: function caller for debug tracking of the caller
*
* kmalloc is the normal method of allocating memory
* in the kernel.
@@ -3236,7 +3252,7 @@ void *__alloc_percpu(size_t size)
/* Catch derefs w/o wrappers */
return (void *)(~(unsigned long)pdata);
- unwind_oom:
+unwind_oom:
while (--i >= 0) {
if (!cpu_possible(i))
continue;
@@ -3339,18 +3355,20 @@ static int alloc_kmemlist(struct kmem_cache *cachep)
struct array_cache *nc = NULL, *new;
struct array_cache **new_alien = NULL;
#ifdef CONFIG_NUMA
- if (!(new_alien = alloc_alien_cache(node, cachep->limit)))
+ new_alien = alloc_alien_cache(node, cachep->limit);
+ if (!new_alien)
goto fail;
#endif
- if (!(new = alloc_arraycache(node, (cachep->shared *
- cachep->batchcount),
- 0xbaadf00d)))
+ new = alloc_arraycache(node, cachep->shared*cachep->batchcount,
+ 0xbaadf00d);
+ if (!new)
goto fail;
- if ((l3 = cachep->nodelists[node])) {
-
+ l3 = cachep->nodelists[node];
+ if (l3) {
spin_lock_irq(&l3->list_lock);
- if ((nc = cachep->nodelists[node]->shared))
+ nc = cachep->nodelists[node]->shared;
+ if (nc)
free_block(cachep, nc->entry, nc->avail, node);
l3->shared = new;
@@ -3359,27 +3377,27 @@ static int alloc_kmemlist(struct kmem_cache *cachep)
new_alien = NULL;
}
l3->free_limit = (1 + nr_cpus_node(node)) *
- cachep->batchcount + cachep->num;
+ cachep->batchcount + cachep->num;
spin_unlock_irq(&l3->list_lock);
kfree(nc);
free_alien_cache(new_alien);
continue;
}
- if (!(l3 = kmalloc_node(sizeof(struct kmem_list3),
- GFP_KERNEL, node)))
+ l3 = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, node);
+ if (!l3)
goto fail;
kmem_list3_init(l3);
l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
- ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
+ ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
l3->shared = new;
l3->alien = new_alien;
l3->free_limit = (1 + nr_cpus_node(node)) *
- cachep->batchcount + cachep->num;
+ cachep->batchcount + cachep->num;
cachep->nodelists[node] = l3;
}
return err;
- fail:
+fail:
err = -ENOMEM;
return err;
}
@@ -3391,7 +3409,7 @@ struct ccupdate_struct {
static void do_ccupdate_local(void *info)
{
- struct ccupdate_struct *new = (struct ccupdate_struct *)info;
+ struct ccupdate_struct *new = info;
struct array_cache *old;
check_irq_off();
@@ -3401,16 +3419,17 @@ static void do_ccupdate_local(void *info)
new->new[smp_processor_id()] = old;
}
-static int do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount,
- int shared)
+/* Always called with the cache_chain_mutex held */
+static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
+ int batchcount, int shared)
{
struct ccupdate_struct new;
int i, err;
memset(&new.new, 0, sizeof(new.new));
for_each_online_cpu(i) {
- new.new[i] =
- alloc_arraycache(cpu_to_node(i), limit, batchcount);
+ new.new[i] = alloc_arraycache(cpu_to_node(i), limit,
+ batchcount);
if (!new.new[i]) {
for (i--; i >= 0; i--)
kfree(new.new[i]);
@@ -3419,14 +3438,12 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount
}
new.cachep = cachep;
- smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
+ on_each_cpu(do_ccupdate_local, (void *)&new, 1, 1);
check_irq_on();
- spin_lock(&cachep->spinlock);
cachep->batchcount = batchcount;
cachep->limit = limit;
cachep->shared = shared;
- spin_unlock(&cachep->spinlock);
for_each_online_cpu(i) {
struct array_cache *ccold = new.new[i];
@@ -3447,15 +3464,17 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount
return 0;
}
+/* Called with cache_chain_mutex held always */
static void enable_cpucache(struct kmem_cache *cachep)
{
int err;
int limit, shared;
- /* The head array serves three purposes:
+ /*
+ * The head array serves three purposes:
* - create a LIFO ordering, i.e. return objects that are cache-warm
* - reduce the number of spinlock operations.
- * - reduce the number of linked list operations on the slab and
+ * - reduce the number of linked list operations on the slab and
* bufctl chains: array operations are cheaper.
* The numbers are guessed, we should auto-tune as described by
* Bonwick.
@@ -3471,7 +3490,8 @@ static void enable_cpucache(struct kmem_cache *cachep)
else
limit = 120;
- /* Cpu bound tasks (e.g. network routing) can exhibit cpu bound
+ /*
+ * CPU bound tasks (e.g. network routing) can exhibit cpu bound
* allocation behaviour: Most allocs on one cpu, most free operations
* on another cpu. For these cases, an efficient object passing between
* cpus is necessary. This is provided by a shared array. The array
@@ -3486,9 +3506,9 @@ static void enable_cpucache(struct kmem_cache *cachep)
#endif
#if DEBUG
- /* With debugging enabled, large batchcount lead to excessively
- * long periods with disabled local interrupts. Limit the
- * batchcount
+ /*
+ * With debugging enabled, large batchcount lead to excessively long
+ * periods with disabled local interrupts. Limit the batchcount
*/
if (limit > 32)
limit = 32;
@@ -3499,23 +3519,32 @@ static void enable_cpucache(struct kmem_cache *cachep)
cachep->name, -err);
}
-static void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac,
- int force, int node)
+/*
+ * Drain an array if it contains any elements taking the l3 lock only if
+ * necessary. Note that the l3 listlock also protects the array_cache
+ * if drain_array() is used on the shared array.
+ */
+void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
+ struct array_cache *ac, int force, int node)
{
int tofree;
- check_spinlock_acquired_node(cachep, node);
+ if (!ac || !ac->avail)
+ return;
if (ac->touched && !force) {
ac->touched = 0;
- } else if (ac->avail) {
- tofree = force ? ac->avail : (ac->limit + 4) / 5;
- if (tofree > ac->avail) {
- tofree = (ac->avail + 1) / 2;
+ } else {
+ spin_lock_irq(&l3->list_lock);
+ if (ac->avail) {
+ tofree = force ? ac->avail : (ac->limit + 4) / 5;
+ if (tofree > ac->avail)
+ tofree = (ac->avail + 1) / 2;
+ free_block(cachep, ac->entry, tofree, node);
+ ac->avail -= tofree;
+ memmove(ac->entry, &(ac->entry[tofree]),
+ sizeof(void *) * ac->avail);
}
- free_block(cachep, ac->entry, tofree, node);
- ac->avail -= tofree;
- memmove(ac->entry, &(ac->entry[tofree]),
- sizeof(void *) * ac->avail);
+ spin_unlock_irq(&l3->list_lock);
}
}
@@ -3528,13 +3557,14 @@ static void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac
* - clear the per-cpu caches for this CPU.
* - return freeable pages to the main free memory pool.
*
- * If we cannot acquire the cache chain mutex then just give up - we'll
- * try again on the next iteration.
+ * If we cannot acquire the cache chain mutex then just give up - we'll try
+ * again on the next iteration.
*/
static void cache_reap(void *unused)
{
struct list_head *walk;
struct kmem_list3 *l3;
+ int node = numa_node_id();
if (!mutex_trylock(&cache_chain_mutex)) {
/* Give up. Setup the next iteration. */
@@ -3550,65 +3580,72 @@ static void cache_reap(void *unused)
struct slab *slabp;
searchp = list_entry(walk, struct kmem_cache, next);
-
- if (searchp->flags & SLAB_NO_REAP)
- goto next;
-
check_irq_on();
- l3 = searchp->nodelists[numa_node_id()];
+ /*
+ * We only take the l3 lock if absolutely necessary and we
+ * have established with reasonable certainty that
+ * we can do some work if the lock was obtained.
+ */
+ l3 = searchp->nodelists[node];
+
reap_alien(searchp, l3);
- spin_lock_irq(&l3->list_lock);
- drain_array_locked(searchp, cpu_cache_get(searchp), 0,
- numa_node_id());
+ drain_array(searchp, l3, cpu_cache_get(searchp), 0, node);
+ /*
+ * These are racy checks but it does not matter
+ * if we skip one check or scan twice.
+ */
if (time_after(l3->next_reap, jiffies))
- goto next_unlock;
+ goto next;
l3->next_reap = jiffies + REAPTIMEOUT_LIST3;
- if (l3->shared)
- drain_array_locked(searchp, l3->shared, 0,
- numa_node_id());
+ drain_array(searchp, l3, l3->shared, 0, node);
if (l3->free_touched) {
l3->free_touched = 0;
- goto next_unlock;
+ goto next;
}
- tofree =
- (l3->free_limit + 5 * searchp->num -
- 1) / (5 * searchp->num);
+ tofree = (l3->free_limit + 5 * searchp->num - 1) /
+ (5 * searchp->num);
do {
+ /*
+ * Do not lock if there are no free blocks.
+ */
+ if (list_empty(&l3->slabs_free))
+ break;
+
+ spin_lock_irq(&l3->list_lock);
p = l3->slabs_free.next;
- if (p == &(l3->slabs_free))
+ if (p == &(l3->slabs_free)) {
+ spin_unlock_irq(&l3->list_lock);
break;
+ }
slabp = list_entry(p, struct slab, list);
BUG_ON(slabp->inuse);
list_del(&slabp->list);
STATS_INC_REAPED(searchp);
- /* Safe to drop the lock. The slab is no longer
- * linked to the cache.
- * searchp cannot disappear, we hold
+ /*
+ * Safe to drop the lock. The slab is no longer linked
+ * to the cache. searchp cannot disappear, we hold
* cache_chain_lock
*/
l3->free_objects -= searchp->num;
spin_unlock_irq(&l3->list_lock);
slab_destroy(searchp, slabp);
- spin_lock_irq(&l3->list_lock);
} while (--tofree > 0);
- next_unlock:
- spin_unlock_irq(&l3->list_lock);
- next:
+next:
cond_resched();
}
check_irq_on();
mutex_unlock(&cache_chain_mutex);
next_reap_node();
- /* Setup the next iteration */
+ /* Set up the next iteration */
schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC);
}
@@ -3658,8 +3695,8 @@ static void *s_next(struct seq_file *m, void *p, loff_t *pos)
{
struct kmem_cache *cachep = p;
++*pos;
- return cachep->next.next == &cache_chain ? NULL
- : list_entry(cachep->next.next, struct kmem_cache, next);
+ return cachep->next.next == &cache_chain ?
+ NULL : list_entry(cachep->next.next, struct kmem_cache, next);
}
static void s_stop(struct seq_file *m, void *p)
@@ -3681,7 +3718,6 @@ static int s_show(struct seq_file *m, void *p)
int node;
struct kmem_list3 *l3;
- spin_lock(&cachep->spinlock);
active_objs = 0;
num_slabs = 0;
for_each_online_node(node) {
@@ -3748,7 +3784,9 @@ static int s_show(struct seq_file *m, void *p)
unsigned long node_frees = cachep->node_frees;
seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \
- %4lu %4lu %4lu %4lu", allocs, high, grown, reaped, errors, max_freeable, node_allocs, node_frees);
+ %4lu %4lu %4lu %4lu", allocs, high, grown,
+ reaped, errors, max_freeable, node_allocs,
+ node_frees);
}
/* cpu stats */
{
@@ -3762,7 +3800,6 @@ static int s_show(struct seq_file *m, void *p)
}
#endif
seq_putc(m, '\n');
- spin_unlock(&cachep->spinlock);
return 0;
}
@@ -3820,13 +3857,12 @@ ssize_t slabinfo_write(struct file *file, const char __user * buffer,
mutex_lock(&cache_chain_mutex);
res = -EINVAL;
list_for_each(p, &cache_chain) {
- struct kmem_cache *cachep = list_entry(p, struct kmem_cache,
- next);
+ struct kmem_cache *cachep;
+ cachep = list_entry(p, struct kmem_cache, next);
if (!strcmp(cachep->name, kbuf)) {
- if (limit < 1 ||
- batchcount < 1 ||
- batchcount > limit || shared < 0) {
+ if (limit < 1 || batchcount < 1 ||
+ batchcount > limit || shared < 0) {
res = 0;
} else {
res = do_tune_cpucache(cachep, limit,
diff --git a/mm/swap.c b/mm/swap.c
index b524ea90bdd..91b7e2026f6 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -209,19 +209,18 @@ int lru_add_drain_all(void)
*/
void fastcall __page_cache_release(struct page *page)
{
- unsigned long flags;
- struct zone *zone = page_zone(page);
+ if (PageLRU(page)) {
+ unsigned long flags;
+ struct zone *zone = page_zone(page);
- spin_lock_irqsave(&zone->lru_lock, flags);
- if (TestClearPageLRU(page))
+ spin_lock_irqsave(&zone->lru_lock, flags);
+ BUG_ON(!PageLRU(page));
+ __ClearPageLRU(page);
del_page_from_lru(zone, page);
- if (page_count(page) != 0)
- page = NULL;
- spin_unlock_irqrestore(&zone->lru_lock, flags);
- if (page)
- free_hot_page(page);
+ spin_unlock_irqrestore(&zone->lru_lock, flags);
+ }
+ free_hot_page(page);
}
-
EXPORT_SYMBOL(__page_cache_release);
/*
@@ -245,7 +244,6 @@ void release_pages(struct page **pages, int nr, int cold)
pagevec_init(&pages_to_free, cold);
for (i = 0; i < nr; i++) {
struct page *page = pages[i];
- struct zone *pagezone;
if (unlikely(PageCompound(page))) {
if (zone) {
@@ -259,23 +257,27 @@ void release_pages(struct page **pages, int nr, int cold)
if (!put_page_testzero(page))
continue;
- pagezone = page_zone(page);
- if (pagezone != zone) {
- if (zone)
- spin_unlock_irq(&zone->lru_lock);
- zone = pagezone;
- spin_lock_irq(&zone->lru_lock);
- }
- if (TestClearPageLRU(page))
+ if (PageLRU(page)) {
+ struct zone *pagezone = page_zone(page);
+ if (pagezone != zone) {
+ if (zone)
+ spin_unlock_irq(&zone->lru_lock);
+ zone = pagezone;
+ spin_lock_irq(&zone->lru_lock);
+ }
+ BUG_ON(!PageLRU(page));
+ __ClearPageLRU(page);
del_page_from_lru(zone, page);
- if (page_count(page) == 0) {
- if (!pagevec_add(&pages_to_free, page)) {
+ }
+
+ if (!pagevec_add(&pages_to_free, page)) {
+ if (zone) {
spin_unlock_irq(&zone->lru_lock);
- __pagevec_free(&pages_to_free);
- pagevec_reinit(&pages_to_free);
- zone = NULL; /* No lock is held */
+ zone = NULL;
}
- }
+ __pagevec_free(&pages_to_free);
+ pagevec_reinit(&pages_to_free);
+ }
}
if (zone)
spin_unlock_irq(&zone->lru_lock);
@@ -343,8 +345,8 @@ void __pagevec_lru_add(struct pagevec *pvec)
zone = pagezone;
spin_lock_irq(&zone->lru_lock);
}
- if (TestSetPageLRU(page))
- BUG();
+ BUG_ON(PageLRU(page));
+ SetPageLRU(page);
add_page_to_inactive_list(zone, page);
}
if (zone)
@@ -370,10 +372,10 @@ void __pagevec_lru_add_active(struct pagevec *pvec)
zone = pagezone;
spin_lock_irq(&zone->lru_lock);
}
- if (TestSetPageLRU(page))
- BUG();
- if (TestSetPageActive(page))
- BUG();
+ BUG_ON(PageLRU(page));
+ SetPageLRU(page);
+ BUG_ON(PageActive(page));
+ SetPageActive(page);
add_page_to_active_list(zone, page);
}
if (zone)
diff --git a/mm/swap_state.c b/mm/swap_state.c
index db8a3d3e163..d7af296833f 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -15,6 +15,7 @@
#include <linux/buffer_head.h>
#include <linux/backing-dev.h>
#include <linux/pagevec.h>
+#include <linux/migrate.h>
#include <asm/pgtable.h>
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 1f9cf0d073b..365ed6ff182 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -116,7 +116,7 @@ static inline unsigned long scan_swap_map(struct swap_info_struct *si)
last_in_cluster = offset + SWAPFILE_CLUSTER;
else if (offset == last_in_cluster) {
spin_lock(&swap_lock);
- si->cluster_next = offset-SWAPFILE_CLUSTER-1;
+ si->cluster_next = offset-SWAPFILE_CLUSTER+1;
goto cluster;
}
if (unlikely(--latency_ration < 0)) {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 4fe7e3aa02e..fd572bbdc9f 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -33,39 +33,21 @@
#include <linux/cpuset.h>
#include <linux/notifier.h>
#include <linux/rwsem.h>
+#include <linux/delay.h>
#include <asm/tlbflush.h>
#include <asm/div64.h>
#include <linux/swapops.h>
-/* possible outcome of pageout() */
-typedef enum {
- /* failed to write page out, page is locked */
- PAGE_KEEP,
- /* move page to the active list, page is locked */
- PAGE_ACTIVATE,
- /* page has been sent to the disk successfully, page is unlocked */
- PAGE_SUCCESS,
- /* page is clean and locked */
- PAGE_CLEAN,
-} pageout_t;
+#include "internal.h"
struct scan_control {
- /* Ask refill_inactive_zone, or shrink_cache to scan this many pages */
- unsigned long nr_to_scan;
-
/* Incremented by the number of inactive pages that were scanned */
unsigned long nr_scanned;
- /* Incremented by the number of pages reclaimed */
- unsigned long nr_reclaimed;
-
unsigned long nr_mapped; /* From page_state */
- /* Ask shrink_caches, or shrink_zone to scan at this priority */
- unsigned int priority;
-
/* This context's GFP mask */
gfp_t gfp_mask;
@@ -183,10 +165,11 @@ EXPORT_SYMBOL(remove_shrinker);
*
* Returns the number of slab objects which we shrunk.
*/
-int shrink_slab(unsigned long scanned, gfp_t gfp_mask, unsigned long lru_pages)
+unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
+ unsigned long lru_pages)
{
struct shrinker *shrinker;
- int ret = 0;
+ unsigned long ret = 0;
if (scanned == 0)
scanned = SWAP_CLUSTER_MAX;
@@ -306,9 +289,10 @@ static void handle_write_error(struct address_space *mapping,
}
/*
- * pageout is called by shrink_list() for each dirty page. Calls ->writepage().
+ * pageout is called by shrink_page_list() for each dirty page.
+ * Calls ->writepage().
*/
-static pageout_t pageout(struct page *page, struct address_space *mapping)
+pageout_t pageout(struct page *page, struct address_space *mapping)
{
/*
* If the page is dirty, only perform writeback if that write
@@ -376,7 +360,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
return PAGE_CLEAN;
}
-static int remove_mapping(struct address_space *mapping, struct page *page)
+int remove_mapping(struct address_space *mapping, struct page *page)
{
if (!mapping)
return 0; /* truncate got there first */
@@ -414,14 +398,15 @@ cannot_free:
}
/*
- * shrink_list adds the number of reclaimed pages to sc->nr_reclaimed
+ * shrink_page_list() returns the number of reclaimed pages
*/
-static int shrink_list(struct list_head *page_list, struct scan_control *sc)
+static unsigned long shrink_page_list(struct list_head *page_list,
+ struct scan_control *sc)
{
LIST_HEAD(ret_pages);
struct pagevec freed_pvec;
int pgactivate = 0;
- int reclaimed = 0;
+ unsigned long nr_reclaimed = 0;
cond_resched();
@@ -464,12 +449,9 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc)
* Anonymous process memory has backing store?
* Try to allocate it some swap space here.
*/
- if (PageAnon(page) && !PageSwapCache(page)) {
- if (!sc->may_swap)
- goto keep_locked;
+ if (PageAnon(page) && !PageSwapCache(page))
if (!add_to_swap(page, GFP_ATOMIC))
goto activate_locked;
- }
#endif /* CONFIG_SWAP */
mapping = page_mapping(page);
@@ -481,12 +463,6 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc)
* processes. Try to unmap it here.
*/
if (page_mapped(page) && mapping) {
- /*
- * No unmapping if we do not swap
- */
- if (!sc->may_swap)
- goto keep_locked;
-
switch (try_to_unmap(page, 0)) {
case SWAP_FAIL:
goto activate_locked;
@@ -561,7 +537,7 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc)
free_it:
unlock_page(page);
- reclaimed++;
+ nr_reclaimed++;
if (!pagevec_add(&freed_pvec, page))
__pagevec_release_nonlru(&freed_pvec);
continue;
@@ -579,483 +555,8 @@ keep:
if (pagevec_count(&freed_pvec))
__pagevec_release_nonlru(&freed_pvec);
mod_page_state(pgactivate, pgactivate);
- sc->nr_reclaimed += reclaimed;
- return reclaimed;
-}
-
-#ifdef CONFIG_MIGRATION
-static inline void move_to_lru(struct page *page)
-{
- list_del(&page->lru);
- if (PageActive(page)) {
- /*
- * lru_cache_add_active checks that
- * the PG_active bit is off.
- */
- ClearPageActive(page);
- lru_cache_add_active(page);
- } else {
- lru_cache_add(page);
- }
- put_page(page);
-}
-
-/*
- * Add isolated pages on the list back to the LRU.
- *
- * returns the number of pages put back.
- */
-int putback_lru_pages(struct list_head *l)
-{
- struct page *page;
- struct page *page2;
- int count = 0;
-
- list_for_each_entry_safe(page, page2, l, lru) {
- move_to_lru(page);
- count++;
- }
- return count;
-}
-
-/*
- * Non migratable page
- */
-int fail_migrate_page(struct page *newpage, struct page *page)
-{
- return -EIO;
-}
-EXPORT_SYMBOL(fail_migrate_page);
-
-/*
- * swapout a single page
- * page is locked upon entry, unlocked on exit
- */
-static int swap_page(struct page *page)
-{
- struct address_space *mapping = page_mapping(page);
-
- if (page_mapped(page) && mapping)
- if (try_to_unmap(page, 1) != SWAP_SUCCESS)
- goto unlock_retry;
-
- if (PageDirty(page)) {
- /* Page is dirty, try to write it out here */
- switch(pageout(page, mapping)) {
- case PAGE_KEEP:
- case PAGE_ACTIVATE:
- goto unlock_retry;
-
- case PAGE_SUCCESS:
- goto retry;
-
- case PAGE_CLEAN:
- ; /* try to free the page below */
- }
- }
-
- if (PagePrivate(page)) {
- if (!try_to_release_page(page, GFP_KERNEL) ||
- (!mapping && page_count(page) == 1))
- goto unlock_retry;
- }
-
- if (remove_mapping(mapping, page)) {
- /* Success */
- unlock_page(page);
- return 0;
- }
-
-unlock_retry:
- unlock_page(page);
-
-retry:
- return -EAGAIN;
-}
-EXPORT_SYMBOL(swap_page);
-
-/*
- * Page migration was first developed in the context of the memory hotplug
- * project. The main authors of the migration code are:
- *
- * IWAMOTO Toshihiro <iwamoto@valinux.co.jp>
- * Hirokazu Takahashi <taka@valinux.co.jp>
- * Dave Hansen <haveblue@us.ibm.com>
- * Christoph Lameter <clameter@sgi.com>
- */
-
-/*
- * Remove references for a page and establish the new page with the correct
- * basic settings to be able to stop accesses to the page.
- */
-int migrate_page_remove_references(struct page *newpage,
- struct page *page, int nr_refs)
-{
- struct address_space *mapping = page_mapping(page);
- struct page **radix_pointer;
-
- /*
- * Avoid doing any of the following work if the page count
- * indicates that the page is in use or truncate has removed
- * the page.
- */
- if (!mapping || page_mapcount(page) + nr_refs != page_count(page))
- return -EAGAIN;
-
- /*
- * Establish swap ptes for anonymous pages or destroy pte
- * maps for files.
- *
- * In order to reestablish file backed mappings the fault handlers
- * will take the radix tree_lock which may then be used to stop
- * processses from accessing this page until the new page is ready.
- *
- * A process accessing via a swap pte (an anonymous page) will take a
- * page_lock on the old page which will block the process until the
- * migration attempt is complete. At that time the PageSwapCache bit
- * will be examined. If the page was migrated then the PageSwapCache
- * bit will be clear and the operation to retrieve the page will be
- * retried which will find the new page in the radix tree. Then a new
- * direct mapping may be generated based on the radix tree contents.
- *
- * If the page was not migrated then the PageSwapCache bit
- * is still set and the operation may continue.
- */
- if (try_to_unmap(page, 1) == SWAP_FAIL)
- /* A vma has VM_LOCKED set -> Permanent failure */
- return -EPERM;
-
- /*
- * Give up if we were unable to remove all mappings.
- */
- if (page_mapcount(page))
- return -EAGAIN;
-
- write_lock_irq(&mapping->tree_lock);
-
- radix_pointer = (struct page **)radix_tree_lookup_slot(
- &mapping->page_tree,
- page_index(page));
-
- if (!page_mapping(page) || page_count(page) != nr_refs ||
- *radix_pointer != page) {
- write_unlock_irq(&mapping->tree_lock);
- return -EAGAIN;
- }
-
- /*
- * Now we know that no one else is looking at the page.
- *
- * Certain minimal information about a page must be available
- * in order for other subsystems to properly handle the page if they
- * find it through the radix tree update before we are finished
- * copying the page.
- */
- get_page(newpage);
- newpage->index = page->index;
- newpage->mapping = page->mapping;
- if (PageSwapCache(page)) {
- SetPageSwapCache(newpage);
- set_page_private(newpage, page_private(page));
- }
-
- *radix_pointer = newpage;
- __put_page(page);
- write_unlock_irq(&mapping->tree_lock);
-
- return 0;
-}
-EXPORT_SYMBOL(migrate_page_remove_references);
-
-/*
- * Copy the page to its new location
- */
-void migrate_page_copy(struct page *newpage, struct page *page)
-{
- copy_highpage(newpage, page);
-
- if (PageError(page))
- SetPageError(newpage);
- if (PageReferenced(page))
- SetPageReferenced(newpage);
- if (PageUptodate(page))
- SetPageUptodate(newpage);
- if (PageActive(page))
- SetPageActive(newpage);
- if (PageChecked(page))
- SetPageChecked(newpage);
- if (PageMappedToDisk(page))
- SetPageMappedToDisk(newpage);
-
- if (PageDirty(page)) {
- clear_page_dirty_for_io(page);
- set_page_dirty(newpage);
- }
-
- ClearPageSwapCache(page);
- ClearPageActive(page);
- ClearPagePrivate(page);
- set_page_private(page, 0);
- page->mapping = NULL;
-
- /*
- * If any waiters have accumulated on the new page then
- * wake them up.
- */
- if (PageWriteback(newpage))
- end_page_writeback(newpage);
-}
-EXPORT_SYMBOL(migrate_page_copy);
-
-/*
- * Common logic to directly migrate a single page suitable for
- * pages that do not use PagePrivate.
- *
- * Pages are locked upon entry and exit.
- */
-int migrate_page(struct page *newpage, struct page *page)
-{
- int rc;
-
- BUG_ON(PageWriteback(page)); /* Writeback must be complete */
-
- rc = migrate_page_remove_references(newpage, page, 2);
-
- if (rc)
- return rc;
-
- migrate_page_copy(newpage, page);
-
- /*
- * Remove auxiliary swap entries and replace
- * them with real ptes.
- *
- * Note that a real pte entry will allow processes that are not
- * waiting on the page lock to use the new page via the page tables
- * before the new page is unlocked.
- */
- remove_from_swap(newpage);
- return 0;
+ return nr_reclaimed;
}
-EXPORT_SYMBOL(migrate_page);
-
-/*
- * migrate_pages
- *
- * Two lists are passed to this function. The first list
- * contains the pages isolated from the LRU to be migrated.
- * The second list contains new pages that the pages isolated
- * can be moved to. If the second list is NULL then all
- * pages are swapped out.
- *
- * The function returns after 10 attempts or if no pages
- * are movable anymore because to has become empty
- * or no retryable pages exist anymore.
- *
- * Return: Number of pages not migrated when "to" ran empty.
- */
-int migrate_pages(struct list_head *from, struct list_head *to,
- struct list_head *moved, struct list_head *failed)
-{
- int retry;
- int nr_failed = 0;
- int pass = 0;
- struct page *page;
- struct page *page2;
- int swapwrite = current->flags & PF_SWAPWRITE;
- int rc;
-
- if (!swapwrite)
- current->flags |= PF_SWAPWRITE;
-
-redo:
- retry = 0;
-
- list_for_each_entry_safe(page, page2, from, lru) {
- struct page *newpage = NULL;
- struct address_space *mapping;
-
- cond_resched();
-
- rc = 0;
- if (page_count(page) == 1)
- /* page was freed from under us. So we are done. */
- goto next;
-
- if (to && list_empty(to))
- break;
-
- /*
- * Skip locked pages during the first two passes to give the
- * functions holding the lock time to release the page. Later we
- * use lock_page() to have a higher chance of acquiring the
- * lock.
- */
- rc = -EAGAIN;
- if (pass > 2)
- lock_page(page);
- else
- if (TestSetPageLocked(page))
- goto next;
-
- /*
- * Only wait on writeback if we have already done a pass where
- * we we may have triggered writeouts for lots of pages.
- */
- if (pass > 0) {
- wait_on_page_writeback(page);
- } else {
- if (PageWriteback(page))
- goto unlock_page;
- }
-
- /*
- * Anonymous pages must have swap cache references otherwise
- * the information contained in the page maps cannot be
- * preserved.
- */
- if (PageAnon(page) && !PageSwapCache(page)) {
- if (!add_to_swap(page, GFP_KERNEL)) {
- rc = -ENOMEM;
- goto unlock_page;
- }
- }
-
- if (!to) {
- rc = swap_page(page);
- goto next;
- }
-
- newpage = lru_to_page(to);
- lock_page(newpage);
-
- /*
- * Pages are properly locked and writeback is complete.
- * Try to migrate the page.
- */
- mapping = page_mapping(page);
- if (!mapping)
- goto unlock_both;
-
- if (mapping->a_ops->migratepage) {
- /*
- * Most pages have a mapping and most filesystems
- * should provide a migration function. Anonymous
- * pages are part of swap space which also has its
- * own migration function. This is the most common
- * path for page migration.
- */
- rc = mapping->a_ops->migratepage(newpage, page);
- goto unlock_both;
- }
-
- /*
- * Default handling if a filesystem does not provide
- * a migration function. We can only migrate clean
- * pages so try to write out any dirty pages first.
- */
- if (PageDirty(page)) {
- switch (pageout(page, mapping)) {
- case PAGE_KEEP:
- case PAGE_ACTIVATE:
- goto unlock_both;
-
- case PAGE_SUCCESS:
- unlock_page(newpage);
- goto next;
-
- case PAGE_CLEAN:
- ; /* try to migrate the page below */
- }
- }
-
- /*
- * Buffers are managed in a filesystem specific way.
- * We must have no buffers or drop them.
- */
- if (!page_has_buffers(page) ||
- try_to_release_page(page, GFP_KERNEL)) {
- rc = migrate_page(newpage, page);
- goto unlock_both;
- }
-
- /*
- * On early passes with mapped pages simply
- * retry. There may be a lock held for some
- * buffers that may go away. Later
- * swap them out.
- */
- if (pass > 4) {
- /*
- * Persistently unable to drop buffers..... As a
- * measure of last resort we fall back to
- * swap_page().
- */
- unlock_page(newpage);
- newpage = NULL;
- rc = swap_page(page);
- goto next;
- }
-
-unlock_both:
- unlock_page(newpage);
-
-unlock_page:
- unlock_page(page);
-
-next:
- if (rc == -EAGAIN) {
- retry++;
- } else if (rc) {
- /* Permanent failure */
- list_move(&page->lru, failed);
- nr_failed++;
- } else {
- if (newpage) {
- /* Successful migration. Return page to LRU */
- move_to_lru(newpage);
- }
- list_move(&page->lru, moved);
- }
- }
- if (retry && pass++ < 10)
- goto redo;
-
- if (!swapwrite)
- current->flags &= ~PF_SWAPWRITE;
-
- return nr_failed + retry;
-}
-
-/*
- * Isolate one page from the LRU lists and put it on the
- * indicated list with elevated refcount.
- *
- * Result:
- * 0 = page not on LRU list
- * 1 = page removed from LRU list and added to the specified list.
- */
-int isolate_lru_page(struct page *page)
-{
- int ret = 0;
-
- if (PageLRU(page)) {
- struct zone *zone = page_zone(page);
- spin_lock_irq(&zone->lru_lock);
- if (TestClearPageLRU(page)) {
- ret = 1;
- get_page(page);
- if (PageActive(page))
- del_page_from_active_list(zone, page);
- else
- del_page_from_inactive_list(zone, page);
- }
- spin_unlock_irq(&zone->lru_lock);
- }
-
- return ret;
-}
-#endif
/*
* zone->lru_lock is heavily contended. Some of the functions that
@@ -1074,32 +575,35 @@ int isolate_lru_page(struct page *page)
*
* returns how many pages were moved onto *@dst.
*/
-static int isolate_lru_pages(int nr_to_scan, struct list_head *src,
- struct list_head *dst, int *scanned)
+static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
+ struct list_head *src, struct list_head *dst,
+ unsigned long *scanned)
{
- int nr_taken = 0;
+ unsigned long nr_taken = 0;
struct page *page;
- int scan = 0;
+ unsigned long scan;
- while (scan++ < nr_to_scan && !list_empty(src)) {
+ for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {
+ struct list_head *target;
page = lru_to_page(src);
prefetchw_prev_lru_page(page, src, flags);
- if (!TestClearPageLRU(page))
- BUG();
+ BUG_ON(!PageLRU(page));
+
list_del(&page->lru);
- if (get_page_testone(page)) {
+ target = src;
+ if (likely(get_page_unless_zero(page))) {
/*
- * It is being freed elsewhere
+ * Be careful not to clear PageLRU until after we're
+ * sure the page is not being freed elsewhere -- the
+ * page release code relies on it.
*/
- __put_page(page);
- SetPageLRU(page);
- list_add(&page->lru, src);
- continue;
- } else {
- list_add(&page->lru, dst);
+ ClearPageLRU(page);
+ target = dst;
nr_taken++;
- }
+ } /* else it is being freed elsewhere */
+
+ list_add(&page->lru, target);
}
*scanned = scan;
@@ -1107,23 +611,26 @@ static int isolate_lru_pages(int nr_to_scan, struct list_head *src,
}
/*
- * shrink_cache() adds the number of pages reclaimed to sc->nr_reclaimed
+ * shrink_inactive_list() is a helper for shrink_zone(). It returns the number
+ * of reclaimed pages
*/
-static void shrink_cache(struct zone *zone, struct scan_control *sc)
+static unsigned long shrink_inactive_list(unsigned long max_scan,
+ struct zone *zone, struct scan_control *sc)
{
LIST_HEAD(page_list);
struct pagevec pvec;
- int max_scan = sc->nr_to_scan;
+ unsigned long nr_scanned = 0;
+ unsigned long nr_reclaimed = 0;
pagevec_init(&pvec, 1);
lru_add_drain();
spin_lock_irq(&zone->lru_lock);
- while (max_scan > 0) {
+ do {
struct page *page;
- int nr_taken;
- int nr_scan;
- int nr_freed;
+ unsigned long nr_taken;
+ unsigned long nr_scan;
+ unsigned long nr_freed;
nr_taken = isolate_lru_pages(sc->swap_cluster_max,
&zone->inactive_list,
@@ -1132,12 +639,9 @@ static void shrink_cache(struct zone *zone, struct scan_control *sc)
zone->pages_scanned += nr_scan;
spin_unlock_irq(&zone->lru_lock);
- if (nr_taken == 0)
- goto done;
-
- max_scan -= nr_scan;
- nr_freed = shrink_list(&page_list, sc);
-
+ nr_scanned += nr_scan;
+ nr_freed = shrink_page_list(&page_list, sc);
+ nr_reclaimed += nr_freed;
local_irq_disable();
if (current_is_kswapd()) {
__mod_page_state_zone(zone, pgscan_kswapd, nr_scan);
@@ -1146,14 +650,17 @@ static void shrink_cache(struct zone *zone, struct scan_control *sc)
__mod_page_state_zone(zone, pgscan_direct, nr_scan);
__mod_page_state_zone(zone, pgsteal, nr_freed);
+ if (nr_taken == 0)
+ goto done;
+
spin_lock(&zone->lru_lock);
/*
* Put back any unfreeable pages.
*/
while (!list_empty(&page_list)) {
page = lru_to_page(&page_list);
- if (TestSetPageLRU(page))
- BUG();
+ BUG_ON(PageLRU(page));
+ SetPageLRU(page);
list_del(&page->lru);
if (PageActive(page))
add_page_to_active_list(zone, page);
@@ -1165,10 +672,12 @@ static void shrink_cache(struct zone *zone, struct scan_control *sc)
spin_lock_irq(&zone->lru_lock);
}
}
- }
- spin_unlock_irq(&zone->lru_lock);
+ } while (nr_scanned < max_scan);
+ spin_unlock(&zone->lru_lock);
done:
+ local_irq_enable();
pagevec_release(&pvec);
+ return nr_reclaimed;
}
/*
@@ -1188,13 +697,12 @@ done:
* The downside is that we have to touch page->_count against each page.
* But we had to alter page->flags anyway.
*/
-static void
-refill_inactive_zone(struct zone *zone, struct scan_control *sc)
+static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
+ struct scan_control *sc)
{
- int pgmoved;
+ unsigned long pgmoved;
int pgdeactivate = 0;
- int pgscanned;
- int nr_pages = sc->nr_to_scan;
+ unsigned long pgscanned;
LIST_HEAD(l_hold); /* The pages which were snipped off */
LIST_HEAD(l_inactive); /* Pages to go onto the inactive_list */
LIST_HEAD(l_active); /* Pages to go onto the active_list */
@@ -1202,7 +710,7 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc)
struct pagevec pvec;
int reclaim_mapped = 0;
- if (unlikely(sc->may_swap)) {
+ if (sc->may_swap) {
long mapped_ratio;
long distress;
long swap_tendency;
@@ -1272,10 +780,11 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc)
while (!list_empty(&l_inactive)) {
page = lru_to_page(&l_inactive);
prefetchw_prev_lru_page(page, &l_inactive, flags);
- if (TestSetPageLRU(page))
- BUG();
- if (!TestClearPageActive(page))
- BUG();
+ BUG_ON(PageLRU(page));
+ SetPageLRU(page);
+ BUG_ON(!PageActive(page));
+ ClearPageActive(page);
+
list_move(&page->lru, &zone->inactive_list);
pgmoved++;
if (!pagevec_add(&pvec, page)) {
@@ -1301,8 +810,8 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc)
while (!list_empty(&l_active)) {
page = lru_to_page(&l_active);
prefetchw_prev_lru_page(page, &l_active, flags);
- if (TestSetPageLRU(page))
- BUG();
+ BUG_ON(PageLRU(page));
+ SetPageLRU(page);
BUG_ON(!PageActive(page));
list_move(&page->lru, &zone->active_list);
pgmoved++;
@@ -1327,11 +836,13 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc)
/*
* This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
*/
-static void
-shrink_zone(struct zone *zone, struct scan_control *sc)
+static unsigned long shrink_zone(int priority, struct zone *zone,
+ struct scan_control *sc)
{
unsigned long nr_active;
unsigned long nr_inactive;
+ unsigned long nr_to_scan;
+ unsigned long nr_reclaimed = 0;
atomic_inc(&zone->reclaim_in_progress);
@@ -1339,14 +850,14 @@ shrink_zone(struct zone *zone, struct scan_control *sc)
* Add one to `nr_to_scan' just to make sure that the kernel will
* slowly sift through the active list.
*/
- zone->nr_scan_active += (zone->nr_active >> sc->priority) + 1;
+ zone->nr_scan_active += (zone->nr_active >> priority) + 1;
nr_active = zone->nr_scan_active;
if (nr_active >= sc->swap_cluster_max)
zone->nr_scan_active = 0;
else
nr_active = 0;
- zone->nr_scan_inactive += (zone->nr_inactive >> sc->priority) + 1;
+ zone->nr_scan_inactive += (zone->nr_inactive >> priority) + 1;
nr_inactive = zone->nr_scan_inactive;
if (nr_inactive >= sc->swap_cluster_max)
zone->nr_scan_inactive = 0;
@@ -1355,23 +866,25 @@ shrink_zone(struct zone *zone, struct scan_control *sc)
while (nr_active || nr_inactive) {
if (nr_active) {
- sc->nr_to_scan = min(nr_active,
+ nr_to_scan = min(nr_active,
(unsigned long)sc->swap_cluster_max);
- nr_active -= sc->nr_to_scan;
- refill_inactive_zone(zone, sc);
+ nr_active -= nr_to_scan;
+ shrink_active_list(nr_to_scan, zone, sc);
}
if (nr_inactive) {
- sc->nr_to_scan = min(nr_inactive,
+ nr_to_scan = min(nr_inactive,
(unsigned long)sc->swap_cluster_max);
- nr_inactive -= sc->nr_to_scan;
- shrink_cache(zone, sc);
+ nr_inactive -= nr_to_scan;
+ nr_reclaimed += shrink_inactive_list(nr_to_scan, zone,
+ sc);
}
}
throttle_vm_writeout();
atomic_dec(&zone->reclaim_in_progress);
+ return nr_reclaimed;
}
/*
@@ -1390,9 +903,10 @@ shrink_zone(struct zone *zone, struct scan_control *sc)
* If a zone is deemed to be full of pinned pages then just give it a light
* scan then give up on it.
*/
-static void
-shrink_caches(struct zone **zones, struct scan_control *sc)
+static unsigned long shrink_zones(int priority, struct zone **zones,
+ struct scan_control *sc)
{
+ unsigned long nr_reclaimed = 0;
int i;
for (i = 0; zones[i] != NULL; i++) {
@@ -1404,15 +918,16 @@ shrink_caches(struct zone **zones, struct scan_control *sc)
if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
continue;
- zone->temp_priority = sc->priority;
- if (zone->prev_priority > sc->priority)
- zone->prev_priority = sc->priority;
+ zone->temp_priority = priority;
+ if (zone->prev_priority > priority)
+ zone->prev_priority = priority;
- if (zone->all_unreclaimable && sc->priority != DEF_PRIORITY)
+ if (zone->all_unreclaimable && priority != DEF_PRIORITY)
continue; /* Let kswapd poll it */
- shrink_zone(zone, sc);
+ nr_reclaimed += shrink_zone(priority, zone, sc);
}
+ return nr_reclaimed;
}
/*
@@ -1428,19 +943,21 @@ shrink_caches(struct zone **zones, struct scan_control *sc)
* holds filesystem locks which prevent writeout this might not work, and the
* allocation attempt will fail.
*/
-int try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
+unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
{
int priority;
int ret = 0;
- int total_scanned = 0, total_reclaimed = 0;
+ unsigned long total_scanned = 0;
+ unsigned long nr_reclaimed = 0;
struct reclaim_state *reclaim_state = current->reclaim_state;
- struct scan_control sc;
unsigned long lru_pages = 0;
int i;
-
- sc.gfp_mask = gfp_mask;
- sc.may_writepage = !laptop_mode;
- sc.may_swap = 1;
+ struct scan_control sc = {
+ .gfp_mask = gfp_mask,
+ .may_writepage = !laptop_mode,
+ .swap_cluster_max = SWAP_CLUSTER_MAX,
+ .may_swap = 1,
+ };
inc_page_state(allocstall);
@@ -1457,20 +974,16 @@ int try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
for (priority = DEF_PRIORITY; priority >= 0; priority--) {
sc.nr_mapped = read_page_state(nr_mapped);
sc.nr_scanned = 0;
- sc.nr_reclaimed = 0;
- sc.priority = priority;
- sc.swap_cluster_max = SWAP_CLUSTER_MAX;
if (!priority)
disable_swap_token();
- shrink_caches(zones, &sc);
+ nr_reclaimed += shrink_zones(priority, zones, &sc);
shrink_slab(sc.nr_scanned, gfp_mask, lru_pages);
if (reclaim_state) {
- sc.nr_reclaimed += reclaim_state->reclaimed_slab;
+ nr_reclaimed += reclaim_state->reclaimed_slab;
reclaim_state->reclaimed_slab = 0;
}
total_scanned += sc.nr_scanned;
- total_reclaimed += sc.nr_reclaimed;
- if (total_reclaimed >= sc.swap_cluster_max) {
+ if (nr_reclaimed >= sc.swap_cluster_max) {
ret = 1;
goto out;
}
@@ -1482,7 +995,8 @@ int try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
* that's undesirable in laptop mode, where we *want* lumpy
* writeout. So in laptop mode, write out the whole world.
*/
- if (total_scanned > sc.swap_cluster_max + sc.swap_cluster_max/2) {
+ if (total_scanned > sc.swap_cluster_max +
+ sc.swap_cluster_max / 2) {
wakeup_pdflush(laptop_mode ? 0 : total_scanned);
sc.may_writepage = 1;
}
@@ -1528,22 +1042,26 @@ out:
* the page allocator fallback scheme to ensure that aging of pages is balanced
* across the zones.
*/
-static int balance_pgdat(pg_data_t *pgdat, int nr_pages, int order)
+static unsigned long balance_pgdat(pg_data_t *pgdat, unsigned long nr_pages,
+ int order)
{
- int to_free = nr_pages;
+ unsigned long to_free = nr_pages;
int all_zones_ok;
int priority;
int i;
- int total_scanned, total_reclaimed;
+ unsigned long total_scanned;
+ unsigned long nr_reclaimed;
struct reclaim_state *reclaim_state = current->reclaim_state;
- struct scan_control sc;
+ struct scan_control sc = {
+ .gfp_mask = GFP_KERNEL,
+ .may_swap = 1,
+ .swap_cluster_max = nr_pages ? nr_pages : SWAP_CLUSTER_MAX,
+ };
loop_again:
total_scanned = 0;
- total_reclaimed = 0;
- sc.gfp_mask = GFP_KERNEL;
- sc.may_writepage = !laptop_mode;
- sc.may_swap = 1;
+ nr_reclaimed = 0;
+ sc.may_writepage = !laptop_mode,
sc.nr_mapped = read_page_state(nr_mapped);
inc_page_state(pageoutrun);
@@ -1624,15 +1142,11 @@ scan:
if (zone->prev_priority > priority)
zone->prev_priority = priority;
sc.nr_scanned = 0;
- sc.nr_reclaimed = 0;
- sc.priority = priority;
- sc.swap_cluster_max = nr_pages? nr_pages : SWAP_CLUSTER_MAX;
- shrink_zone(zone, &sc);
+ nr_reclaimed += shrink_zone(priority, zone, &sc);
reclaim_state->reclaimed_slab = 0;
nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
lru_pages);
- sc.nr_reclaimed += reclaim_state->reclaimed_slab;
- total_reclaimed += sc.nr_reclaimed;
+ nr_reclaimed += reclaim_state->reclaimed_slab;
total_scanned += sc.nr_scanned;
if (zone->all_unreclaimable)
continue;
@@ -1645,10 +1159,10 @@ scan:
* even in laptop mode
*/
if (total_scanned > SWAP_CLUSTER_MAX * 2 &&
- total_scanned > total_reclaimed+total_reclaimed/2)
+ total_scanned > nr_reclaimed + nr_reclaimed / 2)
sc.may_writepage = 1;
}
- if (nr_pages && to_free > total_reclaimed)
+ if (nr_pages && to_free > nr_reclaimed)
continue; /* swsusp: need to do more work */
if (all_zones_ok)
break; /* kswapd: all done */
@@ -1665,7 +1179,7 @@ scan:
* matches the direct reclaim path behaviour in terms of impact
* on zone->*_priority.
*/
- if ((total_reclaimed >= SWAP_CLUSTER_MAX) && (!nr_pages))
+ if ((nr_reclaimed >= SWAP_CLUSTER_MAX) && !nr_pages)
break;
}
out:
@@ -1679,7 +1193,7 @@ out:
goto loop_again;
}
- return total_reclaimed;
+ return nr_reclaimed;
}
/*
@@ -1779,24 +1293,31 @@ void wakeup_kswapd(struct zone *zone, int order)
* Try to free `nr_pages' of memory, system-wide. Returns the number of freed
* pages.
*/
-int shrink_all_memory(int nr_pages)
+unsigned long shrink_all_memory(unsigned long nr_pages)
{
pg_data_t *pgdat;
- int nr_to_free = nr_pages;
- int ret = 0;
+ unsigned long nr_to_free = nr_pages;
+ unsigned long ret = 0;
+ unsigned retry = 2;
struct reclaim_state reclaim_state = {
.reclaimed_slab = 0,
};
current->reclaim_state = &reclaim_state;
+repeat:
for_each_pgdat(pgdat) {
- int freed;
+ unsigned long freed;
+
freed = balance_pgdat(pgdat, nr_to_free, 0);
ret += freed;
nr_to_free -= freed;
- if (nr_to_free <= 0)
+ if ((long)nr_to_free <= 0)
break;
}
+ if (retry-- && ret < nr_pages) {
+ blk_congestion_wait(WRITE, HZ/5);
+ goto repeat;
+ }
current->reclaim_state = NULL;
return ret;
}
@@ -1808,8 +1329,7 @@ int shrink_all_memory(int nr_pages)
away, we get changed to run anywhere: as the first one comes back,
restore their cpu bindings. */
static int __devinit cpu_callback(struct notifier_block *nfb,
- unsigned long action,
- void *hcpu)
+ unsigned long action, void *hcpu)
{
pg_data_t *pgdat;
cpumask_t mask;
@@ -1829,10 +1349,15 @@ static int __devinit cpu_callback(struct notifier_block *nfb,
static int __init kswapd_init(void)
{
pg_data_t *pgdat;
+
swap_setup();
- for_each_pgdat(pgdat)
- pgdat->kswapd
- = find_task_by_pid(kernel_thread(kswapd, pgdat, CLONE_KERNEL));
+ for_each_pgdat(pgdat) {
+ pid_t pid;
+
+ pid = kernel_thread(kswapd, pgdat, CLONE_KERNEL);
+ BUG_ON(pid < 0);
+ pgdat->kswapd = find_task_by_pid(pid);
+ }
total_memory = nr_free_pagecache_pages();
hotcpu_notifier(cpu_callback, 0);
return 0;
@@ -1874,46 +1399,24 @@ int zone_reclaim_interval __read_mostly = 30*HZ;
/*
* Try to free up some pages from this zone through reclaim.
*/
-int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
+static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
{
- int nr_pages;
+ /* Minimum pages needed in order to stay on node */
+ const unsigned long nr_pages = 1 << order;
struct task_struct *p = current;
struct reclaim_state reclaim_state;
- struct scan_control sc;
- cpumask_t mask;
- int node_id;
-
- if (time_before(jiffies,
- zone->last_unsuccessful_zone_reclaim + zone_reclaim_interval))
- return 0;
-
- if (!(gfp_mask & __GFP_WAIT) ||
- zone->all_unreclaimable ||
- atomic_read(&zone->reclaim_in_progress) > 0 ||
- (p->flags & PF_MEMALLOC))
- return 0;
-
- node_id = zone->zone_pgdat->node_id;
- mask = node_to_cpumask(node_id);
- if (!cpus_empty(mask) && node_id != numa_node_id())
- return 0;
-
- sc.may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE);
- sc.may_swap = !!(zone_reclaim_mode & RECLAIM_SWAP);
- sc.nr_scanned = 0;
- sc.nr_reclaimed = 0;
- sc.priority = ZONE_RECLAIM_PRIORITY + 1;
- sc.nr_mapped = read_page_state(nr_mapped);
- sc.gfp_mask = gfp_mask;
+ int priority;
+ unsigned long nr_reclaimed = 0;
+ struct scan_control sc = {
+ .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
+ .may_swap = !!(zone_reclaim_mode & RECLAIM_SWAP),
+ .nr_mapped = read_page_state(nr_mapped),
+ .swap_cluster_max = max_t(unsigned long, nr_pages,
+ SWAP_CLUSTER_MAX),
+ .gfp_mask = gfp_mask,
+ };
disable_swap_token();
-
- nr_pages = 1 << order;
- if (nr_pages > SWAP_CLUSTER_MAX)
- sc.swap_cluster_max = nr_pages;
- else
- sc.swap_cluster_max = SWAP_CLUSTER_MAX;
-
cond_resched();
/*
* We need to be able to allocate from the reserves for RECLAIM_SWAP
@@ -1928,17 +1431,20 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
* Free memory by calling shrink zone with increasing priorities
* until we have enough memory freed.
*/
+ priority = ZONE_RECLAIM_PRIORITY;
do {
- sc.priority--;
- shrink_zone(zone, &sc);
+ nr_reclaimed += shrink_zone(priority, zone, &sc);
+ priority--;
+ } while (priority >= 0 && nr_reclaimed < nr_pages);
- } while (sc.nr_reclaimed < nr_pages && sc.priority > 0);
-
- if (sc.nr_reclaimed < nr_pages && (zone_reclaim_mode & RECLAIM_SLAB)) {
+ if (nr_reclaimed < nr_pages && (zone_reclaim_mode & RECLAIM_SLAB)) {
/*
- * shrink_slab does not currently allow us to determine
- * how many pages were freed in the zone. So we just
- * shake the slab and then go offnode for a single allocation.
+ * shrink_slab() does not currently allow us to determine how
+ * many pages were freed in this zone. So we just shake the slab
+ * a bit and then go off node for this particular allocation
+ * despite possibly having freed enough memory to allocate in
+ * this zone. If we freed local memory then the next
+ * allocations will be local again.
*
* shrink_slab will free memory on all zones and may take
* a long time.
@@ -1949,10 +1455,54 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
p->reclaim_state = NULL;
current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE);
- if (sc.nr_reclaimed == 0)
+ if (nr_reclaimed == 0) {
+ /*
+ * We were unable to reclaim enough pages to stay on node. We
+ * now allow off node accesses for a certain time period before
+ * trying again to reclaim pages from the local zone.
+ */
zone->last_unsuccessful_zone_reclaim = jiffies;
+ }
- return sc.nr_reclaimed >= nr_pages;
+ return nr_reclaimed >= nr_pages;
}
-#endif
+int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
+{
+ cpumask_t mask;
+ int node_id;
+
+ /*
+ * Do not reclaim if there was a recent unsuccessful attempt at zone
+ * reclaim. In that case we let allocations go off node for the
+ * zone_reclaim_interval. Otherwise we would scan for each off-node
+ * page allocation.
+ */
+ if (time_before(jiffies,
+ zone->last_unsuccessful_zone_reclaim + zone_reclaim_interval))
+ return 0;
+
+ /*
+ * Avoid concurrent zone reclaims, do not reclaim in a zone that does
+ * not have reclaimable pages and if we should not delay the allocation
+ * then do not scan.
+ */
+ if (!(gfp_mask & __GFP_WAIT) ||
+ zone->all_unreclaimable ||
+ atomic_read(&zone->reclaim_in_progress) > 0 ||
+ (current->flags & PF_MEMALLOC))
+ return 0;
+
+ /*
+ * Only run zone reclaim on the local zone or on zones that do not
+ * have associated processors. This will favor the local processor
+ * over remote processors and spread off node memory allocations
+ * as wide as possible.
+ */
+ node_id = zone->zone_pgdat->node_id;
+ mask = node_to_cpumask(node_id);
+ if (!cpus_empty(mask) && node_id != numa_node_id())
+ return 0;
+ return __zone_reclaim(zone, gfp_mask, order);
+}
+#endif
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 74cb79eb917..f6940618e34 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -16,11 +16,12 @@
#include <linux/keyctl.h>
#include <linux/fs.h>
#include <linux/err.h>
+#include <linux/mutex.h>
#include <asm/uaccess.h>
#include "internal.h"
/* session keyring create vs join semaphore */
-static DECLARE_MUTEX(key_session_sem);
+static DEFINE_MUTEX(key_session_mutex);
/* the root user's tracking struct */
struct key_user root_key_user = {
@@ -711,7 +712,7 @@ long join_session_keyring(const char *name)
}
/* allow the user to join or create a named keyring */
- down(&key_session_sem);
+ mutex_lock(&key_session_mutex);
/* look for an existing keyring of this name */
keyring = find_keyring_by_name(name, 0);
@@ -737,7 +738,7 @@ long join_session_keyring(const char *name)
key_put(keyring);
error2:
- up(&key_session_sem);
+ mutex_unlock(&key_session_mutex);
error:
return ret;
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 5b16196f282..ccaf988f372 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -117,6 +117,8 @@ static struct security_operations *secondary_ops = NULL;
static LIST_HEAD(superblock_security_head);
static DEFINE_SPINLOCK(sb_security_lock);
+static kmem_cache_t *sel_inode_cache;
+
/* Allocate and free functions for each kind of security blob. */
static int task_alloc_security(struct task_struct *task)
@@ -146,10 +148,11 @@ static int inode_alloc_security(struct inode *inode)
struct task_security_struct *tsec = current->security;
struct inode_security_struct *isec;
- isec = kzalloc(sizeof(struct inode_security_struct), GFP_KERNEL);
+ isec = kmem_cache_alloc(sel_inode_cache, SLAB_KERNEL);
if (!isec)
return -ENOMEM;
+ memset(isec, 0, sizeof(*isec));
init_MUTEX(&isec->sem);
INIT_LIST_HEAD(&isec->list);
isec->inode = inode;
@@ -172,7 +175,7 @@ static void inode_free_security(struct inode *inode)
spin_unlock(&sbsec->isec_lock);
inode->i_security = NULL;
- kfree(isec);
+ kmem_cache_free(sel_inode_cache, isec);
}
static int file_alloc_security(struct file *file)
@@ -1929,7 +1932,6 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir,
struct task_security_struct *tsec;
struct inode_security_struct *dsec;
struct superblock_security_struct *sbsec;
- struct inode_security_struct *isec;
u32 newsid, clen;
int rc;
char *namep = NULL, *context;
@@ -1937,7 +1939,6 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir,
tsec = current->security;
dsec = dir->i_security;
sbsec = dir->i_sb->s_security;
- isec = inode->i_security;
if (tsec->create_sid && sbsec->behavior != SECURITY_FS_USE_MNTPOINT) {
newsid = tsec->create_sid;
@@ -1957,7 +1958,7 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir,
inode_security_set_sid(inode, newsid);
- if (sbsec->behavior == SECURITY_FS_USE_MNTPOINT)
+ if (!ss_initialized || sbsec->behavior == SECURITY_FS_USE_MNTPOINT)
return -EOPNOTSUPP;
if (name) {
@@ -4408,6 +4409,9 @@ static __init int selinux_init(void)
tsec = current->security;
tsec->osid = tsec->sid = SECINITSID_KERNEL;
+ sel_inode_cache = kmem_cache_create("selinux_inode_security",
+ sizeof(struct inode_security_struct),
+ 0, SLAB_PANIC, NULL, NULL);
avc_init();
original_ops = secondary_ops = security_ops;
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index b5fa02d17b1..f5d78365488 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -15,6 +15,7 @@
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/fs.h>
+#include <linux/mutex.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/security.h>
@@ -44,7 +45,7 @@ static int __init checkreqprot_setup(char *str)
__setup("checkreqprot=", checkreqprot_setup);
-static DECLARE_MUTEX(sel_sem);
+static DEFINE_MUTEX(sel_mutex);
/* global data for booleans */
static struct dentry *bool_dir = NULL;
@@ -230,7 +231,7 @@ static ssize_t sel_write_load(struct file * file, const char __user * buf,
ssize_t length;
void *data = NULL;
- down(&sel_sem);
+ mutex_lock(&sel_mutex);
length = task_has_security(current, SECURITY__LOAD_POLICY);
if (length)
@@ -262,7 +263,7 @@ static ssize_t sel_write_load(struct file * file, const char __user * buf,
else
length = count;
out:
- up(&sel_sem);
+ mutex_unlock(&sel_mutex);
vfree(data);
return length;
}
@@ -709,12 +710,11 @@ static ssize_t sel_read_bool(struct file *filep, char __user *buf,
{
char *page = NULL;
ssize_t length;
- ssize_t end;
ssize_t ret;
int cur_enforcing;
struct inode *inode;
- down(&sel_sem);
+ mutex_lock(&sel_mutex);
ret = -EFAULT;
@@ -740,26 +740,9 @@ static ssize_t sel_read_bool(struct file *filep, char __user *buf,
length = scnprintf(page, PAGE_SIZE, "%d %d", cur_enforcing,
bool_pending_values[inode->i_ino - BOOL_INO_OFFSET]);
- if (length < 0) {
- ret = length;
- goto out;
- }
-
- if (*ppos >= length) {
- ret = 0;
- goto out;
- }
- if (count + *ppos > length)
- count = length - *ppos;
- end = count + *ppos;
- if (copy_to_user(buf, (char *) page + *ppos, count)) {
- ret = -EFAULT;
- goto out;
- }
- *ppos = end;
- ret = count;
+ ret = simple_read_from_buffer(buf, count, ppos, page, length);
out:
- up(&sel_sem);
+ mutex_unlock(&sel_mutex);
if (page)
free_page((unsigned long)page);
return ret;
@@ -773,7 +756,7 @@ static ssize_t sel_write_bool(struct file *filep, const char __user *buf,
int new_value;
struct inode *inode;
- down(&sel_sem);
+ mutex_lock(&sel_mutex);
length = task_has_security(current, SECURITY__SETBOOL);
if (length)
@@ -812,7 +795,7 @@ static ssize_t sel_write_bool(struct file *filep, const char __user *buf,
length = count;
out:
- up(&sel_sem);
+ mutex_unlock(&sel_mutex);
if (page)
free_page((unsigned long) page);
return length;
@@ -831,7 +814,7 @@ static ssize_t sel_commit_bools_write(struct file *filep,
ssize_t length = -EFAULT;
int new_value;
- down(&sel_sem);
+ mutex_lock(&sel_mutex);
length = task_has_security(current, SECURITY__SETBOOL);
if (length)
@@ -869,7 +852,7 @@ static ssize_t sel_commit_bools_write(struct file *filep,
length = count;
out:
- up(&sel_sem);
+ mutex_unlock(&sel_mutex);
if (page)
free_page((unsigned long) page);
return length;
@@ -987,7 +970,7 @@ out:
return ret;
err:
kfree(values);
- d_genocide(dir);
+ sel_remove_bools(dir);
ret = -ENOMEM;
goto out;
}
@@ -1168,37 +1151,38 @@ static int sel_make_avc_files(struct dentry *dir)
dentry = d_alloc_name(dir, files[i].name);
if (!dentry) {
ret = -ENOMEM;
- goto err;
+ goto out;
}
inode = sel_make_inode(dir->d_sb, S_IFREG|files[i].mode);
if (!inode) {
ret = -ENOMEM;
- goto err;
+ goto out;
}
inode->i_fop = files[i].ops;
d_add(dentry, inode);
}
out:
return ret;
-err:
- d_genocide(dir);
- goto out;
}
-static int sel_make_dir(struct super_block *sb, struct dentry *dentry)
+static int sel_make_dir(struct inode *dir, struct dentry *dentry)
{
int ret = 0;
struct inode *inode;
- inode = sel_make_inode(sb, S_IFDIR | S_IRUGO | S_IXUGO);
+ inode = sel_make_inode(dir->i_sb, S_IFDIR | S_IRUGO | S_IXUGO);
if (!inode) {
ret = -ENOMEM;
goto out;
}
inode->i_op = &simple_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
+ /* directory inodes start off with i_nlink == 2 (for "." entry) */
+ inode->i_nlink++;
d_add(dentry, inode);
+ /* bump link count on parent directory, too */
+ dir->i_nlink++;
out:
return ret;
}
@@ -1207,7 +1191,7 @@ static int sel_fill_super(struct super_block * sb, void * data, int silent)
{
int ret;
struct dentry *dentry;
- struct inode *inode;
+ struct inode *inode, *root_inode;
struct inode_security_struct *isec;
static struct tree_descr selinux_files[] = {
@@ -1228,30 +1212,33 @@ static int sel_fill_super(struct super_block * sb, void * data, int silent)
};
ret = simple_fill_super(sb, SELINUX_MAGIC, selinux_files);
if (ret)
- return ret;
+ goto err;
+
+ root_inode = sb->s_root->d_inode;
dentry = d_alloc_name(sb->s_root, BOOL_DIR_NAME);
- if (!dentry)
- return -ENOMEM;
+ if (!dentry) {
+ ret = -ENOMEM;
+ goto err;
+ }
- inode = sel_make_inode(sb, S_IFDIR | S_IRUGO | S_IXUGO);
- if (!inode)
- goto out;
- inode->i_op = &simple_dir_inode_operations;
- inode->i_fop = &simple_dir_operations;
- d_add(dentry, inode);
- bool_dir = dentry;
- ret = sel_make_bools();
+ ret = sel_make_dir(root_inode, dentry);
if (ret)
- goto out;
+ goto err;
+
+ bool_dir = dentry;
dentry = d_alloc_name(sb->s_root, NULL_FILE_NAME);
- if (!dentry)
- return -ENOMEM;
+ if (!dentry) {
+ ret = -ENOMEM;
+ goto err;
+ }
inode = sel_make_inode(sb, S_IFCHR | S_IRUGO | S_IWUGO);
- if (!inode)
- goto out;
+ if (!inode) {
+ ret = -ENOMEM;
+ goto err;
+ }
isec = (struct inode_security_struct*)inode->i_security;
isec->sid = SECINITSID_DEVNULL;
isec->sclass = SECCLASS_CHR_FILE;
@@ -1262,22 +1249,23 @@ static int sel_fill_super(struct super_block * sb, void * data, int silent)
selinux_null = dentry;
dentry = d_alloc_name(sb->s_root, "avc");
- if (!dentry)
- return -ENOMEM;
+ if (!dentry) {
+ ret = -ENOMEM;
+ goto err;
+ }
- ret = sel_make_dir(sb, dentry);
+ ret = sel_make_dir(root_inode, dentry);
if (ret)
- goto out;
+ goto err;
ret = sel_make_avc_files(dentry);
if (ret)
- goto out;
-
- return 0;
+ goto err;
out:
- dput(dentry);
+ return ret;
+err:
printk(KERN_ERR "%s: failed while creating inodes\n", __FUNCTION__);
- return -ENOMEM;
+ goto out;
}
static struct super_block *sel_get_sb(struct file_system_type *fs_type,
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 8a764928ff4..63e0b7f29cb 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -27,7 +27,8 @@
#include <linux/in.h>
#include <linux/sched.h>
#include <linux/audit.h>
-#include <asm/semaphore.h>
+#include <linux/mutex.h>
+
#include "flask.h"
#include "avc.h"
#include "avc_ss.h"
@@ -48,9 +49,9 @@ static DEFINE_RWLOCK(policy_rwlock);
#define POLICY_RDUNLOCK read_unlock(&policy_rwlock)
#define POLICY_WRUNLOCK write_unlock_irq(&policy_rwlock)
-static DECLARE_MUTEX(load_sem);
-#define LOAD_LOCK down(&load_sem)
-#define LOAD_UNLOCK up(&load_sem)
+static DEFINE_MUTEX(load_mutex);
+#define LOAD_LOCK mutex_lock(&load_mutex)
+#define LOAD_UNLOCK mutex_unlock(&load_mutex)
static struct sidtab sidtab;
struct policydb policydb;