diff options
210 files changed, 6226 insertions, 3421 deletions
diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt index a0cda062bc3..8e6b8d3c741 100644 --- a/Documentation/networking/bonding.txt +++ b/Documentation/networking/bonding.txt @@ -289,35 +289,73 @@ downdelay fail_over_mac Specifies whether active-backup mode should set all slaves to - the same MAC address (the traditional behavior), or, when - enabled, change the bond's MAC address when changing the - active interface (i.e., fail over the MAC address itself). - - Fail over MAC is useful for devices that cannot ever alter - their MAC address, or for devices that refuse incoming - broadcasts with their own source MAC (which interferes with - the ARP monitor). - - The down side of fail over MAC is that every device on the - network must be updated via gratuitous ARP, vs. just updating - a switch or set of switches (which often takes place for any - traffic, not just ARP traffic, if the switch snoops incoming - traffic to update its tables) for the traditional method. If - the gratuitous ARP is lost, communication may be disrupted. - - When fail over MAC is used in conjuction with the mii monitor, - devices which assert link up prior to being able to actually - transmit and receive are particularly susecptible to loss of - the gratuitous ARP, and an appropriate updelay setting may be - required. - - A value of 0 disables fail over MAC, and is the default. A - value of 1 enables fail over MAC. This option is enabled - automatically if the first slave added cannot change its MAC - address. This option may be modified via sysfs only when no - slaves are present in the bond. - - This option was added in bonding version 3.2.0. + the same MAC address at enslavement (the traditional + behavior), or, when enabled, perform special handling of the + bond's MAC address in accordance with the selected policy. + + Possible values are: + + none or 0 + + This setting disables fail_over_mac, and causes + bonding to set all slaves of an active-backup bond to + the same MAC address at enslavement time. This is the + default. + + active or 1 + + The "active" fail_over_mac policy indicates that the + MAC address of the bond should always be the MAC + address of the currently active slave. The MAC + address of the slaves is not changed; instead, the MAC + address of the bond changes during a failover. + + This policy is useful for devices that cannot ever + alter their MAC address, or for devices that refuse + incoming broadcasts with their own source MAC (which + interferes with the ARP monitor). + + The down side of this policy is that every device on + the network must be updated via gratuitous ARP, + vs. just updating a switch or set of switches (which + often takes place for any traffic, not just ARP + traffic, if the switch snoops incoming traffic to + update its tables) for the traditional method. If the + gratuitous ARP is lost, communication may be + disrupted. + + When this policy is used in conjuction with the mii + monitor, devices which assert link up prior to being + able to actually transmit and receive are particularly + susecptible to loss of the gratuitous ARP, and an + appropriate updelay setting may be required. + + follow or 2 + + The "follow" fail_over_mac policy causes the MAC + address of the bond to be selected normally (normally + the MAC address of the first slave added to the bond). + However, the second and subsequent slaves are not set + to this MAC address while they are in a backup role; a + slave is programmed with the bond's MAC address at + failover time (and the formerly active slave receives + the newly active slave's MAC address). + + This policy is useful for multiport devices that + either become confused or incur a performance penalty + when multiple ports are programmed with the same MAC + address. + + + The default policy is none, unless the first slave cannot + change its MAC address, in which case the active policy is + selected by default. + + This option may be modified via sysfs only when no slaves are + present in the bond. + + This option was added in bonding version 3.2.0. The "follow" + policy was added in bonding version 3.3.0. lacp_rate diff --git a/Documentation/powerpc/booting-without-of.txt b/Documentation/powerpc/booting-without-of.txt index 1d2a772506c..46a9dba11f2 100644 --- a/Documentation/powerpc/booting-without-of.txt +++ b/Documentation/powerpc/booting-without-of.txt @@ -58,6 +58,7 @@ Table of Contents o) Xilinx IP cores p) Freescale Synchronous Serial Interface q) USB EHCI controllers + r) MDIO on GPIOs VII - Marvell Discovery mv64[345]6x System Controller chips 1) The /system-controller node @@ -2870,6 +2871,26 @@ platforms are moved over to use the flattened-device-tree model. reg = <0xe8000000 32>; }; + r) MDIO on GPIOs + + Currently defined compatibles: + - virtual,gpio-mdio + + MDC and MDIO lines connected to GPIO controllers are listed in the + gpios property as described in section VIII.1 in the following order: + + MDC, MDIO. + + Example: + + mdio { + compatible = "virtual,mdio-gpio"; + #address-cells = <1>; + #size-cells = <0>; + gpios = <&qe_pio_a 11 + &qe_pio_c 6>; + }; + VII - Marvell Discovery mv64[345]6x System Controller chips =========================================================== diff --git a/drivers/atm/fore200e.h b/drivers/atm/fore200e.h index 183841cc8fd..8dd4aa76c3b 100644 --- a/drivers/atm/fore200e.h +++ b/drivers/atm/fore200e.h @@ -1,4 +1,3 @@ -/* $Id: fore200e.h,v 1.4 2000/04/14 10:10:34 davem Exp $ */ #ifndef _FORE200E_H #define _FORE200E_H diff --git a/drivers/atm/fore200e_mkfirm.c b/drivers/atm/fore200e_mkfirm.c index 2ebe1a1e6f8..520e14b488f 100644 --- a/drivers/atm/fore200e_mkfirm.c +++ b/drivers/atm/fore200e_mkfirm.c @@ -1,6 +1,4 @@ /* - $Id: fore200e_mkfirm.c,v 1.1 2000/02/21 16:04:32 davem Exp $ - mkfirm.c: generates a C readable file from a binary firmware image Christophe Lizzi (lizzi@{csti.fr, cnam.fr}), June 1999. diff --git a/drivers/atm/he.h b/drivers/atm/he.h index 1dc277547a7..fe6cd15a78a 100644 --- a/drivers/atm/he.h +++ b/drivers/atm/he.h @@ -1,5 +1,3 @@ -/* $Id: he.h,v 1.4 2003/05/06 22:48:00 chas Exp $ */ - /* he.h diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index 28d77b5195d..3a504e94a4d 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -1,8 +1,4 @@ /******************************************************************* - * ident "$Id: idt77252.c,v 1.2 2001/11/11 08:13:54 ecd Exp $" - * - * $Author: ecd $ - * $Date: 2001/11/11 08:13:54 $ * * Copyright (c) 2000 ATecoM GmbH * @@ -29,9 +25,6 @@ * 675 Mass Ave, Cambridge, MA 02139, USA. * *******************************************************************/ -static char const rcsid[] = -"$Id: idt77252.c,v 1.2 2001/11/11 08:13:54 ecd Exp $"; - #include <linux/module.h> #include <linux/pci.h> diff --git a/drivers/atm/idt77252.h b/drivers/atm/idt77252.h index 6f2b4a5875f..e83eaf120da 100644 --- a/drivers/atm/idt77252.h +++ b/drivers/atm/idt77252.h @@ -1,8 +1,4 @@ /******************************************************************* - * ident "$Id: idt77252.h,v 1.2 2001/11/11 08:13:54 ecd Exp $" - * - * $Author: ecd $ - * $Date: 2001/11/11 08:13:54 $ * * Copyright (c) 2000 ATecoM GmbH * diff --git a/drivers/atm/nicstarmac.copyright b/drivers/atm/nicstarmac.copyright index 2e15b39fac4..180531a83c6 100644 --- a/drivers/atm/nicstarmac.copyright +++ b/drivers/atm/nicstarmac.copyright @@ -13,7 +13,7 @@ * * Modified to work with the IDT7721 nicstar -- AAL5 (tested) only. * - * R. D. Rechenmacher <ron@fnal.gov>, Aug. 6, 1997 $Revision: 1.1 $ $Date: 1999/08/20 11:00:11 $ + * R. D. Rechenmacher <ron@fnal.gov>, Aug. 6, 1997 * * Linux driver for the IDT77201 NICStAR PCI ATM controller. * PHY component is expected to be 155 Mbps S/UNI-Lite or IDT 77155; diff --git a/drivers/net/3c509.c b/drivers/net/3c509.c index e6c545fe5f5..b9d097c9f6b 100644 --- a/drivers/net/3c509.c +++ b/drivers/net/3c509.c @@ -413,7 +413,7 @@ static int __devinit el3_pnp_probe(struct pnp_dev *pdev, { short i; int ioaddr, irq, if_port; - u16 phys_addr[3]; + __be16 phys_addr[3]; struct net_device *dev = NULL; int err; @@ -605,7 +605,7 @@ static int __init el3_mca_probe(struct device *device) short i; int ioaddr, irq, if_port; - u16 phys_addr[3]; + __be16 phys_addr[3]; struct net_device *dev = NULL; u_char pos4, pos5; struct mca_device *mdev = to_mca_device(device); @@ -635,14 +635,13 @@ static int __init el3_mca_probe(struct device *device) printk(KERN_DEBUG "3c529: irq %d ioaddr 0x%x ifport %d\n", irq, ioaddr, if_port); } EL3WINDOW(0); - for (i = 0; i < 3; i++) { - phys_addr[i] = htons(read_eeprom(ioaddr, i)); - } + for (i = 0; i < 3; i++) + phys_addr[i] = htons(read_eeprom(ioaddr, i)); dev = alloc_etherdev(sizeof (struct el3_private)); if (dev == NULL) { - release_region(ioaddr, EL3_IO_EXTENT); - return -ENOMEM; + release_region(ioaddr, EL3_IO_EXTENT); + return -ENOMEM; } netdev_boot_setup_check(dev); @@ -668,7 +667,7 @@ static int __init el3_eisa_probe (struct device *device) { short i; int ioaddr, irq, if_port; - u16 phys_addr[3]; + __be16 phys_addr[3]; struct net_device *dev = NULL; struct eisa_device *edev; int err; @@ -1063,7 +1062,6 @@ el3_rx(struct net_device *dev) struct sk_buff *skb; skb = dev_alloc_skb(pkt_len+5); - dev->stats.rx_bytes += pkt_len; if (el3_debug > 4) printk("Receiving packet size %d status %4.4x.\n", pkt_len, rx_status); @@ -1078,6 +1076,7 @@ el3_rx(struct net_device *dev) skb->protocol = eth_type_trans(skb,dev); netif_rx(skb); dev->last_rx = jiffies; + dev->stats.rx_bytes += pkt_len; dev->stats.rx_packets++; continue; } diff --git a/drivers/net/3c515.c b/drivers/net/3c515.c index 105a8c7ca7e..e4e3241628d 100644 --- a/drivers/net/3c515.c +++ b/drivers/net/3c515.c @@ -572,12 +572,16 @@ static int corkscrew_setup(struct net_device *dev, int ioaddr, int irq; DECLARE_MAC_BUF(mac); +#ifdef __ISAPNP__ if (idev) { irq = pnp_irq(idev, 0); vp->dev = &idev->dev; } else { irq = inw(ioaddr + 0x2002) & 15; } +#else + irq = inw(ioaddr + 0x2002) & 15; +#endif dev->base_addr = ioaddr; dev->irq = irq; diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index e52533d75ae..654a78c3108 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -1670,7 +1670,7 @@ config SUNDANCE_MMIO config TLAN tristate "TI ThunderLAN support" - depends on NET_PCI && (PCI || EISA) && !64BIT + depends on NET_PCI && (PCI || EISA) ---help--- If you have a PCI Ethernet network card based on the ThunderLAN chip which is supported by this driver, say Y and read the @@ -2228,6 +2228,7 @@ config VIA_VELOCITY config TIGON3 tristate "Broadcom Tigon3 support" depends on PCI + select PHYLIB help This driver supports Broadcom Tigon3 based gigabit Ethernet cards. @@ -2420,8 +2421,9 @@ config CHELSIO_T1_NAPI config CHELSIO_T3 tristate "Chelsio Communications T3 10Gb Ethernet support" - depends on PCI + depends on PCI && INET select FW_LOADER + select INET_LRO help This driver supports Chelsio T3-based gigabit and 10Gb Ethernet adapters. diff --git a/drivers/net/atlx/atl1.c b/drivers/net/atlx/atl1.c index 9c2394d4942..db04bfb3460 100644 --- a/drivers/net/atlx/atl1.c +++ b/drivers/net/atlx/atl1.c @@ -1876,7 +1876,8 @@ static u16 atl1_alloc_rx_buffers(struct atl1_adapter *adapter) rfd_desc = ATL1_RFD_DESC(rfd_ring, rfd_next_to_use); - skb = dev_alloc_skb(adapter->rx_buffer_len + NET_IP_ALIGN); + skb = netdev_alloc_skb(adapter->netdev, + adapter->rx_buffer_len + NET_IP_ALIGN); if (unlikely(!skb)) { /* Better luck next round */ adapter->net_stats.rx_dropped++; @@ -2135,7 +2136,7 @@ static int atl1_tso(struct atl1_adapter *adapter, struct sk_buff *skb, return -1; } - if (skb->protocol == ntohs(ETH_P_IP)) { + if (skb->protocol == htons(ETH_P_IP)) { struct iphdr *iph = ip_hdr(skb); real_len = (((unsigned char *)iph - skb->data) + diff --git a/drivers/net/au1000_eth.c b/drivers/net/au1000_eth.c index 3634b5fd791..7023d77bf38 100644 --- a/drivers/net/au1000_eth.c +++ b/drivers/net/au1000_eth.c @@ -1239,12 +1239,7 @@ static int au1000_rx(struct net_device *dev) */ static irqreturn_t au1000_interrupt(int irq, void *dev_id) { - struct net_device *dev = (struct net_device *) dev_id; - - if (dev == NULL) { - printk(KERN_ERR "%s: isr: null dev ptr\n", dev->name); - return IRQ_RETVAL(1); - } + struct net_device *dev = dev_id; /* Handle RX interrupts first to minimize chance of overrun */ diff --git a/drivers/net/bfin_mac.c b/drivers/net/bfin_mac.c index 89c0018132e..41443435ab1 100644 --- a/drivers/net/bfin_mac.c +++ b/drivers/net/bfin_mac.c @@ -22,7 +22,6 @@ #include <linux/crc32.h> #include <linux/device.h> #include <linux/spinlock.h> -#include <linux/ethtool.h> #include <linux/mii.h> #include <linux/phy.h> #include <linux/netdevice.h> diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 50a40e43315..5b4af3cc2a4 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -88,6 +88,7 @@ #define BOND_LINK_ARP_INTERV 0 static int max_bonds = BOND_DEFAULT_MAX_BONDS; +static int num_grat_arp = 1; static int miimon = BOND_LINK_MON_INTERV; static int updelay = 0; static int downdelay = 0; @@ -99,11 +100,13 @@ static char *xmit_hash_policy = NULL; static int arp_interval = BOND_LINK_ARP_INTERV; static char *arp_ip_target[BOND_MAX_ARP_TARGETS] = { NULL, }; static char *arp_validate = NULL; -static int fail_over_mac = 0; +static char *fail_over_mac = NULL; struct bond_params bonding_defaults; module_param(max_bonds, int, 0); MODULE_PARM_DESC(max_bonds, "Max number of bonded devices"); +module_param(num_grat_arp, int, 0644); +MODULE_PARM_DESC(num_grat_arp, "Number of gratuitous ARP packets to send on failover event"); module_param(miimon, int, 0); MODULE_PARM_DESC(miimon, "Link check interval in milliseconds"); module_param(updelay, int, 0); @@ -133,8 +136,8 @@ module_param_array(arp_ip_target, charp, NULL, 0); MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form"); module_param(arp_validate, charp, 0); MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes: none (default), active, backup or all"); -module_param(fail_over_mac, int, 0); -MODULE_PARM_DESC(fail_over_mac, "For active-backup, do not set all slaves to the same MAC. 0 of off (default), 1 for on."); +module_param(fail_over_mac, charp, 0); +MODULE_PARM_DESC(fail_over_mac, "For active-backup, do not set all slaves to the same MAC. none (default), active or follow"); /*----------------------------- Global variables ----------------------------*/ @@ -187,6 +190,13 @@ struct bond_parm_tbl arp_validate_tbl[] = { { NULL, -1}, }; +struct bond_parm_tbl fail_over_mac_tbl[] = { +{ "none", BOND_FOM_NONE}, +{ "active", BOND_FOM_ACTIVE}, +{ "follow", BOND_FOM_FOLLOW}, +{ NULL, -1}, +}; + /*-------------------------- Forward declarations ---------------------------*/ static void bond_send_gratuitous_arp(struct bonding *bond); @@ -261,14 +271,14 @@ static int bond_add_vlan(struct bonding *bond, unsigned short vlan_id) */ static int bond_del_vlan(struct bonding *bond, unsigned short vlan_id) { - struct vlan_entry *vlan, *next; + struct vlan_entry *vlan; int res = -ENODEV; dprintk("bond: %s, vlan id %d\n", bond->dev->name, vlan_id); write_lock_bh(&bond->lock); - list_for_each_entry_safe(vlan, next, &bond->vlan_list, vlan_list) { + list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { if (vlan->vlan_id == vlan_id) { list_del(&vlan->vlan_list); @@ -970,6 +980,82 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active, struct } } +/* + * bond_do_fail_over_mac + * + * Perform special MAC address swapping for fail_over_mac settings + * + * Called with RTNL, bond->lock for read, curr_slave_lock for write_bh. + */ +static void bond_do_fail_over_mac(struct bonding *bond, + struct slave *new_active, + struct slave *old_active) +{ + u8 tmp_mac[ETH_ALEN]; + struct sockaddr saddr; + int rv; + + switch (bond->params.fail_over_mac) { + case BOND_FOM_ACTIVE: + if (new_active) + memcpy(bond->dev->dev_addr, new_active->dev->dev_addr, + new_active->dev->addr_len); + break; + case BOND_FOM_FOLLOW: + /* + * if new_active && old_active, swap them + * if just old_active, do nothing (going to no active slave) + * if just new_active, set new_active to bond's MAC + */ + if (!new_active) + return; + + write_unlock_bh(&bond->curr_slave_lock); + read_unlock(&bond->lock); + + if (old_active) { + memcpy(tmp_mac, new_active->dev->dev_addr, ETH_ALEN); + memcpy(saddr.sa_data, old_active->dev->dev_addr, + ETH_ALEN); + saddr.sa_family = new_active->dev->type; + } else { + memcpy(saddr.sa_data, bond->dev->dev_addr, ETH_ALEN); + saddr.sa_family = bond->dev->type; + } + + rv = dev_set_mac_address(new_active->dev, &saddr); + if (rv) { + printk(KERN_ERR DRV_NAME + ": %s: Error %d setting MAC of slave %s\n", + bond->dev->name, -rv, new_active->dev->name); + goto out; + } + + if (!old_active) + goto out; + + memcpy(saddr.sa_data, tmp_mac, ETH_ALEN); + saddr.sa_family = old_active->dev->type; + + rv = dev_set_mac_address(old_active->dev, &saddr); + if (rv) + printk(KERN_ERR DRV_NAME + ": %s: Error %d setting MAC of slave %s\n", + bond->dev->name, -rv, new_active->dev->name); +out: + read_lock(&bond->lock); + write_lock_bh(&bond->curr_slave_lock); + break; + default: + printk(KERN_ERR DRV_NAME + ": %s: bond_do_fail_over_mac impossible: bad policy %d\n", + bond->dev->name, bond->params.fail_over_mac); + break; + } + +} + + /** * find_best_interface - select the best available slave to be the active one * @bond: our bonding struct @@ -1037,7 +1123,8 @@ static struct slave *bond_find_best_slave(struct bonding *bond) * because it is apparently the best available slave we have, even though its * updelay hasn't timed out yet. * - * Warning: Caller must hold curr_slave_lock for writing. + * If new_active is not NULL, caller must hold bond->lock for read and + * curr_slave_lock for write_bh. */ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) { @@ -1048,6 +1135,8 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) } if (new_active) { + new_active->jiffies = jiffies; + if (new_active->link == BOND_LINK_BACK) { if (USES_PRIMARY(bond->params.mode)) { printk(KERN_INFO DRV_NAME @@ -1059,7 +1148,6 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) new_active->delay = 0; new_active->link = BOND_LINK_UP; - new_active->jiffies = jiffies; if (bond->params.mode == BOND_MODE_8023AD) { bond_3ad_handle_link_change(new_active, BOND_LINK_UP); @@ -1103,20 +1191,21 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) bond_set_slave_active_flags(new_active); } - /* when bonding does not set the slave MAC address, the bond MAC - * address is the one of the active slave. - */ if (new_active && bond->params.fail_over_mac) - memcpy(bond->dev->dev_addr, new_active->dev->dev_addr, - new_active->dev->addr_len); + bond_do_fail_over_mac(bond, new_active, old_active); + + bond->send_grat_arp = bond->params.num_grat_arp; if (bond->curr_active_slave && test_bit(__LINK_STATE_LINKWATCH_PENDING, &bond->curr_active_slave->dev->state)) { dprintk("delaying gratuitous arp on %s\n", bond->curr_active_slave->dev->name); - bond->send_grat_arp = 1; - } else - bond_send_gratuitous_arp(bond); + } else { + if (bond->send_grat_arp > 0) { + bond_send_gratuitous_arp(bond); + bond->send_grat_arp--; + } + } } } @@ -1129,7 +1218,7 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) * - The primary_slave has got its link back. * - A slave has got its link back and there's no old curr_active_slave. * - * Warning: Caller must hold curr_slave_lock for writing. + * Caller must hold bond->lock for read and curr_slave_lock for write_bh. */ void bond_select_active_slave(struct bonding *bond) { @@ -1376,14 +1465,14 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) printk(KERN_WARNING DRV_NAME ": %s: Warning: The first slave device " "specified does not support setting the MAC " - "address. Enabling the fail_over_mac option.", + "address. Setting fail_over_mac to active.", bond_dev->name); - bond->params.fail_over_mac = 1; - } else if (!bond->params.fail_over_mac) { + bond->params.fail_over_mac = BOND_FOM_ACTIVE; + } else if (bond->params.fail_over_mac != BOND_FOM_ACTIVE) { printk(KERN_ERR DRV_NAME ": %s: Error: The slave device specified " "does not support setting the MAC address, " - "but fail_over_mac is not enabled.\n" + "but fail_over_mac is not set to active.\n" , bond_dev->name); res = -EOPNOTSUPP; goto err_undo_flags; @@ -1490,6 +1579,10 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) bond_compute_features(bond); + write_unlock_bh(&bond->lock); + + read_lock(&bond->lock); + new_slave->last_arp_rx = jiffies; if (bond->params.miimon && !bond->params.use_carrier) { @@ -1566,6 +1659,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) } } + write_lock_bh(&bond->curr_slave_lock); + switch (bond->params.mode) { case BOND_MODE_ACTIVEBACKUP: bond_set_slave_inactive_flags(new_slave); @@ -1613,9 +1708,11 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) break; } /* switch(bond_mode) */ + write_unlock_bh(&bond->curr_slave_lock); + bond_set_carrier(bond); - write_unlock_bh(&bond->lock); + read_unlock(&bond->lock); res = bond_create_slave_symlinks(bond_dev, slave_dev); if (res) @@ -1639,6 +1736,10 @@ err_unset_master: err_restore_mac: if (!bond->params.fail_over_mac) { + /* XXX TODO - fom follow mode needs to change master's + * MAC if this slave's MAC is in use by the bond, or at + * least print a warning. + */ memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN); addr.sa_family = slave_dev->type; dev_set_mac_address(slave_dev, &addr); @@ -1693,20 +1794,18 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) return -EINVAL; } - mac_addr_differ = memcmp(bond_dev->dev_addr, - slave->perm_hwaddr, - ETH_ALEN); - if (!mac_addr_differ && (bond->slave_cnt > 1)) { - printk(KERN_WARNING DRV_NAME - ": %s: Warning: the permanent HWaddr of %s - " - "%s - is still in use by %s. " - "Set the HWaddr of %s to a different address " - "to avoid conflicts.\n", - bond_dev->name, - slave_dev->name, - print_mac(mac, slave->perm_hwaddr), - bond_dev->name, - slave_dev->name); + if (!bond->params.fail_over_mac) { + mac_addr_differ = memcmp(bond_dev->dev_addr, slave->perm_hwaddr, + ETH_ALEN); + if (!mac_addr_differ && (bond->slave_cnt > 1)) + printk(KERN_WARNING DRV_NAME + ": %s: Warning: the permanent HWaddr of %s - " + "%s - is still in use by %s. " + "Set the HWaddr of %s to a different address " + "to avoid conflicts.\n", + bond_dev->name, slave_dev->name, + print_mac(mac, slave->perm_hwaddr), + bond_dev->name, slave_dev->name); } /* Inform AD package of unbinding of slave. */ @@ -1833,7 +1932,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) /* close slave before restoring its mac address */ dev_close(slave_dev); - if (!bond->params.fail_over_mac) { + if (bond->params.fail_over_mac != BOND_FOM_ACTIVE) { /* restore original ("permanent") mac address */ memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN); addr.sa_family = slave_dev->type; @@ -2144,7 +2243,7 @@ static int __bond_mii_monitor(struct bonding *bond, int have_locks) dprintk("sending delayed gratuitous arp on on %s\n", bond->curr_active_slave->dev->name); bond_send_gratuitous_arp(bond); - bond->send_grat_arp = 0; + bond->send_grat_arp--; } } read_lock(&bond->curr_slave_lock); @@ -2397,7 +2496,7 @@ void bond_mii_monitor(struct work_struct *work) read_lock(&bond->lock); } - delay = ((bond->params.miimon * HZ) / 1000) ? : 1; + delay = msecs_to_jiffies(bond->params.miimon); read_unlock(&bond->lock); queue_delayed_work(bond->wq, &bond->mii_work, delay); } @@ -2426,37 +2525,14 @@ out: return addr; } -static int bond_has_ip(struct bonding *bond) -{ - struct vlan_entry *vlan, *vlan_next; - - if (bond->master_ip) - return 1; - - if (list_empty(&bond->vlan_list)) - return 0; - - list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, - vlan_list) { - if (vlan->vlan_ip) - return 1; - } - - return 0; -} - static int bond_has_this_ip(struct bonding *bond, __be32 ip) { - struct vlan_entry *vlan, *vlan_next; + struct vlan_entry *vlan; if (ip == bond->master_ip) return 1; - if (list_empty(&bond->vlan_list)) - return 0; - - list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, - vlan_list) { + list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { if (ip == vlan->vlan_ip) return 1; } @@ -2498,7 +2574,7 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave) { int i, vlan_id, rv; __be32 *targets = bond->params.arp_targets; - struct vlan_entry *vlan, *vlan_next; + struct vlan_entry *vlan; struct net_device *vlan_dev; struct flowi fl; struct rtable *rt; @@ -2545,8 +2621,7 @@ static void bond_arp_send_all(struct bonding *bond, struct slave *slave) } vlan_id = 0; - list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, - vlan_list) { + list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id); if (vlan_dev == rt->u.dst.dev) { vlan_id = vlan->vlan_id; @@ -2707,7 +2782,7 @@ void bond_loadbalance_arp_mon(struct work_struct *work) read_lock(&bond->lock); - delta_in_ticks = (bond->params.arp_interval * HZ) / 1000; + delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); if (bond->kill_timers) { goto out; @@ -2764,8 +2839,7 @@ void bond_loadbalance_arp_mon(struct work_struct *work) * if we don't know our ip yet */ if (time_after_eq(jiffies, slave->dev->trans_start + 2*delta_in_ticks) || - (time_after_eq(jiffies, slave->dev->last_rx + 2*delta_in_ticks) && - bond_has_ip(bond))) { + (time_after_eq(jiffies, slave->dev->last_rx + 2*delta_in_ticks))) { slave->link = BOND_LINK_DOWN; slave->state = BOND_STATE_BACKUP; @@ -2813,246 +2887,299 @@ out: } /* - * When using arp monitoring in active-backup mode, this function is - * called to determine if any backup slaves have went down or a new - * current slave needs to be found. - * The backup slaves never generate traffic, they are considered up by merely - * receiving traffic. If the current slave goes down, each backup slave will - * be given the opportunity to tx/rx an arp before being taken down - this - * prevents all slaves from being taken down due to the current slave not - * sending any traffic for the backups to receive. The arps are not necessarily - * necessary, any tx and rx traffic will keep the current slave up. While any - * rx traffic will keep the backup slaves up, the current slave is responsible - * for generating traffic to keep them up regardless of any other traffic they - * may have received. - * see loadbalance_arp_monitor for arp monitoring in load balancing mode + * Called to inspect slaves for active-backup mode ARP monitor link state + * changes. Sets new_link in slaves to specify what action should take + * place for the slave. Returns 0 if no changes are found, >0 if changes + * to link states must be committed. + * + * Called with bond->lock held for read. */ -void bond_activebackup_arp_mon(struct work_struct *work) +static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks) { - struct bonding *bond = container_of(work, struct bonding, - arp_work.work); struct slave *slave; - int delta_in_ticks; - int i; + int i, commit = 0; - read_lock(&bond->lock); + bond_for_each_slave(bond, slave, i) { + slave->new_link = BOND_LINK_NOCHANGE; - delta_in_ticks = (bond->params.arp_interval * HZ) / 1000; + if (slave->link != BOND_LINK_UP) { + if (time_before_eq(jiffies, slave_last_rx(bond, slave) + + delta_in_ticks)) { + slave->new_link = BOND_LINK_UP; + commit++; + } - if (bond->kill_timers) { - goto out; - } + continue; + } - if (bond->slave_cnt == 0) { - goto re_arm; + /* + * Give slaves 2*delta after being enslaved or made + * active. This avoids bouncing, as the last receive + * times need a full ARP monitor cycle to be updated. + */ + if (!time_after_eq(jiffies, slave->jiffies + + 2 * delta_in_ticks)) + continue; + + /* + * Backup slave is down if: + * - No current_arp_slave AND + * - more than 3*delta since last receive AND + * - the bond has an IP address + * + * Note: a non-null current_arp_slave indicates + * the curr_active_slave went down and we are + * searching for a new one; under this condition + * we only take the curr_active_slave down - this + * gives each slave a chance to tx/rx traffic + * before being taken out + */ + if (slave->state == BOND_STATE_BACKUP && + !bond->current_arp_slave && + time_after(jiffies, slave_last_rx(bond, slave) + + 3 * delta_in_ticks)) { + slave->new_link = BOND_LINK_DOWN; + commit++; + } + + /* + * Active slave is down if: + * - more than 2*delta since transmitting OR + * - (more than 2*delta since receive AND + * the bond has an IP address) + */ + if ((slave->state == BOND_STATE_ACTIVE) && + (time_after_eq(jiffies, slave->dev->trans_start + + 2 * delta_in_ticks) || + (time_after_eq(jiffies, slave_last_rx(bond, slave) + + 2 * delta_in_ticks)))) { + slave->new_link = BOND_LINK_DOWN; + commit++; + } } - /* determine if any slave has come up or any backup slave has - * gone down - * TODO: what about up/down delay in arp mode? it wasn't here before - * so it can wait + read_lock(&bond->curr_slave_lock); + + /* + * Trigger a commit if the primary option setting has changed. */ - bond_for_each_slave(bond, slave, i) { - if (slave->link != BOND_LINK_UP) { - if (time_before_eq(jiffies, - slave_last_rx(bond, slave) + delta_in_ticks)) { + if (bond->primary_slave && + (bond->primary_slave != bond->curr_active_slave) && + (bond->primary_slave->link == BOND_LINK_UP)) + commit++; - slave->link = BOND_LINK_UP; + read_unlock(&bond->curr_slave_lock); - write_lock_bh(&bond->curr_slave_lock); + return commit; +} - if ((!bond->curr_active_slave) && - time_before_eq(jiffies, slave->dev->trans_start + delta_in_ticks)) { - bond_change_active_slave(bond, slave); - bond->current_arp_slave = NULL; - } else if (bond->curr_active_slave != slave) { - /* this slave has just come up but we - * already have a current slave; this - * can also happen if bond_enslave adds - * a new slave that is up while we are - * searching for a new slave - */ - bond_set_slave_inactive_flags(slave); - bond->current_arp_slave = NULL; - } +/* + * Called to commit link state changes noted by inspection step of + * active-backup mode ARP monitor. + * + * Called with RTNL and bond->lock for read. + */ +static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks) +{ + struct slave *slave; + int i; - bond_set_carrier(bond); + bond_for_each_slave(bond, slave, i) { + switch (slave->new_link) { + case BOND_LINK_NOCHANGE: + continue; - if (slave == bond->curr_active_slave) { - printk(KERN_INFO DRV_NAME - ": %s: %s is up and now the " - "active interface\n", - bond->dev->name, - slave->dev->name); - netif_carrier_on(bond->dev); - } else { - printk(KERN_INFO DRV_NAME - ": %s: backup interface %s is " - "now up\n", - bond->dev->name, - slave->dev->name); - } + case BOND_LINK_UP: + write_lock_bh(&bond->curr_slave_lock); - write_unlock_bh(&bond->curr_slave_lock); - } - } else { - read_lock(&bond->curr_slave_lock); + if (!bond->curr_active_slave && + time_before_eq(jiffies, slave->dev->trans_start + + delta_in_ticks)) { + slave->link = BOND_LINK_UP; + bond_change_active_slave(bond, slave); + bond->current_arp_slave = NULL; - if ((slave != bond->curr_active_slave) && - (!bond->current_arp_slave) && - (time_after_eq(jiffies, slave_last_rx(bond, slave) + 3*delta_in_ticks) && - bond_has_ip(bond))) { - /* a backup slave has gone down; three times - * the delta allows the current slave to be - * taken out before the backup slave. - * note: a non-null current_arp_slave indicates - * the curr_active_slave went down and we are - * searching for a new one; under this - * condition we only take the curr_active_slave - * down - this gives each slave a chance to - * tx/rx traffic before being taken out + printk(KERN_INFO DRV_NAME + ": %s: %s is up and now the " + "active interface\n", + bond->dev->name, slave->dev->name); + + } else if (bond->curr_active_slave != slave) { + /* this slave has just come up but we + * already have a current slave; this can + * also happen if bond_enslave adds a new + * slave that is up while we are searching + * for a new slave */ + slave->link = BOND_LINK_UP; + bond_set_slave_inactive_flags(slave); + bond->current_arp_slave = NULL; - read_unlock(&bond->curr_slave_lock); + printk(KERN_INFO DRV_NAME + ": %s: backup interface %s is now up\n", + bond->dev->name, slave->dev->name); + } - slave->link = BOND_LINK_DOWN; + write_unlock_bh(&bond->curr_slave_lock); - if (slave->link_failure_count < UINT_MAX) { - slave->link_failure_count++; - } + break; + + case BOND_LINK_DOWN: + if (slave->link_failure_count < UINT_MAX) + slave->link_failure_count++; + + slave->link = BOND_LINK_DOWN; + + if (slave == bond->curr_active_slave) { + printk(KERN_INFO DRV_NAME + ": %s: link status down for active " + "interface %s, disabling it\n", + bond->dev->name, slave->dev->name); bond_set_slave_inactive_flags(slave); + write_lock_bh(&bond->curr_slave_lock); + + bond_select_active_slave(bond); + if (bond->curr_active_slave) + bond->curr_active_slave->jiffies = + jiffies; + + write_unlock_bh(&bond->curr_slave_lock); + + bond->current_arp_slave = NULL; + + } else if (slave->state == BOND_STATE_BACKUP) { printk(KERN_INFO DRV_NAME ": %s: backup interface %s is now down\n", - bond->dev->name, - slave->dev->name); - } else { - read_unlock(&bond->curr_slave_lock); + bond->dev->name, slave->dev->name); + + bond_set_slave_inactive_flags(slave); } + break; + + default: + printk(KERN_ERR DRV_NAME + ": %s: impossible: new_link %d on slave %s\n", + bond->dev->name, slave->new_link, + slave->dev->name); } } - read_lock(&bond->curr_slave_lock); - slave = bond->curr_active_slave; - read_unlock(&bond->curr_slave_lock); - - if (slave) { - /* if we have sent traffic in the past 2*arp_intervals but - * haven't xmit and rx traffic in that time interval, select - * a different slave. slave->jiffies is only updated when - * a slave first becomes the curr_active_slave - not necessarily - * after every arp; this ensures the slave has a full 2*delta - * before being taken out. if a primary is being used, check - * if it is up and needs to take over as the curr_active_slave - */ - if ((time_after_eq(jiffies, slave->dev->trans_start + 2*delta_in_ticks) || - (time_after_eq(jiffies, slave_last_rx(bond, slave) + 2*delta_in_ticks) && - bond_has_ip(bond))) && - time_after_eq(jiffies, slave->jiffies + 2*delta_in_ticks)) { + /* + * No race with changes to primary via sysfs, as we hold rtnl. + */ + if (bond->primary_slave && + (bond->primary_slave != bond->curr_active_slave) && + (bond->primary_slave->link == BOND_LINK_UP)) { + write_lock_bh(&bond->curr_slave_lock); + bond_change_active_slave(bond, bond->primary_slave); + write_unlock_bh(&bond->curr_slave_lock); + } - slave->link = BOND_LINK_DOWN; + bond_set_carrier(bond); +} - if (slave->link_failure_count < UINT_MAX) { - slave->link_failure_count++; - } +/* + * Send ARP probes for active-backup mode ARP monitor. + * + * Called with bond->lock held for read. + */ +static void bond_ab_arp_probe(struct bonding *bond) +{ + struct slave *slave; + int i; - printk(KERN_INFO DRV_NAME - ": %s: link status down for active interface " - "%s, disabling it\n", - bond->dev->name, - slave->dev->name); + read_lock(&bond->curr_slave_lock); - write_lock_bh(&bond->curr_slave_lock); + if (bond->current_arp_slave && bond->curr_active_slave) + printk("PROBE: c_arp %s && cas %s BAD\n", + bond->current_arp_slave->dev->name, + bond->curr_active_slave->dev->name); - bond_select_active_slave(bond); - slave = bond->curr_active_slave; + if (bond->curr_active_slave) { + bond_arp_send_all(bond, bond->curr_active_slave); + read_unlock(&bond->curr_slave_lock); + return; + } - write_unlock_bh(&bond->curr_slave_lock); + read_unlock(&bond->curr_slave_lock); - bond->current_arp_slave = slave; + /* if we don't have a curr_active_slave, search for the next available + * backup slave from the current_arp_slave and make it the candidate + * for becoming the curr_active_slave + */ - if (slave) { - slave->jiffies = jiffies; - } - } else if ((bond->primary_slave) && - (bond->primary_slave != slave) && - (bond->primary_slave->link == BOND_LINK_UP)) { - /* at this point, slave is the curr_active_slave */ - printk(KERN_INFO DRV_NAME - ": %s: changing from interface %s to primary " - "interface %s\n", - bond->dev->name, - slave->dev->name, - bond->primary_slave->dev->name); + if (!bond->current_arp_slave) { + bond->current_arp_slave = bond->first_slave; + if (!bond->current_arp_slave) + return; + } - /* primary is up so switch to it */ - write_lock_bh(&bond->curr_slave_lock); - bond_change_active_slave(bond, bond->primary_slave); - write_unlock_bh(&bond->curr_slave_lock); + bond_set_slave_inactive_flags(bond->current_arp_slave); - slave = bond->primary_slave; + /* search for next candidate */ + bond_for_each_slave_from(bond, slave, i, bond->current_arp_slave->next) { + if (IS_UP(slave->dev)) { + slave->link = BOND_LINK_BACK; + bond_set_slave_active_flags(slave); + bond_arp_send_all(bond, slave); slave->jiffies = jiffies; - } else { - bond->current_arp_slave = NULL; + bond->current_arp_slave = slave; + break; } - /* the current slave must tx an arp to ensure backup slaves - * rx traffic + /* if the link state is up at this point, we + * mark it down - this can happen if we have + * simultaneous link failures and + * reselect_active_interface doesn't make this + * one the current slave so it is still marked + * up when it is actually down */ - if (slave && bond_has_ip(bond)) { - bond_arp_send_all(bond, slave); + if (slave->link == BOND_LINK_UP) { + slave->link = BOND_LINK_DOWN; + if (slave->link_failure_count < UINT_MAX) + slave->link_failure_count++; + + bond_set_slave_inactive_flags(slave); + + printk(KERN_INFO DRV_NAME + ": %s: backup interface %s is now down.\n", + bond->dev->name, slave->dev->name); } } +} - /* if we don't have a curr_active_slave, search for the next available - * backup slave from the current_arp_slave and make it the candidate - * for becoming the curr_active_slave - */ - if (!slave) { - if (!bond->current_arp_slave) { - bond->current_arp_slave = bond->first_slave; - } +void bond_activebackup_arp_mon(struct work_struct *work) +{ + struct bonding *bond = container_of(work, struct bonding, + arp_work.work); + int delta_in_ticks; - if (bond->current_arp_slave) { - bond_set_slave_inactive_flags(bond->current_arp_slave); + read_lock(&bond->lock); - /* search for next candidate */ - bond_for_each_slave_from(bond, slave, i, bond->current_arp_slave->next) { - if (IS_UP(slave->dev)) { - slave->link = BOND_LINK_BACK; - bond_set_slave_active_flags(slave); - bond_arp_send_all(bond, slave); - slave->jiffies = jiffies; - bond->current_arp_slave = slave; - break; - } + if (bond->kill_timers) + goto out; - /* if the link state is up at this point, we - * mark it down - this can happen if we have - * simultaneous link failures and - * reselect_active_interface doesn't make this - * one the current slave so it is still marked - * up when it is actually down - */ - if (slave->link == BOND_LINK_UP) { - slave->link = BOND_LINK_DOWN; - if (slave->link_failure_count < UINT_MAX) { - slave->link_failure_count++; - } + delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); - bond_set_slave_inactive_flags(slave); + if (bond->slave_cnt == 0) + goto re_arm; - printk(KERN_INFO DRV_NAME - ": %s: backup interface %s is " - "now down.\n", - bond->dev->name, - slave->dev->name); - } - } - } + if (bond_ab_arp_inspect(bond, delta_in_ticks)) { + read_unlock(&bond->lock); + rtnl_lock(); + read_lock(&bond->lock); + + bond_ab_arp_commit(bond, delta_in_ticks); + + read_unlock(&bond->lock); + rtnl_unlock(); + read_lock(&bond->lock); } + bond_ab_arp_probe(bond); + re_arm: if (bond->params.arp_interval) { queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); @@ -3128,7 +3255,8 @@ static void bond_info_show_master(struct seq_file *seq) if (bond->params.mode == BOND_MODE_ACTIVEBACKUP && bond->params.fail_over_mac) - seq_printf(seq, " (fail_over_mac)"); + seq_printf(seq, " (fail_over_mac %s)", + fail_over_mac_tbl[bond->params.fail_over_mac].modename); seq_printf(seq, "\n"); @@ -3500,13 +3628,13 @@ static int bond_inetaddr_event(struct notifier_block *this, unsigned long event, { struct in_ifaddr *ifa = ptr; struct net_device *vlan_dev, *event_dev = ifa->ifa_dev->dev; - struct bonding *bond, *bond_next; - struct vlan_entry *vlan, *vlan_next; + struct bonding *bond; + struct vlan_entry *vlan; if (dev_net(ifa->ifa_dev->dev) != &init_net) return NOTIFY_DONE; - list_for_each_entry_safe(bond, bond_next, &bond_dev_list, bond_list) { + list_for_each_entry(bond, &bond_dev_list, bond_list) { if (bond->dev == event_dev) { switch (event) { case NETDEV_UP: @@ -3520,11 +3648,7 @@ static int bond_inetaddr_event(struct notifier_block *this, unsigned long event, } } - if (list_empty(&bond->vlan_list)) - continue; - - list_for_each_entry_safe(vlan, vlan_next, &bond->vlan_list, - vlan_list) { + list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { vlan_dev = vlan_group_get_device(bond->vlgrp, vlan->vlan_id); if (vlan_dev == event_dev) { switch (event) { @@ -4060,10 +4184,10 @@ static int bond_set_mac_address(struct net_device *bond_dev, void *addr) dprintk("bond=%p, name=%s\n", bond, (bond_dev ? bond_dev->name : "None")); /* - * If fail_over_mac is enabled, do nothing and return success. - * Returning an error causes ifenslave to fail. + * If fail_over_mac is set to active, do nothing and return + * success. Returning an error causes ifenslave to fail. */ - if (bond->params.fail_over_mac) + if (bond->params.fail_over_mac == BOND_FOM_ACTIVE) return 0; if (!is_valid_ether_addr(sa->sa_data)) { @@ -4568,7 +4692,7 @@ int bond_parse_parm(const char *buf, struct bond_parm_tbl *tbl) static int bond_check_params(struct bond_params *params) { - int arp_validate_value; + int arp_validate_value, fail_over_mac_value; /* * Convert string parameters. @@ -4658,6 +4782,13 @@ static int bond_check_params(struct bond_params *params) use_carrier = 1; } + if (num_grat_arp < 0 || num_grat_arp > 255) { + printk(KERN_WARNING DRV_NAME + ": Warning: num_grat_arp (%d) not in range 0-255 so it " + "was reset to 1 \n", num_grat_arp); + num_grat_arp = 1; + } + /* reset values for 802.3ad */ if (bond_mode == BOND_MODE_8023AD) { if (!miimon) { @@ -4836,15 +4967,29 @@ static int bond_check_params(struct bond_params *params) primary = NULL; } - if (fail_over_mac && (bond_mode != BOND_MODE_ACTIVEBACKUP)) - printk(KERN_WARNING DRV_NAME - ": Warning: fail_over_mac only affects " - "active-backup mode.\n"); + if (fail_over_mac) { + fail_over_mac_value = bond_parse_parm(fail_over_mac, + fail_over_mac_tbl); + if (fail_over_mac_value == -1) { + printk(KERN_ERR DRV_NAME + ": Error: invalid fail_over_mac \"%s\"\n", + arp_validate == NULL ? "NULL" : arp_validate); + return -EINVAL; + } + + if (bond_mode != BOND_MODE_ACTIVEBACKUP) + printk(KERN_WARNING DRV_NAME + ": Warning: fail_over_mac only affects " + "active-backup mode.\n"); + } else { + fail_over_mac_value = BOND_FOM_NONE; + } /* fill params struct with the proper values */ params->mode = bond_mode; params->xmit_policy = xmit_hashtype; params->miimon = miimon; + params->num_grat_arp = num_grat_arp; params->arp_interval = arp_interval; params->arp_validate = arp_validate_value; params->updelay = updelay; @@ -4852,7 +4997,7 @@ static int bond_check_params(struct bond_params *params) params->use_carrier = use_carrier; params->lacp_fast = lacp_fast; params->primary[0] = 0; - params->fail_over_mac = fail_over_mac; + params->fail_over_mac = fail_over_mac_value; if (primary) { strncpy(params->primary, primary, IFNAMSIZ); @@ -4871,10 +5016,10 @@ static struct lock_class_key bonding_netdev_xmit_lock_key; * Caller must NOT hold rtnl_lock; we need to release it here before we * set up our sysfs entries. */ -int bond_create(char *name, struct bond_params *params, struct bonding **newbond) +int bond_create(char *name, struct bond_params *params) { struct net_device *bond_dev; - struct bonding *bond, *nxt; + struct bonding *bond; int res; rtnl_lock(); @@ -4882,7 +5027,7 @@ int bond_create(char *name, struct bond_params *params, struct bonding **newbond /* Check to see if the bond already exists. */ if (name) { - list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list) + list_for_each_entry(bond, &bond_dev_list, bond_list) if (strnicmp(bond->dev->name, name, IFNAMSIZ) == 0) { printk(KERN_ERR DRV_NAME ": cannot add bond %s; it already exists\n", @@ -4925,9 +5070,6 @@ int bond_create(char *name, struct bond_params *params, struct bonding **newbond lockdep_set_class(&bond_dev->_xmit_lock, &bonding_netdev_xmit_lock_key); - if (newbond) - *newbond = bond_dev->priv; - netif_carrier_off(bond_dev); up_write(&bonding_rwsem); @@ -4957,7 +5099,7 @@ static int __init bonding_init(void) { int i; int res; - struct bonding *bond, *nxt; + struct bonding *bond; printk(KERN_INFO "%s", version); @@ -4973,7 +5115,7 @@ static int __init bonding_init(void) init_rwsem(&bonding_rwsem); for (i = 0; i < max_bonds; i++) { - res = bond_create(NULL, &bonding_defaults, NULL); + res = bond_create(NULL, &bonding_defaults); if (res) goto err; } @@ -4987,7 +5129,7 @@ static int __init bonding_init(void) goto out; err: - list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list) { + list_for_each_entry(bond, &bond_dev_list, bond_list) { bond_work_cancel_all(bond); destroy_workqueue(bond->wq); } diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index 08f3d396bcd..dd265c69b0d 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -50,6 +50,7 @@ extern struct bond_parm_tbl bond_mode_tbl[]; extern struct bond_parm_tbl bond_lacp_tbl[]; extern struct bond_parm_tbl xmit_hashtype_tbl[]; extern struct bond_parm_tbl arp_validate_tbl[]; +extern struct bond_parm_tbl fail_over_mac_tbl[]; static int expected_refcount = -1; static struct class *netdev_class; @@ -111,7 +112,6 @@ static ssize_t bonding_store_bonds(struct class *cls, const char *buffer, size_t char *ifname; int rv, res = count; struct bonding *bond; - struct bonding *nxt; sscanf(buffer, "%16s", command); /* IFNAMSIZ*/ ifname = command + 1; @@ -122,7 +122,7 @@ static ssize_t bonding_store_bonds(struct class *cls, const char *buffer, size_t if (command[0] == '+') { printk(KERN_INFO DRV_NAME ": %s is being created...\n", ifname); - rv = bond_create(ifname, &bonding_defaults, &bond); + rv = bond_create(ifname, &bonding_defaults); if (rv) { printk(KERN_INFO DRV_NAME ": Bond creation failed.\n"); res = rv; @@ -134,7 +134,7 @@ static ssize_t bonding_store_bonds(struct class *cls, const char *buffer, size_t rtnl_lock(); down_write(&bonding_rwsem); - list_for_each_entry_safe(bond, nxt, &bond_dev_list, bond_list) + list_for_each_entry(bond, &bond_dev_list, bond_list) if (strnicmp(bond->dev->name, ifname, IFNAMSIZ) == 0) { /* check the ref count on the bond's kobject. * If it's > expected, then there's a file open, @@ -548,42 +548,37 @@ static ssize_t bonding_show_fail_over_mac(struct device *d, struct device_attrib { struct bonding *bond = to_bond(d); - return sprintf(buf, "%d\n", bond->params.fail_over_mac) + 1; + return sprintf(buf, "%s %d\n", + fail_over_mac_tbl[bond->params.fail_over_mac].modename, + bond->params.fail_over_mac); } static ssize_t bonding_store_fail_over_mac(struct device *d, struct device_attribute *attr, const char *buf, size_t count) { int new_value; - int ret = count; struct bonding *bond = to_bond(d); if (bond->slave_cnt != 0) { printk(KERN_ERR DRV_NAME ": %s: Can't alter fail_over_mac with slaves in bond.\n", bond->dev->name); - ret = -EPERM; - goto out; + return -EPERM; } - if (sscanf(buf, "%d", &new_value) != 1) { + new_value = bond_parse_parm(buf, fail_over_mac_tbl); + if (new_value < 0) { printk(KERN_ERR DRV_NAME - ": %s: no fail_over_mac value specified.\n", - bond->dev->name); - ret = -EINVAL; - goto out; + ": %s: Ignoring invalid fail_over_mac value %s.\n", + bond->dev->name, buf); + return -EINVAL; } - if ((new_value == 0) || (new_value == 1)) { - bond->params.fail_over_mac = new_value; - printk(KERN_INFO DRV_NAME ": %s: Setting fail_over_mac to %d.\n", - bond->dev->name, new_value); - } else { - printk(KERN_INFO DRV_NAME - ": %s: Ignoring invalid fail_over_mac value %d.\n", - bond->dev->name, new_value); - } -out: - return ret; + bond->params.fail_over_mac = new_value; + printk(KERN_INFO DRV_NAME ": %s: Setting fail_over_mac to %s (%d).\n", + bond->dev->name, fail_over_mac_tbl[new_value].modename, + new_value); + + return count; } static DEVICE_ATTR(fail_over_mac, S_IRUGO | S_IWUSR, bonding_show_fail_over_mac, bonding_store_fail_over_mac); @@ -952,6 +947,45 @@ out: static DEVICE_ATTR(lacp_rate, S_IRUGO | S_IWUSR, bonding_show_lacp, bonding_store_lacp); /* + * Show and set the number of grat ARP to send after a failover event. + */ +static ssize_t bonding_show_n_grat_arp(struct device *d, + struct device_attribute *attr, + char *buf) +{ + struct bonding *bond = to_bond(d); + + return sprintf(buf, "%d\n", bond->params.num_grat_arp); +} + +static ssize_t bonding_store_n_grat_arp(struct device *d, + struct device_attribute *attr, + const char *buf, size_t count) +{ + int new_value, ret = count; + struct bonding *bond = to_bond(d); + + if (sscanf(buf, "%d", &new_value) != 1) { + printk(KERN_ERR DRV_NAME + ": %s: no num_grat_arp value specified.\n", + bond->dev->name); + ret = -EINVAL; + goto out; + } + if (new_value < 0 || new_value > 255) { + printk(KERN_ERR DRV_NAME + ": %s: Invalid num_grat_arp value %d not in range 0-255; rejected.\n", + bond->dev->name, new_value); + ret = -EINVAL; + goto out; + } else { + bond->params.num_grat_arp = new_value; + } +out: + return ret; +} +static DEVICE_ATTR(num_grat_arp, S_IRUGO | S_IWUSR, bonding_show_n_grat_arp, bonding_store_n_grat_arp); +/* * Show and set the MII monitor interval. There are two tricky bits * here. First, if MII monitoring is activated, then we must disable * ARP monitoring. Second, if the timer isn't running, we must @@ -1388,6 +1422,7 @@ static struct attribute *per_bond_attrs[] = { &dev_attr_updelay.attr, &dev_attr_lacp_rate.attr, &dev_attr_xmit_hash_policy.attr, + &dev_attr_num_grat_arp.attr, &dev_attr_miimon.attr, &dev_attr_primary.attr, &dev_attr_use_carrier.attr, diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h index a3c74e20aa5..89fd9963db7 100644 --- a/drivers/net/bonding/bonding.h +++ b/drivers/net/bonding/bonding.h @@ -125,6 +125,7 @@ struct bond_params { int mode; int xmit_policy; int miimon; + int num_grat_arp; int arp_interval; int arp_validate; int use_carrier; @@ -157,6 +158,7 @@ struct slave { unsigned long jiffies; unsigned long last_arp_rx; s8 link; /* one of BOND_LINK_XXXX */ + s8 new_link; s8 state; /* one of BOND_STATE_XXXX */ u32 original_flags; u32 original_mtu; @@ -169,6 +171,11 @@ struct slave { }; /* + * Link pseudo-state only used internally by monitors + */ +#define BOND_LINK_NOCHANGE -1 + +/* * Here are the locking policies for the two bonding locks: * * 1) Get bond->lock when reading/writing slave list. @@ -241,6 +248,10 @@ static inline struct bonding *bond_get_bond_by_slave(struct slave *slave) return (struct bonding *)slave->dev->master->priv; } +#define BOND_FOM_NONE 0 +#define BOND_FOM_ACTIVE 1 +#define BOND_FOM_FOLLOW 2 + #define BOND_ARP_VALIDATE_NONE 0 #define BOND_ARP_VALIDATE_ACTIVE (1 << BOND_STATE_ACTIVE) #define BOND_ARP_VALIDATE_BACKUP (1 << BOND_STATE_BACKUP) @@ -301,7 +312,7 @@ static inline void bond_unset_master_alb_flags(struct bonding *bond) struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr); int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev); -int bond_create(char *name, struct bond_params *params, struct bonding **newbond); +int bond_create(char *name, struct bond_params *params); void bond_destroy(struct bonding *bond); int bond_release_and_destroy(struct net_device *bond_dev, struct net_device *slave_dev); int bond_create_sysfs(void); diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c index 93e13636f8d..83768df2780 100644 --- a/drivers/net/cassini.c +++ b/drivers/net/cassini.c @@ -142,8 +142,8 @@ #define DRV_MODULE_NAME "cassini" #define PFX DRV_MODULE_NAME ": " -#define DRV_MODULE_VERSION "1.5" -#define DRV_MODULE_RELDATE "4 Jan 2008" +#define DRV_MODULE_VERSION "1.6" +#define DRV_MODULE_RELDATE "21 May 2008" #define CAS_DEF_MSG_ENABLE \ (NETIF_MSG_DRV | \ @@ -2136,9 +2136,12 @@ end_copy_pkt: if (addr) cas_page_unmap(addr); } - skb->csum = csum_unfold(~csum); - skb->ip_summed = CHECKSUM_COMPLETE; skb->protocol = eth_type_trans(skb, cp->dev); + if (skb->protocol == htons(ETH_P_IP)) { + skb->csum = csum_unfold(~csum); + skb->ip_summed = CHECKSUM_COMPLETE; + } else + skb->ip_summed = CHECKSUM_NONE; return len; } diff --git a/drivers/net/cpmac.c b/drivers/net/cpmac.c index ae07100bb93..7f3f62e1b11 100644 --- a/drivers/net/cpmac.c +++ b/drivers/net/cpmac.c @@ -38,6 +38,7 @@ #include <linux/platform_device.h> #include <linux/dma-mapping.h> #include <asm/gpio.h> +#include <asm/atomic.h> MODULE_AUTHOR("Eugene Konev <ejka@imfi.kspu.ru>"); MODULE_DESCRIPTION("TI AR7 ethernet driver (CPMAC)"); @@ -187,6 +188,7 @@ struct cpmac_desc { #define CPMAC_EOQ 0x1000 struct sk_buff *skb; struct cpmac_desc *next; + struct cpmac_desc *prev; dma_addr_t mapping; dma_addr_t data_mapping; }; @@ -208,6 +210,7 @@ struct cpmac_priv { struct work_struct reset_work; struct platform_device *pdev; struct napi_struct napi; + atomic_t reset_pending; }; static irqreturn_t cpmac_irq(int, void *); @@ -241,6 +244,16 @@ static void cpmac_dump_desc(struct net_device *dev, struct cpmac_desc *desc) printk("\n"); } +static void cpmac_dump_all_desc(struct net_device *dev) +{ + struct cpmac_priv *priv = netdev_priv(dev); + struct cpmac_desc *dump = priv->rx_head; + do { + cpmac_dump_desc(dev, dump); + dump = dump->next; + } while (dump != priv->rx_head); +} + static void cpmac_dump_skb(struct net_device *dev, struct sk_buff *skb) { int i; @@ -412,21 +425,42 @@ static struct sk_buff *cpmac_rx_one(struct cpmac_priv *priv, static int cpmac_poll(struct napi_struct *napi, int budget) { struct sk_buff *skb; - struct cpmac_desc *desc; - int received = 0; + struct cpmac_desc *desc, *restart; struct cpmac_priv *priv = container_of(napi, struct cpmac_priv, napi); + int received = 0, processed = 0; spin_lock(&priv->rx_lock); if (unlikely(!priv->rx_head)) { if (netif_msg_rx_err(priv) && net_ratelimit()) printk(KERN_WARNING "%s: rx: polling, but no queue\n", priv->dev->name); + spin_unlock(&priv->rx_lock); netif_rx_complete(priv->dev, napi); return 0; } desc = priv->rx_head; + restart = NULL; while (((desc->dataflags & CPMAC_OWN) == 0) && (received < budget)) { + processed++; + + if ((desc->dataflags & CPMAC_EOQ) != 0) { + /* The last update to eoq->hw_next didn't happen + * soon enough, and the receiver stopped here. + *Remember this descriptor so we can restart + * the receiver after freeing some space. + */ + if (unlikely(restart)) { + if (netif_msg_rx_err(priv)) + printk(KERN_ERR "%s: poll found a" + " duplicate EOQ: %p and %p\n", + priv->dev->name, restart, desc); + goto fatal_error; + } + + restart = desc->next; + } + skb = cpmac_rx_one(priv, desc); if (likely(skb)) { netif_receive_skb(skb); @@ -435,19 +469,90 @@ static int cpmac_poll(struct napi_struct *napi, int budget) desc = desc->next; } + if (desc != priv->rx_head) { + /* We freed some buffers, but not the whole ring, + * add what we did free to the rx list */ + desc->prev->hw_next = (u32)0; + priv->rx_head->prev->hw_next = priv->rx_head->mapping; + } + + /* Optimization: If we did not actually process an EOQ (perhaps because + * of quota limits), check to see if the tail of the queue has EOQ set. + * We should immediately restart in that case so that the receiver can + * restart and run in parallel with more packet processing. + * This lets us handle slightly larger bursts before running + * out of ring space (assuming dev->weight < ring_size) */ + + if (!restart && + (priv->rx_head->prev->dataflags & (CPMAC_OWN|CPMAC_EOQ)) + == CPMAC_EOQ && + (priv->rx_head->dataflags & CPMAC_OWN) != 0) { + /* reset EOQ so the poll loop (above) doesn't try to + * restart this when it eventually gets to this descriptor. + */ + priv->rx_head->prev->dataflags &= ~CPMAC_EOQ; + restart = priv->rx_head; + } + + if (restart) { + priv->dev->stats.rx_errors++; + priv->dev->stats.rx_fifo_errors++; + if (netif_msg_rx_err(priv) && net_ratelimit()) + printk(KERN_WARNING "%s: rx dma ring overrun\n", + priv->dev->name); + + if (unlikely((restart->dataflags & CPMAC_OWN) == 0)) { + if (netif_msg_drv(priv)) + printk(KERN_ERR "%s: cpmac_poll is trying to " + "restart rx from a descriptor that's " + "not free: %p\n", + priv->dev->name, restart); + goto fatal_error; + } + + cpmac_write(priv->regs, CPMAC_RX_PTR(0), restart->mapping); + } + priv->rx_head = desc; spin_unlock(&priv->rx_lock); if (unlikely(netif_msg_rx_status(priv))) printk(KERN_DEBUG "%s: poll processed %d packets\n", priv->dev->name, received); - if (desc->dataflags & CPMAC_OWN) { + if (processed == 0) { + /* we ran out of packets to read, + * revert to interrupt-driven mode */ netif_rx_complete(priv->dev, napi); - cpmac_write(priv->regs, CPMAC_RX_PTR(0), (u32)desc->mapping); cpmac_write(priv->regs, CPMAC_RX_INT_ENABLE, 1); return 0; } return 1; + +fatal_error: + /* Something went horribly wrong. + * Reset hardware to try to recover rather than wedging. */ + + if (netif_msg_drv(priv)) { + printk(KERN_ERR "%s: cpmac_poll is confused. " + "Resetting hardware\n", priv->dev->name); + cpmac_dump_all_desc(priv->dev); + printk(KERN_DEBUG "%s: RX_PTR(0)=0x%08x RX_ACK(0)=0x%08x\n", + priv->dev->name, + cpmac_read(priv->regs, CPMAC_RX_PTR(0)), + cpmac_read(priv->regs, CPMAC_RX_ACK(0))); + } + + spin_unlock(&priv->rx_lock); + netif_rx_complete(priv->dev, napi); + netif_stop_queue(priv->dev); + napi_disable(&priv->napi); + + atomic_inc(&priv->reset_pending); + cpmac_hw_stop(priv->dev); + if (!schedule_work(&priv->reset_work)) + atomic_dec(&priv->reset_pending); + return 0; + } static int cpmac_start_xmit(struct sk_buff *skb, struct net_device *dev) @@ -456,6 +561,9 @@ static int cpmac_start_xmit(struct sk_buff *skb, struct net_device *dev) struct cpmac_desc *desc; struct cpmac_priv *priv = netdev_priv(dev); + if (unlikely(atomic_read(&priv->reset_pending))) + return NETDEV_TX_BUSY; + if (unlikely(skb_padto(skb, ETH_ZLEN))) return NETDEV_TX_OK; @@ -621,8 +729,10 @@ static void cpmac_clear_rx(struct net_device *dev) desc->dataflags = CPMAC_OWN; dev->stats.rx_dropped++; } + desc->hw_next = desc->next->mapping; desc = desc->next; } + priv->rx_head->prev->hw_next = 0; } static void cpmac_clear_tx(struct net_device *dev) @@ -635,14 +745,14 @@ static void cpmac_clear_tx(struct net_device *dev) priv->desc_ring[i].dataflags = 0; if (priv->desc_ring[i].skb) { dev_kfree_skb_any(priv->desc_ring[i].skb); - if (netif_subqueue_stopped(dev, i)) - netif_wake_subqueue(dev, i); + priv->desc_ring[i].skb = NULL; } } } static void cpmac_hw_error(struct work_struct *work) { + int i; struct cpmac_priv *priv = container_of(work, struct cpmac_priv, reset_work); @@ -651,8 +761,48 @@ static void cpmac_hw_error(struct work_struct *work) spin_unlock(&priv->rx_lock); cpmac_clear_tx(priv->dev); cpmac_hw_start(priv->dev); - napi_enable(&priv->napi); - netif_start_queue(priv->dev); + barrier(); + atomic_dec(&priv->reset_pending); + + for (i = 0; i < CPMAC_QUEUES; i++) + netif_wake_subqueue(priv->dev, i); + netif_wake_queue(priv->dev); + cpmac_write(priv->regs, CPMAC_MAC_INT_ENABLE, 3); +} + +static void cpmac_check_status(struct net_device *dev) +{ + struct cpmac_priv *priv = netdev_priv(dev); + + u32 macstatus = cpmac_read(priv->regs, CPMAC_MAC_STATUS); + int rx_channel = (macstatus >> 8) & 7; + int rx_code = (macstatus >> 12) & 15; + int tx_channel = (macstatus >> 16) & 7; + int tx_code = (macstatus >> 20) & 15; + + if (rx_code || tx_code) { + if (netif_msg_drv(priv) && net_ratelimit()) { + /* Can't find any documentation on what these + *error codes actually are. So just log them and hope.. + */ + if (rx_code) + printk(KERN_WARNING "%s: host error %d on rx " + "channel %d (macstatus %08x), resetting\n", + dev->name, rx_code, rx_channel, macstatus); + if (tx_code) + printk(KERN_WARNING "%s: host error %d on tx " + "channel %d (macstatus %08x), resetting\n", + dev->name, tx_code, tx_channel, macstatus); + } + + netif_stop_queue(dev); + cpmac_hw_stop(dev); + if (schedule_work(&priv->reset_work)) + atomic_inc(&priv->reset_pending); + if (unlikely(netif_msg_hw(priv))) + cpmac_dump_regs(dev); + } + cpmac_write(priv->regs, CPMAC_MAC_INT_CLEAR, 0xff); } static irqreturn_t cpmac_irq(int irq, void *dev_id) @@ -683,49 +833,32 @@ static irqreturn_t cpmac_irq(int irq, void *dev_id) cpmac_write(priv->regs, CPMAC_MAC_EOI_VECTOR, 0); - if (unlikely(status & (MAC_INT_HOST | MAC_INT_STATUS))) { - if (netif_msg_drv(priv) && net_ratelimit()) - printk(KERN_ERR "%s: hw error, resetting...\n", - dev->name); - netif_stop_queue(dev); - napi_disable(&priv->napi); - cpmac_hw_stop(dev); - schedule_work(&priv->reset_work); - if (unlikely(netif_msg_hw(priv))) - cpmac_dump_regs(dev); - } + if (unlikely(status & (MAC_INT_HOST | MAC_INT_STATUS))) + cpmac_check_status(dev); return IRQ_HANDLED; } static void cpmac_tx_timeout(struct net_device *dev) { - struct cpmac_priv *priv = netdev_priv(dev); int i; + struct cpmac_priv *priv = netdev_priv(dev); spin_lock(&priv->lock); dev->stats.tx_errors++; spin_unlock(&priv->lock); if (netif_msg_tx_err(priv) && net_ratelimit()) printk(KERN_WARNING "%s: transmit timeout\n", dev->name); - /* - * FIXME: waking up random queue is not the best thing to - * do... on the other hand why we got here at all? - */ -#ifdef CONFIG_NETDEVICES_MULTIQUEUE + + atomic_inc(&priv->reset_pending); + barrier(); + cpmac_clear_tx(dev); + barrier(); + atomic_dec(&priv->reset_pending); + + netif_wake_queue(priv->dev); for (i = 0; i < CPMAC_QUEUES; i++) - if (priv->desc_ring[i].skb) { - priv->desc_ring[i].dataflags = 0; - dev_kfree_skb_any(priv->desc_ring[i].skb); - netif_wake_subqueue(dev, i); - break; - } -#else - priv->desc_ring[0].dataflags = 0; - if (priv->desc_ring[0].skb) - dev_kfree_skb_any(priv->desc_ring[0].skb); - netif_wake_queue(dev); -#endif + netif_wake_subqueue(dev, i); } static int cpmac_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) @@ -901,9 +1034,12 @@ static int cpmac_open(struct net_device *dev) desc->buflen = CPMAC_SKB_SIZE; desc->dataflags = CPMAC_OWN; desc->next = &priv->rx_head[(i + 1) % priv->ring_size]; + desc->next->prev = desc; desc->hw_next = (u32)desc->next->mapping; } + priv->rx_head->prev->hw_next = (u32)0; + if ((res = request_irq(dev->irq, cpmac_irq, IRQF_SHARED, dev->name, dev))) { if (netif_msg_drv(priv)) @@ -912,6 +1048,7 @@ static int cpmac_open(struct net_device *dev) goto fail_irq; } + atomic_set(&priv->reset_pending, 0); INIT_WORK(&priv->reset_work, cpmac_hw_error); cpmac_hw_start(dev); @@ -1007,21 +1144,10 @@ static int __devinit cpmac_probe(struct platform_device *pdev) if (phy_id == PHY_MAX_ADDR) { if (external_switch || dumb_switch) { - struct fixed_phy_status status = {}; - - /* - * FIXME: this should be in the platform code! - * Since there is not platform code at all (that is, - * no mainline users of that driver), place it here - * for now. - */ - phy_id = 0; - status.link = 1; - status.duplex = 1; - status.speed = 100; - fixed_phy_add(PHY_POLL, phy_id, &status); + mdio_bus_id = 0; /* fixed phys bus */ + phy_id = pdev->id; } else { - printk(KERN_ERR "cpmac: no PHY present\n"); + dev_err(&pdev->dev, "no PHY present\n"); return -ENODEV; } } @@ -1064,10 +1190,8 @@ static int __devinit cpmac_probe(struct platform_device *pdev) priv->msg_enable = netif_msg_init(debug_level, 0xff); memcpy(dev->dev_addr, pdata->dev_addr, sizeof(dev->dev_addr)); - snprintf(priv->phy_name, BUS_ID_SIZE, PHY_ID_FMT, mdio_bus_id, phy_id); - - priv->phy = phy_connect(dev, priv->phy_name, &cpmac_adjust_link, 0, - PHY_INTERFACE_MODE_MII); + priv->phy = phy_connect(dev, cpmac_mii.phy_map[phy_id]->dev.bus_id, + &cpmac_adjust_link, 0, PHY_INTERFACE_MODE_MII); if (IS_ERR(priv->phy)) { if (netif_msg_drv(priv)) printk(KERN_ERR "%s: Could not attach to PHY\n", diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h index acebe431d06..271140433b0 100644 --- a/drivers/net/cxgb3/adapter.h +++ b/drivers/net/cxgb3/adapter.h @@ -42,6 +42,7 @@ #include <linux/cache.h> #include <linux/mutex.h> #include <linux/bitops.h> +#include <linux/inet_lro.h> #include "t3cdev.h" #include <asm/io.h> @@ -92,6 +93,7 @@ struct sge_fl { /* SGE per free-buffer list state */ unsigned int gen; /* free list generation */ struct fl_pg_chunk pg_chunk;/* page chunk cache */ unsigned int use_pages; /* whether FL uses pages or sk_buffs */ + unsigned int order; /* order of page allocations */ struct rx_desc *desc; /* address of HW Rx descriptor ring */ struct rx_sw_desc *sdesc; /* address of SW Rx descriptor ring */ dma_addr_t phys_addr; /* physical address of HW ring start */ @@ -116,12 +118,15 @@ struct sge_rspq { /* state for an SGE response queue */ unsigned int polling; /* is the queue serviced through NAPI? */ unsigned int holdoff_tmr; /* interrupt holdoff timer in 100ns */ unsigned int next_holdoff; /* holdoff time for next interrupt */ + unsigned int rx_recycle_buf; /* whether recycling occurred + within current sop-eop */ struct rsp_desc *desc; /* address of HW response ring */ dma_addr_t phys_addr; /* physical address of the ring */ unsigned int cntxt_id; /* SGE context id for the response q */ spinlock_t lock; /* guards response processing */ struct sk_buff *rx_head; /* offload packet receive queue head */ struct sk_buff *rx_tail; /* offload packet receive queue tail */ + struct sk_buff *pg_skb; /* used to build frag list in napi handler */ unsigned long offload_pkts; unsigned long offload_bundles; @@ -169,16 +174,29 @@ enum { /* per port SGE statistics */ SGE_PSTAT_TX_CSUM, /* # of TX checksum offloads */ SGE_PSTAT_VLANEX, /* # of VLAN tag extractions */ SGE_PSTAT_VLANINS, /* # of VLAN tag insertions */ + SGE_PSTAT_LRO_AGGR, /* # of page chunks added to LRO sessions */ + SGE_PSTAT_LRO_FLUSHED, /* # of flushed LRO sessions */ + SGE_PSTAT_LRO_NO_DESC, /* # of overflown LRO sessions */ SGE_PSTAT_MAX /* must be last */ }; +#define T3_MAX_LRO_SES 8 +#define T3_MAX_LRO_MAX_PKTS 64 + struct sge_qset { /* an SGE queue set */ struct adapter *adap; struct napi_struct napi; struct sge_rspq rspq; struct sge_fl fl[SGE_RXQ_PER_SET]; struct sge_txq txq[SGE_TXQ_PER_SET]; + struct net_lro_mgr lro_mgr; + struct net_lro_desc lro_desc[T3_MAX_LRO_SES]; + struct skb_frag_struct *lro_frag_tbl; + int lro_nfrags; + int lro_enabled; + int lro_frag_len; + void *lro_va; struct net_device *netdev; unsigned long txq_stopped; /* which Tx queues are stopped */ struct timer_list tx_reclaim_timer; /* reclaims TX buffers */ diff --git a/drivers/net/cxgb3/common.h b/drivers/net/cxgb3/common.h index 579bee42a5c..d444f5881f5 100644 --- a/drivers/net/cxgb3/common.h +++ b/drivers/net/cxgb3/common.h @@ -351,6 +351,7 @@ struct tp_params { struct qset_params { /* SGE queue set parameters */ unsigned int polling; /* polling/interrupt service for rspq */ + unsigned int lro; /* large receive offload */ unsigned int coalesce_usecs; /* irq coalescing timer */ unsigned int rspq_size; /* # of entries in response queue */ unsigned int fl_size; /* # of entries in regular free list */ diff --git a/drivers/net/cxgb3/cxgb3_ioctl.h b/drivers/net/cxgb3/cxgb3_ioctl.h index 0a82fcddf2d..68200a14065 100644 --- a/drivers/net/cxgb3/cxgb3_ioctl.h +++ b/drivers/net/cxgb3/cxgb3_ioctl.h @@ -90,6 +90,7 @@ struct ch_qset_params { int32_t fl_size[2]; int32_t intr_lat; int32_t polling; + int32_t lro; int32_t cong_thres; }; diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c index 3a312721679..5447f3e60f0 100644 --- a/drivers/net/cxgb3/cxgb3_main.c +++ b/drivers/net/cxgb3/cxgb3_main.c @@ -1212,6 +1212,9 @@ static char stats_strings[][ETH_GSTRING_LEN] = { "VLANinsertions ", "TxCsumOffload ", "RxCsumGood ", + "LroAggregated ", + "LroFlushed ", + "LroNoDesc ", "RxDrops ", "CheckTXEnToggled ", @@ -1340,6 +1343,9 @@ static void get_stats(struct net_device *dev, struct ethtool_stats *stats, *data++ = collect_sge_port_stats(adapter, pi, SGE_PSTAT_VLANINS); *data++ = collect_sge_port_stats(adapter, pi, SGE_PSTAT_TX_CSUM); *data++ = collect_sge_port_stats(adapter, pi, SGE_PSTAT_RX_CSUM_GOOD); + *data++ = collect_sge_port_stats(adapter, pi, SGE_PSTAT_LRO_AGGR); + *data++ = collect_sge_port_stats(adapter, pi, SGE_PSTAT_LRO_FLUSHED); + *data++ = collect_sge_port_stats(adapter, pi, SGE_PSTAT_LRO_NO_DESC); *data++ = s->rx_cong_drops; *data++ = s->num_toggled; @@ -1558,6 +1564,13 @@ static int set_rx_csum(struct net_device *dev, u32 data) struct port_info *p = netdev_priv(dev); p->rx_csum_offload = data; + if (!data) { + struct adapter *adap = p->adapter; + int i; + + for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) + adap->sge.qs[i].lro_enabled = 0; + } return 0; } @@ -1830,6 +1843,11 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) } } } + if (t.lro >= 0) { + struct sge_qset *qs = &adapter->sge.qs[t.qset_idx]; + q->lro = t.lro; + qs->lro_enabled = t.lro; + } break; } case CHELSIO_GET_QSET_PARAMS:{ @@ -1849,6 +1867,7 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr) t.fl_size[0] = q->fl_size; t.fl_size[1] = q->jumbo_size; t.polling = q->polling; + t.lro = q->lro; t.intr_lat = q->coalesce_usecs; t.cong_thres = q->cong_thres; diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c index 796eb305cdc..a96331c875e 100644 --- a/drivers/net/cxgb3/sge.c +++ b/drivers/net/cxgb3/sge.c @@ -55,6 +55,9 @@ * directly. */ #define FL0_PG_CHUNK_SIZE 2048 +#define FL0_PG_ORDER 0 +#define FL1_PG_CHUNK_SIZE (PAGE_SIZE > 8192 ? 16384 : 8192) +#define FL1_PG_ORDER (PAGE_SIZE > 8192 ? 0 : 1) #define SGE_RX_DROP_THRES 16 @@ -359,7 +362,7 @@ static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q) } if (q->pg_chunk.page) { - __free_page(q->pg_chunk.page); + __free_pages(q->pg_chunk.page, q->order); q->pg_chunk.page = NULL; } } @@ -376,13 +379,16 @@ static void free_rx_bufs(struct pci_dev *pdev, struct sge_fl *q) * Add a buffer of the given length to the supplied HW and SW Rx * descriptors. */ -static inline void add_one_rx_buf(void *va, unsigned int len, - struct rx_desc *d, struct rx_sw_desc *sd, - unsigned int gen, struct pci_dev *pdev) +static inline int add_one_rx_buf(void *va, unsigned int len, + struct rx_desc *d, struct rx_sw_desc *sd, + unsigned int gen, struct pci_dev *pdev) { dma_addr_t mapping; mapping = pci_map_single(pdev, va, len, PCI_DMA_FROMDEVICE); + if (unlikely(pci_dma_mapping_error(mapping))) + return -ENOMEM; + pci_unmap_addr_set(sd, dma_addr, mapping); d->addr_lo = cpu_to_be32(mapping); @@ -390,12 +396,14 @@ static inline void add_one_rx_buf(void *va, unsigned int len, wmb(); d->len_gen = cpu_to_be32(V_FLD_GEN1(gen)); d->gen2 = cpu_to_be32(V_FLD_GEN2(gen)); + return 0; } -static int alloc_pg_chunk(struct sge_fl *q, struct rx_sw_desc *sd, gfp_t gfp) +static int alloc_pg_chunk(struct sge_fl *q, struct rx_sw_desc *sd, gfp_t gfp, + unsigned int order) { if (!q->pg_chunk.page) { - q->pg_chunk.page = alloc_page(gfp); + q->pg_chunk.page = alloc_pages(gfp, order); if (unlikely(!q->pg_chunk.page)) return -ENOMEM; q->pg_chunk.va = page_address(q->pg_chunk.page); @@ -404,7 +412,7 @@ static int alloc_pg_chunk(struct sge_fl *q, struct rx_sw_desc *sd, gfp_t gfp) sd->pg_chunk = q->pg_chunk; q->pg_chunk.offset += q->buf_size; - if (q->pg_chunk.offset == PAGE_SIZE) + if (q->pg_chunk.offset == (PAGE_SIZE << order)) q->pg_chunk.page = NULL; else { q->pg_chunk.va += q->buf_size; @@ -424,15 +432,18 @@ static int alloc_pg_chunk(struct sge_fl *q, struct rx_sw_desc *sd, gfp_t gfp) * allocated with the supplied gfp flags. The caller must assure that * @n does not exceed the queue's capacity. */ -static void refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp) +static int refill_fl(struct adapter *adap, struct sge_fl *q, int n, gfp_t gfp) { void *buf_start; struct rx_sw_desc *sd = &q->sdesc[q->pidx]; struct rx_desc *d = &q->desc[q->pidx]; + unsigned int count = 0; while (n--) { + int err; + if (q->use_pages) { - if (unlikely(alloc_pg_chunk(q, sd, gfp))) { + if (unlikely(alloc_pg_chunk(q, sd, gfp, q->order))) { nomem: q->alloc_failed++; break; } @@ -447,8 +458,16 @@ nomem: q->alloc_failed++; buf_start = skb->data; } - add_one_rx_buf(buf_start, q->buf_size, d, sd, q->gen, - adap->pdev); + err = add_one_rx_buf(buf_start, q->buf_size, d, sd, q->gen, + adap->pdev); + if (unlikely(err)) { + if (!q->use_pages) { + kfree_skb(sd->skb); + sd->skb = NULL; + } + break; + } + d++; sd++; if (++q->pidx == q->size) { @@ -458,14 +477,19 @@ nomem: q->alloc_failed++; d = q->desc; } q->credits++; + count++; } wmb(); - t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); + if (likely(count)) + t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id)); + + return count; } static inline void __refill_fl(struct adapter *adap, struct sge_fl *fl) { - refill_fl(adap, fl, min(16U, fl->size - fl->credits), GFP_ATOMIC); + refill_fl(adap, fl, min(16U, fl->size - fl->credits), + GFP_ATOMIC | __GFP_COMP); } /** @@ -560,6 +584,8 @@ static void t3_reset_qset(struct sge_qset *q) memset(q->txq, 0, sizeof(struct sge_txq) * SGE_TXQ_PER_SET); q->txq_stopped = 0; memset(&q->tx_reclaim_timer, 0, sizeof(q->tx_reclaim_timer)); + kfree(q->lro_frag_tbl); + q->lro_nfrags = q->lro_frag_len = 0; } @@ -740,19 +766,22 @@ use_orig_buf: * that are page chunks rather than sk_buffs. */ static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl, - unsigned int len, unsigned int drop_thres) + struct sge_rspq *q, unsigned int len, + unsigned int drop_thres) { - struct sk_buff *skb = NULL; + struct sk_buff *newskb, *skb; struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; - if (len <= SGE_RX_COPY_THRES) { - skb = alloc_skb(len, GFP_ATOMIC); - if (likely(skb != NULL)) { - __skb_put(skb, len); + newskb = skb = q->pg_skb; + + if (!skb && (len <= SGE_RX_COPY_THRES)) { + newskb = alloc_skb(len, GFP_ATOMIC); + if (likely(newskb != NULL)) { + __skb_put(newskb, len); pci_dma_sync_single_for_cpu(adap->pdev, pci_unmap_addr(sd, dma_addr), len, PCI_DMA_FROMDEVICE); - memcpy(skb->data, sd->pg_chunk.va, len); + memcpy(newskb->data, sd->pg_chunk.va, len); pci_dma_sync_single_for_device(adap->pdev, pci_unmap_addr(sd, dma_addr), len, PCI_DMA_FROMDEVICE); @@ -761,14 +790,16 @@ static struct sk_buff *get_packet_pg(struct adapter *adap, struct sge_fl *fl, recycle: fl->credits--; recycle_rx_buf(adap, fl, fl->cidx); - return skb; + q->rx_recycle_buf++; + return newskb; } - if (unlikely(fl->credits <= drop_thres)) + if (unlikely(q->rx_recycle_buf || (!skb && fl->credits <= drop_thres))) goto recycle; - skb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC); - if (unlikely(!skb)) { + if (!skb) + newskb = alloc_skb(SGE_RX_PULL_LEN, GFP_ATOMIC); + if (unlikely(!newskb)) { if (!drop_thres) return NULL; goto recycle; @@ -776,21 +807,29 @@ recycle: pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr), fl->buf_size, PCI_DMA_FROMDEVICE); - __skb_put(skb, SGE_RX_PULL_LEN); - memcpy(skb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN); - skb_fill_page_desc(skb, 0, sd->pg_chunk.page, - sd->pg_chunk.offset + SGE_RX_PULL_LEN, - len - SGE_RX_PULL_LEN); - skb->len = len; - skb->data_len = len - SGE_RX_PULL_LEN; - skb->truesize += skb->data_len; + if (!skb) { + __skb_put(newskb, SGE_RX_PULL_LEN); + memcpy(newskb->data, sd->pg_chunk.va, SGE_RX_PULL_LEN); + skb_fill_page_desc(newskb, 0, sd->pg_chunk.page, + sd->pg_chunk.offset + SGE_RX_PULL_LEN, + len - SGE_RX_PULL_LEN); + newskb->len = len; + newskb->data_len = len - SGE_RX_PULL_LEN; + } else { + skb_fill_page_desc(newskb, skb_shinfo(newskb)->nr_frags, + sd->pg_chunk.page, + sd->pg_chunk.offset, len); + newskb->len += len; + newskb->data_len += len; + } + newskb->truesize += newskb->data_len; fl->credits--; /* * We do not refill FLs here, we let the caller do it to overlap a * prefetch. */ - return skb; + return newskb; } /** @@ -1831,9 +1870,10 @@ static void restart_tx(struct sge_qset *qs) * if it was immediate data in a response. */ static void rx_eth(struct adapter *adap, struct sge_rspq *rq, - struct sk_buff *skb, int pad) + struct sk_buff *skb, int pad, int lro) { struct cpl_rx_pkt *p = (struct cpl_rx_pkt *)(skb->data + pad); + struct sge_qset *qs = rspq_to_qset(rq); struct port_info *pi; skb_pull(skb, sizeof(*p) + pad); @@ -1850,18 +1890,202 @@ static void rx_eth(struct adapter *adap, struct sge_rspq *rq, if (unlikely(p->vlan_valid)) { struct vlan_group *grp = pi->vlan_grp; - rspq_to_qset(rq)->port_stats[SGE_PSTAT_VLANEX]++; + qs->port_stats[SGE_PSTAT_VLANEX]++; if (likely(grp)) - __vlan_hwaccel_rx(skb, grp, ntohs(p->vlan), - rq->polling); + if (lro) + lro_vlan_hwaccel_receive_skb(&qs->lro_mgr, skb, + grp, + ntohs(p->vlan), + p); + else + __vlan_hwaccel_rx(skb, grp, ntohs(p->vlan), + rq->polling); else dev_kfree_skb_any(skb); - } else if (rq->polling) - netif_receive_skb(skb); - else + } else if (rq->polling) { + if (lro) + lro_receive_skb(&qs->lro_mgr, skb, p); + else + netif_receive_skb(skb); + } else netif_rx(skb); } +static inline int is_eth_tcp(u32 rss) +{ + return G_HASHTYPE(ntohl(rss)) == RSS_HASH_4_TUPLE; +} + +/** + * lro_frame_ok - check if an ingress packet is eligible for LRO + * @p: the CPL header of the packet + * + * Returns true if a received packet is eligible for LRO. + * The following conditions must be true: + * - packet is TCP/IP Ethernet II (checked elsewhere) + * - not an IP fragment + * - no IP options + * - TCP/IP checksums are correct + * - the packet is for this host + */ +static inline int lro_frame_ok(const struct cpl_rx_pkt *p) +{ + const struct ethhdr *eh = (struct ethhdr *)(p + 1); + const struct iphdr *ih = (struct iphdr *)(eh + 1); + + return (*((u8 *)p + 1) & 0x90) == 0x10 && p->csum == htons(0xffff) && + eh->h_proto == htons(ETH_P_IP) && ih->ihl == (sizeof(*ih) >> 2); +} + +#define TCP_FLAG_MASK (TCP_FLAG_CWR | TCP_FLAG_ECE | TCP_FLAG_URG |\ + TCP_FLAG_ACK | TCP_FLAG_PSH | TCP_FLAG_RST |\ + TCP_FLAG_SYN | TCP_FLAG_FIN) +#define TSTAMP_WORD ((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |\ + (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP) + +/** + * lro_segment_ok - check if a TCP segment is eligible for LRO + * @tcph: the TCP header of the packet + * + * Returns true if a TCP packet is eligible for LRO. This requires that + * the packet have only the ACK flag set and no TCP options besides + * time stamps. + */ +static inline int lro_segment_ok(const struct tcphdr *tcph) +{ + int optlen; + + if (unlikely((tcp_flag_word(tcph) & TCP_FLAG_MASK) != TCP_FLAG_ACK)) + return 0; + + optlen = (tcph->doff << 2) - sizeof(*tcph); + if (optlen) { + const u32 *opt = (const u32 *)(tcph + 1); + + if (optlen != TCPOLEN_TSTAMP_ALIGNED || + *opt != htonl(TSTAMP_WORD) || !opt[2]) + return 0; + } + return 1; +} + +static int t3_get_lro_header(void **eh, void **iph, void **tcph, + u64 *hdr_flags, void *priv) +{ + const struct cpl_rx_pkt *cpl = priv; + + if (!lro_frame_ok(cpl)) + return -1; + + *eh = (struct ethhdr *)(cpl + 1); + *iph = (struct iphdr *)((struct ethhdr *)*eh + 1); + *tcph = (struct tcphdr *)((struct iphdr *)*iph + 1); + + if (!lro_segment_ok(*tcph)) + return -1; + + *hdr_flags = LRO_IPV4 | LRO_TCP; + return 0; +} + +static int t3_get_skb_header(struct sk_buff *skb, + void **iph, void **tcph, u64 *hdr_flags, + void *priv) +{ + void *eh; + + return t3_get_lro_header(&eh, iph, tcph, hdr_flags, priv); +} + +static int t3_get_frag_header(struct skb_frag_struct *frag, void **eh, + void **iph, void **tcph, u64 *hdr_flags, + void *priv) +{ + return t3_get_lro_header(eh, iph, tcph, hdr_flags, priv); +} + +/** + * lro_add_page - add a page chunk to an LRO session + * @adap: the adapter + * @qs: the associated queue set + * @fl: the free list containing the page chunk to add + * @len: packet length + * @complete: Indicates the last fragment of a frame + * + * Add a received packet contained in a page chunk to an existing LRO + * session. + */ +static void lro_add_page(struct adapter *adap, struct sge_qset *qs, + struct sge_fl *fl, int len, int complete) +{ + struct rx_sw_desc *sd = &fl->sdesc[fl->cidx]; + struct cpl_rx_pkt *cpl; + struct skb_frag_struct *rx_frag = qs->lro_frag_tbl; + int nr_frags = qs->lro_nfrags, frag_len = qs->lro_frag_len; + int offset = 0; + + if (!nr_frags) { + offset = 2 + sizeof(struct cpl_rx_pkt); + qs->lro_va = cpl = sd->pg_chunk.va + 2; + } + + fl->credits--; + + len -= offset; + pci_unmap_single(adap->pdev, pci_unmap_addr(sd, dma_addr), + fl->buf_size, PCI_DMA_FROMDEVICE); + + rx_frag += nr_frags; + rx_frag->page = sd->pg_chunk.page; + rx_frag->page_offset = sd->pg_chunk.offset + offset; + rx_frag->size = len; + frag_len += len; + qs->lro_nfrags++; + qs->lro_frag_len = frag_len; + + if (!complete) + return; + + qs->lro_nfrags = qs->lro_frag_len = 0; + cpl = qs->lro_va; + + if (unlikely(cpl->vlan_valid)) { + struct net_device *dev = qs->netdev; + struct port_info *pi = netdev_priv(dev); + struct vlan_group *grp = pi->vlan_grp; + + if (likely(grp != NULL)) { + lro_vlan_hwaccel_receive_frags(&qs->lro_mgr, + qs->lro_frag_tbl, + frag_len, frag_len, + grp, ntohs(cpl->vlan), + cpl, 0); + return; + } + } + lro_receive_frags(&qs->lro_mgr, qs->lro_frag_tbl, + frag_len, frag_len, cpl, 0); +} + +/** + * init_lro_mgr - initialize a LRO manager object + * @lro_mgr: the LRO manager object + */ +static void init_lro_mgr(struct sge_qset *qs, struct net_lro_mgr *lro_mgr) +{ + lro_mgr->dev = qs->netdev; + lro_mgr->features = LRO_F_NAPI; + lro_mgr->ip_summed = CHECKSUM_UNNECESSARY; + lro_mgr->ip_summed_aggr = CHECKSUM_UNNECESSARY; + lro_mgr->max_desc = T3_MAX_LRO_SES; + lro_mgr->lro_arr = qs->lro_desc; + lro_mgr->get_frag_header = t3_get_frag_header; + lro_mgr->get_skb_header = t3_get_skb_header; + lro_mgr->max_aggr = T3_MAX_LRO_MAX_PKTS; + if (lro_mgr->max_aggr > MAX_SKB_FRAGS) + lro_mgr->max_aggr = MAX_SKB_FRAGS; +} + /** * handle_rsp_cntrl_info - handles control information in a response * @qs: the queue set corresponding to the response @@ -1947,6 +2171,12 @@ static inline int is_new_response(const struct rsp_desc *r, return (r->intr_gen & F_RSPD_GEN2) == q->gen; } +static inline void clear_rspq_bufstate(struct sge_rspq * const q) +{ + q->pg_skb = NULL; + q->rx_recycle_buf = 0; +} + #define RSPD_GTS_MASK (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS) #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \ V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \ @@ -1984,10 +2214,11 @@ static int process_responses(struct adapter *adap, struct sge_qset *qs, q->next_holdoff = q->holdoff_tmr; while (likely(budget_left && is_new_response(r, q))) { - int eth, ethpad = 2; + int packet_complete, eth, ethpad = 2, lro = qs->lro_enabled; struct sk_buff *skb = NULL; u32 len, flags = ntohl(r->flags); - __be32 rss_hi = *(const __be32 *)r, rss_lo = r->rss_hdr.rss_hash_val; + __be32 rss_hi = *(const __be32 *)r, + rss_lo = r->rss_hdr.rss_hash_val; eth = r->rss_hdr.opcode == CPL_RX_PKT; @@ -2015,6 +2246,9 @@ no_mem: } else if ((len = ntohl(r->len_cq)) != 0) { struct sge_fl *fl; + if (eth) + lro = qs->lro_enabled && is_eth_tcp(rss_hi); + fl = (len & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0]; if (fl->use_pages) { void *addr = fl->sdesc[fl->cidx].pg_chunk.va; @@ -2024,9 +2258,18 @@ no_mem: prefetch(addr + L1_CACHE_BYTES); #endif __refill_fl(adap, fl); + if (lro > 0) { + lro_add_page(adap, qs, fl, + G_RSPD_LEN(len), + flags & F_RSPD_EOP); + goto next_fl; + } - skb = get_packet_pg(adap, fl, G_RSPD_LEN(len), - eth ? SGE_RX_DROP_THRES : 0); + skb = get_packet_pg(adap, fl, q, + G_RSPD_LEN(len), + eth ? + SGE_RX_DROP_THRES : 0); + q->pg_skb = skb; } else skb = get_packet(adap, fl, G_RSPD_LEN(len), eth ? SGE_RX_DROP_THRES : 0); @@ -2036,7 +2279,7 @@ no_mem: q->rx_drops++; } else if (unlikely(r->rss_hdr.opcode == CPL_TRACE_PKT)) __skb_pull(skb, 2); - +next_fl: if (++fl->cidx == fl->size) fl->cidx = 0; } else @@ -2060,9 +2303,13 @@ no_mem: q->credits = 0; } - if (likely(skb != NULL)) { + packet_complete = flags & + (F_RSPD_EOP | F_RSPD_IMM_DATA_VALID | + F_RSPD_ASYNC_NOTIF); + + if (skb != NULL && packet_complete) { if (eth) - rx_eth(adap, q, skb, ethpad); + rx_eth(adap, q, skb, ethpad, lro); else { q->offload_pkts++; /* Preserve the RSS info in csum & priority */ @@ -2072,11 +2319,19 @@ no_mem: offload_skbs, ngathered); } + + if (flags & F_RSPD_EOP) + clear_rspq_bufstate(q); } --budget_left; } deliver_partial_bundle(&adap->tdev, q, offload_skbs, ngathered); + lro_flush_all(&qs->lro_mgr); + qs->port_stats[SGE_PSTAT_LRO_AGGR] = qs->lro_mgr.stats.aggregated; + qs->port_stats[SGE_PSTAT_LRO_FLUSHED] = qs->lro_mgr.stats.flushed; + qs->port_stats[SGE_PSTAT_LRO_NO_DESC] = qs->lro_mgr.stats.no_desc; + if (sleeping) check_ring_db(adap, qs, sleeping); @@ -2618,8 +2873,9 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports, int irq_vec_idx, const struct qset_params *p, int ntxq, struct net_device *dev) { - int i, ret = -ENOMEM; + int i, avail, ret = -ENOMEM; struct sge_qset *q = &adapter->sge.qs[id]; + struct net_lro_mgr *lro_mgr = &q->lro_mgr; init_qset_cntxt(q, id); init_timer(&q->tx_reclaim_timer); @@ -2687,11 +2943,23 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports, #else q->fl[0].buf_size = SGE_RX_SM_BUF_SIZE + sizeof(struct cpl_rx_data); #endif - q->fl[0].use_pages = FL0_PG_CHUNK_SIZE > 0; +#if FL1_PG_CHUNK_SIZE > 0 + q->fl[1].buf_size = FL1_PG_CHUNK_SIZE; +#else q->fl[1].buf_size = is_offload(adapter) ? (16 * 1024) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) : MAX_FRAME_SIZE + 2 + sizeof(struct cpl_rx_pkt); +#endif + q->fl[0].use_pages = FL0_PG_CHUNK_SIZE > 0; + q->fl[1].use_pages = FL1_PG_CHUNK_SIZE > 0; + q->fl[0].order = FL0_PG_ORDER; + q->fl[1].order = FL1_PG_ORDER; + + q->lro_frag_tbl = kcalloc(MAX_FRAME_SIZE / FL1_PG_CHUNK_SIZE + 1, + sizeof(struct skb_frag_struct), + GFP_KERNEL); + q->lro_nfrags = q->lro_frag_len = 0; spin_lock_irq(&adapter->sge.reg_lock); /* FL threshold comparison uses < */ @@ -2742,8 +3010,23 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports, q->netdev = dev; t3_update_qset_coalesce(q, p); - refill_fl(adapter, &q->fl[0], q->fl[0].size, GFP_KERNEL); - refill_fl(adapter, &q->fl[1], q->fl[1].size, GFP_KERNEL); + init_lro_mgr(q, lro_mgr); + + avail = refill_fl(adapter, &q->fl[0], q->fl[0].size, + GFP_KERNEL | __GFP_COMP); + if (!avail) { + CH_ALERT(adapter, "free list queue 0 initialization failed\n"); + goto err; + } + if (avail < q->fl[0].size) + CH_WARN(adapter, "free list queue 0 enabled with %d credits\n", + avail); + + avail = refill_fl(adapter, &q->fl[1], q->fl[1].size, + GFP_KERNEL | __GFP_COMP); + if (avail < q->fl[1].size) + CH_WARN(adapter, "free list queue 1 enabled with %d credits\n", + avail); refill_rspq(adapter, &q->rspq, q->rspq.size - 1); t3_write_reg(adapter, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) | @@ -2752,9 +3035,9 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports, mod_timer(&q->tx_reclaim_timer, jiffies + TX_RECLAIM_PERIOD); return 0; - err_unlock: +err_unlock: spin_unlock_irq(&adapter->sge.reg_lock); - err: +err: t3_free_qset(adapter, q); return ret; } @@ -2876,7 +3159,7 @@ void t3_sge_prep(struct adapter *adap, struct sge_params *p) q->coalesce_usecs = 5; q->rspq_size = 1024; q->fl_size = 1024; - q->jumbo_size = 512; + q->jumbo_size = 512; q->txq_size[TXQ_ETH] = 1024; q->txq_size[TXQ_OFLD] = 1024; q->txq_size[TXQ_CTRL] = 256; diff --git a/drivers/net/cxgb3/t3_cpl.h b/drivers/net/cxgb3/t3_cpl.h index b7a1a310dfd..a666c5d51cc 100644 --- a/drivers/net/cxgb3/t3_cpl.h +++ b/drivers/net/cxgb3/t3_cpl.h @@ -174,6 +174,13 @@ enum { /* TCP congestion control algorithms */ CONG_ALG_HIGHSPEED }; +enum { /* RSS hash type */ + RSS_HASH_NONE = 0, + RSS_HASH_2_TUPLE = 1, + RSS_HASH_4_TUPLE = 2, + RSS_HASH_TCPV6 = 3 +}; + union opcode_tid { __be32 opcode_tid; __u8 opcode; @@ -184,6 +191,10 @@ union opcode_tid { #define G_OPCODE(x) (((x) >> S_OPCODE) & 0xFF) #define G_TID(x) ((x) & 0xFFFFFF) +#define S_HASHTYPE 22 +#define M_HASHTYPE 0x3 +#define G_HASHTYPE(x) (((x) >> S_HASHTYPE) & M_HASHTYPE) + /* tid is assumed to be 24-bits */ #define MK_OPCODE_TID(opcode, tid) (V_OPCODE(opcode) | (tid)) diff --git a/drivers/net/dl2k.c b/drivers/net/dl2k.c index e233d04a213..8277e89e552 100644 --- a/drivers/net/dl2k.c +++ b/drivers/net/dl2k.c @@ -499,7 +499,7 @@ rio_timer (unsigned long data) entry = np->old_rx % RX_RING_SIZE; /* Dropped packets don't need to re-allocate */ if (np->rx_skbuff[entry] == NULL) { - skb = dev_alloc_skb (np->rx_buf_sz); + skb = netdev_alloc_skb (dev, np->rx_buf_sz); if (skb == NULL) { np->rx_ring[entry].fraginfo = 0; printk (KERN_INFO @@ -570,7 +570,7 @@ alloc_list (struct net_device *dev) /* Allocate the rx buffers */ for (i = 0; i < RX_RING_SIZE; i++) { /* Allocated fixed size of skbuff */ - struct sk_buff *skb = dev_alloc_skb (np->rx_buf_sz); + struct sk_buff *skb = netdev_alloc_skb (dev, np->rx_buf_sz); np->rx_skbuff[i] = skb; if (skb == NULL) { printk (KERN_ERR @@ -867,7 +867,7 @@ receive_packet (struct net_device *dev) PCI_DMA_FROMDEVICE); skb_put (skb = np->rx_skbuff[entry], pkt_len); np->rx_skbuff[entry] = NULL; - } else if ((skb = dev_alloc_skb (pkt_len + 2)) != NULL) { + } else if ((skb = netdev_alloc_skb(dev, pkt_len + 2))) { pci_dma_sync_single_for_cpu(np->pdev, desc_to_dma(desc), np->rx_buf_sz, @@ -904,7 +904,7 @@ receive_packet (struct net_device *dev) struct sk_buff *skb; /* Dropped packets don't need to re-allocate */ if (np->rx_skbuff[entry] == NULL) { - skb = dev_alloc_skb (np->rx_buf_sz); + skb = netdev_alloc_skb(dev, np->rx_buf_sz); if (skb == NULL) { np->rx_ring[entry].fraginfo = 0; printk (KERN_INFO diff --git a/drivers/net/dm9000.c b/drivers/net/dm9000.c index 32a9a922f15..08a7365a7d1 100644 --- a/drivers/net/dm9000.c +++ b/drivers/net/dm9000.c @@ -903,7 +903,7 @@ dm9000_stop(struct net_device *ndev) if (netif_msg_ifdown(db)) dev_dbg(db->dev, "shutting down %s\n", ndev->name); - cancel_delayed_work(&db->phy_poll); + cancel_delayed_work_sync(&db->phy_poll); netif_stop_queue(ndev); netif_carrier_off(ndev); diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c index 8cbb40f3a50..cab1835173c 100644 --- a/drivers/net/e1000e/netdev.c +++ b/drivers/net/e1000e/netdev.c @@ -4201,8 +4201,8 @@ static int __devinit e1000_probe(struct pci_dev *pdev, struct e1000_adapter *adapter; struct e1000_hw *hw; const struct e1000_info *ei = e1000_info_tbl[ent->driver_data]; - unsigned long mmio_start, mmio_len; - unsigned long flash_start, flash_len; + resource_size_t mmio_start, mmio_len; + resource_size_t flash_start, flash_len; static int cards_found; int i, err, pci_using_dac; diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c index d1b6d4e7495..287a6191873 100644 --- a/drivers/net/ehea/ehea_main.c +++ b/drivers/net/ehea/ehea_main.c @@ -2213,8 +2213,6 @@ static void ehea_vlan_rx_register(struct net_device *dev, goto out; } - memset(cb1->vlan_filter, 0, sizeof(cb1->vlan_filter)); - hret = ehea_h_modify_ehea_port(adapter->handle, port->logical_port_id, H_PORT_CB1, H_PORT_CB1_ALL, cb1); if (hret != H_SUCCESS) @@ -3178,11 +3176,12 @@ out_err: static void ehea_shutdown_single_port(struct ehea_port *port) { + struct ehea_adapter *adapter = port->adapter; unregister_netdev(port->netdev); ehea_unregister_port(port); kfree(port->mc_list); free_netdev(port->netdev); - port->adapter->active_ports--; + adapter->active_ports--; } static int ehea_setup_ports(struct ehea_adapter *adapter) diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index 35f66d4a459..e4d69789436 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -426,6 +426,7 @@ union ring_type { #define NV_PCI_REGSZ_VER1 0x270 #define NV_PCI_REGSZ_VER2 0x2d4 #define NV_PCI_REGSZ_VER3 0x604 +#define NV_PCI_REGSZ_MAX 0x604 /* various timeout delays: all in usec */ #define NV_TXRX_RESET_DELAY 4 @@ -784,6 +785,9 @@ struct fe_priv { /* flow control */ u32 pause_flags; + + /* power saved state */ + u32 saved_config_space[NV_PCI_REGSZ_MAX/4]; }; /* @@ -5785,49 +5789,66 @@ static int nv_suspend(struct pci_dev *pdev, pm_message_t state) { struct net_device *dev = pci_get_drvdata(pdev); struct fe_priv *np = netdev_priv(dev); + u8 __iomem *base = get_hwbase(dev); + int i; - if (!netif_running(dev)) - goto out; - + if (netif_running(dev)) { + // Gross. + nv_close(dev); + } netif_device_detach(dev); - // Gross. - nv_close(dev); + /* save non-pci configuration space */ + for (i = 0;i <= np->register_size/sizeof(u32); i++) + np->saved_config_space[i] = readl(base + i*sizeof(u32)); pci_save_state(pdev); pci_enable_wake(pdev, pci_choose_state(pdev, state), np->wolenabled); + pci_disable_device(pdev); pci_set_power_state(pdev, pci_choose_state(pdev, state)); -out: return 0; } static int nv_resume(struct pci_dev *pdev) { struct net_device *dev = pci_get_drvdata(pdev); + struct fe_priv *np = netdev_priv(dev); u8 __iomem *base = get_hwbase(dev); - int rc = 0; - u32 txreg; - - if (!netif_running(dev)) - goto out; - - netif_device_attach(dev); + int i, rc = 0; pci_set_power_state(pdev, PCI_D0); pci_restore_state(pdev); + /* ack any pending wake events, disable PME */ pci_enable_wake(pdev, PCI_D0, 0); - /* restore mac address reverse flag */ - txreg = readl(base + NvRegTransmitPoll); - txreg |= NVREG_TRANSMITPOLL_MAC_ADDR_REV; - writel(txreg, base + NvRegTransmitPoll); + /* restore non-pci configuration space */ + for (i = 0;i <= np->register_size/sizeof(u32); i++) + writel(np->saved_config_space[i], base+i*sizeof(u32)); - rc = nv_open(dev); -out: + netif_device_attach(dev); + if (netif_running(dev)) { + rc = nv_open(dev); + nv_set_multicast(dev); + } return rc; } + +static void nv_shutdown(struct pci_dev *pdev) +{ + struct net_device *dev = pci_get_drvdata(pdev); + struct fe_priv *np = netdev_priv(dev); + + if (netif_running(dev)) + nv_close(dev); + + pci_enable_wake(pdev, PCI_D3hot, np->wolenabled); + pci_enable_wake(pdev, PCI_D3cold, np->wolenabled); + pci_disable_device(pdev); + pci_set_power_state(pdev, PCI_D3hot); +} #else #define nv_suspend NULL +#define nv_shutdown NULL #define nv_resume NULL #endif /* CONFIG_PM */ @@ -5998,6 +6019,7 @@ static struct pci_driver driver = { .remove = __devexit_p(nv_remove), .suspend = nv_suspend, .resume = nv_resume, + .shutdown = nv_shutdown, }; static int __init init_nic(void) diff --git a/drivers/net/fs_enet/fs_enet-main.c b/drivers/net/fs_enet/fs_enet-main.c index 67b4b0728fc..fb7c47790bd 100644 --- a/drivers/net/fs_enet/fs_enet-main.c +++ b/drivers/net/fs_enet/fs_enet-main.c @@ -43,6 +43,7 @@ #include <asm/uaccess.h> #ifdef CONFIG_PPC_CPM_NEW_BINDING +#include <linux/of_gpio.h> #include <asm/of_platform.h> #endif @@ -1093,7 +1094,7 @@ err: if (registered) unregister_netdev(ndev); - if (fep != NULL) { + if (fep && fep->ops) { (*fep->ops->free_bd)(ndev); (*fep->ops->cleanup_data)(ndev); } @@ -1172,8 +1173,7 @@ static int __devinit find_phy(struct device_node *np, struct fs_platform_info *fpi) { struct device_node *phynode, *mdionode; - struct resource res; - int ret = 0, len; + int ret = 0, len, bus_id; const u32 *data; data = of_get_property(np, "fixed-link", NULL); @@ -1190,19 +1190,28 @@ static int __devinit find_phy(struct device_node *np, if (!phynode) return -EINVAL; - mdionode = of_get_parent(phynode); - if (!mdionode) + data = of_get_property(phynode, "reg", &len); + if (!data || len != 4) { + ret = -EINVAL; goto out_put_phy; + } - ret = of_address_to_resource(mdionode, 0, &res); - if (ret) - goto out_put_mdio; + mdionode = of_get_parent(phynode); + if (!mdionode) { + ret = -EINVAL; + goto out_put_phy; + } - data = of_get_property(phynode, "reg", &len); - if (!data || len != 4) - goto out_put_mdio; + bus_id = of_get_gpio(mdionode, 0); + if (bus_id < 0) { + struct resource res; + ret = of_address_to_resource(mdionode, 0, &res); + if (ret) + goto out_put_mdio; + bus_id = res.start; + } - snprintf(fpi->bus_id, 16, "%x:%02x", res.start, *data); + snprintf(fpi->bus_id, 16, "%x:%02x", bus_id, *data); out_put_mdio: of_node_put(mdionode); diff --git a/drivers/net/hamachi.c b/drivers/net/hamachi.c index e5c2380f50c..3199526bcec 100644 --- a/drivers/net/hamachi.c +++ b/drivers/net/hamachi.c @@ -1140,11 +1140,11 @@ static void hamachi_tx_timeout(struct net_device *dev) } /* Fill in the Rx buffers. Handle allocation failure gracefully. */ for (i = 0; i < RX_RING_SIZE; i++) { - struct sk_buff *skb = dev_alloc_skb(hmp->rx_buf_sz); + struct sk_buff *skb = netdev_alloc_skb(dev, hmp->rx_buf_sz); hmp->rx_skbuff[i] = skb; if (skb == NULL) break; - skb->dev = dev; /* Mark as being used by this device. */ + skb_reserve(skb, 2); /* 16 byte align the IP header. */ hmp->rx_ring[i].addr = cpu_to_leXX(pci_map_single(hmp->pci_dev, skb->data, hmp->rx_buf_sz, PCI_DMA_FROMDEVICE)); @@ -1178,14 +1178,6 @@ static void hamachi_init_ring(struct net_device *dev) hmp->cur_rx = hmp->cur_tx = 0; hmp->dirty_rx = hmp->dirty_tx = 0; -#if 0 - /* This is wrong. I'm not sure what the original plan was, but this - * is wrong. An MTU of 1 gets you a buffer of 1536, while an MTU - * of 1501 gets a buffer of 1533? -KDU - */ - hmp->rx_buf_sz = (dev->mtu <= 1500 ? PKT_BUF_SZ : dev->mtu + 32); -#endif - /* My attempt at a reasonable correction */ /* +26 gets the maximum ethernet encapsulation, +7 & ~7 because the * card needs room to do 8 byte alignment, +2 so we can reserve * the first 2 bytes, and +16 gets room for the status word from the diff --git a/drivers/net/hamradio/scc.c b/drivers/net/hamradio/scc.c index f9051593583..45ae9d1191d 100644 --- a/drivers/net/hamradio/scc.c +++ b/drivers/net/hamradio/scc.c @@ -1340,9 +1340,10 @@ static unsigned int scc_set_param(struct scc_channel *scc, unsigned int cmd, uns case PARAM_RTS: if ( !(scc->wreg[R5] & RTS) ) { - if (arg != TX_OFF) + if (arg != TX_OFF) { scc_key_trx(scc, TX_ON); scc_start_tx_timer(scc, t_txdelay, scc->kiss.txdelay); + } } else { if (arg == TX_OFF) { diff --git a/drivers/net/ixp2000/ixpdev.c b/drivers/net/ixp2000/ixpdev.c index 484cb2ba717..7111c65f0b3 100644 --- a/drivers/net/ixp2000/ixpdev.c +++ b/drivers/net/ixp2000/ixpdev.c @@ -108,14 +108,14 @@ static int ixpdev_rx(struct net_device *dev, int processed, int budget) if (unlikely(!netif_running(nds[desc->channel]))) goto err; - skb = dev_alloc_skb(desc->pkt_length + 2); + skb = netdev_alloc_skb(dev, desc->pkt_length + 2); if (likely(skb != NULL)) { skb_reserve(skb, 2); skb_copy_to_linear_data(skb, buf, desc->pkt_length); skb_put(skb, desc->pkt_length); skb->protocol = eth_type_trans(skb, nds[desc->channel]); - skb->dev->last_rx = jiffies; + dev->last_rx = jiffies; netif_receive_skb(skb); } diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c index c91b12ea26a..93007d38df5 100644 --- a/drivers/net/myri10ge/myri10ge.c +++ b/drivers/net/myri10ge/myri10ge.c @@ -49,6 +49,7 @@ #include <linux/if_ether.h> #include <linux/if_vlan.h> #include <linux/inet_lro.h> +#include <linux/dca.h> #include <linux/ip.h> #include <linux/inet.h> #include <linux/in.h> @@ -185,11 +186,18 @@ struct myri10ge_slice_state { dma_addr_t fw_stats_bus; int watchdog_tx_done; int watchdog_tx_req; +#ifdef CONFIG_DCA + int cached_dca_tag; + int cpu; + __be32 __iomem *dca_tag; +#endif + char irq_desc[32]; }; struct myri10ge_priv { - struct myri10ge_slice_state ss; + struct myri10ge_slice_state *ss; int tx_boundary; /* boundary transmits cannot cross */ + int num_slices; int running; /* running? */ int csum_flag; /* rx_csums? */ int small_bytes; @@ -208,6 +216,11 @@ struct myri10ge_priv { dma_addr_t cmd_bus; struct pci_dev *pdev; int msi_enabled; + int msix_enabled; + struct msix_entry *msix_vectors; +#ifdef CONFIG_DCA + int dca_enabled; +#endif u32 link_state; unsigned int rdma_tags_available; int intr_coal_delay; @@ -244,6 +257,8 @@ struct myri10ge_priv { static char *myri10ge_fw_unaligned = "myri10ge_ethp_z8e.dat"; static char *myri10ge_fw_aligned = "myri10ge_eth_z8e.dat"; +static char *myri10ge_fw_rss_unaligned = "myri10ge_rss_ethp_z8e.dat"; +static char *myri10ge_fw_rss_aligned = "myri10ge_rss_eth_z8e.dat"; static char *myri10ge_fw_name = NULL; module_param(myri10ge_fw_name, charp, S_IRUGO | S_IWUSR); @@ -321,6 +336,18 @@ static int myri10ge_wcfifo = 0; module_param(myri10ge_wcfifo, int, S_IRUGO); MODULE_PARM_DESC(myri10ge_wcfifo, "Enable WC Fifo when WC is enabled"); +static int myri10ge_max_slices = 1; +module_param(myri10ge_max_slices, int, S_IRUGO); +MODULE_PARM_DESC(myri10ge_max_slices, "Max tx/rx queues"); + +static int myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_PORT; +module_param(myri10ge_rss_hash, int, S_IRUGO); +MODULE_PARM_DESC(myri10ge_rss_hash, "Type of RSS hashing to do"); + +static int myri10ge_dca = 1; +module_param(myri10ge_dca, int, S_IRUGO); +MODULE_PARM_DESC(myri10ge_dca, "Enable DCA if possible"); + #define MYRI10GE_FW_OFFSET 1024*1024 #define MYRI10GE_HIGHPART_TO_U32(X) \ (sizeof (X) == 8) ? ((u32)((u64)(X) >> 32)) : (0) @@ -631,7 +658,7 @@ static int myri10ge_adopt_running_firmware(struct myri10ge_priv *mgp) return status; } -int myri10ge_get_firmware_capabilities(struct myri10ge_priv *mgp) +static int myri10ge_get_firmware_capabilities(struct myri10ge_priv *mgp) { struct myri10ge_cmd cmd; int status; @@ -657,7 +684,7 @@ int myri10ge_get_firmware_capabilities(struct myri10ge_priv *mgp) return 0; } -static int myri10ge_load_firmware(struct myri10ge_priv *mgp) +static int myri10ge_load_firmware(struct myri10ge_priv *mgp, int adopt) { char __iomem *submit; __be32 buf[16] __attribute__ ((__aligned__(8))); @@ -667,6 +694,8 @@ static int myri10ge_load_firmware(struct myri10ge_priv *mgp) size = 0; status = myri10ge_load_hotplug_firmware(mgp, &size); if (status) { + if (!adopt) + return status; dev_warn(&mgp->pdev->dev, "hotplug firmware loading failed\n"); /* Do not attempt to adopt firmware if there @@ -859,8 +888,12 @@ abort: static int myri10ge_reset(struct myri10ge_priv *mgp) { struct myri10ge_cmd cmd; - int status; + struct myri10ge_slice_state *ss; + int i, status; size_t bytes; +#ifdef CONFIG_DCA + unsigned long dca_tag_off; +#endif /* try to send a reset command to the card to see if it * is alive */ @@ -872,20 +905,74 @@ static int myri10ge_reset(struct myri10ge_priv *mgp) } (void)myri10ge_dma_test(mgp, MXGEFW_DMA_TEST); + /* + * Use non-ndis mcp_slot (eg, 4 bytes total, + * no toeplitz hash value returned. Older firmware will + * not understand this command, but will use the correct + * sized mcp_slot, so we ignore error returns + */ + cmd.data0 = MXGEFW_RSS_MCP_SLOT_TYPE_MIN; + (void)myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_MCP_SLOT_TYPE, &cmd, 0); /* Now exchange information about interrupts */ - bytes = mgp->max_intr_slots * sizeof(*mgp->ss.rx_done.entry); - memset(mgp->ss.rx_done.entry, 0, bytes); + bytes = mgp->max_intr_slots * sizeof(*mgp->ss[0].rx_done.entry); cmd.data0 = (u32) bytes; status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd, 0); - cmd.data0 = MYRI10GE_LOWPART_TO_U32(mgp->ss.rx_done.bus); - cmd.data1 = MYRI10GE_HIGHPART_TO_U32(mgp->ss.rx_done.bus); - status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_DMA, &cmd, 0); + + /* + * Even though we already know how many slices are supported + * via myri10ge_probe_slices() MXGEFW_CMD_GET_MAX_RSS_QUEUES + * has magic side effects, and must be called after a reset. + * It must be called prior to calling any RSS related cmds, + * including assigning an interrupt queue for anything but + * slice 0. It must also be called *after* + * MXGEFW_CMD_SET_INTRQ_SIZE, since the intrq size is used by + * the firmware to compute offsets. + */ + + if (mgp->num_slices > 1) { + + /* ask the maximum number of slices it supports */ + status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES, + &cmd, 0); + if (status != 0) { + dev_err(&mgp->pdev->dev, + "failed to get number of slices\n"); + } + + /* + * MXGEFW_CMD_ENABLE_RSS_QUEUES must be called prior + * to setting up the interrupt queue DMA + */ + + cmd.data0 = mgp->num_slices; + cmd.data1 = 1; /* use MSI-X */ + status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES, + &cmd, 0); + if (status != 0) { + dev_err(&mgp->pdev->dev, + "failed to set number of slices\n"); + + return status; + } + } + for (i = 0; i < mgp->num_slices; i++) { + ss = &mgp->ss[i]; + cmd.data0 = MYRI10GE_LOWPART_TO_U32(ss->rx_done.bus); + cmd.data1 = MYRI10GE_HIGHPART_TO_U32(ss->rx_done.bus); + cmd.data2 = i; + status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_DMA, + &cmd, 0); + }; status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_IRQ_ACK_OFFSET, &cmd, 0); - mgp->ss.irq_claim = (__iomem __be32 *) (mgp->sram + cmd.data0); + for (i = 0; i < mgp->num_slices; i++) { + ss = &mgp->ss[i]; + ss->irq_claim = + (__iomem __be32 *) (mgp->sram + cmd.data0 + 8 * i); + } status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_IRQ_DEASSERT_OFFSET, &cmd, 0); mgp->irq_deassert = (__iomem __be32 *) (mgp->sram + cmd.data0); @@ -899,24 +986,116 @@ static int myri10ge_reset(struct myri10ge_priv *mgp) } put_be32(htonl(mgp->intr_coal_delay), mgp->intr_coal_delay_ptr); - memset(mgp->ss.rx_done.entry, 0, bytes); +#ifdef CONFIG_DCA + status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_DCA_OFFSET, &cmd, 0); + dca_tag_off = cmd.data0; + for (i = 0; i < mgp->num_slices; i++) { + ss = &mgp->ss[i]; + if (status == 0) { + ss->dca_tag = (__iomem __be32 *) + (mgp->sram + dca_tag_off + 4 * i); + } else { + ss->dca_tag = NULL; + } + } +#endif /* CONFIG_DCA */ /* reset mcp/driver shared state back to 0 */ - mgp->ss.tx.req = 0; - mgp->ss.tx.done = 0; - mgp->ss.tx.pkt_start = 0; - mgp->ss.tx.pkt_done = 0; - mgp->ss.rx_big.cnt = 0; - mgp->ss.rx_small.cnt = 0; - mgp->ss.rx_done.idx = 0; - mgp->ss.rx_done.cnt = 0; + mgp->link_changes = 0; + for (i = 0; i < mgp->num_slices; i++) { + ss = &mgp->ss[i]; + + memset(ss->rx_done.entry, 0, bytes); + ss->tx.req = 0; + ss->tx.done = 0; + ss->tx.pkt_start = 0; + ss->tx.pkt_done = 0; + ss->rx_big.cnt = 0; + ss->rx_small.cnt = 0; + ss->rx_done.idx = 0; + ss->rx_done.cnt = 0; + ss->tx.wake_queue = 0; + ss->tx.stop_queue = 0; + } + status = myri10ge_update_mac_address(mgp, mgp->dev->dev_addr); myri10ge_change_pause(mgp, mgp->pause); myri10ge_set_multicast_list(mgp->dev); return status; } +#ifdef CONFIG_DCA +static void +myri10ge_write_dca(struct myri10ge_slice_state *ss, int cpu, int tag) +{ + ss->cpu = cpu; + ss->cached_dca_tag = tag; + put_be32(htonl(tag), ss->dca_tag); +} + +static inline void myri10ge_update_dca(struct myri10ge_slice_state *ss) +{ + int cpu = get_cpu(); + int tag; + + if (cpu != ss->cpu) { + tag = dca_get_tag(cpu); + if (ss->cached_dca_tag != tag) + myri10ge_write_dca(ss, cpu, tag); + } + put_cpu(); +} + +static void myri10ge_setup_dca(struct myri10ge_priv *mgp) +{ + int err, i; + struct pci_dev *pdev = mgp->pdev; + + if (mgp->ss[0].dca_tag == NULL || mgp->dca_enabled) + return; + if (!myri10ge_dca) { + dev_err(&pdev->dev, "dca disabled by administrator\n"); + return; + } + err = dca_add_requester(&pdev->dev); + if (err) { + dev_err(&pdev->dev, + "dca_add_requester() failed, err=%d\n", err); + return; + } + mgp->dca_enabled = 1; + for (i = 0; i < mgp->num_slices; i++) + myri10ge_write_dca(&mgp->ss[i], -1, 0); +} + +static void myri10ge_teardown_dca(struct myri10ge_priv *mgp) +{ + struct pci_dev *pdev = mgp->pdev; + int err; + + if (!mgp->dca_enabled) + return; + mgp->dca_enabled = 0; + err = dca_remove_requester(&pdev->dev); +} + +static int myri10ge_notify_dca_device(struct device *dev, void *data) +{ + struct myri10ge_priv *mgp; + unsigned long event; + + mgp = dev_get_drvdata(dev); + event = *(unsigned long *)data; + + if (event == DCA_PROVIDER_ADD) + myri10ge_setup_dca(mgp); + else if (event == DCA_PROVIDER_REMOVE) + myri10ge_teardown_dca(mgp); + return 0; +} +#endif /* CONFIG_DCA */ + static inline void myri10ge_submit_8rx(struct mcp_kreq_ether_recv __iomem * dst, struct mcp_kreq_ether_recv *src) @@ -1095,9 +1274,10 @@ myri10ge_rx_done(struct myri10ge_slice_state *ss, struct myri10ge_rx_buf *rx, rx_frags[0].size -= MXGEFW_PAD; len -= MXGEFW_PAD; lro_receive_frags(&ss->rx_done.lro_mgr, rx_frags, - len, len, /* opaque, will come back in get_frag_header */ + len, len, (void *)(__force unsigned long)csum, csum); + return 1; } @@ -1236,7 +1416,7 @@ myri10ge_clean_rx_done(struct myri10ge_slice_state *ss, int budget) static inline void myri10ge_check_statblock(struct myri10ge_priv *mgp) { - struct mcp_irq_data *stats = mgp->ss.fw_stats; + struct mcp_irq_data *stats = mgp->ss[0].fw_stats; if (unlikely(stats->stats_updated)) { unsigned link_up = ntohl(stats->link_up); @@ -1283,6 +1463,11 @@ static int myri10ge_poll(struct napi_struct *napi, int budget) struct net_device *netdev = ss->mgp->dev; int work_done; +#ifdef CONFIG_DCA + if (ss->mgp->dca_enabled) + myri10ge_update_dca(ss); +#endif + /* process as many rx events as NAPI will allow */ work_done = myri10ge_clean_rx_done(ss, budget); @@ -1302,6 +1487,13 @@ static irqreturn_t myri10ge_intr(int irq, void *arg) u32 send_done_count; int i; + /* an interrupt on a non-zero slice is implicitly valid + * since MSI-X irqs are not shared */ + if (ss != mgp->ss) { + netif_rx_schedule(ss->dev, &ss->napi); + return (IRQ_HANDLED); + } + /* make sure it is our IRQ, and that the DMA has finished */ if (unlikely(!stats->valid)) return (IRQ_NONE); @@ -1311,7 +1503,7 @@ static irqreturn_t myri10ge_intr(int irq, void *arg) if (stats->valid & 1) netif_rx_schedule(ss->dev, &ss->napi); - if (!mgp->msi_enabled) { + if (!mgp->msi_enabled && !mgp->msix_enabled) { put_be32(0, mgp->irq_deassert); if (!myri10ge_deassert_wait) stats->valid = 0; @@ -1446,10 +1638,10 @@ myri10ge_get_ringparam(struct net_device *netdev, { struct myri10ge_priv *mgp = netdev_priv(netdev); - ring->rx_mini_max_pending = mgp->ss.rx_small.mask + 1; - ring->rx_max_pending = mgp->ss.rx_big.mask + 1; + ring->rx_mini_max_pending = mgp->ss[0].rx_small.mask + 1; + ring->rx_max_pending = mgp->ss[0].rx_big.mask + 1; ring->rx_jumbo_max_pending = 0; - ring->tx_max_pending = mgp->ss.rx_small.mask + 1; + ring->tx_max_pending = mgp->ss[0].rx_small.mask + 1; ring->rx_mini_pending = ring->rx_mini_max_pending; ring->rx_pending = ring->rx_max_pending; ring->rx_jumbo_pending = ring->rx_jumbo_max_pending; @@ -1497,9 +1689,12 @@ static const char myri10ge_gstrings_main_stats[][ETH_GSTRING_LEN] = { "tx_aborted_errors", "tx_carrier_errors", "tx_fifo_errors", "tx_heartbeat_errors", "tx_window_errors", /* device-specific stats */ - "tx_boundary", "WC", "irq", "MSI", + "tx_boundary", "WC", "irq", "MSI", "MSIX", "read_dma_bw_MBs", "write_dma_bw_MBs", "read_write_dma_bw_MBs", "serial_number", "watchdog_resets", +#ifdef CONFIG_DCA + "dca_capable", "dca_enabled", +#endif "link_changes", "link_up", "dropped_link_overflow", "dropped_link_error_or_filtered", "dropped_pause", "dropped_bad_phy", "dropped_bad_crc32", @@ -1524,23 +1719,31 @@ static const char myri10ge_gstrings_slice_stats[][ETH_GSTRING_LEN] = { static void myri10ge_get_strings(struct net_device *netdev, u32 stringset, u8 * data) { + struct myri10ge_priv *mgp = netdev_priv(netdev); + int i; + switch (stringset) { case ETH_SS_STATS: memcpy(data, *myri10ge_gstrings_main_stats, sizeof(myri10ge_gstrings_main_stats)); data += sizeof(myri10ge_gstrings_main_stats); - memcpy(data, *myri10ge_gstrings_slice_stats, - sizeof(myri10ge_gstrings_slice_stats)); - data += sizeof(myri10ge_gstrings_slice_stats); + for (i = 0; i < mgp->num_slices; i++) { + memcpy(data, *myri10ge_gstrings_slice_stats, + sizeof(myri10ge_gstrings_slice_stats)); + data += sizeof(myri10ge_gstrings_slice_stats); + } break; } } static int myri10ge_get_sset_count(struct net_device *netdev, int sset) { + struct myri10ge_priv *mgp = netdev_priv(netdev); + switch (sset) { case ETH_SS_STATS: - return MYRI10GE_MAIN_STATS_LEN + MYRI10GE_SLICE_STATS_LEN; + return MYRI10GE_MAIN_STATS_LEN + + mgp->num_slices * MYRI10GE_SLICE_STATS_LEN; default: return -EOPNOTSUPP; } @@ -1552,6 +1755,7 @@ myri10ge_get_ethtool_stats(struct net_device *netdev, { struct myri10ge_priv *mgp = netdev_priv(netdev); struct myri10ge_slice_state *ss; + int slice; int i; for (i = 0; i < MYRI10GE_NET_STATS_LEN; i++) @@ -1561,15 +1765,20 @@ myri10ge_get_ethtool_stats(struct net_device *netdev, data[i++] = (unsigned int)mgp->wc_enabled; data[i++] = (unsigned int)mgp->pdev->irq; data[i++] = (unsigned int)mgp->msi_enabled; + data[i++] = (unsigned int)mgp->msix_enabled; data[i++] = (unsigned int)mgp->read_dma; data[i++] = (unsigned int)mgp->write_dma; data[i++] = (unsigned int)mgp->read_write_dma; data[i++] = (unsigned int)mgp->serial_number; data[i++] = (unsigned int)mgp->watchdog_resets; +#ifdef CONFIG_DCA + data[i++] = (unsigned int)(mgp->ss[0].dca_tag != NULL); + data[i++] = (unsigned int)(mgp->dca_enabled); +#endif data[i++] = (unsigned int)mgp->link_changes; /* firmware stats are useful only in the first slice */ - ss = &mgp->ss; + ss = &mgp->ss[0]; data[i++] = (unsigned int)ntohl(ss->fw_stats->link_up); data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_link_overflow); data[i++] = @@ -1585,24 +1794,27 @@ myri10ge_get_ethtool_stats(struct net_device *netdev, data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_no_small_buffer); data[i++] = (unsigned int)ntohl(ss->fw_stats->dropped_no_big_buffer); - data[i++] = 0; - data[i++] = (unsigned int)ss->tx.pkt_start; - data[i++] = (unsigned int)ss->tx.pkt_done; - data[i++] = (unsigned int)ss->tx.req; - data[i++] = (unsigned int)ss->tx.done; - data[i++] = (unsigned int)ss->rx_small.cnt; - data[i++] = (unsigned int)ss->rx_big.cnt; - data[i++] = (unsigned int)ss->tx.wake_queue; - data[i++] = (unsigned int)ss->tx.stop_queue; - data[i++] = (unsigned int)ss->tx.linearized; - data[i++] = ss->rx_done.lro_mgr.stats.aggregated; - data[i++] = ss->rx_done.lro_mgr.stats.flushed; - if (ss->rx_done.lro_mgr.stats.flushed) - data[i++] = ss->rx_done.lro_mgr.stats.aggregated / - ss->rx_done.lro_mgr.stats.flushed; - else - data[i++] = 0; - data[i++] = ss->rx_done.lro_mgr.stats.no_desc; + for (slice = 0; slice < mgp->num_slices; slice++) { + ss = &mgp->ss[slice]; + data[i++] = slice; + data[i++] = (unsigned int)ss->tx.pkt_start; + data[i++] = (unsigned int)ss->tx.pkt_done; + data[i++] = (unsigned int)ss->tx.req; + data[i++] = (unsigned int)ss->tx.done; + data[i++] = (unsigned int)ss->rx_small.cnt; + data[i++] = (unsigned int)ss->rx_big.cnt; + data[i++] = (unsigned int)ss->tx.wake_queue; + data[i++] = (unsigned int)ss->tx.stop_queue; + data[i++] = (unsigned int)ss->tx.linearized; + data[i++] = ss->rx_done.lro_mgr.stats.aggregated; + data[i++] = ss->rx_done.lro_mgr.stats.flushed; + if (ss->rx_done.lro_mgr.stats.flushed) + data[i++] = ss->rx_done.lro_mgr.stats.aggregated / + ss->rx_done.lro_mgr.stats.flushed; + else + data[i++] = 0; + data[i++] = ss->rx_done.lro_mgr.stats.no_desc; + } } static void myri10ge_set_msglevel(struct net_device *netdev, u32 value) @@ -1645,12 +1857,15 @@ static int myri10ge_allocate_rings(struct myri10ge_slice_state *ss) struct net_device *dev = mgp->dev; int tx_ring_size, rx_ring_size; int tx_ring_entries, rx_ring_entries; - int i, status; + int i, slice, status; size_t bytes; /* get ring sizes */ + slice = ss - mgp->ss; + cmd.data0 = slice; status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_RING_SIZE, &cmd, 0); tx_ring_size = cmd.data0; + cmd.data0 = slice; status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_RX_RING_SIZE, &cmd, 0); if (status != 0) return status; @@ -1715,15 +1930,17 @@ static int myri10ge_allocate_rings(struct myri10ge_slice_state *ss) mgp->small_bytes + MXGEFW_PAD, 0); if (ss->rx_small.fill_cnt < ss->rx_small.mask + 1) { - printk(KERN_ERR "myri10ge: %s: alloced only %d small bufs\n", - dev->name, ss->rx_small.fill_cnt); + printk(KERN_ERR + "myri10ge: %s:slice-%d: alloced only %d small bufs\n", + dev->name, slice, ss->rx_small.fill_cnt); goto abort_with_rx_small_ring; } myri10ge_alloc_rx_pages(mgp, &ss->rx_big, mgp->big_bytes, 0); if (ss->rx_big.fill_cnt < ss->rx_big.mask + 1) { - printk(KERN_ERR "myri10ge: %s: alloced only %d big bufs\n", - dev->name, ss->rx_big.fill_cnt); + printk(KERN_ERR + "myri10ge: %s:slice-%d: alloced only %d big bufs\n", + dev->name, slice, ss->rx_big.fill_cnt); goto abort_with_rx_big_ring; } @@ -1775,6 +1992,10 @@ static void myri10ge_free_rings(struct myri10ge_slice_state *ss) struct myri10ge_tx_buf *tx; int i, len, idx; + /* If not allocated, skip it */ + if (ss->tx.req_list == NULL) + return; + for (i = ss->rx_big.cnt; i < ss->rx_big.fill_cnt; i++) { idx = i & ss->rx_big.mask; if (i == ss->rx_big.fill_cnt - 1) @@ -1837,25 +2058,67 @@ static void myri10ge_free_rings(struct myri10ge_slice_state *ss) static int myri10ge_request_irq(struct myri10ge_priv *mgp) { struct pci_dev *pdev = mgp->pdev; + struct myri10ge_slice_state *ss; + struct net_device *netdev = mgp->dev; + int i; int status; + mgp->msi_enabled = 0; + mgp->msix_enabled = 0; + status = 0; if (myri10ge_msi) { - status = pci_enable_msi(pdev); - if (status != 0) - dev_err(&pdev->dev, - "Error %d setting up MSI; falling back to xPIC\n", - status); - else - mgp->msi_enabled = 1; - } else { - mgp->msi_enabled = 0; + if (mgp->num_slices > 1) { + status = + pci_enable_msix(pdev, mgp->msix_vectors, + mgp->num_slices); + if (status == 0) { + mgp->msix_enabled = 1; + } else { + dev_err(&pdev->dev, + "Error %d setting up MSI-X\n", status); + return status; + } + } + if (mgp->msix_enabled == 0) { + status = pci_enable_msi(pdev); + if (status != 0) { + dev_err(&pdev->dev, + "Error %d setting up MSI; falling back to xPIC\n", + status); + } else { + mgp->msi_enabled = 1; + } + } } - status = request_irq(pdev->irq, myri10ge_intr, IRQF_SHARED, - mgp->dev->name, mgp); - if (status != 0) { - dev_err(&pdev->dev, "failed to allocate IRQ\n"); - if (mgp->msi_enabled) - pci_disable_msi(pdev); + if (mgp->msix_enabled) { + for (i = 0; i < mgp->num_slices; i++) { + ss = &mgp->ss[i]; + snprintf(ss->irq_desc, sizeof(ss->irq_desc), + "%s:slice-%d", netdev->name, i); + status = request_irq(mgp->msix_vectors[i].vector, + myri10ge_intr, 0, ss->irq_desc, + ss); + if (status != 0) { + dev_err(&pdev->dev, + "slice %d failed to allocate IRQ\n", i); + i--; + while (i >= 0) { + free_irq(mgp->msix_vectors[i].vector, + &mgp->ss[i]); + i--; + } + pci_disable_msix(pdev); + return status; + } + } + } else { + status = request_irq(pdev->irq, myri10ge_intr, IRQF_SHARED, + mgp->dev->name, &mgp->ss[0]); + if (status != 0) { + dev_err(&pdev->dev, "failed to allocate IRQ\n"); + if (mgp->msi_enabled) + pci_disable_msi(pdev); + } } return status; } @@ -1863,10 +2126,18 @@ static int myri10ge_request_irq(struct myri10ge_priv *mgp) static void myri10ge_free_irq(struct myri10ge_priv *mgp) { struct pci_dev *pdev = mgp->pdev; + int i; - free_irq(pdev->irq, mgp); + if (mgp->msix_enabled) { + for (i = 0; i < mgp->num_slices; i++) + free_irq(mgp->msix_vectors[i].vector, &mgp->ss[i]); + } else { + free_irq(pdev->irq, &mgp->ss[0]); + } if (mgp->msi_enabled) pci_disable_msi(pdev); + if (mgp->msix_enabled) + pci_disable_msix(pdev); } static int @@ -1928,12 +2199,82 @@ myri10ge_get_frag_header(struct skb_frag_struct *frag, void **mac_hdr, return 0; } +static int myri10ge_get_txrx(struct myri10ge_priv *mgp, int slice) +{ + struct myri10ge_cmd cmd; + struct myri10ge_slice_state *ss; + int status; + + ss = &mgp->ss[slice]; + cmd.data0 = 0; /* single slice for now */ + status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET, &cmd, 0); + ss->tx.lanai = (struct mcp_kreq_ether_send __iomem *) + (mgp->sram + cmd.data0); + + cmd.data0 = slice; + status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SMALL_RX_OFFSET, + &cmd, 0); + ss->rx_small.lanai = (struct mcp_kreq_ether_recv __iomem *) + (mgp->sram + cmd.data0); + + cmd.data0 = slice; + status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd, 0); + ss->rx_big.lanai = (struct mcp_kreq_ether_recv __iomem *) + (mgp->sram + cmd.data0); + + if (myri10ge_wcfifo && mgp->wc_enabled) { + ss->tx.wc_fifo = (u8 __iomem *) + mgp->sram + MXGEFW_ETH_SEND_4 + 64 * slice; + ss->rx_small.wc_fifo = (u8 __iomem *) + mgp->sram + MXGEFW_ETH_RECV_SMALL + 64 * slice; + ss->rx_big.wc_fifo = (u8 __iomem *) + mgp->sram + MXGEFW_ETH_RECV_BIG + 64 * slice; + } else { + ss->tx.wc_fifo = NULL; + ss->rx_small.wc_fifo = NULL; + ss->rx_big.wc_fifo = NULL; + } + return status; + +} + +static int myri10ge_set_stats(struct myri10ge_priv *mgp, int slice) +{ + struct myri10ge_cmd cmd; + struct myri10ge_slice_state *ss; + int status; + + ss = &mgp->ss[slice]; + cmd.data0 = MYRI10GE_LOWPART_TO_U32(ss->fw_stats_bus); + cmd.data1 = MYRI10GE_HIGHPART_TO_U32(ss->fw_stats_bus); + cmd.data2 = sizeof(struct mcp_irq_data); + status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd, 0); + if (status == -ENOSYS) { + dma_addr_t bus = ss->fw_stats_bus; + if (slice != 0) + return -EINVAL; + bus += offsetof(struct mcp_irq_data, send_done_count); + cmd.data0 = MYRI10GE_LOWPART_TO_U32(bus); + cmd.data1 = MYRI10GE_HIGHPART_TO_U32(bus); + status = myri10ge_send_cmd(mgp, + MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, + &cmd, 0); + /* Firmware cannot support multicast without STATS_DMA_V2 */ + mgp->fw_multicast_support = 0; + } else { + mgp->fw_multicast_support = 1; + } + return 0; +} + static int myri10ge_open(struct net_device *dev) { + struct myri10ge_slice_state *ss; struct myri10ge_priv *mgp = netdev_priv(dev); struct myri10ge_cmd cmd; + int i, status, big_pow2, slice; + u8 *itable; struct net_lro_mgr *lro_mgr; - int status, big_pow2; if (mgp->running != MYRI10GE_ETH_STOPPED) return -EBUSY; @@ -1945,6 +2286,48 @@ static int myri10ge_open(struct net_device *dev) goto abort_with_nothing; } + if (mgp->num_slices > 1) { + cmd.data0 = mgp->num_slices; + cmd.data1 = 1; /* use MSI-X */ + status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES, + &cmd, 0); + if (status != 0) { + printk(KERN_ERR + "myri10ge: %s: failed to set number of slices\n", + dev->name); + goto abort_with_nothing; + } + /* setup the indirection table */ + cmd.data0 = mgp->num_slices; + status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_TABLE_SIZE, + &cmd, 0); + + status |= myri10ge_send_cmd(mgp, + MXGEFW_CMD_GET_RSS_TABLE_OFFSET, + &cmd, 0); + if (status != 0) { + printk(KERN_ERR + "myri10ge: %s: failed to setup rss tables\n", + dev->name); + } + + /* just enable an identity mapping */ + itable = mgp->sram + cmd.data0; + for (i = 0; i < mgp->num_slices; i++) + __raw_writeb(i, &itable[i]); + + cmd.data0 = 1; + cmd.data1 = myri10ge_rss_hash; + status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_RSS_ENABLE, + &cmd, 0); + if (status != 0) { + printk(KERN_ERR + "myri10ge: %s: failed to enable slices\n", + dev->name); + goto abort_with_nothing; + } + } + status = myri10ge_request_irq(mgp); if (status != 0) goto abort_with_nothing; @@ -1968,41 +2351,6 @@ static int myri10ge_open(struct net_device *dev) if (myri10ge_small_bytes > 0) mgp->small_bytes = myri10ge_small_bytes; - /* get the lanai pointers to the send and receive rings */ - - status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET, &cmd, 0); - mgp->ss.tx.lanai = - (struct mcp_kreq_ether_send __iomem *)(mgp->sram + cmd.data0); - - status |= - myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SMALL_RX_OFFSET, &cmd, 0); - mgp->ss.rx_small.lanai = - (struct mcp_kreq_ether_recv __iomem *)(mgp->sram + cmd.data0); - - status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_BIG_RX_OFFSET, &cmd, 0); - mgp->ss.rx_big.lanai = - (struct mcp_kreq_ether_recv __iomem *)(mgp->sram + cmd.data0); - - if (status != 0) { - printk(KERN_ERR - "myri10ge: %s: failed to get ring sizes or locations\n", - dev->name); - mgp->running = MYRI10GE_ETH_STOPPED; - goto abort_with_irq; - } - - if (myri10ge_wcfifo && mgp->wc_enabled) { - mgp->ss.tx.wc_fifo = (u8 __iomem *) mgp->sram + MXGEFW_ETH_SEND_4; - mgp->ss.rx_small.wc_fifo = - (u8 __iomem *) mgp->sram + MXGEFW_ETH_RECV_SMALL; - mgp->ss.rx_big.wc_fifo = - (u8 __iomem *) mgp->sram + MXGEFW_ETH_RECV_BIG; - } else { - mgp->ss.tx.wc_fifo = NULL; - mgp->ss.rx_small.wc_fifo = NULL; - mgp->ss.rx_big.wc_fifo = NULL; - } - /* Firmware needs the big buff size as a power of 2. Lie and * tell him the buffer is larger, because we only use 1 * buffer/pkt, and the mtu will prevent overruns. @@ -2017,9 +2365,44 @@ static int myri10ge_open(struct net_device *dev) mgp->big_bytes = big_pow2; } - status = myri10ge_allocate_rings(&mgp->ss); - if (status != 0) - goto abort_with_irq; + /* setup the per-slice data structures */ + for (slice = 0; slice < mgp->num_slices; slice++) { + ss = &mgp->ss[slice]; + + status = myri10ge_get_txrx(mgp, slice); + if (status != 0) { + printk(KERN_ERR + "myri10ge: %s: failed to get ring sizes or locations\n", + dev->name); + goto abort_with_rings; + } + status = myri10ge_allocate_rings(ss); + if (status != 0) + goto abort_with_rings; + if (slice == 0) + status = myri10ge_set_stats(mgp, slice); + if (status) { + printk(KERN_ERR + "myri10ge: %s: Couldn't set stats DMA\n", + dev->name); + goto abort_with_rings; + } + + lro_mgr = &ss->rx_done.lro_mgr; + lro_mgr->dev = dev; + lro_mgr->features = LRO_F_NAPI; + lro_mgr->ip_summed = CHECKSUM_COMPLETE; + lro_mgr->ip_summed_aggr = CHECKSUM_UNNECESSARY; + lro_mgr->max_desc = MYRI10GE_MAX_LRO_DESCRIPTORS; + lro_mgr->lro_arr = ss->rx_done.lro_desc; + lro_mgr->get_frag_header = myri10ge_get_frag_header; + lro_mgr->max_aggr = myri10ge_lro_max_pkts; + if (lro_mgr->max_aggr > MAX_SKB_FRAGS) + lro_mgr->max_aggr = MAX_SKB_FRAGS; + + /* must happen prior to any irq */ + napi_enable(&(ss)->napi); + } /* now give firmware buffers sizes, and MTU */ cmd.data0 = dev->mtu + ETH_HLEN + VLAN_HLEN; @@ -2036,25 +2419,15 @@ static int myri10ge_open(struct net_device *dev) goto abort_with_rings; } - cmd.data0 = MYRI10GE_LOWPART_TO_U32(mgp->ss.fw_stats_bus); - cmd.data1 = MYRI10GE_HIGHPART_TO_U32(mgp->ss.fw_stats_bus); - cmd.data2 = sizeof(struct mcp_irq_data); - status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd, 0); - if (status == -ENOSYS) { - dma_addr_t bus = mgp->ss.fw_stats_bus; - bus += offsetof(struct mcp_irq_data, send_done_count); - cmd.data0 = MYRI10GE_LOWPART_TO_U32(bus); - cmd.data1 = MYRI10GE_HIGHPART_TO_U32(bus); - status = myri10ge_send_cmd(mgp, - MXGEFW_CMD_SET_STATS_DMA_OBSOLETE, - &cmd, 0); - /* Firmware cannot support multicast without STATS_DMA_V2 */ - mgp->fw_multicast_support = 0; - } else { - mgp->fw_multicast_support = 1; - } - if (status) { - printk(KERN_ERR "myri10ge: %s: Couldn't set stats DMA\n", + /* + * Set Linux style TSO mode; this is needed only on newer + * firmware versions. Older versions default to Linux + * style TSO + */ + cmd.data0 = 0; + status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_TSO_MODE, &cmd, 0); + if (status && status != -ENOSYS) { + printk(KERN_ERR "myri10ge: %s: Couldn't set TSO mode\n", dev->name); goto abort_with_rings; } @@ -2062,21 +2435,6 @@ static int myri10ge_open(struct net_device *dev) mgp->link_state = ~0U; mgp->rdma_tags_available = 15; - lro_mgr = &mgp->ss.rx_done.lro_mgr; - lro_mgr->dev = dev; - lro_mgr->features = LRO_F_NAPI; - lro_mgr->ip_summed = CHECKSUM_COMPLETE; - lro_mgr->ip_summed_aggr = CHECKSUM_UNNECESSARY; - lro_mgr->max_desc = MYRI10GE_MAX_LRO_DESCRIPTORS; - lro_mgr->lro_arr = mgp->ss.rx_done.lro_desc; - lro_mgr->get_frag_header = myri10ge_get_frag_header; - lro_mgr->max_aggr = myri10ge_lro_max_pkts; - lro_mgr->frag_align_pad = 2; - if (lro_mgr->max_aggr > MAX_SKB_FRAGS) - lro_mgr->max_aggr = MAX_SKB_FRAGS; - - napi_enable(&mgp->ss.napi); /* must happen prior to any irq */ - status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_UP, &cmd, 0); if (status) { printk(KERN_ERR "myri10ge: %s: Couldn't bring up link\n", @@ -2084,8 +2442,6 @@ static int myri10ge_open(struct net_device *dev) goto abort_with_rings; } - mgp->ss.tx.wake_queue = 0; - mgp->ss.tx.stop_queue = 0; mgp->running = MYRI10GE_ETH_RUNNING; mgp->watchdog_timer.expires = jiffies + myri10ge_watchdog_timeout * HZ; add_timer(&mgp->watchdog_timer); @@ -2093,9 +2449,9 @@ static int myri10ge_open(struct net_device *dev) return 0; abort_with_rings: - myri10ge_free_rings(&mgp->ss); + for (i = 0; i < mgp->num_slices; i++) + myri10ge_free_rings(&mgp->ss[i]); -abort_with_irq: myri10ge_free_irq(mgp); abort_with_nothing: @@ -2108,16 +2464,19 @@ static int myri10ge_close(struct net_device *dev) struct myri10ge_priv *mgp = netdev_priv(dev); struct myri10ge_cmd cmd; int status, old_down_cnt; + int i; if (mgp->running != MYRI10GE_ETH_RUNNING) return 0; - if (mgp->ss.tx.req_bytes == NULL) + if (mgp->ss[0].tx.req_bytes == NULL) return 0; del_timer_sync(&mgp->watchdog_timer); mgp->running = MYRI10GE_ETH_STOPPING; - napi_disable(&mgp->ss.napi); + for (i = 0; i < mgp->num_slices; i++) { + napi_disable(&mgp->ss[i].napi); + } netif_carrier_off(dev); netif_stop_queue(dev); old_down_cnt = mgp->down_cnt; @@ -2133,7 +2492,8 @@ static int myri10ge_close(struct net_device *dev) netif_tx_disable(dev); myri10ge_free_irq(mgp); - myri10ge_free_rings(&mgp->ss); + for (i = 0; i < mgp->num_slices; i++) + myri10ge_free_rings(&mgp->ss[i]); mgp->running = MYRI10GE_ETH_STOPPED; return 0; @@ -2254,7 +2614,7 @@ static int myri10ge_xmit(struct sk_buff *skb, struct net_device *dev) u8 flags, odd_flag; /* always transmit through slot 0 */ - ss = &mgp->ss; + ss = mgp->ss; tx = &ss->tx; again: req = tx->req_list; @@ -2559,7 +2919,21 @@ drop: static struct net_device_stats *myri10ge_get_stats(struct net_device *dev) { struct myri10ge_priv *mgp = netdev_priv(dev); - return &mgp->stats; + struct myri10ge_slice_netstats *slice_stats; + struct net_device_stats *stats = &mgp->stats; + int i; + + memset(stats, 0, sizeof(*stats)); + for (i = 0; i < mgp->num_slices; i++) { + slice_stats = &mgp->ss[i].stats; + stats->rx_packets += slice_stats->rx_packets; + stats->tx_packets += slice_stats->tx_packets; + stats->rx_bytes += slice_stats->rx_bytes; + stats->tx_bytes += slice_stats->tx_bytes; + stats->rx_dropped += slice_stats->rx_dropped; + stats->tx_dropped += slice_stats->tx_dropped; + } + return stats; } static void myri10ge_set_multicast_list(struct net_device *dev) @@ -2770,10 +3144,10 @@ static void myri10ge_enable_ecrc(struct myri10ge_priv *mgp) * * If the driver can neither enable ECRC nor verify that it has * already been enabled, then it must use a firmware image which works - * around unaligned completion packets (myri10ge_ethp_z8e.dat), and it + * around unaligned completion packets (myri10ge_rss_ethp_z8e.dat), and it * should also ensure that it never gives the device a Read-DMA which is * larger than 2KB by setting the tx_boundary to 2KB. If ECRC is - * enabled, then the driver should use the aligned (myri10ge_eth_z8e.dat) + * enabled, then the driver should use the aligned (myri10ge_rss_eth_z8e.dat) * firmware image, and set tx_boundary to 4KB. */ @@ -2802,7 +3176,7 @@ static void myri10ge_firmware_probe(struct myri10ge_priv *mgp) * completions) in order to see if it works on this host. */ mgp->fw_name = myri10ge_fw_aligned; - status = myri10ge_load_firmware(mgp); + status = myri10ge_load_firmware(mgp, 1); if (status != 0) { goto abort; } @@ -2983,6 +3357,7 @@ static void myri10ge_watchdog(struct work_struct *work) struct myri10ge_tx_buf *tx; u32 reboot; int status; + int i; u16 cmd, vendor; mgp->watchdog_resets++; @@ -3030,20 +3405,26 @@ static void myri10ge_watchdog(struct work_struct *work) printk(KERN_ERR "myri10ge: %s: device timeout, resetting\n", mgp->dev->name); - tx = &mgp->ss.tx; - printk(KERN_INFO "myri10ge: %s: %d %d %d %d %d\n", - mgp->dev->name, tx->req, tx->done, - tx->pkt_start, tx->pkt_done, - (int)ntohl(mgp->ss.fw_stats->send_done_count)); - msleep(2000); - printk(KERN_INFO "myri10ge: %s: %d %d %d %d %d\n", - mgp->dev->name, tx->req, tx->done, - tx->pkt_start, tx->pkt_done, - (int)ntohl(mgp->ss.fw_stats->send_done_count)); + for (i = 0; i < mgp->num_slices; i++) { + tx = &mgp->ss[i].tx; + printk(KERN_INFO + "myri10ge: %s: (%d): %d %d %d %d %d\n", + mgp->dev->name, i, tx->req, tx->done, + tx->pkt_start, tx->pkt_done, + (int)ntohl(mgp->ss[i].fw_stats-> + send_done_count)); + msleep(2000); + printk(KERN_INFO + "myri10ge: %s: (%d): %d %d %d %d %d\n", + mgp->dev->name, i, tx->req, tx->done, + tx->pkt_start, tx->pkt_done, + (int)ntohl(mgp->ss[i].fw_stats-> + send_done_count)); + } } rtnl_lock(); myri10ge_close(mgp->dev); - status = myri10ge_load_firmware(mgp); + status = myri10ge_load_firmware(mgp, 1); if (status != 0) printk(KERN_ERR "myri10ge: %s: failed to load firmware\n", mgp->dev->name); @@ -3063,47 +3444,241 @@ static void myri10ge_watchdog_timer(unsigned long arg) { struct myri10ge_priv *mgp; struct myri10ge_slice_state *ss; + int i, reset_needed; u32 rx_pause_cnt; mgp = (struct myri10ge_priv *)arg; - rx_pause_cnt = ntohl(mgp->ss.fw_stats->dropped_pause); + rx_pause_cnt = ntohl(mgp->ss[0].fw_stats->dropped_pause); + for (i = 0, reset_needed = 0; + i < mgp->num_slices && reset_needed == 0; ++i) { + + ss = &mgp->ss[i]; + if (ss->rx_small.watchdog_needed) { + myri10ge_alloc_rx_pages(mgp, &ss->rx_small, + mgp->small_bytes + MXGEFW_PAD, + 1); + if (ss->rx_small.fill_cnt - ss->rx_small.cnt >= + myri10ge_fill_thresh) + ss->rx_small.watchdog_needed = 0; + } + if (ss->rx_big.watchdog_needed) { + myri10ge_alloc_rx_pages(mgp, &ss->rx_big, + mgp->big_bytes, 1); + if (ss->rx_big.fill_cnt - ss->rx_big.cnt >= + myri10ge_fill_thresh) + ss->rx_big.watchdog_needed = 0; + } - ss = &mgp->ss; - if (ss->rx_small.watchdog_needed) { - myri10ge_alloc_rx_pages(mgp, &ss->rx_small, - mgp->small_bytes + MXGEFW_PAD, 1); - if (ss->rx_small.fill_cnt - ss->rx_small.cnt >= - myri10ge_fill_thresh) - ss->rx_small.watchdog_needed = 0; - } - if (ss->rx_big.watchdog_needed) { - myri10ge_alloc_rx_pages(mgp, &ss->rx_big, mgp->big_bytes, 1); - if (ss->rx_big.fill_cnt - ss->rx_big.cnt >= - myri10ge_fill_thresh) - ss->rx_big.watchdog_needed = 0; - } - - if (ss->tx.req != ss->tx.done && - ss->tx.done == ss->watchdog_tx_done && - ss->watchdog_tx_req != ss->watchdog_tx_done) { - /* nic seems like it might be stuck.. */ - if (rx_pause_cnt != mgp->watchdog_pause) { - if (net_ratelimit()) - printk(KERN_WARNING "myri10ge %s:" - "TX paused, check link partner\n", - mgp->dev->name); - } else { - schedule_work(&mgp->watchdog_work); - return; + if (ss->tx.req != ss->tx.done && + ss->tx.done == ss->watchdog_tx_done && + ss->watchdog_tx_req != ss->watchdog_tx_done) { + /* nic seems like it might be stuck.. */ + if (rx_pause_cnt != mgp->watchdog_pause) { + if (net_ratelimit()) + printk(KERN_WARNING "myri10ge %s:" + "TX paused, check link partner\n", + mgp->dev->name); + } else { + reset_needed = 1; + } } + ss->watchdog_tx_done = ss->tx.done; + ss->watchdog_tx_req = ss->tx.req; } - /* rearm timer */ - mod_timer(&mgp->watchdog_timer, - jiffies + myri10ge_watchdog_timeout * HZ); - ss->watchdog_tx_done = ss->tx.done; - ss->watchdog_tx_req = ss->tx.req; mgp->watchdog_pause = rx_pause_cnt; + + if (reset_needed) { + schedule_work(&mgp->watchdog_work); + } else { + /* rearm timer */ + mod_timer(&mgp->watchdog_timer, + jiffies + myri10ge_watchdog_timeout * HZ); + } +} + +static void myri10ge_free_slices(struct myri10ge_priv *mgp) +{ + struct myri10ge_slice_state *ss; + struct pci_dev *pdev = mgp->pdev; + size_t bytes; + int i; + + if (mgp->ss == NULL) + return; + + for (i = 0; i < mgp->num_slices; i++) { + ss = &mgp->ss[i]; + if (ss->rx_done.entry != NULL) { + bytes = mgp->max_intr_slots * + sizeof(*ss->rx_done.entry); + dma_free_coherent(&pdev->dev, bytes, + ss->rx_done.entry, ss->rx_done.bus); + ss->rx_done.entry = NULL; + } + if (ss->fw_stats != NULL) { + bytes = sizeof(*ss->fw_stats); + dma_free_coherent(&pdev->dev, bytes, + ss->fw_stats, ss->fw_stats_bus); + ss->fw_stats = NULL; + } + } + kfree(mgp->ss); + mgp->ss = NULL; +} + +static int myri10ge_alloc_slices(struct myri10ge_priv *mgp) +{ + struct myri10ge_slice_state *ss; + struct pci_dev *pdev = mgp->pdev; + size_t bytes; + int i; + + bytes = sizeof(*mgp->ss) * mgp->num_slices; + mgp->ss = kzalloc(bytes, GFP_KERNEL); + if (mgp->ss == NULL) { + return -ENOMEM; + } + + for (i = 0; i < mgp->num_slices; i++) { + ss = &mgp->ss[i]; + bytes = mgp->max_intr_slots * sizeof(*ss->rx_done.entry); + ss->rx_done.entry = dma_alloc_coherent(&pdev->dev, bytes, + &ss->rx_done.bus, + GFP_KERNEL); + if (ss->rx_done.entry == NULL) + goto abort; + memset(ss->rx_done.entry, 0, bytes); + bytes = sizeof(*ss->fw_stats); + ss->fw_stats = dma_alloc_coherent(&pdev->dev, bytes, + &ss->fw_stats_bus, + GFP_KERNEL); + if (ss->fw_stats == NULL) + goto abort; + ss->mgp = mgp; + ss->dev = mgp->dev; + netif_napi_add(ss->dev, &ss->napi, myri10ge_poll, + myri10ge_napi_weight); + } + return 0; +abort: + myri10ge_free_slices(mgp); + return -ENOMEM; +} + +/* + * This function determines the number of slices supported. + * The number slices is the minumum of the number of CPUS, + * the number of MSI-X irqs supported, the number of slices + * supported by the firmware + */ +static void myri10ge_probe_slices(struct myri10ge_priv *mgp) +{ + struct myri10ge_cmd cmd; + struct pci_dev *pdev = mgp->pdev; + char *old_fw; + int i, status, ncpus, msix_cap; + + mgp->num_slices = 1; + msix_cap = pci_find_capability(pdev, PCI_CAP_ID_MSIX); + ncpus = num_online_cpus(); + + if (myri10ge_max_slices == 1 || msix_cap == 0 || + (myri10ge_max_slices == -1 && ncpus < 2)) + return; + + /* try to load the slice aware rss firmware */ + old_fw = mgp->fw_name; + if (old_fw == myri10ge_fw_aligned) + mgp->fw_name = myri10ge_fw_rss_aligned; + else + mgp->fw_name = myri10ge_fw_rss_unaligned; + status = myri10ge_load_firmware(mgp, 0); + if (status != 0) { + dev_info(&pdev->dev, "Rss firmware not found\n"); + return; + } + + /* hit the board with a reset to ensure it is alive */ + memset(&cmd, 0, sizeof(cmd)); + status = myri10ge_send_cmd(mgp, MXGEFW_CMD_RESET, &cmd, 0); + if (status != 0) { + dev_err(&mgp->pdev->dev, "failed reset\n"); + goto abort_with_fw; + return; + } + + mgp->max_intr_slots = cmd.data0 / sizeof(struct mcp_slot); + + /* tell it the size of the interrupt queues */ + cmd.data0 = mgp->max_intr_slots * sizeof(struct mcp_slot); + status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_INTRQ_SIZE, &cmd, 0); + if (status != 0) { + dev_err(&mgp->pdev->dev, "failed MXGEFW_CMD_SET_INTRQ_SIZE\n"); + goto abort_with_fw; + } + + /* ask the maximum number of slices it supports */ + status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_RSS_QUEUES, &cmd, 0); + if (status != 0) + goto abort_with_fw; + else + mgp->num_slices = cmd.data0; + + /* Only allow multiple slices if MSI-X is usable */ + if (!myri10ge_msi) { + goto abort_with_fw; + } + + /* if the admin did not specify a limit to how many + * slices we should use, cap it automatically to the + * number of CPUs currently online */ + if (myri10ge_max_slices == -1) + myri10ge_max_slices = ncpus; + + if (mgp->num_slices > myri10ge_max_slices) + mgp->num_slices = myri10ge_max_slices; + + /* Now try to allocate as many MSI-X vectors as we have + * slices. We give up on MSI-X if we can only get a single + * vector. */ + + mgp->msix_vectors = kzalloc(mgp->num_slices * + sizeof(*mgp->msix_vectors), GFP_KERNEL); + if (mgp->msix_vectors == NULL) + goto disable_msix; + for (i = 0; i < mgp->num_slices; i++) { + mgp->msix_vectors[i].entry = i; + } + + while (mgp->num_slices > 1) { + /* make sure it is a power of two */ + while (!is_power_of_2(mgp->num_slices)) + mgp->num_slices--; + if (mgp->num_slices == 1) + goto disable_msix; + status = pci_enable_msix(pdev, mgp->msix_vectors, + mgp->num_slices); + if (status == 0) { + pci_disable_msix(pdev); + return; + } + if (status > 0) + mgp->num_slices = status; + else + goto disable_msix; + } + +disable_msix: + if (mgp->msix_vectors != NULL) { + kfree(mgp->msix_vectors); + mgp->msix_vectors = NULL; + } + +abort_with_fw: + mgp->num_slices = 1; + mgp->fw_name = old_fw; + myri10ge_load_firmware(mgp, 0); } static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent) @@ -3111,7 +3686,6 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct net_device *netdev; struct myri10ge_priv *mgp; struct device *dev = &pdev->dev; - size_t bytes; int i; int status = -ENXIO; int dac_enabled; @@ -3126,7 +3700,6 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent) mgp = netdev_priv(netdev); mgp->dev = netdev; - netif_napi_add(netdev, &mgp->ss.napi, myri10ge_poll, myri10ge_napi_weight); mgp->pdev = pdev; mgp->csum_flag = MXGEFW_FLAGS_CKSUM; mgp->pause = myri10ge_flow_control; @@ -3172,11 +3745,6 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (mgp->cmd == NULL) goto abort_with_netdev; - mgp->ss.fw_stats = dma_alloc_coherent(&pdev->dev, sizeof(*mgp->ss.fw_stats), - &mgp->ss.fw_stats_bus, GFP_KERNEL); - if (mgp->ss.fw_stats == NULL) - goto abort_with_cmd; - mgp->board_span = pci_resource_len(pdev, 0); mgp->iomem_base = pci_resource_start(pdev, 0); mgp->mtrr = -1; @@ -3213,28 +3781,28 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent) for (i = 0; i < ETH_ALEN; i++) netdev->dev_addr[i] = mgp->mac_addr[i]; - /* allocate rx done ring */ - bytes = mgp->max_intr_slots * sizeof(*mgp->ss.rx_done.entry); - mgp->ss.rx_done.entry = dma_alloc_coherent(&pdev->dev, bytes, - &mgp->ss.rx_done.bus, GFP_KERNEL); - if (mgp->ss.rx_done.entry == NULL) - goto abort_with_ioremap; - memset(mgp->ss.rx_done.entry, 0, bytes); - myri10ge_select_firmware(mgp); - status = myri10ge_load_firmware(mgp); + status = myri10ge_load_firmware(mgp, 1); if (status != 0) { dev_err(&pdev->dev, "failed to load firmware\n"); - goto abort_with_rx_done; + goto abort_with_ioremap; + } + myri10ge_probe_slices(mgp); + status = myri10ge_alloc_slices(mgp); + if (status != 0) { + dev_err(&pdev->dev, "failed to alloc slice state\n"); + goto abort_with_firmware; } status = myri10ge_reset(mgp); if (status != 0) { dev_err(&pdev->dev, "failed reset\n"); - goto abort_with_firmware; + goto abort_with_slices; } - +#ifdef CONFIG_DCA + myri10ge_setup_dca(mgp); +#endif pci_set_drvdata(pdev, mgp); if ((myri10ge_initial_mtu + ETH_HLEN) > MYRI10GE_MAX_ETHER_MTU) myri10ge_initial_mtu = MYRI10GE_MAX_ETHER_MTU - ETH_HLEN; @@ -3277,24 +3845,27 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent) dev_err(&pdev->dev, "register_netdev failed: %d\n", status); goto abort_with_state; } - dev_info(dev, "%s IRQ %d, tx bndry %d, fw %s, WC %s\n", - (mgp->msi_enabled ? "MSI" : "xPIC"), - netdev->irq, mgp->tx_boundary, mgp->fw_name, - (mgp->wc_enabled ? "Enabled" : "Disabled")); + if (mgp->msix_enabled) + dev_info(dev, "%d MSI-X IRQs, tx bndry %d, fw %s, WC %s\n", + mgp->num_slices, mgp->tx_boundary, mgp->fw_name, + (mgp->wc_enabled ? "Enabled" : "Disabled")); + else + dev_info(dev, "%s IRQ %d, tx bndry %d, fw %s, WC %s\n", + mgp->msi_enabled ? "MSI" : "xPIC", + netdev->irq, mgp->tx_boundary, mgp->fw_name, + (mgp->wc_enabled ? "Enabled" : "Disabled")); return 0; abort_with_state: pci_restore_state(pdev); +abort_with_slices: + myri10ge_free_slices(mgp); + abort_with_firmware: myri10ge_dummy_rdma(mgp, 0); -abort_with_rx_done: - bytes = mgp->max_intr_slots * sizeof(*mgp->ss.rx_done.entry); - dma_free_coherent(&pdev->dev, bytes, - mgp->ss.rx_done.entry, mgp->ss.rx_done.bus); - abort_with_ioremap: iounmap(mgp->sram); @@ -3303,10 +3874,6 @@ abort_with_wc: if (mgp->mtrr >= 0) mtrr_del(mgp->mtrr, mgp->iomem_base, mgp->board_span); #endif - dma_free_coherent(&pdev->dev, sizeof(*mgp->ss.fw_stats), - mgp->ss.fw_stats, mgp->ss.fw_stats_bus); - -abort_with_cmd: dma_free_coherent(&pdev->dev, sizeof(*mgp->cmd), mgp->cmd, mgp->cmd_bus); @@ -3327,7 +3894,6 @@ static void myri10ge_remove(struct pci_dev *pdev) { struct myri10ge_priv *mgp; struct net_device *netdev; - size_t bytes; mgp = pci_get_drvdata(pdev); if (mgp == NULL) @@ -3337,24 +3903,23 @@ static void myri10ge_remove(struct pci_dev *pdev) netdev = mgp->dev; unregister_netdev(netdev); +#ifdef CONFIG_DCA + myri10ge_teardown_dca(mgp); +#endif myri10ge_dummy_rdma(mgp, 0); /* avoid a memory leak */ pci_restore_state(pdev); - bytes = mgp->max_intr_slots * sizeof(*mgp->ss.rx_done.entry); - dma_free_coherent(&pdev->dev, bytes, - mgp->ss.rx_done.entry, mgp->ss.rx_done.bus); - iounmap(mgp->sram); #ifdef CONFIG_MTRR if (mgp->mtrr >= 0) mtrr_del(mgp->mtrr, mgp->iomem_base, mgp->board_span); #endif - dma_free_coherent(&pdev->dev, sizeof(*mgp->ss.fw_stats), - mgp->ss.fw_stats, mgp->ss.fw_stats_bus); - + myri10ge_free_slices(mgp); + if (mgp->msix_vectors != NULL) + kfree(mgp->msix_vectors); dma_free_coherent(&pdev->dev, sizeof(*mgp->cmd), mgp->cmd, mgp->cmd_bus); @@ -3383,10 +3948,42 @@ static struct pci_driver myri10ge_driver = { #endif }; +#ifdef CONFIG_DCA +static int +myri10ge_notify_dca(struct notifier_block *nb, unsigned long event, void *p) +{ + int err = driver_for_each_device(&myri10ge_driver.driver, + NULL, &event, + myri10ge_notify_dca_device); + + if (err) + return NOTIFY_BAD; + return NOTIFY_DONE; +} + +static struct notifier_block myri10ge_dca_notifier = { + .notifier_call = myri10ge_notify_dca, + .next = NULL, + .priority = 0, +}; +#endif /* CONFIG_DCA */ + static __init int myri10ge_init_module(void) { printk(KERN_INFO "%s: Version %s\n", myri10ge_driver.name, MYRI10GE_VERSION_STR); + + if (myri10ge_rss_hash > MXGEFW_RSS_HASH_TYPE_SRC_PORT || + myri10ge_rss_hash < MXGEFW_RSS_HASH_TYPE_IPV4) { + printk(KERN_ERR + "%s: Illegal rssh hash type %d, defaulting to source port\n", + myri10ge_driver.name, myri10ge_rss_hash); + myri10ge_rss_hash = MXGEFW_RSS_HASH_TYPE_SRC_PORT; + } +#ifdef CONFIG_DCA + dca_register_notify(&myri10ge_dca_notifier); +#endif + return pci_register_driver(&myri10ge_driver); } @@ -3394,6 +3991,9 @@ module_init(myri10ge_init_module); static __exit void myri10ge_cleanup_module(void) { +#ifdef CONFIG_DCA + dca_unregister_notify(&myri10ge_dca_notifier); +#endif pci_unregister_driver(&myri10ge_driver); } diff --git a/drivers/net/ns83820.c b/drivers/net/ns83820.c index b42c05f84be..ff449619f04 100644 --- a/drivers/net/ns83820.c +++ b/drivers/net/ns83820.c @@ -585,16 +585,13 @@ static inline int rx_refill(struct net_device *ndev, gfp_t gfp) for (i=0; i<NR_RX_DESC; i++) { struct sk_buff *skb; long res; + /* extra 16 bytes for alignment */ - skb = __dev_alloc_skb(REAL_RX_BUF_SIZE+16, gfp); + skb = __netdev_alloc_skb(ndev, REAL_RX_BUF_SIZE+16, gfp); if (unlikely(!skb)) break; - res = (long)skb->data & 0xf; - res = 0x10 - res; - res &= 0xf; - skb_reserve(skb, res); - + skb_reserve(skb, skb->data - PTR_ALIGN(skb->data, 16)); if (gfp != GFP_ATOMIC) spin_lock_irqsave(&dev->rx_info.lock, flags); res = ns83820_add_rx_skb(dev, skb); diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c index 8f328a03847..a550c9bd126 100644 --- a/drivers/net/pcmcia/fmvj18x_cs.c +++ b/drivers/net/pcmcia/fmvj18x_cs.c @@ -391,7 +391,9 @@ static int fmvj18x_config(struct pcmcia_device *link) cardtype = CONTEC; break; case MANFID_FUJITSU: - if (link->card_id == PRODID_FUJITSU_MBH10302) + if (link->conf.ConfigBase == 0x0fe0) + cardtype = MBH10302; + else if (link->card_id == PRODID_FUJITSU_MBH10302) /* RATOC REX-5588/9822/4886's PRODID are 0004(=MBH10302), but these are MBH10304 based card. */ cardtype = MBH10304; diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c index d041f831a18..f6c4698ce73 100644 --- a/drivers/net/pcmcia/xirc2ps_cs.c +++ b/drivers/net/pcmcia/xirc2ps_cs.c @@ -1461,22 +1461,25 @@ static void set_multicast_list(struct net_device *dev) { unsigned int ioaddr = dev->base_addr; + unsigned value; SelectPage(0x42); + value = GetByte(XIRCREG42_SWC1) & 0xC0; + if (dev->flags & IFF_PROMISC) { /* snoop */ - PutByte(XIRCREG42_SWC1, 0x06); /* set MPE and PME */ + PutByte(XIRCREG42_SWC1, value | 0x06); /* set MPE and PME */ } else if (dev->mc_count > 9 || (dev->flags & IFF_ALLMULTI)) { - PutByte(XIRCREG42_SWC1, 0x02); /* set MPE */ + PutByte(XIRCREG42_SWC1, value | 0x02); /* set MPE */ } else if (dev->mc_count) { /* the chip can filter 9 addresses perfectly */ - PutByte(XIRCREG42_SWC1, 0x01); + PutByte(XIRCREG42_SWC1, value | 0x01); SelectPage(0x40); PutByte(XIRCREG40_CMD0, Offline); set_addresses(dev); SelectPage(0x40); PutByte(XIRCREG40_CMD0, EnableRecv | Online); } else { /* standard usage */ - PutByte(XIRCREG42_SWC1, 0x00); + PutByte(XIRCREG42_SWC1, value | 0x00); } SelectPage(0); } @@ -1722,6 +1725,7 @@ do_reset(struct net_device *dev, int full) /* enable receiver and put the mac online */ if (full) { + set_multicast_list(dev); SelectPage(0x40); PutByte(XIRCREG40_CMD0, EnableRecv | Online); } diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c index 81fd85214b9..ca8c0e03740 100644 --- a/drivers/net/pcnet32.c +++ b/drivers/net/pcnet32.c @@ -325,7 +325,7 @@ static int pcnet32_get_regs_len(struct net_device *dev); static void pcnet32_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *ptr); static void pcnet32_purge_tx_ring(struct net_device *dev); -static int pcnet32_alloc_ring(struct net_device *dev, char *name); +static int pcnet32_alloc_ring(struct net_device *dev, const char *name); static void pcnet32_free_ring(struct net_device *dev); static void pcnet32_check_media(struct net_device *dev, int verbose); @@ -1983,7 +1983,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev) } /* if any allocation fails, caller must also call pcnet32_free_ring */ -static int pcnet32_alloc_ring(struct net_device *dev, char *name) +static int pcnet32_alloc_ring(struct net_device *dev, const char *name) { struct pcnet32_private *lp = netdev_priv(dev); diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 6bf9e76b0a0..d55932acd88 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -5,7 +5,7 @@ menuconfig PHYLIB tristate "PHY Device support and infrastructure" depends on !S390 - depends on NET_ETHERNET && (BROKEN || !S390) + depends on NET_ETHERNET help Ethernet controllers are usually attached to PHY devices. This option provides infrastructure for @@ -53,7 +53,8 @@ config SMSC_PHY config BROADCOM_PHY tristate "Drivers for Broadcom PHYs" ---help--- - Currently supports the BCM5411, BCM5421 and BCM5461 PHYs. + Currently supports the BCM5411, BCM5421, BCM5461, BCM5464, BCM5481 + and BCM5482 PHYs. config ICPLUS_PHY tristate "Drivers for ICPlus PHYs" @@ -83,4 +84,10 @@ config MDIO_BITBANG If in doubt, say N. +config MDIO_OF_GPIO + tristate "Support for GPIO lib-based bitbanged MDIO buses" + depends on MDIO_BITBANG && OF_GPIO + ---help--- + Supports GPIO lib-based MDIO busses. + endif # PHYLIB diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index 5997d6ef702..eee329fa6f5 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -15,3 +15,4 @@ obj-$(CONFIG_ICPLUS_PHY) += icplus.o obj-$(CONFIG_REALTEK_PHY) += realtek.o obj-$(CONFIG_FIXED_PHY) += fixed.o obj-$(CONFIG_MDIO_BITBANG) += mdio-bitbang.o +obj-$(CONFIG_MDIO_OF_GPIO) += mdio-ofgpio.o diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 60c5cfe9691..4b4dc98ad16 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -24,6 +24,12 @@ #define MII_BCM54XX_ESR 0x11 /* BCM54xx extended status register */ #define MII_BCM54XX_ESR_IS 0x1000 /* Interrupt status */ +#define MII_BCM54XX_EXP_DATA 0x15 /* Expansion register data */ +#define MII_BCM54XX_EXP_SEL 0x17 /* Expansion register select */ +#define MII_BCM54XX_EXP_SEL_SSD 0x0e00 /* Secondary SerDes select */ +#define MII_BCM54XX_EXP_SEL_ER 0x0f00 /* Expansion register select */ + +#define MII_BCM54XX_AUX_CTL 0x18 /* Auxiliary control register */ #define MII_BCM54XX_ISR 0x1a /* BCM54xx interrupt status register */ #define MII_BCM54XX_IMR 0x1b /* BCM54xx interrupt mask register */ #define MII_BCM54XX_INT_CRCERR 0x0001 /* CRC error */ @@ -42,10 +48,120 @@ #define MII_BCM54XX_INT_MDIX 0x2000 /* MDIX status change */ #define MII_BCM54XX_INT_PSERR 0x4000 /* Pair swap error */ +#define MII_BCM54XX_SHD 0x1c /* 0x1c shadow registers */ +#define MII_BCM54XX_SHD_WRITE 0x8000 +#define MII_BCM54XX_SHD_VAL(x) ((x & 0x1f) << 10) +#define MII_BCM54XX_SHD_DATA(x) ((x & 0x3ff) << 0) + +/* + * Broadcom LED source encodings. These are used in BCM5461, BCM5481, + * BCM5482, and possibly some others. + */ +#define BCM_LED_SRC_LINKSPD1 0x0 +#define BCM_LED_SRC_LINKSPD2 0x1 +#define BCM_LED_SRC_XMITLED 0x2 +#define BCM_LED_SRC_ACTIVITYLED 0x3 +#define BCM_LED_SRC_FDXLED 0x4 +#define BCM_LED_SRC_SLAVE 0x5 +#define BCM_LED_SRC_INTR 0x6 +#define BCM_LED_SRC_QUALITY 0x7 +#define BCM_LED_SRC_RCVLED 0x8 +#define BCM_LED_SRC_MULTICOLOR1 0xa +#define BCM_LED_SRC_OPENSHORT 0xb +#define BCM_LED_SRC_OFF 0xe /* Tied high */ +#define BCM_LED_SRC_ON 0xf /* Tied low */ + +/* + * BCM5482: Shadow registers + * Shadow values go into bits [14:10] of register 0x1c to select a shadow + * register to access. + */ +#define BCM5482_SHD_LEDS1 0x0d /* 01101: LED Selector 1 */ + /* LED3 / ~LINKSPD[2] selector */ +#define BCM5482_SHD_LEDS1_LED3(src) ((src & 0xf) << 4) + /* LED1 / ~LINKSPD[1] selector */ +#define BCM5482_SHD_LEDS1_LED1(src) ((src & 0xf) << 0) +#define BCM5482_SHD_SSD 0x14 /* 10100: Secondary SerDes control */ +#define BCM5482_SHD_SSD_LEDM 0x0008 /* SSD LED Mode enable */ +#define BCM5482_SHD_SSD_EN 0x0001 /* SSD enable */ +#define BCM5482_SHD_MODE 0x1f /* 11111: Mode Control Register */ +#define BCM5482_SHD_MODE_1000BX 0x0001 /* Enable 1000BASE-X registers */ + +/* + * BCM5482: Secondary SerDes registers + */ +#define BCM5482_SSD_1000BX_CTL 0x00 /* 1000BASE-X Control */ +#define BCM5482_SSD_1000BX_CTL_PWRDOWN 0x0800 /* Power-down SSD */ +#define BCM5482_SSD_SGMII_SLAVE 0x15 /* SGMII Slave Register */ +#define BCM5482_SSD_SGMII_SLAVE_EN 0x0002 /* Slave mode enable */ +#define BCM5482_SSD_SGMII_SLAVE_AD 0x0001 /* Slave auto-detection */ + +/* + * Device flags for PHYs that can be configured for different operating + * modes. + */ +#define PHY_BCM_FLAGS_VALID 0x80000000 +#define PHY_BCM_FLAGS_INTF_XAUI 0x00000020 +#define PHY_BCM_FLAGS_INTF_SGMII 0x00000010 +#define PHY_BCM_FLAGS_MODE_1000BX 0x00000002 +#define PHY_BCM_FLAGS_MODE_COPPER 0x00000001 + MODULE_DESCRIPTION("Broadcom PHY driver"); MODULE_AUTHOR("Maciej W. Rozycki"); MODULE_LICENSE("GPL"); +/* + * Indirect register access functions for the 1000BASE-T/100BASE-TX/10BASE-T + * 0x1c shadow registers. + */ +static int bcm54xx_shadow_read(struct phy_device *phydev, u16 shadow) +{ + phy_write(phydev, MII_BCM54XX_SHD, MII_BCM54XX_SHD_VAL(shadow)); + return MII_BCM54XX_SHD_DATA(phy_read(phydev, MII_BCM54XX_SHD)); +} + +static int bcm54xx_shadow_write(struct phy_device *phydev, u16 shadow, u16 val) +{ + return phy_write(phydev, MII_BCM54XX_SHD, + MII_BCM54XX_SHD_WRITE | + MII_BCM54XX_SHD_VAL(shadow) | + MII_BCM54XX_SHD_DATA(val)); +} + +/* + * Indirect register access functions for the Expansion Registers + * and Secondary SerDes registers (when sec_serdes=1). + */ +static int bcm54xx_exp_read(struct phy_device *phydev, + int sec_serdes, u8 regnum) +{ + int val; + + phy_write(phydev, MII_BCM54XX_EXP_SEL, + (sec_serdes ? MII_BCM54XX_EXP_SEL_SSD : + MII_BCM54XX_EXP_SEL_ER) | + regnum); + val = phy_read(phydev, MII_BCM54XX_EXP_DATA); + phy_write(phydev, MII_BCM54XX_EXP_SEL, regnum); + + return val; +} + +static int bcm54xx_exp_write(struct phy_device *phydev, + int sec_serdes, u8 regnum, u16 val) +{ + int ret; + + phy_write(phydev, MII_BCM54XX_EXP_SEL, + (sec_serdes ? MII_BCM54XX_EXP_SEL_SSD : + MII_BCM54XX_EXP_SEL_ER) | + regnum); + ret = phy_write(phydev, MII_BCM54XX_EXP_DATA, val); + phy_write(phydev, MII_BCM54XX_EXP_SEL, regnum); + + return ret; +} + static int bcm54xx_config_init(struct phy_device *phydev) { int reg, err; @@ -70,6 +186,87 @@ static int bcm54xx_config_init(struct phy_device *phydev) return 0; } +static int bcm5482_config_init(struct phy_device *phydev) +{ + int err, reg; + + err = bcm54xx_config_init(phydev); + + if (phydev->dev_flags & PHY_BCM_FLAGS_MODE_1000BX) { + /* + * Enable secondary SerDes and its use as an LED source + */ + reg = bcm54xx_shadow_read(phydev, BCM5482_SHD_SSD); + bcm54xx_shadow_write(phydev, BCM5482_SHD_SSD, + reg | + BCM5482_SHD_SSD_LEDM | + BCM5482_SHD_SSD_EN); + + /* + * Enable SGMII slave mode and auto-detection + */ + reg = bcm54xx_exp_read(phydev, 1, BCM5482_SSD_SGMII_SLAVE); + bcm54xx_exp_write(phydev, 1, BCM5482_SSD_SGMII_SLAVE, + reg | + BCM5482_SSD_SGMII_SLAVE_EN | + BCM5482_SSD_SGMII_SLAVE_AD); + + /* + * Disable secondary SerDes powerdown + */ + reg = bcm54xx_exp_read(phydev, 1, BCM5482_SSD_1000BX_CTL); + bcm54xx_exp_write(phydev, 1, BCM5482_SSD_1000BX_CTL, + reg & ~BCM5482_SSD_1000BX_CTL_PWRDOWN); + + /* + * Select 1000BASE-X register set (primary SerDes) + */ + reg = bcm54xx_shadow_read(phydev, BCM5482_SHD_MODE); + bcm54xx_shadow_write(phydev, BCM5482_SHD_MODE, + reg | BCM5482_SHD_MODE_1000BX); + + /* + * LED1=ACTIVITYLED, LED3=LINKSPD[2] + * (Use LED1 as secondary SerDes ACTIVITY LED) + */ + bcm54xx_shadow_write(phydev, BCM5482_SHD_LEDS1, + BCM5482_SHD_LEDS1_LED1(BCM_LED_SRC_ACTIVITYLED) | + BCM5482_SHD_LEDS1_LED3(BCM_LED_SRC_LINKSPD2)); + + /* + * Auto-negotiation doesn't seem to work quite right + * in this mode, so we disable it and force it to the + * right speed/duplex setting. Only 'link status' + * is important. + */ + phydev->autoneg = AUTONEG_DISABLE; + phydev->speed = SPEED_1000; + phydev->duplex = DUPLEX_FULL; + } + + return err; +} + +static int bcm5482_read_status(struct phy_device *phydev) +{ + int err; + + err = genphy_read_status(phydev); + + if (phydev->dev_flags & PHY_BCM_FLAGS_MODE_1000BX) { + /* + * Only link status matters for 1000Base-X mode, so force + * 1000 Mbit/s full-duplex status + */ + if (phydev->link) { + phydev->speed = SPEED_1000; + phydev->duplex = DUPLEX_FULL; + } + } + + return err; +} + static int bcm54xx_ack_interrupt(struct phy_device *phydev) { int reg; @@ -210,9 +407,9 @@ static struct phy_driver bcm5482_driver = { .name = "Broadcom BCM5482", .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT, - .config_init = bcm54xx_config_init, + .config_init = bcm5482_config_init, .config_aneg = genphy_config_aneg, - .read_status = genphy_read_status, + .read_status = bcm5482_read_status, .ack_interrupt = bcm54xx_ack_interrupt, .config_intr = bcm54xx_config_intr, .driver = { .owner = THIS_MODULE }, diff --git a/drivers/net/phy/mdio-ofgpio.c b/drivers/net/phy/mdio-ofgpio.c new file mode 100644 index 00000000000..7edfc0c3483 --- /dev/null +++ b/drivers/net/phy/mdio-ofgpio.c @@ -0,0 +1,205 @@ +/* + * OpenFirmware GPIO based MDIO bitbang driver. + * + * Copyright (c) 2008 CSE Semaphore Belgium. + * by Laurent Pinchart <laurentp@cse-semaphore.com> + * + * Based on earlier work by + * + * Copyright (c) 2003 Intracom S.A. + * by Pantelis Antoniou <panto@intracom.gr> + * + * 2005 (c) MontaVista Software, Inc. + * Vitaly Bordug <vbordug@ru.mvista.com> + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/mdio-bitbang.h> +#include <linux/of_gpio.h> +#include <linux/of_platform.h> + +struct mdio_gpio_info { + struct mdiobb_ctrl ctrl; + int mdc, mdio; +}; + +static void mdio_dir(struct mdiobb_ctrl *ctrl, int dir) +{ + struct mdio_gpio_info *bitbang = + container_of(ctrl, struct mdio_gpio_info, ctrl); + + if (dir) + gpio_direction_output(bitbang->mdio, 1); + else + gpio_direction_input(bitbang->mdio); +} + +static int mdio_read(struct mdiobb_ctrl *ctrl) +{ + struct mdio_gpio_info *bitbang = + container_of(ctrl, struct mdio_gpio_info, ctrl); + + return gpio_get_value(bitbang->mdio); +} + +static void mdio(struct mdiobb_ctrl *ctrl, int what) +{ + struct mdio_gpio_info *bitbang = + container_of(ctrl, struct mdio_gpio_info, ctrl); + + gpio_set_value(bitbang->mdio, what); +} + +static void mdc(struct mdiobb_ctrl *ctrl, int what) +{ + struct mdio_gpio_info *bitbang = + container_of(ctrl, struct mdio_gpio_info, ctrl); + + gpio_set_value(bitbang->mdc, what); +} + +static struct mdiobb_ops mdio_gpio_ops = { + .owner = THIS_MODULE, + .set_mdc = mdc, + .set_mdio_dir = mdio_dir, + .set_mdio_data = mdio, + .get_mdio_data = mdio_read, +}; + +static int __devinit mdio_ofgpio_bitbang_init(struct mii_bus *bus, + struct device_node *np) +{ + struct mdio_gpio_info *bitbang = bus->priv; + + bitbang->mdc = of_get_gpio(np, 0); + bitbang->mdio = of_get_gpio(np, 1); + + if (bitbang->mdc < 0 || bitbang->mdio < 0) + return -ENODEV; + + snprintf(bus->id, MII_BUS_ID_SIZE, "%x", bitbang->mdc); + return 0; +} + +static void __devinit add_phy(struct mii_bus *bus, struct device_node *np) +{ + const u32 *data; + int len, id, irq; + + data = of_get_property(np, "reg", &len); + if (!data || len != 4) + return; + + id = *data; + bus->phy_mask &= ~(1 << id); + + irq = of_irq_to_resource(np, 0, NULL); + if (irq != NO_IRQ) + bus->irq[id] = irq; +} + +static int __devinit mdio_ofgpio_probe(struct of_device *ofdev, + const struct of_device_id *match) +{ + struct device_node *np = NULL; + struct mii_bus *new_bus; + struct mdio_gpio_info *bitbang; + int ret = -ENOMEM; + int i; + + bitbang = kzalloc(sizeof(struct mdio_gpio_info), GFP_KERNEL); + if (!bitbang) + goto out; + + bitbang->ctrl.ops = &mdio_gpio_ops; + + new_bus = alloc_mdio_bitbang(&bitbang->ctrl); + if (!new_bus) + goto out_free_priv; + + new_bus->name = "GPIO Bitbanged MII", + + ret = mdio_ofgpio_bitbang_init(new_bus, ofdev->node); + if (ret) + goto out_free_bus; + + new_bus->phy_mask = ~0; + new_bus->irq = kmalloc(sizeof(int) * PHY_MAX_ADDR, GFP_KERNEL); + if (!new_bus->irq) + goto out_free_bus; + + for (i = 0; i < PHY_MAX_ADDR; i++) + new_bus->irq[i] = -1; + + while ((np = of_get_next_child(ofdev->node, np))) + if (!strcmp(np->type, "ethernet-phy")) + add_phy(new_bus, np); + + new_bus->dev = &ofdev->dev; + dev_set_drvdata(&ofdev->dev, new_bus); + + ret = mdiobus_register(new_bus); + if (ret) + goto out_free_irqs; + + return 0; + +out_free_irqs: + dev_set_drvdata(&ofdev->dev, NULL); + kfree(new_bus->irq); +out_free_bus: + kfree(new_bus); +out_free_priv: + free_mdio_bitbang(new_bus); +out: + return ret; +} + +static int mdio_ofgpio_remove(struct of_device *ofdev) +{ + struct mii_bus *bus = dev_get_drvdata(&ofdev->dev); + struct mdio_gpio_info *bitbang = bus->priv; + + mdiobus_unregister(bus); + free_mdio_bitbang(bus); + dev_set_drvdata(&ofdev->dev, NULL); + kfree(bus->irq); + kfree(bitbang); + kfree(bus); + + return 0; +} + +static struct of_device_id mdio_ofgpio_match[] = { + { + .compatible = "virtual,mdio-gpio", + }, + {}, +}; + +static struct of_platform_driver mdio_ofgpio_driver = { + .name = "mdio-gpio", + .match_table = mdio_ofgpio_match, + .probe = mdio_ofgpio_probe, + .remove = mdio_ofgpio_remove, +}; + +static int mdio_ofgpio_init(void) +{ + return of_register_platform_driver(&mdio_ofgpio_driver); +} + +static void mdio_ofgpio_exit(void) +{ + of_unregister_platform_driver(&mdio_ofgpio_driver); +} + +module_init(mdio_ofgpio_init); +module_exit(mdio_ofgpio_exit); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index ac3c01d28fd..16a0e7de588 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -207,6 +207,7 @@ int get_phy_id(struct mii_bus *bus, int addr, u32 *phy_id) return 0; } +EXPORT_SYMBOL(get_phy_id); /** * get_phy_device - reads the specified PHY device and returns its @phy_device struct diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c index 1f4ca2b54a7..c926bf0b190 100644 --- a/drivers/net/ppp_generic.c +++ b/drivers/net/ppp_generic.c @@ -361,7 +361,7 @@ static int ppp_open(struct inode *inode, struct file *file) return 0; } -static int ppp_release(struct inode *inode, struct file *file) +static int ppp_release(struct inode *unused, struct file *file) { struct ppp_file *pf = file->private_data; struct ppp *ppp; @@ -545,8 +545,7 @@ static int get_filter(void __user *arg, struct sock_filter **p) } #endif /* CONFIG_PPP_FILTER */ -static int ppp_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) +static long ppp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct ppp_file *pf = file->private_data; struct ppp *ppp; @@ -574,24 +573,29 @@ static int ppp_ioctl(struct inode *inode, struct file *file, * this fd and reopening /dev/ppp. */ err = -EINVAL; + lock_kernel(); if (pf->kind == INTERFACE) { ppp = PF_TO_PPP(pf); if (file == ppp->owner) ppp_shutdown_interface(ppp); } if (atomic_read(&file->f_count) <= 2) { - ppp_release(inode, file); + ppp_release(NULL, file); err = 0; } else printk(KERN_DEBUG "PPPIOCDETACH file->f_count=%d\n", atomic_read(&file->f_count)); + unlock_kernel(); return err; } if (pf->kind == CHANNEL) { - struct channel *pch = PF_TO_CHANNEL(pf); + struct channel *pch; struct ppp_channel *chan; + lock_kernel(); + pch = PF_TO_CHANNEL(pf); + switch (cmd) { case PPPIOCCONNECT: if (get_user(unit, p)) @@ -611,6 +615,7 @@ static int ppp_ioctl(struct inode *inode, struct file *file, err = chan->ops->ioctl(chan, cmd, arg); up_read(&pch->chan_sem); } + unlock_kernel(); return err; } @@ -620,6 +625,7 @@ static int ppp_ioctl(struct inode *inode, struct file *file, return -EINVAL; } + lock_kernel(); ppp = PF_TO_PPP(pf); switch (cmd) { case PPPIOCSMRU: @@ -767,7 +773,7 @@ static int ppp_ioctl(struct inode *inode, struct file *file, default: err = -ENOTTY; } - + unlock_kernel(); return err; } @@ -779,6 +785,7 @@ static int ppp_unattached_ioctl(struct ppp_file *pf, struct file *file, struct channel *chan; int __user *p = (int __user *)arg; + lock_kernel(); switch (cmd) { case PPPIOCNEWUNIT: /* Create a new ppp unit */ @@ -827,6 +834,7 @@ static int ppp_unattached_ioctl(struct ppp_file *pf, struct file *file, default: err = -ENOTTY; } + unlock_kernel(); return err; } @@ -835,7 +843,7 @@ static const struct file_operations ppp_device_fops = { .read = ppp_read, .write = ppp_write, .poll = ppp_poll, - .ioctl = ppp_ioctl, + .unlocked_ioctl = ppp_ioctl, .open = ppp_open, .release = ppp_release }; diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c index 79359919335..8db342f2fdc 100644 --- a/drivers/net/pppol2tp.c +++ b/drivers/net/pppol2tp.c @@ -980,6 +980,8 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) __wsum csum = 0; struct udphdr *uh; unsigned int len; + int old_headroom; + int new_headroom; if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) goto abort; @@ -1001,16 +1003,18 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) /* Check that there's enough headroom in the skb to insert IP, * UDP and L2TP and PPP headers. If not enough, expand it to - * make room. Note that a new skb (or a clone) is - * allocated. If we return an error from this point on, make - * sure we free the new skb but do not free the original skb - * since that is done by the caller for the error case. + * make room. Adjust truesize. */ headroom = NET_SKB_PAD + sizeof(struct iphdr) + sizeof(struct udphdr) + hdr_len + sizeof(ppph); + old_headroom = skb_headroom(skb); if (skb_cow_head(skb, headroom)) goto abort; + new_headroom = skb_headroom(skb); + skb_orphan(skb); + skb->truesize += new_headroom - old_headroom; + /* Setup PPP header */ __skb_push(skb, sizeof(ppph)); skb->data[0] = ppph[0]; @@ -1065,7 +1069,6 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) /* Get routing info from the tunnel socket */ dst_release(skb->dst); skb->dst = dst_clone(__sk_dst_get(sk_tun)); - skb_orphan(skb); skb->sk = sk_tun; /* Queue the packet to IP for output */ diff --git a/drivers/net/s2io-regs.h b/drivers/net/s2io-regs.h index 2109508c047..f8274f8941e 100644 --- a/drivers/net/s2io-regs.h +++ b/drivers/net/s2io-regs.h @@ -250,7 +250,7 @@ struct XENA_dev_config { u64 tx_mat0_n[0x8]; #define TX_MAT_SET(fifo, msi) vBIT(msi, (8 * fifo), 8) - u8 unused_1[0x8]; + u64 xmsi_mask_reg; u64 stat_byte_cnt; #define STAT_BC(n) vBIT(n,4,12) diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index 75bde647583..dcc953e57ab 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -86,7 +86,7 @@ #include "s2io.h" #include "s2io-regs.h" -#define DRV_VERSION "2.0.26.23" +#define DRV_VERSION "2.0.26.24" /* S2io Driver name & version. */ static char s2io_driver_name[] = "Neterion"; @@ -1113,9 +1113,10 @@ static int s2io_on_nec_bridge(struct pci_dev *s2io_pdev) struct pci_dev *tdev = NULL; while ((tdev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, tdev)) != NULL) { if (tdev->vendor == NEC_VENID && tdev->device == NEC_DEVID) { - if (tdev->bus == s2io_pdev->bus->parent) + if (tdev->bus == s2io_pdev->bus->parent) { pci_dev_put(tdev); return 1; + } } } return 0; @@ -1219,15 +1220,33 @@ static int init_tti(struct s2io_nic *nic, int link) TTI_DATA1_MEM_TX_URNG_B(0x10) | TTI_DATA1_MEM_TX_URNG_C(0x30) | TTI_DATA1_MEM_TX_TIMER_AC_EN; - - if (use_continuous_tx_intrs && (link == LINK_UP)) - val64 |= TTI_DATA1_MEM_TX_TIMER_CI_EN; + if (i == 0) + if (use_continuous_tx_intrs && (link == LINK_UP)) + val64 |= TTI_DATA1_MEM_TX_TIMER_CI_EN; writeq(val64, &bar0->tti_data1_mem); - val64 = TTI_DATA2_MEM_TX_UFC_A(0x10) | - TTI_DATA2_MEM_TX_UFC_B(0x20) | - TTI_DATA2_MEM_TX_UFC_C(0x40) | - TTI_DATA2_MEM_TX_UFC_D(0x80); + if (nic->config.intr_type == MSI_X) { + val64 = TTI_DATA2_MEM_TX_UFC_A(0x10) | + TTI_DATA2_MEM_TX_UFC_B(0x100) | + TTI_DATA2_MEM_TX_UFC_C(0x200) | + TTI_DATA2_MEM_TX_UFC_D(0x300); + } else { + if ((nic->config.tx_steering_type == + TX_DEFAULT_STEERING) && + (config->tx_fifo_num > 1) && + (i >= nic->udp_fifo_idx) && + (i < (nic->udp_fifo_idx + + nic->total_udp_fifos))) + val64 = TTI_DATA2_MEM_TX_UFC_A(0x50) | + TTI_DATA2_MEM_TX_UFC_B(0x80) | + TTI_DATA2_MEM_TX_UFC_C(0x100) | + TTI_DATA2_MEM_TX_UFC_D(0x120); + else + val64 = TTI_DATA2_MEM_TX_UFC_A(0x10) | + TTI_DATA2_MEM_TX_UFC_B(0x20) | + TTI_DATA2_MEM_TX_UFC_C(0x40) | + TTI_DATA2_MEM_TX_UFC_D(0x80); + } writeq(val64, &bar0->tti_data2_mem); @@ -2813,6 +2832,15 @@ static void free_rx_buffers(struct s2io_nic *sp) } } +static int s2io_chk_rx_buffers(struct ring_info *ring) +{ + if (fill_rx_buffers(ring) == -ENOMEM) { + DBG_PRINT(INFO_DBG, "%s:Out of memory", ring->dev->name); + DBG_PRINT(INFO_DBG, " in Rx Intr!!\n"); + } + return 0; +} + /** * s2io_poll - Rx interrupt handler for NAPI support * @napi : pointer to the napi structure. @@ -2826,57 +2854,72 @@ static void free_rx_buffers(struct s2io_nic *sp) * 0 on success and 1 if there are No Rx packets to be processed. */ -static int s2io_poll(struct napi_struct *napi, int budget) +static int s2io_poll_msix(struct napi_struct *napi, int budget) { - struct s2io_nic *nic = container_of(napi, struct s2io_nic, napi); - struct net_device *dev = nic->dev; - int pkt_cnt = 0, org_pkts_to_process; - struct mac_info *mac_control; + struct ring_info *ring = container_of(napi, struct ring_info, napi); + struct net_device *dev = ring->dev; struct config_param *config; + struct mac_info *mac_control; + int pkts_processed = 0; + u8 *addr = NULL, val8 = 0; + struct s2io_nic *nic = dev->priv; struct XENA_dev_config __iomem *bar0 = nic->bar0; - int i; + int budget_org = budget; - mac_control = &nic->mac_control; config = &nic->config; + mac_control = &nic->mac_control; - nic->pkts_to_process = budget; - org_pkts_to_process = nic->pkts_to_process; + if (unlikely(!is_s2io_card_up(nic))) + return 0; - writeq(S2IO_MINUS_ONE, &bar0->rx_traffic_int); - readl(&bar0->rx_traffic_int); + pkts_processed = rx_intr_handler(ring, budget); + s2io_chk_rx_buffers(ring); - for (i = 0; i < config->rx_ring_num; i++) { - rx_intr_handler(&mac_control->rings[i]); - pkt_cnt = org_pkts_to_process - nic->pkts_to_process; - if (!nic->pkts_to_process) { - /* Quota for the current iteration has been met */ - goto no_rx; - } + if (pkts_processed < budget_org) { + netif_rx_complete(dev, napi); + /*Re Enable MSI-Rx Vector*/ + addr = (u8 *)&bar0->xmsi_mask_reg; + addr += 7 - ring->ring_no; + val8 = (ring->ring_no == 0) ? 0x3f : 0xbf; + writeb(val8, addr); + val8 = readb(addr); } + return pkts_processed; +} +static int s2io_poll_inta(struct napi_struct *napi, int budget) +{ + struct s2io_nic *nic = container_of(napi, struct s2io_nic, napi); + struct ring_info *ring; + struct net_device *dev = nic->dev; + struct config_param *config; + struct mac_info *mac_control; + int pkts_processed = 0; + int ring_pkts_processed, i; + struct XENA_dev_config __iomem *bar0 = nic->bar0; + int budget_org = budget; - netif_rx_complete(dev, napi); + config = &nic->config; + mac_control = &nic->mac_control; - for (i = 0; i < config->rx_ring_num; i++) { - if (fill_rx_buffers(&mac_control->rings[i]) == -ENOMEM) { - DBG_PRINT(INFO_DBG, "%s:Out of memory", dev->name); - DBG_PRINT(INFO_DBG, " in Rx Poll!!\n"); - break; - } - } - /* Re enable the Rx interrupts. */ - writeq(0x0, &bar0->rx_traffic_mask); - readl(&bar0->rx_traffic_mask); - return pkt_cnt; + if (unlikely(!is_s2io_card_up(nic))) + return 0; -no_rx: for (i = 0; i < config->rx_ring_num; i++) { - if (fill_rx_buffers(&mac_control->rings[i]) == -ENOMEM) { - DBG_PRINT(INFO_DBG, "%s:Out of memory", dev->name); - DBG_PRINT(INFO_DBG, " in Rx Poll!!\n"); + ring = &mac_control->rings[i]; + ring_pkts_processed = rx_intr_handler(ring, budget); + s2io_chk_rx_buffers(ring); + pkts_processed += ring_pkts_processed; + budget -= ring_pkts_processed; + if (budget <= 0) break; - } } - return pkt_cnt; + if (pkts_processed < budget_org) { + netif_rx_complete(dev, napi); + /* Re enable the Rx interrupts for the ring */ + writeq(0, &bar0->rx_traffic_mask); + readl(&bar0->rx_traffic_mask); + } + return pkts_processed; } #ifdef CONFIG_NET_POLL_CONTROLLER @@ -2918,7 +2961,7 @@ static void s2io_netpoll(struct net_device *dev) /* check for received packet and indicate up to network */ for (i = 0; i < config->rx_ring_num; i++) - rx_intr_handler(&mac_control->rings[i]); + rx_intr_handler(&mac_control->rings[i], 0); for (i = 0; i < config->rx_ring_num; i++) { if (fill_rx_buffers(&mac_control->rings[i]) == -ENOMEM) { @@ -2934,7 +2977,8 @@ static void s2io_netpoll(struct net_device *dev) /** * rx_intr_handler - Rx interrupt handler - * @nic: device private variable. + * @ring_info: per ring structure. + * @budget: budget for napi processing. * Description: * If the interrupt is because of a received frame or if the * receive ring contains fresh as yet un-processed frames,this function is @@ -2942,15 +2986,15 @@ static void s2io_netpoll(struct net_device *dev) * stopped and sends the skb to the OSM's Rx handler and then increments * the offset. * Return Value: - * NONE. + * No. of napi packets processed. */ -static void rx_intr_handler(struct ring_info *ring_data) +static int rx_intr_handler(struct ring_info *ring_data, int budget) { int get_block, put_block; struct rx_curr_get_info get_info, put_info; struct RxD_t *rxdp; struct sk_buff *skb; - int pkt_cnt = 0; + int pkt_cnt = 0, napi_pkts = 0; int i; struct RxD1* rxdp1; struct RxD3* rxdp3; @@ -2977,7 +3021,7 @@ static void rx_intr_handler(struct ring_info *ring_data) DBG_PRINT(ERR_DBG, "%s: The skb is ", ring_data->dev->name); DBG_PRINT(ERR_DBG, "Null in Rx Intr\n"); - return; + return 0; } if (ring_data->rxd_mode == RXD_MODE_1) { rxdp1 = (struct RxD1*)rxdp; @@ -3014,9 +3058,10 @@ static void rx_intr_handler(struct ring_info *ring_data) rxdp = ring_data->rx_blocks[get_block].block_virt_addr; } - if(ring_data->nic->config.napi){ - ring_data->nic->pkts_to_process -= 1; - if (!ring_data->nic->pkts_to_process) + if (ring_data->nic->config.napi) { + budget--; + napi_pkts++; + if (!budget) break; } pkt_cnt++; @@ -3034,6 +3079,7 @@ static void rx_intr_handler(struct ring_info *ring_data) } } } + return(napi_pkts); } /** @@ -3730,14 +3776,19 @@ static void restore_xmsi_data(struct s2io_nic *nic) { struct XENA_dev_config __iomem *bar0 = nic->bar0; u64 val64; - int i; + int i, msix_index; + + + if (nic->device_type == XFRAME_I_DEVICE) + return; for (i=0; i < MAX_REQUESTED_MSI_X; i++) { + msix_index = (i) ? ((i-1) * 8 + 1): 0; writeq(nic->msix_info[i].addr, &bar0->xmsi_address); writeq(nic->msix_info[i].data, &bar0->xmsi_data); - val64 = (s2BIT(7) | s2BIT(15) | vBIT(i, 26, 6)); + val64 = (s2BIT(7) | s2BIT(15) | vBIT(msix_index, 26, 6)); writeq(val64, &bar0->xmsi_access); - if (wait_for_msix_trans(nic, i)) { + if (wait_for_msix_trans(nic, msix_index)) { DBG_PRINT(ERR_DBG, "failed in %s\n", __FUNCTION__); continue; } @@ -3748,13 +3799,17 @@ static void store_xmsi_data(struct s2io_nic *nic) { struct XENA_dev_config __iomem *bar0 = nic->bar0; u64 val64, addr, data; - int i; + int i, msix_index; + + if (nic->device_type == XFRAME_I_DEVICE) + return; /* Store and display */ for (i=0; i < MAX_REQUESTED_MSI_X; i++) { - val64 = (s2BIT(15) | vBIT(i, 26, 6)); + msix_index = (i) ? ((i-1) * 8 + 1): 0; + val64 = (s2BIT(15) | vBIT(msix_index, 26, 6)); writeq(val64, &bar0->xmsi_access); - if (wait_for_msix_trans(nic, i)) { + if (wait_for_msix_trans(nic, msix_index)) { DBG_PRINT(ERR_DBG, "failed in %s\n", __FUNCTION__); continue; } @@ -3770,11 +3825,11 @@ static void store_xmsi_data(struct s2io_nic *nic) static int s2io_enable_msi_x(struct s2io_nic *nic) { struct XENA_dev_config __iomem *bar0 = nic->bar0; - u64 tx_mat, rx_mat; + u64 rx_mat; u16 msi_control; /* Temp variable */ int ret, i, j, msix_indx = 1; - nic->entries = kcalloc(MAX_REQUESTED_MSI_X, sizeof(struct msix_entry), + nic->entries = kmalloc(nic->num_entries * sizeof(struct msix_entry), GFP_KERNEL); if (!nic->entries) { DBG_PRINT(INFO_DBG, "%s: Memory allocation failed\n", \ @@ -3783,10 +3838,12 @@ static int s2io_enable_msi_x(struct s2io_nic *nic) return -ENOMEM; } nic->mac_control.stats_info->sw_stat.mem_allocated - += (MAX_REQUESTED_MSI_X * sizeof(struct msix_entry)); + += (nic->num_entries * sizeof(struct msix_entry)); + + memset(nic->entries, 0, nic->num_entries * sizeof(struct msix_entry)); nic->s2io_entries = - kcalloc(MAX_REQUESTED_MSI_X, sizeof(struct s2io_msix_entry), + kmalloc(nic->num_entries * sizeof(struct s2io_msix_entry), GFP_KERNEL); if (!nic->s2io_entries) { DBG_PRINT(INFO_DBG, "%s: Memory allocation failed\n", @@ -3794,60 +3851,52 @@ static int s2io_enable_msi_x(struct s2io_nic *nic) nic->mac_control.stats_info->sw_stat.mem_alloc_fail_cnt++; kfree(nic->entries); nic->mac_control.stats_info->sw_stat.mem_freed - += (MAX_REQUESTED_MSI_X * sizeof(struct msix_entry)); + += (nic->num_entries * sizeof(struct msix_entry)); return -ENOMEM; } nic->mac_control.stats_info->sw_stat.mem_allocated - += (MAX_REQUESTED_MSI_X * sizeof(struct s2io_msix_entry)); - - for (i=0; i< MAX_REQUESTED_MSI_X; i++) { - nic->entries[i].entry = i; - nic->s2io_entries[i].entry = i; + += (nic->num_entries * sizeof(struct s2io_msix_entry)); + memset(nic->s2io_entries, 0, + nic->num_entries * sizeof(struct s2io_msix_entry)); + + nic->entries[0].entry = 0; + nic->s2io_entries[0].entry = 0; + nic->s2io_entries[0].in_use = MSIX_FLG; + nic->s2io_entries[0].type = MSIX_ALARM_TYPE; + nic->s2io_entries[0].arg = &nic->mac_control.fifos; + + for (i = 1; i < nic->num_entries; i++) { + nic->entries[i].entry = ((i - 1) * 8) + 1; + nic->s2io_entries[i].entry = ((i - 1) * 8) + 1; nic->s2io_entries[i].arg = NULL; nic->s2io_entries[i].in_use = 0; } - tx_mat = readq(&bar0->tx_mat0_n[0]); - for (i=0; i<nic->config.tx_fifo_num; i++, msix_indx++) { - tx_mat |= TX_MAT_SET(i, msix_indx); - nic->s2io_entries[msix_indx].arg = &nic->mac_control.fifos[i]; - nic->s2io_entries[msix_indx].type = MSIX_FIFO_TYPE; - nic->s2io_entries[msix_indx].in_use = MSIX_FLG; - } - writeq(tx_mat, &bar0->tx_mat0_n[0]); - rx_mat = readq(&bar0->rx_mat); - for (j = 0; j < nic->config.rx_ring_num; j++, msix_indx++) { + for (j = 0; j < nic->config.rx_ring_num; j++) { rx_mat |= RX_MAT_SET(j, msix_indx); - nic->s2io_entries[msix_indx].arg - = &nic->mac_control.rings[j]; - nic->s2io_entries[msix_indx].type = MSIX_RING_TYPE; - nic->s2io_entries[msix_indx].in_use = MSIX_FLG; + nic->s2io_entries[j+1].arg = &nic->mac_control.rings[j]; + nic->s2io_entries[j+1].type = MSIX_RING_TYPE; + nic->s2io_entries[j+1].in_use = MSIX_FLG; + msix_indx += 8; } writeq(rx_mat, &bar0->rx_mat); + readq(&bar0->rx_mat); - nic->avail_msix_vectors = 0; - ret = pci_enable_msix(nic->pdev, nic->entries, MAX_REQUESTED_MSI_X); + ret = pci_enable_msix(nic->pdev, nic->entries, nic->num_entries); /* We fail init if error or we get less vectors than min required */ - if (ret >= (nic->config.tx_fifo_num + nic->config.rx_ring_num + 1)) { - nic->avail_msix_vectors = ret; - ret = pci_enable_msix(nic->pdev, nic->entries, ret); - } if (ret) { DBG_PRINT(ERR_DBG, "%s: Enabling MSIX failed\n", nic->dev->name); kfree(nic->entries); nic->mac_control.stats_info->sw_stat.mem_freed - += (MAX_REQUESTED_MSI_X * sizeof(struct msix_entry)); + += (nic->num_entries * sizeof(struct msix_entry)); kfree(nic->s2io_entries); nic->mac_control.stats_info->sw_stat.mem_freed - += (MAX_REQUESTED_MSI_X * sizeof(struct s2io_msix_entry)); + += (nic->num_entries * sizeof(struct s2io_msix_entry)); nic->entries = NULL; nic->s2io_entries = NULL; - nic->avail_msix_vectors = 0; return -ENOMEM; } - if (!nic->avail_msix_vectors) - nic->avail_msix_vectors = MAX_REQUESTED_MSI_X; /* * To enable MSI-X, MSI also needs to be enabled, due to a bug @@ -3919,7 +3968,7 @@ static void remove_msix_isr(struct s2io_nic *sp) int i; u16 msi_control; - for (i = 0; i < MAX_REQUESTED_MSI_X; i++) { + for (i = 0; i < sp->num_entries; i++) { if (sp->s2io_entries[i].in_use == MSIX_REGISTERED_SUCCESS) { int vector = sp->entries[i].vector; @@ -3975,29 +4024,6 @@ static int s2io_open(struct net_device *dev) netif_carrier_off(dev); sp->last_link_state = 0; - if (sp->config.intr_type == MSI_X) { - int ret = s2io_enable_msi_x(sp); - - if (!ret) { - ret = s2io_test_msi(sp); - /* rollback MSI-X, will re-enable during add_isr() */ - remove_msix_isr(sp); - } - if (ret) { - - DBG_PRINT(ERR_DBG, - "%s: MSI-X requested but failed to enable\n", - dev->name); - sp->config.intr_type = INTA; - } - } - - /* NAPI doesn't work well with MSI(X) */ - if (sp->config.intr_type != INTA) { - if(sp->config.napi) - sp->config.napi = 0; - } - /* Initialize H/W and enable interrupts */ err = s2io_card_up(sp); if (err) { @@ -4020,12 +4046,12 @@ hw_init_failed: if (sp->entries) { kfree(sp->entries); sp->mac_control.stats_info->sw_stat.mem_freed - += (MAX_REQUESTED_MSI_X * sizeof(struct msix_entry)); + += (sp->num_entries * sizeof(struct msix_entry)); } if (sp->s2io_entries) { kfree(sp->s2io_entries); sp->mac_control.stats_info->sw_stat.mem_freed - += (MAX_REQUESTED_MSI_X * sizeof(struct s2io_msix_entry)); + += (sp->num_entries * sizeof(struct s2io_msix_entry)); } } return err; @@ -4327,40 +4353,64 @@ s2io_alarm_handle(unsigned long data) mod_timer(&sp->alarm_timer, jiffies + HZ / 2); } -static int s2io_chk_rx_buffers(struct ring_info *ring) -{ - if (fill_rx_buffers(ring) == -ENOMEM) { - DBG_PRINT(INFO_DBG, "%s:Out of memory", ring->dev->name); - DBG_PRINT(INFO_DBG, " in Rx Intr!!\n"); - } - return 0; -} - static irqreturn_t s2io_msix_ring_handle(int irq, void *dev_id) { struct ring_info *ring = (struct ring_info *)dev_id; struct s2io_nic *sp = ring->nic; + struct XENA_dev_config __iomem *bar0 = sp->bar0; + struct net_device *dev = sp->dev; - if (!is_s2io_card_up(sp)) + if (unlikely(!is_s2io_card_up(sp))) return IRQ_HANDLED; - rx_intr_handler(ring); - s2io_chk_rx_buffers(ring); + if (sp->config.napi) { + u8 *addr = NULL, val8 = 0; + + addr = (u8 *)&bar0->xmsi_mask_reg; + addr += (7 - ring->ring_no); + val8 = (ring->ring_no == 0) ? 0x7f : 0xff; + writeb(val8, addr); + val8 = readb(addr); + netif_rx_schedule(dev, &ring->napi); + } else { + rx_intr_handler(ring, 0); + s2io_chk_rx_buffers(ring); + } return IRQ_HANDLED; } static irqreturn_t s2io_msix_fifo_handle(int irq, void *dev_id) { - struct fifo_info *fifo = (struct fifo_info *)dev_id; - struct s2io_nic *sp = fifo->nic; + int i; + struct fifo_info *fifos = (struct fifo_info *)dev_id; + struct s2io_nic *sp = fifos->nic; + struct XENA_dev_config __iomem *bar0 = sp->bar0; + struct config_param *config = &sp->config; + u64 reason; - if (!is_s2io_card_up(sp)) + if (unlikely(!is_s2io_card_up(sp))) + return IRQ_NONE; + + reason = readq(&bar0->general_int_status); + if (unlikely(reason == S2IO_MINUS_ONE)) + /* Nothing much can be done. Get out */ return IRQ_HANDLED; - tx_intr_handler(fifo); + writeq(S2IO_MINUS_ONE, &bar0->general_int_mask); + + if (reason & GEN_INTR_TXTRAFFIC) + writeq(S2IO_MINUS_ONE, &bar0->tx_traffic_int); + + for (i = 0; i < config->tx_fifo_num; i++) + tx_intr_handler(&fifos[i]); + + writeq(sp->general_int_mask, &bar0->general_int_mask); + readl(&bar0->general_int_status); + return IRQ_HANDLED; } + static void s2io_txpic_intr_handle(struct s2io_nic *sp) { struct XENA_dev_config __iomem *bar0 = sp->bar0; @@ -4762,14 +4812,10 @@ static irqreturn_t s2io_isr(int irq, void *dev_id) if (config->napi) { if (reason & GEN_INTR_RXTRAFFIC) { - if (likely(netif_rx_schedule_prep(dev, - &sp->napi))) { - __netif_rx_schedule(dev, &sp->napi); - writeq(S2IO_MINUS_ONE, - &bar0->rx_traffic_mask); - } else - writeq(S2IO_MINUS_ONE, - &bar0->rx_traffic_int); + netif_rx_schedule(dev, &sp->napi); + writeq(S2IO_MINUS_ONE, &bar0->rx_traffic_mask); + writeq(S2IO_MINUS_ONE, &bar0->rx_traffic_int); + readl(&bar0->rx_traffic_int); } } else { /* @@ -4781,7 +4827,7 @@ static irqreturn_t s2io_isr(int irq, void *dev_id) writeq(S2IO_MINUS_ONE, &bar0->rx_traffic_int); for (i = 0; i < config->rx_ring_num; i++) - rx_intr_handler(&mac_control->rings[i]); + rx_intr_handler(&mac_control->rings[i], 0); } /* @@ -6951,7 +6997,7 @@ static int rxd_owner_bit_reset(struct s2io_nic *sp) &skb,(u64 *)&temp0_64, (u64 *)&temp1_64, (u64 *)&temp2_64, - size) == ENOMEM) { + size) == -ENOMEM) { return 0; } @@ -6984,62 +7030,62 @@ static int s2io_add_isr(struct s2io_nic * sp) /* After proper initialization of H/W, register ISR */ if (sp->config.intr_type == MSI_X) { - int i, msix_tx_cnt=0,msix_rx_cnt=0; - - for (i=1; (sp->s2io_entries[i].in_use == MSIX_FLG); i++) { - if (sp->s2io_entries[i].type == MSIX_FIFO_TYPE) { - sprintf(sp->desc[i], "%s:MSI-X-%d-TX", + int i, msix_rx_cnt = 0; + + for (i = 0; i < sp->num_entries; i++) { + if (sp->s2io_entries[i].in_use == MSIX_FLG) { + if (sp->s2io_entries[i].type == + MSIX_RING_TYPE) { + sprintf(sp->desc[i], "%s:MSI-X-%d-RX", + dev->name, i); + err = request_irq(sp->entries[i].vector, + s2io_msix_ring_handle, 0, + sp->desc[i], + sp->s2io_entries[i].arg); + } else if (sp->s2io_entries[i].type == + MSIX_ALARM_TYPE) { + sprintf(sp->desc[i], "%s:MSI-X-%d-TX", dev->name, i); - err = request_irq(sp->entries[i].vector, - s2io_msix_fifo_handle, 0, sp->desc[i], - sp->s2io_entries[i].arg); - /* If either data or addr is zero print it */ - if(!(sp->msix_info[i].addr && - sp->msix_info[i].data)) { - DBG_PRINT(ERR_DBG, "%s @ Addr:0x%llx " - "Data:0x%llx\n",sp->desc[i], - (unsigned long long) - sp->msix_info[i].addr, - (unsigned long long) - sp->msix_info[i].data); - } else { - msix_tx_cnt++; + err = request_irq(sp->entries[i].vector, + s2io_msix_fifo_handle, 0, + sp->desc[i], + sp->s2io_entries[i].arg); + } - } else { - sprintf(sp->desc[i], "%s:MSI-X-%d-RX", - dev->name, i); - err = request_irq(sp->entries[i].vector, - s2io_msix_ring_handle, 0, sp->desc[i], - sp->s2io_entries[i].arg); - /* If either data or addr is zero print it */ - if(!(sp->msix_info[i].addr && + /* if either data or addr is zero print it. */ + if (!(sp->msix_info[i].addr && sp->msix_info[i].data)) { - DBG_PRINT(ERR_DBG, "%s @ Addr:0x%llx " - "Data:0x%llx\n",sp->desc[i], + DBG_PRINT(ERR_DBG, + "%s @Addr:0x%llx Data:0x%llx\n", + sp->desc[i], (unsigned long long) sp->msix_info[i].addr, (unsigned long long) - sp->msix_info[i].data); - } else { + ntohl(sp->msix_info[i].data)); + } else msix_rx_cnt++; + if (err) { + remove_msix_isr(sp); + + DBG_PRINT(ERR_DBG, + "%s:MSI-X-%d registration " + "failed\n", dev->name, i); + + DBG_PRINT(ERR_DBG, + "%s: Defaulting to INTA\n", + dev->name); + sp->config.intr_type = INTA; + break; } + sp->s2io_entries[i].in_use = + MSIX_REGISTERED_SUCCESS; } - if (err) { - remove_msix_isr(sp); - DBG_PRINT(ERR_DBG,"%s:MSI-X-%d registration " - "failed\n", dev->name, i); - DBG_PRINT(ERR_DBG, "%s: defaulting to INTA\n", - dev->name); - sp->config.intr_type = INTA; - break; - } - sp->s2io_entries[i].in_use = MSIX_REGISTERED_SUCCESS; } if (!err) { - printk(KERN_INFO "MSI-X-TX %d entries enabled\n", - msix_tx_cnt); printk(KERN_INFO "MSI-X-RX %d entries enabled\n", - msix_rx_cnt); + --msix_rx_cnt); + DBG_PRINT(INFO_DBG, "MSI-X-TX entries enabled" + " through alarm vector\n"); } } if (sp->config.intr_type == INTA) { @@ -7080,8 +7126,15 @@ static void do_s2io_card_down(struct s2io_nic * sp, int do_io) clear_bit(__S2IO_STATE_CARD_UP, &sp->state); /* Disable napi */ - if (config->napi) - napi_disable(&sp->napi); + if (sp->config.napi) { + int off = 0; + if (config->intr_type == MSI_X) { + for (; off < sp->config.rx_ring_num; off++) + napi_disable(&sp->mac_control.rings[off].napi); + } + else + napi_disable(&sp->napi); + } /* disable Tx and Rx traffic on the NIC */ if (do_io) @@ -7173,8 +7226,15 @@ static int s2io_card_up(struct s2io_nic * sp) } /* Initialise napi */ - if (config->napi) - napi_enable(&sp->napi); + if (config->napi) { + int i; + if (config->intr_type == MSI_X) { + for (i = 0; i < sp->config.rx_ring_num; i++) + napi_enable(&sp->mac_control.rings[i].napi); + } else { + napi_enable(&sp->napi); + } + } /* Maintain the state prior to the open */ if (sp->promisc_flg) @@ -7217,7 +7277,7 @@ static int s2io_card_up(struct s2io_nic * sp) /* Enable select interrupts */ en_dis_err_alarms(sp, ENA_ALL_INTRS, ENABLE_INTRS); if (sp->config.intr_type != INTA) - en_dis_able_nic_intrs(sp, ENA_ALL_INTRS, DISABLE_INTRS); + en_dis_able_nic_intrs(sp, TX_TRAFFIC_INTR, ENABLE_INTRS); else { interruptible = TX_TRAFFIC_INTR | RX_TRAFFIC_INTR; interruptible |= TX_PIC_INTR; @@ -7615,9 +7675,6 @@ static int s2io_verify_parm(struct pci_dev *pdev, u8 *dev_intr_type, rx_ring_num = MAX_RX_RINGS; } - if (*dev_intr_type != INTA) - napi = 0; - if ((*dev_intr_type != INTA) && (*dev_intr_type != MSI_X)) { DBG_PRINT(ERR_DBG, "s2io: Wrong intr_type requested. " "Defaulting to INTA\n"); @@ -7918,8 +7975,6 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) * will use eth_mac_addr() for dev->set_mac_address * mac address will be set every time dev->open() is called */ - netif_napi_add(dev, &sp->napi, s2io_poll, 32); - #ifdef CONFIG_NET_POLL_CONTROLLER dev->poll_controller = s2io_netpoll; #endif @@ -7963,6 +8018,32 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) } } + if (sp->config.intr_type == MSI_X) { + sp->num_entries = config->rx_ring_num + 1; + ret = s2io_enable_msi_x(sp); + + if (!ret) { + ret = s2io_test_msi(sp); + /* rollback MSI-X, will re-enable during add_isr() */ + remove_msix_isr(sp); + } + if (ret) { + + DBG_PRINT(ERR_DBG, + "%s: MSI-X requested but failed to enable\n", + dev->name); + sp->config.intr_type = INTA; + } + } + + if (config->intr_type == MSI_X) { + for (i = 0; i < config->rx_ring_num ; i++) + netif_napi_add(dev, &mac_control->rings[i].napi, + s2io_poll_msix, 64); + } else { + netif_napi_add(dev, &sp->napi, s2io_poll_inta, 64); + } + /* Not needed for Herc */ if (sp->device_type & XFRAME_I_DEVICE) { /* @@ -8013,6 +8094,11 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) /* store mac addresses from CAM to s2io_nic structure */ do_s2io_store_unicast_mc(sp); + /* Configure MSIX vector for number of rings configured plus one */ + if ((sp->device_type == XFRAME_II_DEVICE) && + (config->intr_type == MSI_X)) + sp->num_entries = config->rx_ring_num + 1; + /* Store the values of the MSIX table in the s2io_nic structure */ store_xmsi_data(sp); /* reset Nic and bring it to known state */ @@ -8078,8 +8164,14 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre) break; } - if (napi) + switch (sp->config.napi) { + case 0: + DBG_PRINT(ERR_DBG, "%s: NAPI disabled\n", dev->name); + break; + case 1: DBG_PRINT(ERR_DBG, "%s: NAPI enabled\n", dev->name); + break; + } DBG_PRINT(ERR_DBG, "%s: Using %d Tx fifo(s)\n", dev->name, sp->config.tx_fifo_num); diff --git a/drivers/net/s2io.h b/drivers/net/s2io.h index 58229dcbf5e..d0a84ba887a 100644 --- a/drivers/net/s2io.h +++ b/drivers/net/s2io.h @@ -706,7 +706,7 @@ struct ring_info { /* per-ring buffer counter */ u32 rx_bufs_left; - #define MAX_LRO_SESSIONS 32 +#define MAX_LRO_SESSIONS 32 struct lro lro0_n[MAX_LRO_SESSIONS]; u8 lro; @@ -725,6 +725,11 @@ struct ring_info { /* copy of sp->pdev pointer */ struct pci_dev *pdev; + /* Per ring napi struct */ + struct napi_struct napi; + + unsigned long interrupt_count; + /* * Place holders for the virtual and physical addresses of * all the Rx Blocks @@ -841,7 +846,7 @@ struct usr_addr { * Structure to keep track of the MSI-X vectors and the corresponding * argument registered against each vector */ -#define MAX_REQUESTED_MSI_X 17 +#define MAX_REQUESTED_MSI_X 9 struct s2io_msix_entry { u16 vector; @@ -849,8 +854,8 @@ struct s2io_msix_entry void *arg; u8 type; -#define MSIX_FIFO_TYPE 1 -#define MSIX_RING_TYPE 2 +#define MSIX_ALARM_TYPE 1 +#define MSIX_RING_TYPE 2 u8 in_use; #define MSIX_REGISTERED_SUCCESS 0xAA @@ -877,7 +882,6 @@ struct s2io_nic { */ int pkts_to_process; struct net_device *dev; - struct napi_struct napi; struct mac_info mac_control; struct config_param config; struct pci_dev *pdev; @@ -948,6 +952,7 @@ struct s2io_nic { */ u8 other_fifo_idx; + struct napi_struct napi; /* after blink, the adapter must be restored with original * values. */ @@ -962,6 +967,7 @@ struct s2io_nic { unsigned long long start_time; struct vlan_group *vlgrp; #define MSIX_FLG 0xA5 + int num_entries; struct msix_entry *entries; int msi_detected; wait_queue_head_t msi_wait; @@ -982,6 +988,7 @@ struct s2io_nic { u16 lro_max_aggr_per_sess; volatile unsigned long state; u64 general_int_mask; + #define VPD_STRING_LEN 80 u8 product_name[VPD_STRING_LEN]; u8 serial_num[VPD_STRING_LEN]; @@ -1103,7 +1110,7 @@ static void __devexit s2io_rem_nic(struct pci_dev *pdev); static int init_shared_mem(struct s2io_nic *sp); static void free_shared_mem(struct s2io_nic *sp); static int init_nic(struct s2io_nic *nic); -static void rx_intr_handler(struct ring_info *ring_data); +static int rx_intr_handler(struct ring_info *ring_data, int budget); static void tx_intr_handler(struct fifo_info *fifo_data); static void s2io_handle_errors(void * dev_id); @@ -1114,7 +1121,8 @@ static void s2io_set_multicast(struct net_device *dev); static int rx_osm_handler(struct ring_info *ring_data, struct RxD_t * rxdp); static void s2io_link(struct s2io_nic * sp, int link); static void s2io_reset(struct s2io_nic * sp); -static int s2io_poll(struct napi_struct *napi, int budget); +static int s2io_poll_msix(struct napi_struct *napi, int budget); +static int s2io_poll_inta(struct napi_struct *napi, int budget); static void s2io_init_pci(struct s2io_nic * sp); static int do_s2io_prog_unicast(struct net_device *dev, u8 *addr); static void s2io_alarm_handle(unsigned long data); diff --git a/drivers/net/sb1250-mac.c b/drivers/net/sb1250-mac.c index 29519902517..fe41e4ec21e 100644 --- a/drivers/net/sb1250-mac.c +++ b/drivers/net/sb1250-mac.c @@ -179,8 +179,7 @@ enum sbmac_state { #define SBMAC_MAX_TXDESCR 256 #define SBMAC_MAX_RXDESCR 256 -#define ETHER_ALIGN 2 -#define ETHER_ADDR_LEN 6 +#define ETHER_ADDR_LEN 6 #define ENET_PACKET_SIZE 1518 /*#define ENET_PACKET_SIZE 9216 */ @@ -262,8 +261,6 @@ struct sbmac_softc { spinlock_t sbm_lock; /* spin lock */ int sbm_devflags; /* current device flags */ - int sbm_buffersize; - /* * Controller-specific things */ @@ -305,10 +302,11 @@ struct sbmac_softc { static void sbdma_initctx(struct sbmacdma *d, struct sbmac_softc *s, int chan, int txrx, int maxdescr); static void sbdma_channel_start(struct sbmacdma *d, int rxtx); -static int sbdma_add_rcvbuffer(struct sbmacdma *d, struct sk_buff *m); +static int sbdma_add_rcvbuffer(struct sbmac_softc *sc, struct sbmacdma *d, + struct sk_buff *m); static int sbdma_add_txbuffer(struct sbmacdma *d, struct sk_buff *m); static void sbdma_emptyring(struct sbmacdma *d); -static void sbdma_fillring(struct sbmacdma *d); +static void sbdma_fillring(struct sbmac_softc *sc, struct sbmacdma *d); static int sbdma_rx_process(struct sbmac_softc *sc, struct sbmacdma *d, int work_to_do, int poll); static void sbdma_tx_process(struct sbmac_softc *sc, struct sbmacdma *d, @@ -777,16 +775,13 @@ static void sbdma_channel_stop(struct sbmacdma *d) d->sbdma_remptr = NULL; } -static void sbdma_align_skb(struct sk_buff *skb,int power2,int offset) +static inline void sbdma_align_skb(struct sk_buff *skb, + unsigned int power2, unsigned int offset) { - unsigned long addr; - unsigned long newaddr; - - addr = (unsigned long) skb->data; - - newaddr = (addr + power2 - 1) & ~(power2 - 1); + unsigned char *addr = skb->data; + unsigned char *newaddr = PTR_ALIGN(addr, power2); - skb_reserve(skb,newaddr-addr+offset); + skb_reserve(skb, newaddr - addr + offset); } @@ -797,7 +792,8 @@ static void sbdma_align_skb(struct sk_buff *skb,int power2,int offset) * this queues a buffer for inbound packets. * * Input parameters: - * d - DMA channel descriptor + * sc - softc structure + * d - DMA channel descriptor * sb - sk_buff to add, or NULL if we should allocate one * * Return value: @@ -806,8 +802,10 @@ static void sbdma_align_skb(struct sk_buff *skb,int power2,int offset) ********************************************************************* */ -static int sbdma_add_rcvbuffer(struct sbmacdma *d, struct sk_buff *sb) +static int sbdma_add_rcvbuffer(struct sbmac_softc *sc, struct sbmacdma *d, + struct sk_buff *sb) { + struct net_device *dev = sc->sbm_dev; struct sbdmadscr *dsc; struct sbdmadscr *nextdsc; struct sk_buff *sb_new = NULL; @@ -848,14 +846,16 @@ static int sbdma_add_rcvbuffer(struct sbmacdma *d, struct sk_buff *sb) */ if (sb == NULL) { - sb_new = dev_alloc_skb(ENET_PACKET_SIZE + SMP_CACHE_BYTES * 2 + ETHER_ALIGN); + sb_new = netdev_alloc_skb(dev, ENET_PACKET_SIZE + + SMP_CACHE_BYTES * 2 + + NET_IP_ALIGN); if (sb_new == NULL) { pr_info("%s: sk_buff allocation failed\n", d->sbdma_eth->sbm_dev->name); return -ENOBUFS; } - sbdma_align_skb(sb_new, SMP_CACHE_BYTES, ETHER_ALIGN); + sbdma_align_skb(sb_new, SMP_CACHE_BYTES, NET_IP_ALIGN); } else { sb_new = sb; @@ -874,10 +874,10 @@ static int sbdma_add_rcvbuffer(struct sbmacdma *d, struct sk_buff *sb) * Do not interrupt per DMA transfer. */ dsc->dscr_a = virt_to_phys(sb_new->data) | - V_DMA_DSCRA_A_SIZE(NUMCACHEBLKS(pktsize+ETHER_ALIGN)) | 0; + V_DMA_DSCRA_A_SIZE(NUMCACHEBLKS(pktsize + NET_IP_ALIGN)) | 0; #else dsc->dscr_a = virt_to_phys(sb_new->data) | - V_DMA_DSCRA_A_SIZE(NUMCACHEBLKS(pktsize+ETHER_ALIGN)) | + V_DMA_DSCRA_A_SIZE(NUMCACHEBLKS(pktsize + NET_IP_ALIGN)) | M_DMA_DSCRA_INTERRUPT; #endif @@ -1032,18 +1032,19 @@ static void sbdma_emptyring(struct sbmacdma *d) * with sk_buffs * * Input parameters: - * d - DMA channel + * sc - softc structure + * d - DMA channel * * Return value: * nothing ********************************************************************* */ -static void sbdma_fillring(struct sbmacdma *d) +static void sbdma_fillring(struct sbmac_softc *sc, struct sbmacdma *d) { int idx; - for (idx = 0; idx < SBMAC_MAX_RXDESCR-1; idx++) { - if (sbdma_add_rcvbuffer(d,NULL) != 0) + for (idx = 0; idx < SBMAC_MAX_RXDESCR - 1; idx++) { + if (sbdma_add_rcvbuffer(sc, d, NULL) != 0) break; } } @@ -1159,10 +1160,11 @@ again: * packet and put it right back on the receive ring. */ - if (unlikely (sbdma_add_rcvbuffer(d,NULL) == - -ENOBUFS)) { + if (unlikely(sbdma_add_rcvbuffer(sc, d, NULL) == + -ENOBUFS)) { dev->stats.rx_dropped++; - sbdma_add_rcvbuffer(d,sb); /* re-add old buffer */ + /* Re-add old buffer */ + sbdma_add_rcvbuffer(sc, d, sb); /* No point in continuing at the moment */ printk(KERN_ERR "dropped packet (1)\n"); d->sbdma_remptr = SBDMA_NEXTBUF(d,sbdma_remptr); @@ -1212,7 +1214,7 @@ again: * put it back on the receive ring. */ dev->stats.rx_errors++; - sbdma_add_rcvbuffer(d,sb); + sbdma_add_rcvbuffer(sc, d, sb); } @@ -1570,7 +1572,7 @@ static void sbmac_channel_start(struct sbmac_softc *s) * Fill the receive ring */ - sbdma_fillring(&(s->sbm_rxdma)); + sbdma_fillring(s, &(s->sbm_rxdma)); /* * Turn on the rest of the bits in the enable register @@ -2312,13 +2314,6 @@ static int sbmac_init(struct platform_device *pldev, long long base) dev->dev_addr[i] = eaddr[i]; } - - /* - * Init packet size - */ - - sc->sbm_buffersize = ENET_PACKET_SIZE + SMP_CACHE_BYTES * 2 + ETHER_ALIGN; - /* * Initialize context (get pointers to registers and stuff), then * allocate the memory for the descriptor tables. diff --git a/drivers/net/sc92031.c b/drivers/net/sc92031.c index f64a860029b..b4b63805ee8 100644 --- a/drivers/net/sc92031.c +++ b/drivers/net/sc92031.c @@ -953,9 +953,6 @@ static int sc92031_start_xmit(struct sk_buff *skb, struct net_device *dev) unsigned entry; u32 tx_status; - if (skb_padto(skb, ETH_ZLEN)) - return NETDEV_TX_OK; - if (unlikely(skb->len > TX_BUF_SIZE)) { dev->stats.tx_dropped++; goto out; @@ -975,6 +972,11 @@ static int sc92031_start_xmit(struct sk_buff *skb, struct net_device *dev) skb_copy_and_csum_dev(skb, priv->tx_bufs + entry * TX_BUF_SIZE); len = skb->len; + if (unlikely(len < ETH_ZLEN)) { + memset(priv->tx_bufs + entry * TX_BUF_SIZE + len, + 0, ETH_ZLEN - len); + len = ETH_ZLEN; + } wmb(); diff --git a/drivers/net/sfc/Kconfig b/drivers/net/sfc/Kconfig index dbad95c295b..3be13b592b4 100644 --- a/drivers/net/sfc/Kconfig +++ b/drivers/net/sfc/Kconfig @@ -4,6 +4,8 @@ config SFC select MII select INET_LRO select CRC32 + select I2C + select I2C_ALGOBIT help This driver supports 10-gigabit Ethernet cards based on the Solarflare Communications Solarstorm SFC4000 controller. diff --git a/drivers/net/sfc/Makefile b/drivers/net/sfc/Makefile index 1d2daeec7ac..c8f5704c8fb 100644 --- a/drivers/net/sfc/Makefile +++ b/drivers/net/sfc/Makefile @@ -1,5 +1,5 @@ sfc-y += efx.o falcon.o tx.o rx.o falcon_xmac.o \ - i2c-direct.o selftest.o ethtool.o xfp_phy.o \ + selftest.o ethtool.o xfp_phy.o \ mdio_10g.o tenxpress.o boards.o sfe4001.o obj-$(CONFIG_SFC) += sfc.o diff --git a/drivers/net/sfc/bitfield.h b/drivers/net/sfc/bitfield.h index 2806201644c..2c79d27404e 100644 --- a/drivers/net/sfc/bitfield.h +++ b/drivers/net/sfc/bitfield.h @@ -483,7 +483,7 @@ typedef union efx_oword { #endif #define EFX_SET_OWORD_FIELD_VER(efx, oword, field, value) do { \ - if (FALCON_REV(efx) >= FALCON_REV_B0) { \ + if (falcon_rev(efx) >= FALCON_REV_B0) { \ EFX_SET_OWORD_FIELD((oword), field##_B0, (value)); \ } else { \ EFX_SET_OWORD_FIELD((oword), field##_A1, (value)); \ @@ -491,7 +491,7 @@ typedef union efx_oword { } while (0) #define EFX_QWORD_FIELD_VER(efx, qword, field) \ - (FALCON_REV(efx) >= FALCON_REV_B0 ? \ + (falcon_rev(efx) >= FALCON_REV_B0 ? \ EFX_QWORD_FIELD((qword), field##_B0) : \ EFX_QWORD_FIELD((qword), field##_A1)) @@ -501,8 +501,5 @@ typedef union efx_oword { #define DMA_ADDR_T_WIDTH (8 * sizeof(dma_addr_t)) #define EFX_DMA_TYPE_WIDTH(width) \ (((width) < DMA_ADDR_T_WIDTH) ? (width) : DMA_ADDR_T_WIDTH) -#define EFX_DMA_MAX_MASK ((DMA_ADDR_T_WIDTH == 64) ? \ - ~((u64) 0) : ~((u32) 0)) -#define EFX_DMA_MASK(mask) ((mask) & EFX_DMA_MAX_MASK) #endif /* EFX_BITFIELD_H */ diff --git a/drivers/net/sfc/boards.c b/drivers/net/sfc/boards.c index eecaa6d5858..d3d3dd0a117 100644 --- a/drivers/net/sfc/boards.c +++ b/drivers/net/sfc/boards.c @@ -27,10 +27,8 @@ static void blink_led_timer(unsigned long context) struct efx_blinker *bl = &efx->board_info.blinker; efx->board_info.set_fault_led(efx, bl->state); bl->state = !bl->state; - if (bl->resubmit) { - bl->timer.expires = jiffies + BLINK_INTERVAL; - add_timer(&bl->timer); - } + if (bl->resubmit) + mod_timer(&bl->timer, jiffies + BLINK_INTERVAL); } static void board_blink(struct efx_nic *efx, int blink) @@ -44,8 +42,7 @@ static void board_blink(struct efx_nic *efx, int blink) blinker->state = 0; setup_timer(&blinker->timer, blink_led_timer, (unsigned long)efx); - blinker->timer.expires = jiffies + BLINK_INTERVAL; - add_timer(&blinker->timer); + mod_timer(&blinker->timer, jiffies + BLINK_INTERVAL); } else { blinker->resubmit = 0; if (blinker->timer.function) @@ -112,7 +109,7 @@ static struct efx_board_data board_data[] = { [EFX_BOARD_INVALID] = {NULL, NULL, dummy_init}, [EFX_BOARD_SFE4001] = - {"SFE4001", "10GBASE-T adapter", sfe4001_poweron}, + {"SFE4001", "10GBASE-T adapter", sfe4001_init}, [EFX_BOARD_SFE4002] = {"SFE4002", "XFP adapter", sfe4002_init}, }; diff --git a/drivers/net/sfc/boards.h b/drivers/net/sfc/boards.h index 695764dc2e6..e5e844359ce 100644 --- a/drivers/net/sfc/boards.h +++ b/drivers/net/sfc/boards.h @@ -20,8 +20,7 @@ enum efx_board_type { }; extern int efx_set_board_info(struct efx_nic *efx, u16 revision_info); -extern int sfe4001_poweron(struct efx_nic *efx); -extern void sfe4001_poweroff(struct efx_nic *efx); +extern int sfe4001_init(struct efx_nic *efx); /* Are we putting the PHY into flash config mode */ extern unsigned int sfe4001_phy_flash_cfg; diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c index 418f2e53a95..74265d8553b 100644 --- a/drivers/net/sfc/efx.c +++ b/drivers/net/sfc/efx.c @@ -199,11 +199,12 @@ static inline int efx_process_channel(struct efx_channel *channel, int rx_quota) */ static inline void efx_channel_processed(struct efx_channel *channel) { - /* Write to EVQ_RPTR_REG. If a new event arrived in a race - * with finishing processing, a new interrupt will be raised. - */ + /* The interrupt handler for this channel may set work_pending + * as soon as we acknowledge the events we've seen. Make sure + * it's cleared before then. */ channel->work_pending = 0; - smp_wmb(); /* Ensure channel updated before any new interrupt. */ + smp_wmb(); + falcon_eventq_read_ack(channel); } @@ -265,7 +266,7 @@ void efx_process_channel_now(struct efx_channel *channel) napi_disable(&channel->napi_str); /* Poll the channel */ - (void) efx_process_channel(channel, efx->type->evq_size); + efx_process_channel(channel, efx->type->evq_size); /* Ack the eventq. This may cause an interrupt to be generated * when they are reenabled */ @@ -317,26 +318,6 @@ static void efx_remove_eventq(struct efx_channel *channel) * *************************************************************************/ -/* Setup per-NIC RX buffer parameters. - * Calculate the rx buffer allocation parameters required to support - * the current MTU, including padding for header alignment and overruns. - */ -static void efx_calc_rx_buffer_params(struct efx_nic *efx) -{ - unsigned int order, len; - - len = (max(EFX_PAGE_IP_ALIGN, NET_IP_ALIGN) + - EFX_MAX_FRAME_LEN(efx->net_dev->mtu) + - efx->type->rx_buffer_padding); - - /* Calculate page-order */ - for (order = 0; ((1u << order) * PAGE_SIZE) < len; ++order) - ; - - efx->rx_buffer_len = len; - efx->rx_buffer_order = order; -} - static int efx_probe_channel(struct efx_channel *channel) { struct efx_tx_queue *tx_queue; @@ -387,7 +368,14 @@ static int efx_init_channels(struct efx_nic *efx) struct efx_channel *channel; int rc = 0; - efx_calc_rx_buffer_params(efx); + /* Calculate the rx buffer allocation parameters required to + * support the current MTU, including padding for header + * alignment and overruns. + */ + efx->rx_buffer_len = (max(EFX_PAGE_IP_ALIGN, NET_IP_ALIGN) + + EFX_MAX_FRAME_LEN(efx->net_dev->mtu) + + efx->type->rx_buffer_padding); + efx->rx_buffer_order = get_order(efx->rx_buffer_len); /* Initialise the channels */ efx_for_each_channel(channel, efx) { @@ -440,9 +428,12 @@ static void efx_start_channel(struct efx_channel *channel) netif_napi_add(channel->napi_dev, &channel->napi_str, efx_poll, napi_weight); + /* The interrupt handler for this channel may set work_pending + * as soon as we enable it. Make sure it's cleared before + * then. Similarly, make sure it sees the enabled flag set. */ channel->work_pending = 0; channel->enabled = 1; - smp_wmb(); /* ensure channel updated before first interrupt */ + smp_wmb(); napi_enable(&channel->napi_str); @@ -704,7 +695,7 @@ static void efx_stop_port(struct efx_nic *efx) mutex_unlock(&efx->mac_lock); /* Serialise against efx_set_multicast_list() */ - if (NET_DEV_REGISTERED(efx)) { + if (efx_dev_registered(efx)) { netif_tx_lock_bh(efx->net_dev); netif_tx_unlock_bh(efx->net_dev); } @@ -791,22 +782,23 @@ static int efx_init_io(struct efx_nic *efx) efx->membase = ioremap_nocache(efx->membase_phys, efx->type->mem_map_size); if (!efx->membase) { - EFX_ERR(efx, "could not map memory BAR %d at %lx+%x\n", - efx->type->mem_bar, efx->membase_phys, + EFX_ERR(efx, "could not map memory BAR %d at %llx+%x\n", + efx->type->mem_bar, + (unsigned long long)efx->membase_phys, efx->type->mem_map_size); rc = -ENOMEM; goto fail4; } - EFX_LOG(efx, "memory BAR %u at %lx+%x (virtual %p)\n", - efx->type->mem_bar, efx->membase_phys, efx->type->mem_map_size, - efx->membase); + EFX_LOG(efx, "memory BAR %u at %llx+%x (virtual %p)\n", + efx->type->mem_bar, (unsigned long long)efx->membase_phys, + efx->type->mem_map_size, efx->membase); return 0; fail4: release_mem_region(efx->membase_phys, efx->type->mem_map_size); fail3: - efx->membase_phys = 0UL; + efx->membase_phys = 0; fail2: pci_disable_device(efx->pci_dev); fail1: @@ -824,7 +816,7 @@ static void efx_fini_io(struct efx_nic *efx) if (efx->membase_phys) { pci_release_region(efx->pci_dev, efx->type->mem_bar); - efx->membase_phys = 0UL; + efx->membase_phys = 0; } pci_disable_device(efx->pci_dev); @@ -1043,7 +1035,7 @@ static void efx_start_all(struct efx_nic *efx) return; if ((efx->state != STATE_RUNNING) && (efx->state != STATE_INIT)) return; - if (NET_DEV_REGISTERED(efx) && !netif_running(efx->net_dev)) + if (efx_dev_registered(efx) && !netif_running(efx->net_dev)) return; /* Mark the port as enabled so port reconfigurations can start, then @@ -1073,9 +1065,8 @@ static void efx_flush_all(struct efx_nic *efx) cancel_delayed_work_sync(&efx->monitor_work); /* Ensure that all RX slow refills are complete. */ - efx_for_each_rx_queue(rx_queue, efx) { + efx_for_each_rx_queue(rx_queue, efx) cancel_delayed_work_sync(&rx_queue->work); - } /* Stop scheduled port reconfigurations */ cancel_work_sync(&efx->reconfigure_work); @@ -1101,9 +1092,10 @@ static void efx_stop_all(struct efx_nic *efx) falcon_disable_interrupts(efx); if (efx->legacy_irq) synchronize_irq(efx->legacy_irq); - efx_for_each_channel_with_interrupt(channel, efx) + efx_for_each_channel_with_interrupt(channel, efx) { if (channel->irq) synchronize_irq(channel->irq); + } /* Stop all NAPI processing and synchronous rx refills */ efx_for_each_channel(channel, efx) @@ -1125,7 +1117,7 @@ static void efx_stop_all(struct efx_nic *efx) /* Stop the kernel transmit interface late, so the watchdog * timer isn't ticking over the flush */ efx_stop_queue(efx); - if (NET_DEV_REGISTERED(efx)) { + if (efx_dev_registered(efx)) { netif_tx_lock_bh(efx->net_dev); netif_tx_unlock_bh(efx->net_dev); } @@ -1344,13 +1336,17 @@ static int efx_net_stop(struct net_device *net_dev) return 0; } -/* Context: process, dev_base_lock held, non-blocking. */ +/* Context: process, dev_base_lock or RTNL held, non-blocking. */ static struct net_device_stats *efx_net_stats(struct net_device *net_dev) { struct efx_nic *efx = net_dev->priv; struct efx_mac_stats *mac_stats = &efx->mac_stats; struct net_device_stats *stats = &net_dev->stats; + /* Update stats if possible, but do not wait if another thread + * is updating them (or resetting the NIC); slightly stale + * stats are acceptable. + */ if (!spin_trylock(&efx->stats_lock)) return stats; if (efx->state == STATE_RUNNING) { @@ -1494,7 +1490,7 @@ static void efx_set_multicast_list(struct net_device *net_dev) static int efx_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *net_dev = (struct net_device *)ptr; + struct net_device *net_dev = ptr; if (net_dev->open == efx_net_open && event == NETDEV_CHANGENAME) { struct efx_nic *efx = net_dev->priv; @@ -1563,7 +1559,7 @@ static void efx_unregister_netdev(struct efx_nic *efx) efx_for_each_tx_queue(tx_queue, efx) efx_release_tx_buffers(tx_queue); - if (NET_DEV_REGISTERED(efx)) { + if (efx_dev_registered(efx)) { strlcpy(efx->name, pci_name(efx->pci_dev), sizeof(efx->name)); unregister_netdev(efx->net_dev); } @@ -1688,7 +1684,7 @@ static int efx_reset(struct efx_nic *efx) if (method == RESET_TYPE_DISABLE) { /* Reinitialise the device anyway so the driver unload sequence * can talk to the external SRAM */ - (void) falcon_init_nic(efx); + falcon_init_nic(efx); rc = -EIO; goto fail4; } @@ -1819,6 +1815,7 @@ static struct efx_board efx_dummy_board_info = { .init = efx_nic_dummy_op_int, .init_leds = efx_port_dummy_op_int, .set_fault_led = efx_port_dummy_op_blink, + .fini = efx_port_dummy_op_void, }; /************************************************************************** @@ -1945,6 +1942,7 @@ static void efx_pci_remove_main(struct efx_nic *efx) efx_fini_port(efx); /* Shutdown the board, then the NIC and board state */ + efx->board_info.fini(efx); falcon_fini_interrupt(efx); efx_fini_napi(efx); diff --git a/drivers/net/sfc/falcon.c b/drivers/net/sfc/falcon.c index b57cc68058c..8cb57987905 100644 --- a/drivers/net/sfc/falcon.c +++ b/drivers/net/sfc/falcon.c @@ -13,6 +13,8 @@ #include <linux/pci.h> #include <linux/module.h> #include <linux/seq_file.h> +#include <linux/i2c.h> +#include <linux/i2c-algo-bit.h> #include "net_driver.h" #include "bitfield.h" #include "efx.h" @@ -36,10 +38,12 @@ * struct falcon_nic_data - Falcon NIC state * @next_buffer_table: First available buffer table id * @pci_dev2: The secondary PCI device if present + * @i2c_data: Operations and state for I2C bit-bashing algorithm */ struct falcon_nic_data { unsigned next_buffer_table; struct pci_dev *pci_dev2; + struct i2c_algo_bit_data i2c_data; }; /************************************************************************** @@ -116,17 +120,8 @@ MODULE_PARM_DESC(rx_xon_thresh_bytes, "RX fifo XON threshold"); ************************************************************************** */ -/* DMA address mask (up to 46-bit, avoiding compiler warnings) - * - * Note that it is possible to have a platform with 64-bit longs and - * 32-bit DMA addresses, or vice versa. EFX_DMA_MASK takes care of the - * platform DMA mask. - */ -#if BITS_PER_LONG == 64 -#define FALCON_DMA_MASK EFX_DMA_MASK(0x00003fffffffffffUL) -#else -#define FALCON_DMA_MASK EFX_DMA_MASK(0x00003fffffffffffULL) -#endif +/* DMA address mask */ +#define FALCON_DMA_MASK DMA_BIT_MASK(46) /* TX DMA length mask (13-bit) */ #define FALCON_TX_DMA_MASK (4096 - 1) @@ -145,7 +140,7 @@ MODULE_PARM_DESC(rx_xon_thresh_bytes, "RX fifo XON threshold"); #define PCI_EXP_LNKSTA_LNK_WID_LBN 4 #define FALCON_IS_DUAL_FUNC(efx) \ - (FALCON_REV(efx) < FALCON_REV_B0) + (falcon_rev(efx) < FALCON_REV_B0) /************************************************************************** * @@ -184,39 +179,57 @@ static inline int falcon_event_present(efx_qword_t *event) * ************************************************************************** */ -static void falcon_setsdascl(struct efx_i2c_interface *i2c) +static void falcon_setsda(void *data, int state) +{ + struct efx_nic *efx = (struct efx_nic *)data; + efx_oword_t reg; + + falcon_read(efx, ®, GPIO_CTL_REG_KER); + EFX_SET_OWORD_FIELD(reg, GPIO3_OEN, !state); + falcon_write(efx, ®, GPIO_CTL_REG_KER); +} + +static void falcon_setscl(void *data, int state) { + struct efx_nic *efx = (struct efx_nic *)data; efx_oword_t reg; - falcon_read(i2c->efx, ®, GPIO_CTL_REG_KER); - EFX_SET_OWORD_FIELD(reg, GPIO0_OEN, (i2c->scl ? 0 : 1)); - EFX_SET_OWORD_FIELD(reg, GPIO3_OEN, (i2c->sda ? 0 : 1)); - falcon_write(i2c->efx, ®, GPIO_CTL_REG_KER); + falcon_read(efx, ®, GPIO_CTL_REG_KER); + EFX_SET_OWORD_FIELD(reg, GPIO0_OEN, !state); + falcon_write(efx, ®, GPIO_CTL_REG_KER); } -static int falcon_getsda(struct efx_i2c_interface *i2c) +static int falcon_getsda(void *data) { + struct efx_nic *efx = (struct efx_nic *)data; efx_oword_t reg; - falcon_read(i2c->efx, ®, GPIO_CTL_REG_KER); + falcon_read(efx, ®, GPIO_CTL_REG_KER); return EFX_OWORD_FIELD(reg, GPIO3_IN); } -static int falcon_getscl(struct efx_i2c_interface *i2c) +static int falcon_getscl(void *data) { + struct efx_nic *efx = (struct efx_nic *)data; efx_oword_t reg; - falcon_read(i2c->efx, ®, GPIO_CTL_REG_KER); - return EFX_DWORD_FIELD(reg, GPIO0_IN); + falcon_read(efx, ®, GPIO_CTL_REG_KER); + return EFX_OWORD_FIELD(reg, GPIO0_IN); } -static struct efx_i2c_bit_operations falcon_i2c_bit_operations = { - .setsda = falcon_setsdascl, - .setscl = falcon_setsdascl, +static struct i2c_algo_bit_data falcon_i2c_bit_operations = { + .setsda = falcon_setsda, + .setscl = falcon_setscl, .getsda = falcon_getsda, .getscl = falcon_getscl, - .udelay = 100, - .mdelay = 10, + .udelay = 5, + /* + * This is the number of system clock ticks after which + * i2c-algo-bit gives up waiting for SCL to become high. + * It must be at least 2 since the first tick can happen + * immediately after it starts waiting. + */ + .timeout = 2, }; /************************************************************************** @@ -465,7 +478,7 @@ int falcon_init_tx(struct efx_tx_queue *tx_queue) TX_DESCQ_TYPE, 0, TX_NON_IP_DROP_DIS_B0, 1); - if (FALCON_REV(efx) >= FALCON_REV_B0) { + if (falcon_rev(efx) >= FALCON_REV_B0) { int csum = !(efx->net_dev->features & NETIF_F_IP_CSUM); EFX_SET_OWORD_FIELD(tx_desc_ptr, TX_IP_CHKSM_DIS_B0, csum); EFX_SET_OWORD_FIELD(tx_desc_ptr, TX_TCP_CHKSM_DIS_B0, csum); @@ -474,7 +487,7 @@ int falcon_init_tx(struct efx_tx_queue *tx_queue) falcon_write_table(efx, &tx_desc_ptr, efx->type->txd_ptr_tbl_base, tx_queue->queue); - if (FALCON_REV(efx) < FALCON_REV_B0) { + if (falcon_rev(efx) < FALCON_REV_B0) { efx_oword_t reg; BUG_ON(tx_queue->queue >= 128); /* HW limit */ @@ -635,7 +648,7 @@ int falcon_init_rx(struct efx_rx_queue *rx_queue) efx_oword_t rx_desc_ptr; struct efx_nic *efx = rx_queue->efx; int rc; - int is_b0 = FALCON_REV(efx) >= FALCON_REV_B0; + int is_b0 = falcon_rev(efx) >= FALCON_REV_B0; int iscsi_digest_en = is_b0; EFX_LOG(efx, "RX queue %d ring in special buffers %d-%d\n", @@ -822,10 +835,10 @@ static inline void falcon_handle_tx_event(struct efx_channel *channel, tx_ev_q_label = EFX_QWORD_FIELD(*event, TX_EV_Q_LABEL); tx_queue = &efx->tx_queue[tx_ev_q_label]; - if (NET_DEV_REGISTERED(efx)) + if (efx_dev_registered(efx)) netif_tx_lock(efx->net_dev); falcon_notify_tx_desc(tx_queue); - if (NET_DEV_REGISTERED(efx)) + if (efx_dev_registered(efx)) netif_tx_unlock(efx->net_dev); } else if (EFX_QWORD_FIELD(*event, TX_EV_PKT_ERR) && EFX_WORKAROUND_10727(efx)) { @@ -884,7 +897,7 @@ static void falcon_handle_rx_not_ok(struct efx_rx_queue *rx_queue, RX_EV_TCP_UDP_CHKSUM_ERR); rx_ev_eth_crc_err = EFX_QWORD_FIELD(*event, RX_EV_ETH_CRC_ERR); rx_ev_frm_trunc = EFX_QWORD_FIELD(*event, RX_EV_FRM_TRUNC); - rx_ev_drib_nib = ((FALCON_REV(efx) >= FALCON_REV_B0) ? + rx_ev_drib_nib = ((falcon_rev(efx) >= FALCON_REV_B0) ? 0 : EFX_QWORD_FIELD(*event, RX_EV_DRIB_NIB)); rx_ev_pause_frm = EFX_QWORD_FIELD(*event, RX_EV_PAUSE_FRM_ERR); @@ -1065,7 +1078,7 @@ static void falcon_handle_global_event(struct efx_channel *channel, EFX_QWORD_FIELD(*event, XG_PHY_INTR)) is_phy_event = 1; - if ((FALCON_REV(efx) >= FALCON_REV_B0) && + if ((falcon_rev(efx) >= FALCON_REV_B0) && EFX_OWORD_FIELD(*event, XG_MNT_INTR_B0)) is_phy_event = 1; @@ -1405,7 +1418,7 @@ static inline void falcon_irq_ack_a1(struct efx_nic *efx) static irqreturn_t falcon_fatal_interrupt(struct efx_nic *efx) { struct falcon_nic_data *nic_data = efx->nic_data; - efx_oword_t *int_ker = (efx_oword_t *) efx->irq_status.addr; + efx_oword_t *int_ker = efx->irq_status.addr; efx_oword_t fatal_intr; int error, mem_perr; static int n_int_errors; @@ -1451,8 +1464,8 @@ out: */ static irqreturn_t falcon_legacy_interrupt_b0(int irq, void *dev_id) { - struct efx_nic *efx = (struct efx_nic *)dev_id; - efx_oword_t *int_ker = (efx_oword_t *) efx->irq_status.addr; + struct efx_nic *efx = dev_id; + efx_oword_t *int_ker = efx->irq_status.addr; struct efx_channel *channel; efx_dword_t reg; u32 queues; @@ -1489,8 +1502,8 @@ static irqreturn_t falcon_legacy_interrupt_b0(int irq, void *dev_id) static irqreturn_t falcon_legacy_interrupt_a1(int irq, void *dev_id) { - struct efx_nic *efx = (struct efx_nic *)dev_id; - efx_oword_t *int_ker = (efx_oword_t *) efx->irq_status.addr; + struct efx_nic *efx = dev_id; + efx_oword_t *int_ker = efx->irq_status.addr; struct efx_channel *channel; int syserr; int queues; @@ -1542,9 +1555,9 @@ static irqreturn_t falcon_legacy_interrupt_a1(int irq, void *dev_id) */ static irqreturn_t falcon_msi_interrupt(int irq, void *dev_id) { - struct efx_channel *channel = (struct efx_channel *)dev_id; + struct efx_channel *channel = dev_id; struct efx_nic *efx = channel->efx; - efx_oword_t *int_ker = (efx_oword_t *) efx->irq_status.addr; + efx_oword_t *int_ker = efx->irq_status.addr; int syserr; efx->last_irq_cpu = raw_smp_processor_id(); @@ -1572,7 +1585,7 @@ static void falcon_setup_rss_indir_table(struct efx_nic *efx) unsigned long offset; efx_dword_t dword; - if (FALCON_REV(efx) < FALCON_REV_B0) + if (falcon_rev(efx) < FALCON_REV_B0) return; for (offset = RX_RSS_INDIR_TBL_B0; @@ -1595,7 +1608,7 @@ int falcon_init_interrupt(struct efx_nic *efx) if (!EFX_INT_MODE_USE_MSI(efx)) { irq_handler_t handler; - if (FALCON_REV(efx) >= FALCON_REV_B0) + if (falcon_rev(efx) >= FALCON_REV_B0) handler = falcon_legacy_interrupt_b0; else handler = falcon_legacy_interrupt_a1; @@ -1636,12 +1649,13 @@ void falcon_fini_interrupt(struct efx_nic *efx) efx_oword_t reg; /* Disable MSI/MSI-X interrupts */ - efx_for_each_channel_with_interrupt(channel, efx) + efx_for_each_channel_with_interrupt(channel, efx) { if (channel->irq) free_irq(channel->irq, channel); + } /* ACK legacy interrupt */ - if (FALCON_REV(efx) >= FALCON_REV_B0) + if (falcon_rev(efx) >= FALCON_REV_B0) falcon_read(efx, ®, INT_ISR0_B0); else falcon_irq_ack_a1(efx); @@ -1732,7 +1746,7 @@ void falcon_drain_tx_fifo(struct efx_nic *efx) efx_oword_t temp; int count; - if ((FALCON_REV(efx) < FALCON_REV_B0) || + if ((falcon_rev(efx) < FALCON_REV_B0) || (efx->loopback_mode != LOOPBACK_NONE)) return; @@ -1785,7 +1799,7 @@ void falcon_deconfigure_mac_wrapper(struct efx_nic *efx) { efx_oword_t temp; - if (FALCON_REV(efx) < FALCON_REV_B0) + if (falcon_rev(efx) < FALCON_REV_B0) return; /* Isolate the MAC -> RX */ @@ -1823,7 +1837,7 @@ void falcon_reconfigure_mac_wrapper(struct efx_nic *efx) MAC_SPEED, link_speed); /* On B0, MAC backpressure can be disabled and packets get * discarded. */ - if (FALCON_REV(efx) >= FALCON_REV_B0) { + if (falcon_rev(efx) >= FALCON_REV_B0) { EFX_SET_OWORD_FIELD(reg, TXFIFO_DRAIN_EN_B0, !efx->link_up); } @@ -1841,7 +1855,7 @@ void falcon_reconfigure_mac_wrapper(struct efx_nic *efx) EFX_SET_OWORD_FIELD_VER(efx, reg, RX_XOFF_MAC_EN, tx_fc); /* Unisolate the MAC -> RX */ - if (FALCON_REV(efx) >= FALCON_REV_B0) + if (falcon_rev(efx) >= FALCON_REV_B0) EFX_SET_OWORD_FIELD(reg, RX_INGR_EN_B0, 1); falcon_write(efx, ®, RX_CFG_REG_KER); } @@ -1856,7 +1870,7 @@ int falcon_dma_stats(struct efx_nic *efx, unsigned int done_offset) return 0; /* Statistics fetch will fail if the MAC is in TX drain */ - if (FALCON_REV(efx) >= FALCON_REV_B0) { + if (falcon_rev(efx) >= FALCON_REV_B0) { efx_oword_t temp; falcon_read(efx, &temp, MAC0_CTRL_REG_KER); if (EFX_OWORD_FIELD(temp, TXFIFO_DRAIN_EN_B0)) @@ -1940,7 +1954,7 @@ static int falcon_gmii_wait(struct efx_nic *efx) static void falcon_mdio_write(struct net_device *net_dev, int phy_id, int addr, int value) { - struct efx_nic *efx = (struct efx_nic *)net_dev->priv; + struct efx_nic *efx = net_dev->priv; unsigned int phy_id2 = phy_id & FALCON_PHY_ID_ID_MASK; efx_oword_t reg; @@ -2008,7 +2022,7 @@ static void falcon_mdio_write(struct net_device *net_dev, int phy_id, * could be read, -1 will be returned. */ static int falcon_mdio_read(struct net_device *net_dev, int phy_id, int addr) { - struct efx_nic *efx = (struct efx_nic *)net_dev->priv; + struct efx_nic *efx = net_dev->priv; unsigned int phy_addr = phy_id & FALCON_PHY_ID_ID_MASK; efx_oword_t reg; int value = -1; @@ -2113,7 +2127,7 @@ int falcon_probe_port(struct efx_nic *efx) falcon_init_mdio(&efx->mii); /* Hardware flow ctrl. FalconA RX FIFO too small for pause generation */ - if (FALCON_REV(efx) >= FALCON_REV_B0) + if (falcon_rev(efx) >= FALCON_REV_B0) efx->flow_control = EFX_FC_RX | EFX_FC_TX; else efx->flow_control = EFX_FC_RX; @@ -2373,7 +2387,7 @@ static int falcon_probe_nic_variant(struct efx_nic *efx) return -ENODEV; } - switch (FALCON_REV(efx)) { + switch (falcon_rev(efx)) { case FALCON_REV_A0: case 0xff: EFX_ERR(efx, "Falcon rev A0 not supported\n"); @@ -2399,7 +2413,7 @@ static int falcon_probe_nic_variant(struct efx_nic *efx) break; default: - EFX_ERR(efx, "Unknown Falcon rev %d\n", FALCON_REV(efx)); + EFX_ERR(efx, "Unknown Falcon rev %d\n", falcon_rev(efx)); return -ENODEV; } @@ -2411,15 +2425,9 @@ int falcon_probe_nic(struct efx_nic *efx) struct falcon_nic_data *nic_data; int rc; - /* Initialise I2C interface state */ - efx->i2c.efx = efx; - efx->i2c.op = &falcon_i2c_bit_operations; - efx->i2c.sda = 1; - efx->i2c.scl = 1; - /* Allocate storage for hardware specific data */ nic_data = kzalloc(sizeof(*nic_data), GFP_KERNEL); - efx->nic_data = (void *) nic_data; + efx->nic_data = nic_data; /* Determine number of ports etc. */ rc = falcon_probe_nic_variant(efx); @@ -2467,6 +2475,18 @@ int falcon_probe_nic(struct efx_nic *efx) if (rc) goto fail5; + /* Initialise I2C adapter */ + efx->i2c_adap.owner = THIS_MODULE; + efx->i2c_adap.class = I2C_CLASS_HWMON; + nic_data->i2c_data = falcon_i2c_bit_operations; + nic_data->i2c_data.data = efx; + efx->i2c_adap.algo_data = &nic_data->i2c_data; + efx->i2c_adap.dev.parent = &efx->pci_dev->dev; + strcpy(efx->i2c_adap.name, "SFC4000 GPIO"); + rc = i2c_bit_add_bus(&efx->i2c_adap); + if (rc) + goto fail5; + return 0; fail5: @@ -2489,13 +2509,10 @@ int falcon_probe_nic(struct efx_nic *efx) */ int falcon_init_nic(struct efx_nic *efx) { - struct falcon_nic_data *data; efx_oword_t temp; unsigned thresh; int rc; - data = (struct falcon_nic_data *)efx->nic_data; - /* Set up the address region register. This is only needed * for the B0 FPGA, but since we are just pushing in the * reset defaults this may as well be unconditional. */ @@ -2562,7 +2579,7 @@ int falcon_init_nic(struct efx_nic *efx) /* Set number of RSS queues for receive path. */ falcon_read(efx, &temp, RX_FILTER_CTL_REG); - if (FALCON_REV(efx) >= FALCON_REV_B0) + if (falcon_rev(efx) >= FALCON_REV_B0) EFX_SET_OWORD_FIELD(temp, NUM_KER, 0); else EFX_SET_OWORD_FIELD(temp, NUM_KER, efx->rss_queues - 1); @@ -2600,7 +2617,7 @@ int falcon_init_nic(struct efx_nic *efx) /* Prefetch threshold 2 => fetch when descriptor cache half empty */ EFX_SET_OWORD_FIELD(temp, TX_PREF_THRESHOLD, 2); /* Squash TX of packets of 16 bytes or less */ - if (FALCON_REV(efx) >= FALCON_REV_B0 && EFX_WORKAROUND_9141(efx)) + if (falcon_rev(efx) >= FALCON_REV_B0 && EFX_WORKAROUND_9141(efx)) EFX_SET_OWORD_FIELD(temp, TX_FLUSH_MIN_LEN_EN_B0, 1); falcon_write(efx, &temp, TX_CFG2_REG_KER); @@ -2617,7 +2634,7 @@ int falcon_init_nic(struct efx_nic *efx) if (EFX_WORKAROUND_7575(efx)) EFX_SET_OWORD_FIELD_VER(efx, temp, RX_USR_BUF_SIZE, (3 * 4096) / 32); - if (FALCON_REV(efx) >= FALCON_REV_B0) + if (falcon_rev(efx) >= FALCON_REV_B0) EFX_SET_OWORD_FIELD(temp, RX_INGR_EN_B0, 1); /* RX FIFO flow control thresholds */ @@ -2633,7 +2650,7 @@ int falcon_init_nic(struct efx_nic *efx) falcon_write(efx, &temp, RX_CFG_REG_KER); /* Set destination of both TX and RX Flush events */ - if (FALCON_REV(efx) >= FALCON_REV_B0) { + if (falcon_rev(efx) >= FALCON_REV_B0) { EFX_POPULATE_OWORD_1(temp, FLS_EVQ_ID, 0); falcon_write(efx, &temp, DP_CTRL_REG); } @@ -2644,10 +2661,14 @@ int falcon_init_nic(struct efx_nic *efx) void falcon_remove_nic(struct efx_nic *efx) { struct falcon_nic_data *nic_data = efx->nic_data; + int rc; + + rc = i2c_del_adapter(&efx->i2c_adap); + BUG_ON(rc); falcon_free_buffer(efx, &efx->irq_status); - (void) falcon_reset_hw(efx, RESET_TYPE_ALL); + falcon_reset_hw(efx, RESET_TYPE_ALL); /* Release the second function after the reset */ if (nic_data->pci_dev2) { diff --git a/drivers/net/sfc/falcon.h b/drivers/net/sfc/falcon.h index 6117403b0c0..492f9bc2884 100644 --- a/drivers/net/sfc/falcon.h +++ b/drivers/net/sfc/falcon.h @@ -23,7 +23,10 @@ enum falcon_revision { FALCON_REV_B0 = 2, }; -#define FALCON_REV(efx) ((efx)->pci_dev->revision) +static inline int falcon_rev(struct efx_nic *efx) +{ + return efx->pci_dev->revision; +} extern struct efx_nic_type falcon_a_nic_type; extern struct efx_nic_type falcon_b_nic_type; diff --git a/drivers/net/sfc/falcon_hwdefs.h b/drivers/net/sfc/falcon_hwdefs.h index 06e2d68fc3d..6d003114eea 100644 --- a/drivers/net/sfc/falcon_hwdefs.h +++ b/drivers/net/sfc/falcon_hwdefs.h @@ -1125,7 +1125,7 @@ struct falcon_nvconfig_board_v2 { u8 port1_phy_type; __le16 asic_sub_revision; __le16 board_revision; -} __attribute__ ((packed)); +} __packed; #define NVCONFIG_BASE 0x300 #define NVCONFIG_BOARD_MAGIC_NUM 0xFA1C @@ -1144,6 +1144,6 @@ struct falcon_nvconfig { __le16 board_struct_ver; __le16 board_checksum; struct falcon_nvconfig_board_v2 board_v2; -} __attribute__ ((packed)); +} __packed; #endif /* EFX_FALCON_HWDEFS_H */ diff --git a/drivers/net/sfc/falcon_io.h b/drivers/net/sfc/falcon_io.h index ea08184ddfa..6670cdfc41a 100644 --- a/drivers/net/sfc/falcon_io.h +++ b/drivers/net/sfc/falcon_io.h @@ -56,14 +56,27 @@ #define FALCON_USE_QWORD_IO 1 #endif -#define _falcon_writeq(efx, value, reg) \ - __raw_writeq((__force u64) (value), (efx)->membase + (reg)) -#define _falcon_writel(efx, value, reg) \ - __raw_writel((__force u32) (value), (efx)->membase + (reg)) -#define _falcon_readq(efx, reg) \ - ((__force __le64) __raw_readq((efx)->membase + (reg))) -#define _falcon_readl(efx, reg) \ - ((__force __le32) __raw_readl((efx)->membase + (reg))) +#ifdef FALCON_USE_QWORD_IO +static inline void _falcon_writeq(struct efx_nic *efx, __le64 value, + unsigned int reg) +{ + __raw_writeq((__force u64)value, efx->membase + reg); +} +static inline __le64 _falcon_readq(struct efx_nic *efx, unsigned int reg) +{ + return (__force __le64)__raw_readq(efx->membase + reg); +} +#endif + +static inline void _falcon_writel(struct efx_nic *efx, __le32 value, + unsigned int reg) +{ + __raw_writel((__force u32)value, efx->membase + reg); +} +static inline __le32 _falcon_readl(struct efx_nic *efx, unsigned int reg) +{ + return (__force __le32)__raw_readl(efx->membase + reg); +} /* Writes to a normal 16-byte Falcon register, locking as appropriate. */ static inline void falcon_write(struct efx_nic *efx, efx_oword_t *value, diff --git a/drivers/net/sfc/falcon_xmac.c b/drivers/net/sfc/falcon_xmac.c index a74b7931a3c..dbdcee4b0f8 100644 --- a/drivers/net/sfc/falcon_xmac.c +++ b/drivers/net/sfc/falcon_xmac.c @@ -221,7 +221,7 @@ static int falcon_xgmii_status(struct efx_nic *efx) { efx_dword_t reg; - if (FALCON_REV(efx) < FALCON_REV_B0) + if (falcon_rev(efx) < FALCON_REV_B0) return 1; /* The ISR latches, so clear it and re-read */ @@ -241,7 +241,7 @@ static void falcon_mask_status_intr(struct efx_nic *efx, int enable) { efx_dword_t reg; - if ((FALCON_REV(efx) < FALCON_REV_B0) || LOOPBACK_INTERNAL(efx)) + if ((falcon_rev(efx) < FALCON_REV_B0) || LOOPBACK_INTERNAL(efx)) return; /* Flush the ISR */ @@ -454,7 +454,7 @@ static int falcon_check_xaui_link_up(struct efx_nic *efx) EFX_LOG(efx, "%s Clobbering XAUI (%d tries left).\n", __func__, tries); - (void) falcon_reset_xaui(efx); + falcon_reset_xaui(efx); udelay(200); tries--; } @@ -572,7 +572,7 @@ int falcon_check_xmac(struct efx_nic *efx) xaui_link_ok = falcon_xaui_link_ok(efx); if (EFX_WORKAROUND_5147(efx) && !xaui_link_ok) - (void) falcon_reset_xaui(efx); + falcon_reset_xaui(efx); /* Call the PHY check_hw routine */ rc = efx->phy_op->check_hw(efx); @@ -639,7 +639,7 @@ int falcon_xmac_set_pause(struct efx_nic *efx, enum efx_fc_type flow_control) reset = ((flow_control & EFX_FC_TX) && !(efx->flow_control & EFX_FC_TX)); if (EFX_WORKAROUND_11482(efx) && reset) { - if (FALCON_REV(efx) >= FALCON_REV_B0) { + if (falcon_rev(efx) >= FALCON_REV_B0) { /* Recover by resetting the EM block */ if (efx->link_up) falcon_drain_tx_fifo(efx); diff --git a/drivers/net/sfc/i2c-direct.c b/drivers/net/sfc/i2c-direct.c deleted file mode 100644 index b6c62d0ed9c..00000000000 --- a/drivers/net/sfc/i2c-direct.c +++ /dev/null @@ -1,381 +0,0 @@ -/**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards - * Copyright 2005 Fen Systems Ltd. - * Copyright 2006-2008 Solarflare Communications Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation, incorporated herein by reference. - */ - -#include <linux/delay.h> -#include "net_driver.h" -#include "i2c-direct.h" - -/* - * I2C data (SDA) and clock (SCL) line read/writes with appropriate - * delays. - */ - -static inline void setsda(struct efx_i2c_interface *i2c, int state) -{ - udelay(i2c->op->udelay); - i2c->sda = state; - i2c->op->setsda(i2c); - udelay(i2c->op->udelay); -} - -static inline void setscl(struct efx_i2c_interface *i2c, int state) -{ - udelay(i2c->op->udelay); - i2c->scl = state; - i2c->op->setscl(i2c); - udelay(i2c->op->udelay); -} - -static inline int getsda(struct efx_i2c_interface *i2c) -{ - int sda; - - udelay(i2c->op->udelay); - sda = i2c->op->getsda(i2c); - udelay(i2c->op->udelay); - return sda; -} - -static inline int getscl(struct efx_i2c_interface *i2c) -{ - int scl; - - udelay(i2c->op->udelay); - scl = i2c->op->getscl(i2c); - udelay(i2c->op->udelay); - return scl; -} - -/* - * I2C low-level protocol operations - * - */ - -static inline void i2c_release(struct efx_i2c_interface *i2c) -{ - EFX_WARN_ON_PARANOID(!i2c->scl); - EFX_WARN_ON_PARANOID(!i2c->sda); - /* Devices may time out if operations do not end */ - setscl(i2c, 1); - setsda(i2c, 1); - EFX_BUG_ON_PARANOID(getsda(i2c) != 1); - EFX_BUG_ON_PARANOID(getscl(i2c) != 1); -} - -static inline void i2c_start(struct efx_i2c_interface *i2c) -{ - /* We may be restarting immediately after a {send,recv}_bit, - * so SCL will not necessarily already be high. - */ - EFX_WARN_ON_PARANOID(!i2c->sda); - setscl(i2c, 1); - setsda(i2c, 0); - setscl(i2c, 0); - setsda(i2c, 1); -} - -static inline void i2c_send_bit(struct efx_i2c_interface *i2c, int bit) -{ - EFX_WARN_ON_PARANOID(i2c->scl != 0); - setsda(i2c, bit); - setscl(i2c, 1); - setscl(i2c, 0); - setsda(i2c, 1); -} - -static inline int i2c_recv_bit(struct efx_i2c_interface *i2c) -{ - int bit; - - EFX_WARN_ON_PARANOID(i2c->scl != 0); - EFX_WARN_ON_PARANOID(!i2c->sda); - setscl(i2c, 1); - bit = getsda(i2c); - setscl(i2c, 0); - return bit; -} - -static inline void i2c_stop(struct efx_i2c_interface *i2c) -{ - EFX_WARN_ON_PARANOID(i2c->scl != 0); - setsda(i2c, 0); - setscl(i2c, 1); - setsda(i2c, 1); -} - -/* - * I2C mid-level protocol operations - * - */ - -/* Sends a byte via the I2C bus and checks for an acknowledgement from - * the slave device. - */ -static int i2c_send_byte(struct efx_i2c_interface *i2c, u8 byte) -{ - int i; - - /* Send byte */ - for (i = 0; i < 8; i++) { - i2c_send_bit(i2c, !!(byte & 0x80)); - byte <<= 1; - } - - /* Check for acknowledgement from slave */ - return (i2c_recv_bit(i2c) == 0 ? 0 : -EIO); -} - -/* Receives a byte via the I2C bus and sends ACK/NACK to the slave device. */ -static u8 i2c_recv_byte(struct efx_i2c_interface *i2c, int ack) -{ - u8 value = 0; - int i; - - /* Receive byte */ - for (i = 0; i < 8; i++) - value = (value << 1) | i2c_recv_bit(i2c); - - /* Send ACK/NACK */ - i2c_send_bit(i2c, (ack ? 0 : 1)); - - return value; -} - -/* Calculate command byte for a read operation */ -static inline u8 i2c_read_cmd(u8 device_id) -{ - return ((device_id << 1) | 1); -} - -/* Calculate command byte for a write operation */ -static inline u8 i2c_write_cmd(u8 device_id) -{ - return ((device_id << 1) | 0); -} - -int efx_i2c_check_presence(struct efx_i2c_interface *i2c, u8 device_id) -{ - int rc; - - /* If someone is driving the bus low we just give up. */ - if (getsda(i2c) == 0 || getscl(i2c) == 0) { - EFX_ERR(i2c->efx, "%s someone is holding the I2C bus low." - " Giving up.\n", __func__); - return -EFAULT; - } - - /* Pretend to initiate a device write */ - i2c_start(i2c); - rc = i2c_send_byte(i2c, i2c_write_cmd(device_id)); - if (rc) - goto out; - - out: - i2c_stop(i2c); - i2c_release(i2c); - - return rc; -} - -/* This performs a fast read of one or more consecutive bytes from an - * I2C device. Not all devices support consecutive reads of more than - * one byte; for these devices use efx_i2c_read() instead. - */ -int efx_i2c_fast_read(struct efx_i2c_interface *i2c, - u8 device_id, u8 offset, u8 *data, unsigned int len) -{ - int i; - int rc; - - EFX_WARN_ON_PARANOID(getsda(i2c) != 1); - EFX_WARN_ON_PARANOID(getscl(i2c) != 1); - EFX_WARN_ON_PARANOID(data == NULL); - EFX_WARN_ON_PARANOID(len < 1); - - /* Select device and starting offset */ - i2c_start(i2c); - rc = i2c_send_byte(i2c, i2c_write_cmd(device_id)); - if (rc) - goto out; - rc = i2c_send_byte(i2c, offset); - if (rc) - goto out; - - /* Read data from device */ - i2c_start(i2c); - rc = i2c_send_byte(i2c, i2c_read_cmd(device_id)); - if (rc) - goto out; - for (i = 0; i < (len - 1); i++) - /* Read and acknowledge all but the last byte */ - data[i] = i2c_recv_byte(i2c, 1); - /* Read last byte with no acknowledgement */ - data[i] = i2c_recv_byte(i2c, 0); - - out: - i2c_stop(i2c); - i2c_release(i2c); - - return rc; -} - -/* This performs a fast write of one or more consecutive bytes to an - * I2C device. Not all devices support consecutive writes of more - * than one byte; for these devices use efx_i2c_write() instead. - */ -int efx_i2c_fast_write(struct efx_i2c_interface *i2c, - u8 device_id, u8 offset, - const u8 *data, unsigned int len) -{ - int i; - int rc; - - EFX_WARN_ON_PARANOID(getsda(i2c) != 1); - EFX_WARN_ON_PARANOID(getscl(i2c) != 1); - EFX_WARN_ON_PARANOID(len < 1); - - /* Select device and starting offset */ - i2c_start(i2c); - rc = i2c_send_byte(i2c, i2c_write_cmd(device_id)); - if (rc) - goto out; - rc = i2c_send_byte(i2c, offset); - if (rc) - goto out; - - /* Write data to device */ - for (i = 0; i < len; i++) { - rc = i2c_send_byte(i2c, data[i]); - if (rc) - goto out; - } - - out: - i2c_stop(i2c); - i2c_release(i2c); - - return rc; -} - -/* I2C byte-by-byte read */ -int efx_i2c_read(struct efx_i2c_interface *i2c, - u8 device_id, u8 offset, u8 *data, unsigned int len) -{ - int rc; - - /* i2c_fast_read with length 1 is a single byte read */ - for (; len > 0; offset++, data++, len--) { - rc = efx_i2c_fast_read(i2c, device_id, offset, data, 1); - if (rc) - return rc; - } - - return 0; -} - -/* I2C byte-by-byte write */ -int efx_i2c_write(struct efx_i2c_interface *i2c, - u8 device_id, u8 offset, const u8 *data, unsigned int len) -{ - int rc; - - /* i2c_fast_write with length 1 is a single byte write */ - for (; len > 0; offset++, data++, len--) { - rc = efx_i2c_fast_write(i2c, device_id, offset, data, 1); - if (rc) - return rc; - mdelay(i2c->op->mdelay); - } - - return 0; -} - - -/* This is just a slightly neater wrapper round efx_i2c_fast_write - * in the case where the target doesn't take an offset - */ -int efx_i2c_send_bytes(struct efx_i2c_interface *i2c, - u8 device_id, const u8 *data, unsigned int len) -{ - return efx_i2c_fast_write(i2c, device_id, data[0], data + 1, len - 1); -} - -/* I2C receiving of bytes - does not send an offset byte */ -int efx_i2c_recv_bytes(struct efx_i2c_interface *i2c, u8 device_id, - u8 *bytes, unsigned int len) -{ - int i; - int rc; - - EFX_WARN_ON_PARANOID(getsda(i2c) != 1); - EFX_WARN_ON_PARANOID(getscl(i2c) != 1); - EFX_WARN_ON_PARANOID(len < 1); - - /* Select device */ - i2c_start(i2c); - - /* Read data from device */ - rc = i2c_send_byte(i2c, i2c_read_cmd(device_id)); - if (rc) - goto out; - - for (i = 0; i < (len - 1); i++) - /* Read and acknowledge all but the last byte */ - bytes[i] = i2c_recv_byte(i2c, 1); - /* Read last byte with no acknowledgement */ - bytes[i] = i2c_recv_byte(i2c, 0); - - out: - i2c_stop(i2c); - i2c_release(i2c); - - return rc; -} - -/* SMBus and some I2C devices will time out if the I2C clock is - * held low for too long. This is most likely to happen in virtualised - * systems (when the entire domain is descheduled) but could in - * principle happen due to preemption on any busy system (and given the - * potential length of an I2C operation turning preemption off is not - * a sensible option). The following functions deal with the failure by - * retrying up to a fixed number of times. - */ - -#define I2C_MAX_RETRIES (10) - -/* The timeout problem will result in -EIO. If the wrapped function - * returns any other error, pass this up and do not retry. */ -#define RETRY_WRAPPER(_f) \ - int retries = I2C_MAX_RETRIES; \ - int rc; \ - while (retries) { \ - rc = _f; \ - if (rc != -EIO) \ - return rc; \ - retries--; \ - } \ - return rc; \ - -int efx_i2c_check_presence_retry(struct efx_i2c_interface *i2c, u8 device_id) -{ - RETRY_WRAPPER(efx_i2c_check_presence(i2c, device_id)) -} - -int efx_i2c_read_retry(struct efx_i2c_interface *i2c, - u8 device_id, u8 offset, u8 *data, unsigned int len) -{ - RETRY_WRAPPER(efx_i2c_read(i2c, device_id, offset, data, len)) -} - -int efx_i2c_write_retry(struct efx_i2c_interface *i2c, - u8 device_id, u8 offset, const u8 *data, unsigned int len) -{ - RETRY_WRAPPER(efx_i2c_write(i2c, device_id, offset, data, len)) -} diff --git a/drivers/net/sfc/i2c-direct.h b/drivers/net/sfc/i2c-direct.h deleted file mode 100644 index 291e561071f..00000000000 --- a/drivers/net/sfc/i2c-direct.h +++ /dev/null @@ -1,91 +0,0 @@ -/**************************************************************************** - * Driver for Solarflare Solarstorm network controllers and boards - * Copyright 2005 Fen Systems Ltd. - * Copyright 2006 Solarflare Communications Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation, incorporated herein by reference. - */ - -#ifndef EFX_I2C_DIRECT_H -#define EFX_I2C_DIRECT_H - -#include "net_driver.h" - -/* - * Direct control of an I2C bus - */ - -struct efx_i2c_interface; - -/** - * struct efx_i2c_bit_operations - I2C bus direct control methods - * - * I2C bus direct control methods. - * - * @setsda: Set state of SDA line - * @setscl: Set state of SCL line - * @getsda: Get state of SDA line - * @getscl: Get state of SCL line - * @udelay: Delay between each bit operation - * @mdelay: Delay between each byte write - */ -struct efx_i2c_bit_operations { - void (*setsda) (struct efx_i2c_interface *i2c); - void (*setscl) (struct efx_i2c_interface *i2c); - int (*getsda) (struct efx_i2c_interface *i2c); - int (*getscl) (struct efx_i2c_interface *i2c); - unsigned int udelay; - unsigned int mdelay; -}; - -/** - * struct efx_i2c_interface - an I2C interface - * - * An I2C interface. - * - * @efx: Attached Efx NIC - * @op: I2C bus control methods - * @sda: Current output state of SDA line - * @scl: Current output state of SCL line - */ -struct efx_i2c_interface { - struct efx_nic *efx; - struct efx_i2c_bit_operations *op; - unsigned int sda:1; - unsigned int scl:1; -}; - -extern int efx_i2c_check_presence(struct efx_i2c_interface *i2c, u8 device_id); -extern int efx_i2c_fast_read(struct efx_i2c_interface *i2c, - u8 device_id, u8 offset, - u8 *data, unsigned int len); -extern int efx_i2c_fast_write(struct efx_i2c_interface *i2c, - u8 device_id, u8 offset, - const u8 *data, unsigned int len); -extern int efx_i2c_read(struct efx_i2c_interface *i2c, - u8 device_id, u8 offset, u8 *data, unsigned int len); -extern int efx_i2c_write(struct efx_i2c_interface *i2c, - u8 device_id, u8 offset, - const u8 *data, unsigned int len); - -extern int efx_i2c_send_bytes(struct efx_i2c_interface *i2c, u8 device_id, - const u8 *bytes, unsigned int len); - -extern int efx_i2c_recv_bytes(struct efx_i2c_interface *i2c, u8 device_id, - u8 *bytes, unsigned int len); - - -/* Versions of the API that retry on failure. */ -extern int efx_i2c_check_presence_retry(struct efx_i2c_interface *i2c, - u8 device_id); - -extern int efx_i2c_read_retry(struct efx_i2c_interface *i2c, - u8 device_id, u8 offset, u8 *data, unsigned int len); - -extern int efx_i2c_write_retry(struct efx_i2c_interface *i2c, - u8 device_id, u8 offset, - const u8 *data, unsigned int len); - -#endif /* EFX_I2C_DIRECT_H */ diff --git a/drivers/net/sfc/net_driver.h b/drivers/net/sfc/net_driver.h index 59f261b4171..d803b86c647 100644 --- a/drivers/net/sfc/net_driver.h +++ b/drivers/net/sfc/net_driver.h @@ -26,10 +26,10 @@ #include <linux/highmem.h> #include <linux/workqueue.h> #include <linux/inet_lro.h> +#include <linux/i2c.h> #include "enum.h" #include "bitfield.h" -#include "i2c-direct.h" #define EFX_MAX_LRO_DESCRIPTORS 8 #define EFX_MAX_LRO_AGGR MAX_SKB_FRAGS @@ -42,7 +42,7 @@ #ifndef EFX_DRIVER_NAME #define EFX_DRIVER_NAME "sfc" #endif -#define EFX_DRIVER_VERSION "2.2.0136" +#define EFX_DRIVER_VERSION "2.2" #ifdef EFX_ENABLE_DEBUG #define EFX_BUG_ON_PARANOID(x) BUG_ON(x) @@ -52,28 +52,19 @@ #define EFX_WARN_ON_PARANOID(x) do {} while (0) #endif -#define NET_DEV_REGISTERED(efx) \ - ((efx)->net_dev->reg_state == NETREG_REGISTERED) - -/* Include net device name in log messages if it has been registered. - * Use efx->name not efx->net_dev->name so that races with (un)registration - * are harmless. - */ -#define NET_DEV_NAME(efx) (NET_DEV_REGISTERED(efx) ? (efx)->name : "") - /* Un-rate-limited logging */ #define EFX_ERR(efx, fmt, args...) \ -dev_err(&((efx)->pci_dev->dev), "ERR: %s " fmt, NET_DEV_NAME(efx), ##args) +dev_err(&((efx)->pci_dev->dev), "ERR: %s " fmt, efx_dev_name(efx), ##args) #define EFX_INFO(efx, fmt, args...) \ -dev_info(&((efx)->pci_dev->dev), "INFO: %s " fmt, NET_DEV_NAME(efx), ##args) +dev_info(&((efx)->pci_dev->dev), "INFO: %s " fmt, efx_dev_name(efx), ##args) #ifdef EFX_ENABLE_DEBUG #define EFX_LOG(efx, fmt, args...) \ -dev_info(&((efx)->pci_dev->dev), "DBG: %s " fmt, NET_DEV_NAME(efx), ##args) +dev_info(&((efx)->pci_dev->dev), "DBG: %s " fmt, efx_dev_name(efx), ##args) #else #define EFX_LOG(efx, fmt, args...) \ -dev_dbg(&((efx)->pci_dev->dev), "DBG: %s " fmt, NET_DEV_NAME(efx), ##args) +dev_dbg(&((efx)->pci_dev->dev), "DBG: %s " fmt, efx_dev_name(efx), ##args) #endif #define EFX_TRACE(efx, fmt, args...) do {} while (0) @@ -90,11 +81,6 @@ do {if (net_ratelimit()) EFX_INFO(efx, fmt, ##args); } while (0) #define EFX_LOG_RL(efx, fmt, args...) \ do {if (net_ratelimit()) EFX_LOG(efx, fmt, ##args); } while (0) -/* Kernel headers may redefine inline anyway */ -#ifndef inline -#define inline inline __attribute__ ((always_inline)) -#endif - /************************************************************************** * * Efx data structures @@ -432,7 +418,10 @@ struct efx_blinker { * @init_leds: Sets up board LEDs * @set_fault_led: Turns the fault LED on or off * @blink: Starts/stops blinking + * @fini: Cleanup function * @blinker: used to blink LEDs in software + * @hwmon_client: I2C client for hardware monitor + * @ioexp_client: I2C client for power/port control */ struct efx_board { int type; @@ -445,7 +434,9 @@ struct efx_board { int (*init_leds)(struct efx_nic *efx); void (*set_fault_led) (struct efx_nic *efx, int state); void (*blink) (struct efx_nic *efx, int start); + void (*fini) (struct efx_nic *nic); struct efx_blinker blinker; + struct i2c_client *hwmon_client, *ioexp_client; }; #define STRING_TABLE_LOOKUP(val, member) \ @@ -632,7 +623,7 @@ union efx_multicast_hash { * @membase: Memory BAR value * @biu_lock: BIU (bus interface unit) lock * @interrupt_mode: Interrupt mode - * @i2c: I2C interface + * @i2c_adap: I2C adapter * @board_info: Board-level information * @state: Device state flag. Serialised by the rtnl_lock. * @reset_pending: Pending reset method (normally RESET_TYPE_NONE) @@ -695,12 +686,12 @@ struct efx_nic { struct workqueue_struct *workqueue; struct work_struct reset_work; struct delayed_work monitor_work; - unsigned long membase_phys; + resource_size_t membase_phys; void __iomem *membase; spinlock_t biu_lock; enum efx_int_mode interrupt_mode; - struct efx_i2c_interface i2c; + struct i2c_adapter i2c_adap; struct efx_board board_info; enum nic_state state; @@ -719,7 +710,7 @@ struct efx_nic { unsigned n_rx_nodesc_drop_cnt; - void *nic_data; + struct falcon_nic_data *nic_data; struct mutex mac_lock; int port_enabled; @@ -760,6 +751,20 @@ struct efx_nic { void *loopback_selftest; }; +static inline int efx_dev_registered(struct efx_nic *efx) +{ + return efx->net_dev->reg_state == NETREG_REGISTERED; +} + +/* Net device name, for inclusion in log messages if it has been registered. + * Use efx->name not efx->net_dev->name so that races with (un)registration + * are harmless. + */ +static inline const char *efx_dev_name(struct efx_nic *efx) +{ + return efx_dev_registered(efx) ? efx->name : ""; +} + /** * struct efx_nic_type - Efx device type definition * @mem_bar: Memory BAR number @@ -795,7 +800,7 @@ struct efx_nic_type { unsigned int txd_ring_mask; unsigned int rxd_ring_mask; unsigned int evq_size; - dma_addr_t max_dma_mask; + u64 max_dma_mask; unsigned int tx_dma_mask; unsigned bug5391_mask; diff --git a/drivers/net/sfc/rx.c b/drivers/net/sfc/rx.c index 670622373dd..601b001437c 100644 --- a/drivers/net/sfc/rx.c +++ b/drivers/net/sfc/rx.c @@ -86,14 +86,17 @@ static unsigned int rx_refill_limit = 95; */ #define EFX_RXD_HEAD_ROOM 2 -/* Macros for zero-order pages (potentially) containing multiple RX buffers */ -#define RX_DATA_OFFSET(_data) \ - (((unsigned long) (_data)) & (PAGE_SIZE-1)) -#define RX_BUF_OFFSET(_rx_buf) \ - RX_DATA_OFFSET((_rx_buf)->data) - -#define RX_PAGE_SIZE(_efx) \ - (PAGE_SIZE * (1u << (_efx)->rx_buffer_order)) +static inline unsigned int efx_rx_buf_offset(struct efx_rx_buffer *buf) +{ + /* Offset is always within one page, so we don't need to consider + * the page order. + */ + return (__force unsigned long) buf->data & (PAGE_SIZE - 1); +} +static inline unsigned int efx_rx_buf_size(struct efx_nic *efx) +{ + return PAGE_SIZE << efx->rx_buffer_order; +} /************************************************************************** @@ -106,7 +109,7 @@ static unsigned int rx_refill_limit = 95; static int efx_lro_get_skb_hdr(struct sk_buff *skb, void **ip_hdr, void **tcpudp_hdr, u64 *hdr_flags, void *priv) { - struct efx_channel *channel = (struct efx_channel *)priv; + struct efx_channel *channel = priv; struct iphdr *iph; struct tcphdr *th; @@ -131,12 +134,12 @@ static int efx_get_frag_hdr(struct skb_frag_struct *frag, void **mac_hdr, void **ip_hdr, void **tcpudp_hdr, u64 *hdr_flags, void *priv) { - struct efx_channel *channel = (struct efx_channel *)priv; + struct efx_channel *channel = priv; struct ethhdr *eh; struct iphdr *iph; /* We support EtherII and VLAN encapsulated IPv4 */ - eh = (struct ethhdr *)(page_address(frag->page) + frag->page_offset); + eh = page_address(frag->page) + frag->page_offset; *mac_hdr = eh; if (eh->h_proto == htons(ETH_P_IP)) { @@ -269,7 +272,7 @@ static inline int efx_init_rx_buffer_page(struct efx_rx_queue *rx_queue, return -ENOMEM; dma_addr = pci_map_page(efx->pci_dev, rx_buf->page, - 0, RX_PAGE_SIZE(efx), + 0, efx_rx_buf_size(efx), PCI_DMA_FROMDEVICE); if (unlikely(pci_dma_mapping_error(dma_addr))) { @@ -280,14 +283,14 @@ static inline int efx_init_rx_buffer_page(struct efx_rx_queue *rx_queue, rx_queue->buf_page = rx_buf->page; rx_queue->buf_dma_addr = dma_addr; - rx_queue->buf_data = ((char *) page_address(rx_buf->page) + + rx_queue->buf_data = (page_address(rx_buf->page) + EFX_PAGE_IP_ALIGN); } - offset = RX_DATA_OFFSET(rx_queue->buf_data); rx_buf->len = bytes; - rx_buf->dma_addr = rx_queue->buf_dma_addr + offset; rx_buf->data = rx_queue->buf_data; + offset = efx_rx_buf_offset(rx_buf); + rx_buf->dma_addr = rx_queue->buf_dma_addr + offset; /* Try to pack multiple buffers per page */ if (efx->rx_buffer_order == 0) { @@ -295,7 +298,7 @@ static inline int efx_init_rx_buffer_page(struct efx_rx_queue *rx_queue, rx_queue->buf_data += ((bytes + 0x1ff) & ~0x1ff); offset += ((bytes + 0x1ff) & ~0x1ff); - space = RX_PAGE_SIZE(efx) - offset; + space = efx_rx_buf_size(efx) - offset; if (space >= bytes) { /* Refs dropped on kernel releasing each skb */ get_page(rx_queue->buf_page); @@ -344,7 +347,8 @@ static inline void efx_unmap_rx_buffer(struct efx_nic *efx, EFX_BUG_ON_PARANOID(rx_buf->skb); if (rx_buf->unmap_addr) { pci_unmap_page(efx->pci_dev, rx_buf->unmap_addr, - RX_PAGE_SIZE(efx), PCI_DMA_FROMDEVICE); + efx_rx_buf_size(efx), + PCI_DMA_FROMDEVICE); rx_buf->unmap_addr = 0; } } else if (likely(rx_buf->skb)) { @@ -400,9 +404,10 @@ static int __efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, return 0; /* Record minimum fill level */ - if (unlikely(fill_level < rx_queue->min_fill)) + if (unlikely(fill_level < rx_queue->min_fill)) { if (fill_level) rx_queue->min_fill = fill_level; + } /* Acquire RX add lock. If this lock is contended, then a fast * fill must already be in progress (e.g. in the refill @@ -552,7 +557,7 @@ static inline void efx_rx_packet_lro(struct efx_channel *channel, struct skb_frag_struct frags; frags.page = rx_buf->page; - frags.page_offset = RX_BUF_OFFSET(rx_buf); + frags.page_offset = efx_rx_buf_offset(rx_buf); frags.size = rx_buf->len; lro_receive_frags(lro_mgr, &frags, rx_buf->len, @@ -597,7 +602,7 @@ static inline struct sk_buff *efx_rx_mk_skb(struct efx_rx_buffer *rx_buf, if (unlikely(rx_buf->len > hdr_len)) { struct skb_frag_struct *frag = skb_shinfo(skb)->frags; frag->page = rx_buf->page; - frag->page_offset = RX_BUF_OFFSET(rx_buf) + hdr_len; + frag->page_offset = efx_rx_buf_offset(rx_buf) + hdr_len; frag->size = skb->len - hdr_len; skb_shinfo(skb)->nr_frags = 1; skb->data_len = frag->size; @@ -851,7 +856,8 @@ void efx_fini_rx_queue(struct efx_rx_queue *rx_queue) /* For a page that is part-way through splitting into RX buffers */ if (rx_queue->buf_page != NULL) { pci_unmap_page(rx_queue->efx->pci_dev, rx_queue->buf_dma_addr, - RX_PAGE_SIZE(rx_queue->efx), PCI_DMA_FROMDEVICE); + efx_rx_buf_size(rx_queue->efx), + PCI_DMA_FROMDEVICE); __free_pages(rx_queue->buf_page, rx_queue->efx->rx_buffer_order); rx_queue->buf_page = NULL; diff --git a/drivers/net/sfc/selftest.c b/drivers/net/sfc/selftest.c index cbda15946e8..3b2de9fe7f2 100644 --- a/drivers/net/sfc/selftest.c +++ b/drivers/net/sfc/selftest.c @@ -290,7 +290,7 @@ void efx_loopback_rx_packet(struct efx_nic *efx, payload = &state->payload; - received = (struct efx_loopback_payload *)(char *) buf_ptr; + received = (struct efx_loopback_payload *) buf_ptr; received->ip.saddr = payload->ip.saddr; received->ip.check = payload->ip.check; @@ -424,10 +424,10 @@ static int efx_tx_loopback(struct efx_tx_queue *tx_queue) * interrupt handler. */ smp_wmb(); - if (NET_DEV_REGISTERED(efx)) + if (efx_dev_registered(efx)) netif_tx_lock_bh(efx->net_dev); rc = efx_xmit(efx, tx_queue, skb); - if (NET_DEV_REGISTERED(efx)) + if (efx_dev_registered(efx)) netif_tx_unlock_bh(efx->net_dev); if (rc != NETDEV_TX_OK) { @@ -453,7 +453,7 @@ static int efx_rx_loopback(struct efx_tx_queue *tx_queue, int tx_done = 0, rx_good, rx_bad; int i, rc = 0; - if (NET_DEV_REGISTERED(efx)) + if (efx_dev_registered(efx)) netif_tx_lock_bh(efx->net_dev); /* Count the number of tx completions, and decrement the refcnt. Any @@ -465,7 +465,7 @@ static int efx_rx_loopback(struct efx_tx_queue *tx_queue, dev_kfree_skb_any(skb); } - if (NET_DEV_REGISTERED(efx)) + if (efx_dev_registered(efx)) netif_tx_unlock_bh(efx->net_dev); /* Check TX completion and received packet counts */ @@ -517,6 +517,8 @@ efx_test_loopback(struct efx_tx_queue *tx_queue, state->packet_count = min(1 << (i << 2), state->packet_count); state->skbs = kzalloc(sizeof(state->skbs[0]) * state->packet_count, GFP_KERNEL); + if (!state->skbs) + return -ENOMEM; state->flush = 0; EFX_LOG(efx, "TX queue %d testing %s loopback with %d " @@ -700,7 +702,7 @@ int efx_offline_test(struct efx_nic *efx, * "flushing" so all inflight packets are dropped */ BUG_ON(efx->loopback_selftest); state->flush = 1; - efx->loopback_selftest = (void *)state; + efx->loopback_selftest = state; rc = efx_test_loopbacks(efx, tests, loopback_modes); diff --git a/drivers/net/sfc/sfe4001.c b/drivers/net/sfc/sfe4001.c index 725d1a539c4..b2784952399 100644 --- a/drivers/net/sfc/sfe4001.c +++ b/drivers/net/sfc/sfe4001.c @@ -106,28 +106,27 @@ static const u8 xgphy_max_temperature = 90; -void sfe4001_poweroff(struct efx_nic *efx) +static void sfe4001_poweroff(struct efx_nic *efx) { - struct efx_i2c_interface *i2c = &efx->i2c; + struct i2c_client *ioexp_client = efx->board_info.ioexp_client; + struct i2c_client *hwmon_client = efx->board_info.hwmon_client; - u8 cfg, out, in; + /* Turn off all power rails and disable outputs */ + i2c_smbus_write_byte_data(ioexp_client, P0_OUT, 0xff); + i2c_smbus_write_byte_data(ioexp_client, P1_CONFIG, 0xff); + i2c_smbus_write_byte_data(ioexp_client, P0_CONFIG, 0xff); - EFX_INFO(efx, "%s\n", __func__); - - /* Turn off all power rails */ - out = 0xff; - (void) efx_i2c_write(i2c, PCA9539, P0_OUT, &out, 1); - - /* Disable port 1 outputs on IO expander */ - cfg = 0xff; - (void) efx_i2c_write(i2c, PCA9539, P1_CONFIG, &cfg, 1); + /* Clear any over-temperature alert */ + i2c_smbus_read_byte_data(hwmon_client, RSL); +} - /* Disable port 0 outputs on IO expander */ - cfg = 0xff; - (void) efx_i2c_write(i2c, PCA9539, P0_CONFIG, &cfg, 1); +static void sfe4001_fini(struct efx_nic *efx) +{ + EFX_INFO(efx, "%s\n", __func__); - /* Clear any over-temperature alert */ - (void) efx_i2c_read(i2c, MAX6647, RSL, &in, 1); + sfe4001_poweroff(efx); + i2c_unregister_device(efx->board_info.ioexp_client); + i2c_unregister_device(efx->board_info.hwmon_client); } /* The P0_EN_3V3X line on SFE4001 boards (from A2 onward) is connected @@ -143,14 +142,26 @@ MODULE_PARM_DESC(phy_flash_cfg, * be turned on before the PHY can be used. * Context: Process context, rtnl lock held */ -int sfe4001_poweron(struct efx_nic *efx) +int sfe4001_init(struct efx_nic *efx) { - struct efx_i2c_interface *i2c = &efx->i2c; + struct i2c_client *hwmon_client, *ioexp_client; unsigned int count; int rc; - u8 out, in, cfg; + u8 out; efx_dword_t reg; + hwmon_client = i2c_new_dummy(&efx->i2c_adap, MAX6647); + if (!hwmon_client) + return -EIO; + efx->board_info.hwmon_client = hwmon_client; + + ioexp_client = i2c_new_dummy(&efx->i2c_adap, PCA9539); + if (!ioexp_client) { + rc = -EIO; + goto fail_hwmon; + } + efx->board_info.ioexp_client = ioexp_client; + /* 10Xpress has fixed-function LED pins, so there is no board-specific * blink code. */ efx->board_info.blink = tenxpress_phy_blink; @@ -166,44 +177,45 @@ int sfe4001_poweron(struct efx_nic *efx) falcon_xmac_writel(efx, ®, XX_PWR_RST_REG_MAC); udelay(10); + efx->board_info.fini = sfe4001_fini; + /* Set DSP over-temperature alert threshold */ EFX_INFO(efx, "DSP cut-out at %dC\n", xgphy_max_temperature); - rc = efx_i2c_write(i2c, MAX6647, WLHO, - &xgphy_max_temperature, 1); + rc = i2c_smbus_write_byte_data(hwmon_client, WLHO, + xgphy_max_temperature); if (rc) - goto fail1; + goto fail_ioexp; /* Read it back and verify */ - rc = efx_i2c_read(i2c, MAX6647, RLHN, &in, 1); - if (rc) - goto fail1; - if (in != xgphy_max_temperature) { + rc = i2c_smbus_read_byte_data(hwmon_client, RLHN); + if (rc < 0) + goto fail_ioexp; + if (rc != xgphy_max_temperature) { rc = -EFAULT; - goto fail1; + goto fail_ioexp; } /* Clear any previous over-temperature alert */ - rc = efx_i2c_read(i2c, MAX6647, RSL, &in, 1); - if (rc) - goto fail1; + rc = i2c_smbus_read_byte_data(hwmon_client, RSL); + if (rc < 0) + goto fail_ioexp; /* Enable port 0 and port 1 outputs on IO expander */ - cfg = 0x00; - rc = efx_i2c_write(i2c, PCA9539, P0_CONFIG, &cfg, 1); + rc = i2c_smbus_write_byte_data(ioexp_client, P0_CONFIG, 0x00); if (rc) - goto fail1; - cfg = 0xff & ~(1 << P1_SPARE_LBN); - rc = efx_i2c_write(i2c, PCA9539, P1_CONFIG, &cfg, 1); + goto fail_ioexp; + rc = i2c_smbus_write_byte_data(ioexp_client, P1_CONFIG, + 0xff & ~(1 << P1_SPARE_LBN)); if (rc) - goto fail2; + goto fail_on; /* Turn all power off then wait 1 sec. This ensures PHY is reset */ out = 0xff & ~((0 << P0_EN_1V2_LBN) | (0 << P0_EN_2V5_LBN) | (0 << P0_EN_3V3X_LBN) | (0 << P0_EN_5V_LBN) | (0 << P0_EN_1V0X_LBN)); - rc = efx_i2c_write(i2c, PCA9539, P0_OUT, &out, 1); + rc = i2c_smbus_write_byte_data(ioexp_client, P0_OUT, out); if (rc) - goto fail3; + goto fail_on; schedule_timeout_uninterruptible(HZ); count = 0; @@ -215,26 +227,26 @@ int sfe4001_poweron(struct efx_nic *efx) if (sfe4001_phy_flash_cfg) out |= 1 << P0_EN_3V3X_LBN; - rc = efx_i2c_write(i2c, PCA9539, P0_OUT, &out, 1); + rc = i2c_smbus_write_byte_data(ioexp_client, P0_OUT, out); if (rc) - goto fail3; + goto fail_on; msleep(10); /* Turn on 1V power rail */ out &= ~(1 << P0_EN_1V0X_LBN); - rc = efx_i2c_write(i2c, PCA9539, P0_OUT, &out, 1); + rc = i2c_smbus_write_byte_data(ioexp_client, P0_OUT, out); if (rc) - goto fail3; + goto fail_on; EFX_INFO(efx, "waiting for power (attempt %d)...\n", count); schedule_timeout_uninterruptible(HZ); /* Check DSP is powered */ - rc = efx_i2c_read(i2c, PCA9539, P1_IN, &in, 1); - if (rc) - goto fail3; - if (in & (1 << P1_AFE_PWD_LBN)) + rc = i2c_smbus_read_byte_data(ioexp_client, P1_IN); + if (rc < 0) + goto fail_on; + if (rc & (1 << P1_AFE_PWD_LBN)) goto done; /* DSP doesn't look powered in flash config mode */ @@ -244,23 +256,17 @@ int sfe4001_poweron(struct efx_nic *efx) EFX_INFO(efx, "timed out waiting for power\n"); rc = -ETIMEDOUT; - goto fail3; + goto fail_on; done: EFX_INFO(efx, "PHY is powered on\n"); return 0; -fail3: - /* Turn off all power rails */ - out = 0xff; - (void) efx_i2c_write(i2c, PCA9539, P0_OUT, &out, 1); - /* Disable port 1 outputs on IO expander */ - out = 0xff; - (void) efx_i2c_write(i2c, PCA9539, P1_CONFIG, &out, 1); -fail2: - /* Disable port 0 outputs on IO expander */ - out = 0xff; - (void) efx_i2c_write(i2c, PCA9539, P0_CONFIG, &out, 1); -fail1: +fail_on: + sfe4001_poweroff(efx); +fail_ioexp: + i2c_unregister_device(ioexp_client); +fail_hwmon: + i2c_unregister_device(hwmon_client); return rc; } diff --git a/drivers/net/sfc/tenxpress.c b/drivers/net/sfc/tenxpress.c index b1cd6deec01..c0146061c32 100644 --- a/drivers/net/sfc/tenxpress.c +++ b/drivers/net/sfc/tenxpress.c @@ -211,6 +211,8 @@ static int tenxpress_phy_init(struct efx_nic *efx) int rc = 0; phy_data = kzalloc(sizeof(*phy_data), GFP_KERNEL); + if (!phy_data) + return -ENOMEM; efx->phy_data = phy_data; tenxpress_set_state(efx, TENXPRESS_STATUS_NORMAL); @@ -376,7 +378,7 @@ static void tenxpress_phy_reconfigure(struct efx_nic *efx) * perform a special software reset */ if ((phy_data->tx_disabled && !efx->tx_disabled) || loop_change) { - (void) tenxpress_special_reset(efx); + tenxpress_special_reset(efx); falcon_reset_xaui(efx); } diff --git a/drivers/net/sfc/tx.c b/drivers/net/sfc/tx.c index 9b436f5b488..5cdd082ab8f 100644 --- a/drivers/net/sfc/tx.c +++ b/drivers/net/sfc/tx.c @@ -387,7 +387,7 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) if (unlikely(tx_queue->stopped)) { fill_level = tx_queue->insert_count - tx_queue->read_count; if (fill_level < EFX_NETDEV_TX_THRESHOLD(tx_queue)) { - EFX_BUG_ON_PARANOID(!NET_DEV_REGISTERED(efx)); + EFX_BUG_ON_PARANOID(!efx_dev_registered(efx)); /* Do this under netif_tx_lock(), to avoid racing * with efx_xmit(). */ @@ -639,11 +639,12 @@ static void efx_tsoh_block_free(struct efx_tx_queue *tx_queue, base_dma = tsoh->dma_addr & PAGE_MASK; p = &tx_queue->tso_headers_free; - while (*p != NULL) + while (*p != NULL) { if (((unsigned long)*p & PAGE_MASK) == base_kva) *p = (*p)->next; else p = &(*p)->next; + } pci_free_consistent(pci_dev, PAGE_SIZE, (void *)base_kva, base_dma); } @@ -939,9 +940,10 @@ static inline int tso_start_new_packet(struct efx_tx_queue *tx_queue, /* Allocate a DMA-mapped header buffer. */ if (likely(TSOH_SIZE(st->p.header_length) <= TSOH_STD_SIZE)) { - if (tx_queue->tso_headers_free == NULL) + if (tx_queue->tso_headers_free == NULL) { if (efx_tsoh_block_alloc(tx_queue)) return -1; + } EFX_BUG_ON_PARANOID(!tx_queue->tso_headers_free); tsoh = tx_queue->tso_headers_free; tx_queue->tso_headers_free = tsoh->next; @@ -1106,9 +1108,10 @@ static void efx_fini_tso(struct efx_tx_queue *tx_queue) { unsigned i; - if (tx_queue->buffer) + if (tx_queue->buffer) { for (i = 0; i <= tx_queue->efx->type->txd_ring_mask; ++i) efx_tsoh_free(tx_queue, &tx_queue->buffer[i]); + } while (tx_queue->tso_headers_free != NULL) efx_tsoh_block_free(tx_queue, tx_queue->tso_headers_free, diff --git a/drivers/net/sfc/workarounds.h b/drivers/net/sfc/workarounds.h index dca62f19019..35ab19c27f8 100644 --- a/drivers/net/sfc/workarounds.h +++ b/drivers/net/sfc/workarounds.h @@ -16,7 +16,7 @@ */ #define EFX_WORKAROUND_ALWAYS(efx) 1 -#define EFX_WORKAROUND_FALCON_A(efx) (FALCON_REV(efx) <= FALCON_REV_A1) +#define EFX_WORKAROUND_FALCON_A(efx) (falcon_rev(efx) <= FALCON_REV_A1) /* XAUI resets if link not detected */ #define EFX_WORKAROUND_5147 EFX_WORKAROUND_ALWAYS diff --git a/drivers/net/sfc/xfp_phy.c b/drivers/net/sfc/xfp_phy.c index 3b9f9ddbc37..f3684ad2888 100644 --- a/drivers/net/sfc/xfp_phy.c +++ b/drivers/net/sfc/xfp_phy.c @@ -85,7 +85,9 @@ static int xfp_phy_init(struct efx_nic *efx) int rc; phy_data = kzalloc(sizeof(struct xfp_phy_data), GFP_KERNEL); - efx->phy_data = (void *) phy_data; + if (!phy_data) + return -ENOMEM; + efx->phy_data = phy_data; EFX_INFO(efx, "XFP: PHY ID reg %x (OUI %x model %x revision" " %x)\n", devid, MDIO_ID_OUI(devid), MDIO_ID_MODEL(devid), diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c index 4b0f0335877..c83406f4f2a 100644 --- a/drivers/net/sky2.c +++ b/drivers/net/sky2.c @@ -284,6 +284,86 @@ static void sky2_power_aux(struct sky2_hw *hw) PC_VAUX_ON | PC_VCC_OFF)); } +static void sky2_power_state(struct sky2_hw *hw, pci_power_t state) +{ + u16 power_control = sky2_pci_read16(hw, hw->pm_cap + PCI_PM_CTRL); + int pex = pci_find_capability(hw->pdev, PCI_CAP_ID_EXP); + u32 reg; + + sky2_write8(hw, B2_TST_CTRL1, TST_CFG_WRITE_ON); + + switch (state) { + case PCI_D0: + break; + + case PCI_D1: + power_control |= 1; + break; + + case PCI_D2: + power_control |= 2; + break; + + case PCI_D3hot: + case PCI_D3cold: + power_control |= 3; + if (hw->flags & SKY2_HW_ADV_POWER_CTL) { + /* additional power saving measurements */ + reg = sky2_pci_read32(hw, PCI_DEV_REG4); + + /* set gating core clock for LTSSM in L1 state */ + reg |= P_PEX_LTSSM_STAT(P_PEX_LTSSM_L1_STAT) | + /* auto clock gated scheme controlled by CLKREQ */ + P_ASPM_A1_MODE_SELECT | + /* enable Gate Root Core Clock */ + P_CLK_GATE_ROOT_COR_ENA; + + if (pex && (hw->flags & SKY2_HW_CLK_POWER)) { + /* enable Clock Power Management (CLKREQ) */ + u16 ctrl = sky2_pci_read16(hw, pex + PCI_EXP_DEVCTL); + + ctrl |= PCI_EXP_DEVCTL_AUX_PME; + sky2_pci_write16(hw, pex + PCI_EXP_DEVCTL, ctrl); + } else + /* force CLKREQ Enable in Our4 (A1b only) */ + reg |= P_ASPM_FORCE_CLKREQ_ENA; + + /* set Mask Register for Release/Gate Clock */ + sky2_pci_write32(hw, PCI_DEV_REG5, + P_REL_PCIE_EXIT_L1_ST | P_GAT_PCIE_ENTER_L1_ST | + P_REL_PCIE_RX_EX_IDLE | P_GAT_PCIE_RX_EL_IDLE | + P_REL_GPHY_LINK_UP | P_GAT_GPHY_LINK_DOWN); + } else + sky2_write8(hw, B28_Y2_ASF_STAT_CMD, Y2_ASF_CLK_HALT); + + /* put CPU into reset state */ + sky2_write8(hw, B28_Y2_ASF_STAT_CMD, HCU_CCSR_ASF_RESET); + if (hw->chip_id == CHIP_ID_YUKON_SUPR && hw->chip_rev == CHIP_REV_YU_SU_A0) + /* put CPU into halt state */ + sky2_write8(hw, B28_Y2_ASF_STAT_CMD, HCU_CCSR_ASF_HALTED); + + if (pex && !(hw->flags & SKY2_HW_RAM_BUFFER)) { + reg = sky2_pci_read32(hw, PCI_DEV_REG1); + /* force to PCIe L1 */ + reg |= PCI_FORCE_PEX_L1; + sky2_pci_write32(hw, PCI_DEV_REG1, reg); + } + break; + + default: + dev_warn(&hw->pdev->dev, PFX "Invalid power state (%d) ", + state); + return; + } + + power_control |= PCI_PM_CTRL_PME_ENABLE; + /* Finally, set the new power state. */ + sky2_pci_write32(hw, hw->pm_cap + PCI_PM_CTRL, power_control); + + sky2_write8(hw, B2_TST_CTRL1, TST_CFG_WRITE_OFF); + sky2_pci_read32(hw, B0_CTST); +} + static void sky2_gmac_reset(struct sky2_hw *hw, unsigned port) { u16 reg; @@ -619,28 +699,71 @@ static void sky2_phy_init(struct sky2_hw *hw, unsigned port) gm_phy_write(hw, port, PHY_MARV_INT_MASK, PHY_M_DEF_MSK); } -static void sky2_phy_power(struct sky2_hw *hw, unsigned port, int onoff) +static const u32 phy_power[] = { PCI_Y2_PHY1_POWD, PCI_Y2_PHY2_POWD }; +static const u32 coma_mode[] = { PCI_Y2_PHY1_COMA, PCI_Y2_PHY2_COMA }; + +static void sky2_phy_power_up(struct sky2_hw *hw, unsigned port) { u32 reg1; - static const u32 phy_power[] = { PCI_Y2_PHY1_POWD, PCI_Y2_PHY2_POWD }; - static const u32 coma_mode[] = { PCI_Y2_PHY1_COMA, PCI_Y2_PHY2_COMA }; sky2_write8(hw, B2_TST_CTRL1, TST_CFG_WRITE_ON); reg1 = sky2_pci_read32(hw, PCI_DEV_REG1); - /* Turn on/off phy power saving */ - if (onoff) - reg1 &= ~phy_power[port]; - else - reg1 |= phy_power[port]; + reg1 &= ~phy_power[port]; - if (onoff && hw->chip_id == CHIP_ID_YUKON_XL && hw->chip_rev > 1) + if (hw->chip_id == CHIP_ID_YUKON_XL && hw->chip_rev > 1) reg1 |= coma_mode[port]; sky2_pci_write32(hw, PCI_DEV_REG1, reg1); sky2_write8(hw, B2_TST_CTRL1, TST_CFG_WRITE_OFF); sky2_pci_read32(hw, PCI_DEV_REG1); +} + +static void sky2_phy_power_down(struct sky2_hw *hw, unsigned port) +{ + u32 reg1; + u16 ctrl; + + /* release GPHY Control reset */ + sky2_write8(hw, SK_REG(port, GPHY_CTRL), GPC_RST_CLR); + + /* release GMAC reset */ + sky2_write8(hw, SK_REG(port, GMAC_CTRL), GMC_RST_CLR); - udelay(100); + if (hw->flags & SKY2_HW_NEWER_PHY) { + /* select page 2 to access MAC control register */ + gm_phy_write(hw, port, PHY_MARV_EXT_ADR, 2); + + ctrl = gm_phy_read(hw, port, PHY_MARV_PHY_CTRL); + /* allow GMII Power Down */ + ctrl &= ~PHY_M_MAC_GMIF_PUP; + gm_phy_write(hw, port, PHY_MARV_PHY_CTRL, ctrl); + + /* set page register back to 0 */ + gm_phy_write(hw, port, PHY_MARV_EXT_ADR, 0); + } + + /* setup General Purpose Control Register */ + gma_write16(hw, port, GM_GP_CTRL, + GM_GPCR_FL_PASS | GM_GPCR_SPEED_100 | GM_GPCR_AU_ALL_DIS); + + if (hw->chip_id != CHIP_ID_YUKON_EC) { + if (hw->chip_id == CHIP_ID_YUKON_EC_U) { + ctrl = gm_phy_read(hw, port, PHY_MARV_PHY_CTRL); + + /* enable Power Down */ + ctrl |= PHY_M_PC_POW_D_ENA; + gm_phy_write(hw, port, PHY_MARV_PHY_CTRL, ctrl); + } + + /* set IEEE compatible Power Down Mode (dev. #4.99) */ + gm_phy_write(hw, port, PHY_MARV_CTRL, PHY_CT_PDOWN); + } + + sky2_write8(hw, B2_TST_CTRL1, TST_CFG_WRITE_ON); + reg1 = sky2_pci_read32(hw, PCI_DEV_REG1); + reg1 |= phy_power[port]; /* set PHY to PowerDown/COMA Mode */ + sky2_pci_write32(hw, PCI_DEV_REG1, reg1); + sky2_write8(hw, B2_TST_CTRL1, TST_CFG_WRITE_OFF); } /* Force a renegotiation */ @@ -675,8 +798,11 @@ static void sky2_wol_init(struct sky2_port *sky2) sky2->advertising &= ~(ADVERTISED_1000baseT_Half|ADVERTISED_1000baseT_Full); sky2->flow_mode = FC_NONE; - sky2_phy_power(hw, port, 1); - sky2_phy_reinit(sky2); + + spin_lock_bh(&sky2->phy_lock); + sky2_phy_power_up(hw, port); + sky2_phy_init(hw, port); + spin_unlock_bh(&sky2->phy_lock); sky2->flow_mode = save_mode; sky2->advertising = ctrl; @@ -781,6 +907,7 @@ static void sky2_mac_init(struct sky2_hw *hw, unsigned port) sky2_write8(hw, SK_REG(port, GMAC_IRQ_MSK), GMAC_DEF_MSK); spin_lock_bh(&sky2->phy_lock); + sky2_phy_power_up(hw, port); sky2_phy_init(hw, port); spin_unlock_bh(&sky2->phy_lock); @@ -1159,17 +1286,9 @@ static int sky2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } #ifdef SKY2_VLAN_TAG_USED -static void sky2_vlan_rx_register(struct net_device *dev, struct vlan_group *grp) +static void sky2_set_vlan_mode(struct sky2_hw *hw, u16 port, bool onoff) { - struct sky2_port *sky2 = netdev_priv(dev); - struct sky2_hw *hw = sky2->hw; - u16 port = sky2->port; - - netif_tx_lock_bh(dev); - napi_disable(&hw->napi); - - sky2->vlgrp = grp; - if (grp) { + if (onoff) { sky2_write32(hw, SK_REG(port, RX_GMF_CTRL_T), RX_VLAN_STRIP_ON); sky2_write32(hw, SK_REG(port, TX_GMF_CTRL_T), @@ -1180,6 +1299,19 @@ static void sky2_vlan_rx_register(struct net_device *dev, struct vlan_group *grp sky2_write32(hw, SK_REG(port, TX_GMF_CTRL_T), TX_VLAN_TAG_OFF); } +} + +static void sky2_vlan_rx_register(struct net_device *dev, struct vlan_group *grp) +{ + struct sky2_port *sky2 = netdev_priv(dev); + struct sky2_hw *hw = sky2->hw; + u16 port = sky2->port; + + netif_tx_lock_bh(dev); + napi_disable(&hw->napi); + + sky2->vlgrp = grp; + sky2_set_vlan_mode(hw, port, grp != NULL); sky2_read32(hw, B0_Y2_SP_LISR); napi_enable(&hw->napi); @@ -1380,8 +1512,6 @@ static int sky2_up(struct net_device *dev) if (!sky2->rx_ring) goto err_out; - sky2_phy_power(hw, port, 1); - sky2_mac_init(hw, port); /* Register is number of 4K blocks on internal RAM buffer. */ @@ -1418,6 +1548,10 @@ static int sky2_up(struct net_device *dev) sky2_prefetch_init(hw, txqaddr[port], sky2->tx_le_map, TX_RING_SIZE - 1); +#ifdef SKY2_VLAN_TAG_USED + sky2_set_vlan_mode(hw, port, sky2->vlgrp != NULL); +#endif + err = sky2_rx_start(sky2); if (err) goto err_out; @@ -1758,7 +1892,7 @@ static int sky2_down(struct net_device *dev) sky2_write8(hw, SK_REG(port, RX_GMF_CTRL_T), GMF_RST_SET); sky2_write8(hw, SK_REG(port, TX_GMF_CTRL_T), GMF_RST_SET); - sky2_phy_power(hw, port, 0); + sky2_phy_power_down(hw, port); netif_carrier_off(dev); @@ -2732,6 +2866,10 @@ static int __devinit sky2_init(struct sky2_hw *hw) hw->flags = SKY2_HW_GIGABIT | SKY2_HW_NEWER_PHY | SKY2_HW_ADV_POWER_CTL; + + /* check for Rev. A1 dev 4200 */ + if (sky2_read16(hw, Q_ADDR(Q_XA1, Q_WM)) == 0) + hw->flags |= SKY2_HW_CLK_POWER; break; case CHIP_ID_YUKON_EX: @@ -2782,6 +2920,11 @@ static int __devinit sky2_init(struct sky2_hw *hw) if (hw->pmd_type == 'L' || hw->pmd_type == 'S' || hw->pmd_type == 'P') hw->flags |= SKY2_HW_FIBRE_PHY; + hw->pm_cap = pci_find_capability(hw->pdev, PCI_CAP_ID_PM); + if (hw->pm_cap == 0) { + dev_err(&hw->pdev->dev, "cannot find PowerManagement capability\n"); + return -EIO; + } hw->ports = 1; t8 = sky2_read8(hw, B2_Y2_HW_RES); @@ -4353,7 +4496,7 @@ static int sky2_suspend(struct pci_dev *pdev, pm_message_t state) pci_save_state(pdev); pci_enable_wake(pdev, pci_choose_state(pdev, state), wol); - pci_set_power_state(pdev, pci_choose_state(pdev, state)); + sky2_power_state(hw, pci_choose_state(pdev, state)); return 0; } @@ -4366,9 +4509,7 @@ static int sky2_resume(struct pci_dev *pdev) if (!hw) return 0; - err = pci_set_power_state(pdev, PCI_D0); - if (err) - goto out; + sky2_power_state(hw, PCI_D0); err = pci_restore_state(pdev); if (err) @@ -4436,8 +4577,7 @@ static void sky2_shutdown(struct pci_dev *pdev) pci_enable_wake(pdev, PCI_D3cold, wol); pci_disable_device(pdev); - pci_set_power_state(pdev, PCI_D3hot); - + sky2_power_state(hw, PCI_D3hot); } static struct pci_driver sky2_driver = { diff --git a/drivers/net/sky2.h b/drivers/net/sky2.h index c0a5eea2000..1fa82bf029d 100644 --- a/drivers/net/sky2.h +++ b/drivers/net/sky2.h @@ -28,6 +28,11 @@ enum pci_dev_reg_1 { PCI_Y2_PHY2_POWD = 1<<27, /* Set PHY 2 to Power Down (YUKON-2) */ PCI_Y2_PHY1_POWD = 1<<26, /* Set PHY 1 to Power Down (YUKON-2) */ PCI_Y2_PME_LEGACY= 1<<15, /* PCI Express legacy power management mode */ + + PCI_PHY_LNK_TIM_MSK= 3L<<8,/* Bit 9.. 8: GPHY Link Trigger Timer */ + PCI_ENA_L1_EVENT = 1<<7, /* Enable PEX L1 Event */ + PCI_ENA_GPHY_LNK = 1<<6, /* Enable PEX L1 on GPHY Link down */ + PCI_FORCE_PEX_L1 = 1<<5, /* Force to PEX L1 */ }; enum pci_dev_reg_2 { @@ -45,7 +50,11 @@ enum pci_dev_reg_2 { /* PCI_OUR_REG_4 32 bit Our Register 4 (Yukon-ECU only) */ enum pci_dev_reg_4 { - /* (Link Training & Status State Machine) */ + /* (Link Training & Status State Machine) */ + P_PEX_LTSSM_STAT_MSK = 0x7fL<<25, /* Bit 31..25: PEX LTSSM Mask */ +#define P_PEX_LTSSM_STAT(x) ((x << 25) & P_PEX_LTSSM_STAT_MSK) + P_PEX_LTSSM_L1_STAT = 0x34, + P_PEX_LTSSM_DET_STAT = 0x01, P_TIMER_VALUE_MSK = 0xffL<<16, /* Bit 23..16: Timer Value Mask */ /* (Active State Power Management) */ P_FORCE_ASPM_REQUEST = 1<<15, /* Force ASPM Request (A1 only) */ @@ -454,6 +463,9 @@ enum yukon_ex_rev { CHIP_REV_YU_EX_A0 = 1, CHIP_REV_YU_EX_B0 = 2, }; +enum yukon_supr_rev { + CHIP_REV_YU_SU_A0 = 0, +}; /* B2_Y2_CLK_GATE 8 bit Clock Gating (Yukon-2 only) */ @@ -1143,6 +1155,12 @@ enum { PHY_M_PC_ENA_AUTO = 3, /* 11 = Enable Automatic Crossover */ }; +/* for Yukon-EC Ultra Gigabit Ethernet PHY (88E1149 only) */ +enum { + PHY_M_PC_COP_TX_DIS = 1<<3, /* Copper Transmitter Disable */ + PHY_M_PC_POW_D_ENA = 1<<2, /* Power Down Enable */ +}; + /* for 10/100 Fast Ethernet PHY (88E3082 only) */ enum { PHY_M_PC_ENA_DTE_DT = 1<<15, /* Enable Data Terminal Equ. (DTE) Detect */ @@ -1411,6 +1429,7 @@ enum { /***** PHY_MARV_PHY_CTRL (page 2) 16 bit r/w MAC Specific Ctrl *****/ enum { PHY_M_MAC_MD_MSK = 7<<7, /* Bit 9.. 7: Mode Select Mask */ + PHY_M_MAC_GMIF_PUP = 1<<3, /* GMII Power Up (88E1149 only) */ PHY_M_MAC_MD_AUTO = 3,/* Auto Copper/1000Base-X */ PHY_M_MAC_MD_COPPER = 5,/* Copper only */ PHY_M_MAC_MD_1000BX = 7,/* 1000Base-X only */ @@ -2052,7 +2071,9 @@ struct sky2_hw { #define SKY2_HW_NEW_LE 0x00000020 /* new LSOv2 format */ #define SKY2_HW_AUTO_TX_SUM 0x00000040 /* new IP decode for Tx */ #define SKY2_HW_ADV_POWER_CTL 0x00000080 /* additional PHY power regs */ +#define SKY2_HW_CLK_POWER 0x00000100 /* clock power management */ + int pm_cap; u8 chip_id; u8 chip_rev; u8 pmd_type; diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 07b3f77e762..d9f248f23b9 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -32,6 +32,8 @@ #include <linux/skbuff.h> #include <linux/ethtool.h> #include <linux/mii.h> +#include <linux/phy.h> +#include <linux/brcmphy.h> #include <linux/if_vlan.h> #include <linux/ip.h> #include <linux/tcp.h> @@ -64,8 +66,8 @@ #define DRV_MODULE_NAME "tg3" #define PFX DRV_MODULE_NAME ": " -#define DRV_MODULE_VERSION "3.92" -#define DRV_MODULE_RELDATE "May 2, 2008" +#define DRV_MODULE_VERSION "3.93" +#define DRV_MODULE_RELDATE "May 22, 2008" #define TG3_DEF_MAC_MODE 0 #define TG3_DEF_RX_MODE 0 @@ -203,6 +205,7 @@ static struct pci_device_id tg3_pci_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5723)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5761)}, {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5761E)}, + {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5785)}, {PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, PCI_DEVICE_ID_SYSKONNECT_9DXX)}, {PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, PCI_DEVICE_ID_SYSKONNECT_9MXX)}, {PCI_DEVICE(PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC1000)}, @@ -804,6 +807,569 @@ static int tg3_writephy(struct tg3 *tp, int reg, u32 val) return ret; } +static int tg3_bmcr_reset(struct tg3 *tp) +{ + u32 phy_control; + int limit, err; + + /* OK, reset it, and poll the BMCR_RESET bit until it + * clears or we time out. + */ + phy_control = BMCR_RESET; + err = tg3_writephy(tp, MII_BMCR, phy_control); + if (err != 0) + return -EBUSY; + + limit = 5000; + while (limit--) { + err = tg3_readphy(tp, MII_BMCR, &phy_control); + if (err != 0) + return -EBUSY; + + if ((phy_control & BMCR_RESET) == 0) { + udelay(40); + break; + } + udelay(10); + } + if (limit <= 0) + return -EBUSY; + + return 0; +} + +static int tg3_mdio_read(struct mii_bus *bp, int mii_id, int reg) +{ + struct tg3 *tp = (struct tg3 *)bp->priv; + u32 val; + + if (tp->tg3_flags3 & TG3_FLG3_MDIOBUS_PAUSED) + return -EAGAIN; + + if (tg3_readphy(tp, reg, &val)) + return -EIO; + + return val; +} + +static int tg3_mdio_write(struct mii_bus *bp, int mii_id, int reg, u16 val) +{ + struct tg3 *tp = (struct tg3 *)bp->priv; + + if (tp->tg3_flags3 & TG3_FLG3_MDIOBUS_PAUSED) + return -EAGAIN; + + if (tg3_writephy(tp, reg, val)) + return -EIO; + + return 0; +} + +static int tg3_mdio_reset(struct mii_bus *bp) +{ + return 0; +} + +static void tg3_mdio_config(struct tg3 *tp) +{ + u32 val; + + if (tp->mdio_bus.phy_map[PHY_ADDR]->interface != + PHY_INTERFACE_MODE_RGMII) + return; + + val = tr32(MAC_PHYCFG1) & ~(MAC_PHYCFG1_RGMII_EXT_RX_DEC | + MAC_PHYCFG1_RGMII_SND_STAT_EN); + if (tp->tg3_flags3 & TG3_FLG3_RGMII_STD_IBND_DISABLE) { + if (tp->tg3_flags3 & TG3_FLG3_RGMII_EXT_IBND_RX_EN) + val |= MAC_PHYCFG1_RGMII_EXT_RX_DEC; + if (tp->tg3_flags3 & TG3_FLG3_RGMII_EXT_IBND_TX_EN) + val |= MAC_PHYCFG1_RGMII_SND_STAT_EN; + } + tw32(MAC_PHYCFG1, val | MAC_PHYCFG1_RGMII_INT | MAC_PHYCFG1_TXC_DRV); + + val = tr32(MAC_PHYCFG2) & ~(MAC_PHYCFG2_INBAND_ENABLE); + if (!(tp->tg3_flags3 & TG3_FLG3_RGMII_STD_IBND_DISABLE)) + val |= MAC_PHYCFG2_INBAND_ENABLE; + tw32(MAC_PHYCFG2, val); + + val = tr32(MAC_EXT_RGMII_MODE); + val &= ~(MAC_RGMII_MODE_RX_INT_B | + MAC_RGMII_MODE_RX_QUALITY | + MAC_RGMII_MODE_RX_ACTIVITY | + MAC_RGMII_MODE_RX_ENG_DET | + MAC_RGMII_MODE_TX_ENABLE | + MAC_RGMII_MODE_TX_LOWPWR | + MAC_RGMII_MODE_TX_RESET); + if (tp->tg3_flags3 & TG3_FLG3_RGMII_STD_IBND_DISABLE) { + if (tp->tg3_flags3 & TG3_FLG3_RGMII_EXT_IBND_RX_EN) + val |= MAC_RGMII_MODE_RX_INT_B | + MAC_RGMII_MODE_RX_QUALITY | + MAC_RGMII_MODE_RX_ACTIVITY | + MAC_RGMII_MODE_RX_ENG_DET; + if (tp->tg3_flags3 & TG3_FLG3_RGMII_EXT_IBND_TX_EN) + val |= MAC_RGMII_MODE_TX_ENABLE | + MAC_RGMII_MODE_TX_LOWPWR | + MAC_RGMII_MODE_TX_RESET; + } + tw32(MAC_EXT_RGMII_MODE, val); +} + +static void tg3_mdio_start(struct tg3 *tp) +{ + if (tp->tg3_flags3 & TG3_FLG3_MDIOBUS_INITED) { + mutex_lock(&tp->mdio_bus.mdio_lock); + tp->tg3_flags3 &= ~TG3_FLG3_MDIOBUS_PAUSED; + mutex_unlock(&tp->mdio_bus.mdio_lock); + } + + tp->mi_mode &= ~MAC_MI_MODE_AUTO_POLL; + tw32_f(MAC_MI_MODE, tp->mi_mode); + udelay(80); + + if (tp->tg3_flags3 & TG3_FLG3_MDIOBUS_INITED) + tg3_mdio_config(tp); +} + +static void tg3_mdio_stop(struct tg3 *tp) +{ + if (tp->tg3_flags3 & TG3_FLG3_MDIOBUS_INITED) { + mutex_lock(&tp->mdio_bus.mdio_lock); + tp->tg3_flags3 |= TG3_FLG3_MDIOBUS_PAUSED; + mutex_unlock(&tp->mdio_bus.mdio_lock); + } +} + +static int tg3_mdio_init(struct tg3 *tp) +{ + int i; + u32 reg; + struct phy_device *phydev; + struct mii_bus *mdio_bus = &tp->mdio_bus; + + tg3_mdio_start(tp); + + if (!(tp->tg3_flags3 & TG3_FLG3_USE_PHYLIB) || + (tp->tg3_flags3 & TG3_FLG3_MDIOBUS_INITED)) + return 0; + + memset(mdio_bus, 0, sizeof(*mdio_bus)); + + mdio_bus->name = "tg3 mdio bus"; + snprintf(mdio_bus->id, MII_BUS_ID_SIZE, "%x", + (tp->pdev->bus->number << 8) | tp->pdev->devfn); + mdio_bus->priv = tp; + mdio_bus->dev = &tp->pdev->dev; + mdio_bus->read = &tg3_mdio_read; + mdio_bus->write = &tg3_mdio_write; + mdio_bus->reset = &tg3_mdio_reset; + mdio_bus->phy_mask = ~(1 << PHY_ADDR); + mdio_bus->irq = &tp->mdio_irq[0]; + + for (i = 0; i < PHY_MAX_ADDR; i++) + mdio_bus->irq[i] = PHY_POLL; + + /* The bus registration will look for all the PHYs on the mdio bus. + * Unfortunately, it does not ensure the PHY is powered up before + * accessing the PHY ID registers. A chip reset is the + * quickest way to bring the device back to an operational state.. + */ + if (tg3_readphy(tp, MII_BMCR, ®) || (reg & BMCR_PDOWN)) + tg3_bmcr_reset(tp); + + i = mdiobus_register(mdio_bus); + if (i) { + printk(KERN_WARNING "%s: mdiobus_reg failed (0x%x)\n", + tp->dev->name, i); + return i; + } + + tp->tg3_flags3 |= TG3_FLG3_MDIOBUS_INITED; + + phydev = tp->mdio_bus.phy_map[PHY_ADDR]; + + switch (phydev->phy_id) { + case TG3_PHY_ID_BCM50610: + phydev->interface = PHY_INTERFACE_MODE_RGMII; + if (tp->tg3_flags3 & TG3_FLG3_RGMII_STD_IBND_DISABLE) + phydev->dev_flags |= PHY_BRCM_STD_IBND_DISABLE; + if (tp->tg3_flags3 & TG3_FLG3_RGMII_EXT_IBND_RX_EN) + phydev->dev_flags |= PHY_BRCM_EXT_IBND_RX_ENABLE; + if (tp->tg3_flags3 & TG3_FLG3_RGMII_EXT_IBND_TX_EN) + phydev->dev_flags |= PHY_BRCM_EXT_IBND_TX_ENABLE; + break; + case TG3_PHY_ID_BCMAC131: + phydev->interface = PHY_INTERFACE_MODE_MII; + break; + } + + tg3_mdio_config(tp); + + return 0; +} + +static void tg3_mdio_fini(struct tg3 *tp) +{ + if (tp->tg3_flags3 & TG3_FLG3_MDIOBUS_INITED) { + tp->tg3_flags3 &= ~TG3_FLG3_MDIOBUS_INITED; + mdiobus_unregister(&tp->mdio_bus); + tp->tg3_flags3 &= ~TG3_FLG3_MDIOBUS_PAUSED; + } +} + +/* tp->lock is held. */ +static void tg3_wait_for_event_ack(struct tg3 *tp) +{ + int i; + + /* Wait for up to 2.5 milliseconds */ + for (i = 0; i < 250000; i++) { + if (!(tr32(GRC_RX_CPU_EVENT) & GRC_RX_CPU_DRIVER_EVENT)) + break; + udelay(10); + } +} + +/* tp->lock is held. */ +static void tg3_ump_link_report(struct tg3 *tp) +{ + u32 reg; + u32 val; + + if (!(tp->tg3_flags2 & TG3_FLG2_5780_CLASS) || + !(tp->tg3_flags & TG3_FLAG_ENABLE_ASF)) + return; + + tg3_wait_for_event_ack(tp); + + tg3_write_mem(tp, NIC_SRAM_FW_CMD_MBOX, FWCMD_NICDRV_LINK_UPDATE); + + tg3_write_mem(tp, NIC_SRAM_FW_CMD_LEN_MBOX, 14); + + val = 0; + if (!tg3_readphy(tp, MII_BMCR, ®)) + val = reg << 16; + if (!tg3_readphy(tp, MII_BMSR, ®)) + val |= (reg & 0xffff); + tg3_write_mem(tp, NIC_SRAM_FW_CMD_DATA_MBOX, val); + + val = 0; + if (!tg3_readphy(tp, MII_ADVERTISE, ®)) + val = reg << 16; + if (!tg3_readphy(tp, MII_LPA, ®)) + val |= (reg & 0xffff); + tg3_write_mem(tp, NIC_SRAM_FW_CMD_DATA_MBOX + 4, val); + + val = 0; + if (!(tp->tg3_flags2 & TG3_FLG2_MII_SERDES)) { + if (!tg3_readphy(tp, MII_CTRL1000, ®)) + val = reg << 16; + if (!tg3_readphy(tp, MII_STAT1000, ®)) + val |= (reg & 0xffff); + } + tg3_write_mem(tp, NIC_SRAM_FW_CMD_DATA_MBOX + 8, val); + + if (!tg3_readphy(tp, MII_PHYADDR, ®)) + val = reg << 16; + else + val = 0; + tg3_write_mem(tp, NIC_SRAM_FW_CMD_DATA_MBOX + 12, val); + + val = tr32(GRC_RX_CPU_EVENT); + val |= GRC_RX_CPU_DRIVER_EVENT; + tw32_f(GRC_RX_CPU_EVENT, val); +} + +static void tg3_link_report(struct tg3 *tp) +{ + if (!netif_carrier_ok(tp->dev)) { + if (netif_msg_link(tp)) + printk(KERN_INFO PFX "%s: Link is down.\n", + tp->dev->name); + tg3_ump_link_report(tp); + } else if (netif_msg_link(tp)) { + printk(KERN_INFO PFX "%s: Link is up at %d Mbps, %s duplex.\n", + tp->dev->name, + (tp->link_config.active_speed == SPEED_1000 ? + 1000 : + (tp->link_config.active_speed == SPEED_100 ? + 100 : 10)), + (tp->link_config.active_duplex == DUPLEX_FULL ? + "full" : "half")); + + printk(KERN_INFO PFX + "%s: Flow control is %s for TX and %s for RX.\n", + tp->dev->name, + (tp->link_config.active_flowctrl & TG3_FLOW_CTRL_TX) ? + "on" : "off", + (tp->link_config.active_flowctrl & TG3_FLOW_CTRL_RX) ? + "on" : "off"); + tg3_ump_link_report(tp); + } +} + +static u16 tg3_advert_flowctrl_1000T(u8 flow_ctrl) +{ + u16 miireg; + + if ((flow_ctrl & TG3_FLOW_CTRL_TX) && (flow_ctrl & TG3_FLOW_CTRL_RX)) + miireg = ADVERTISE_PAUSE_CAP; + else if (flow_ctrl & TG3_FLOW_CTRL_TX) + miireg = ADVERTISE_PAUSE_ASYM; + else if (flow_ctrl & TG3_FLOW_CTRL_RX) + miireg = ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM; + else + miireg = 0; + + return miireg; +} + +static u16 tg3_advert_flowctrl_1000X(u8 flow_ctrl) +{ + u16 miireg; + + if ((flow_ctrl & TG3_FLOW_CTRL_TX) && (flow_ctrl & TG3_FLOW_CTRL_RX)) + miireg = ADVERTISE_1000XPAUSE; + else if (flow_ctrl & TG3_FLOW_CTRL_TX) + miireg = ADVERTISE_1000XPSE_ASYM; + else if (flow_ctrl & TG3_FLOW_CTRL_RX) + miireg = ADVERTISE_1000XPAUSE | ADVERTISE_1000XPSE_ASYM; + else + miireg = 0; + + return miireg; +} + +static u8 tg3_resolve_flowctrl_1000T(u16 lcladv, u16 rmtadv) +{ + u8 cap = 0; + + if (lcladv & ADVERTISE_PAUSE_CAP) { + if (lcladv & ADVERTISE_PAUSE_ASYM) { + if (rmtadv & LPA_PAUSE_CAP) + cap = TG3_FLOW_CTRL_TX | TG3_FLOW_CTRL_RX; + else if (rmtadv & LPA_PAUSE_ASYM) + cap = TG3_FLOW_CTRL_RX; + } else { + if (rmtadv & LPA_PAUSE_CAP) + cap = TG3_FLOW_CTRL_TX | TG3_FLOW_CTRL_RX; + } + } else if (lcladv & ADVERTISE_PAUSE_ASYM) { + if ((rmtadv & LPA_PAUSE_CAP) && (rmtadv & LPA_PAUSE_ASYM)) + cap = TG3_FLOW_CTRL_TX; + } + + return cap; +} + +static u8 tg3_resolve_flowctrl_1000X(u16 lcladv, u16 rmtadv) +{ + u8 cap = 0; + + if (lcladv & ADVERTISE_1000XPAUSE) { + if (lcladv & ADVERTISE_1000XPSE_ASYM) { + if (rmtadv & LPA_1000XPAUSE) + cap = TG3_FLOW_CTRL_TX | TG3_FLOW_CTRL_RX; + else if (rmtadv & LPA_1000XPAUSE_ASYM) + cap = TG3_FLOW_CTRL_RX; + } else { + if (rmtadv & LPA_1000XPAUSE) + cap = TG3_FLOW_CTRL_TX | TG3_FLOW_CTRL_RX; + } + } else if (lcladv & ADVERTISE_1000XPSE_ASYM) { + if ((rmtadv & LPA_1000XPAUSE) && (rmtadv & LPA_1000XPAUSE_ASYM)) + cap = TG3_FLOW_CTRL_TX; + } + + return cap; +} + +static void tg3_setup_flow_control(struct tg3 *tp, u32 lcladv, u32 rmtadv) +{ + u8 autoneg; + u8 flowctrl = 0; + u32 old_rx_mode = tp->rx_mode; + u32 old_tx_mode = tp->tx_mode; + + if (tp->tg3_flags3 & TG3_FLG3_USE_PHYLIB) + autoneg = tp->mdio_bus.phy_map[PHY_ADDR]->autoneg; + else + autoneg = tp->link_config.autoneg; + + if (autoneg == AUTONEG_ENABLE && + (tp->tg3_flags & TG3_FLAG_PAUSE_AUTONEG)) { + if (tp->tg3_flags2 & TG3_FLG2_ANY_SERDES) + flowctrl = tg3_resolve_flowctrl_1000X(lcladv, rmtadv); + else + flowctrl = tg3_resolve_flowctrl_1000T(lcladv, rmtadv); + } else + flowctrl = tp->link_config.flowctrl; + + tp->link_config.active_flowctrl = flowctrl; + + if (flowctrl & TG3_FLOW_CTRL_RX) + tp->rx_mode |= RX_MODE_FLOW_CTRL_ENABLE; + else + tp->rx_mode &= ~RX_MODE_FLOW_CTRL_ENABLE; + + if (old_rx_mode != tp->rx_mode) + tw32_f(MAC_RX_MODE, tp->rx_mode); + + if (flowctrl & TG3_FLOW_CTRL_TX) + tp->tx_mode |= TX_MODE_FLOW_CTRL_ENABLE; + else + tp->tx_mode &= ~TX_MODE_FLOW_CTRL_ENABLE; + + if (old_tx_mode != tp->tx_mode) + tw32_f(MAC_TX_MODE, tp->tx_mode); +} + +static void tg3_adjust_link(struct net_device *dev) +{ + u8 oldflowctrl, linkmesg = 0; + u32 mac_mode, lcl_adv, rmt_adv; + struct tg3 *tp = netdev_priv(dev); + struct phy_device *phydev = tp->mdio_bus.phy_map[PHY_ADDR]; + + spin_lock(&tp->lock); + + mac_mode = tp->mac_mode & ~(MAC_MODE_PORT_MODE_MASK | + MAC_MODE_HALF_DUPLEX); + + oldflowctrl = tp->link_config.active_flowctrl; + + if (phydev->link) { + lcl_adv = 0; + rmt_adv = 0; + + if (phydev->speed == SPEED_100 || phydev->speed == SPEED_10) + mac_mode |= MAC_MODE_PORT_MODE_MII; + else + mac_mode |= MAC_MODE_PORT_MODE_GMII; + + if (phydev->duplex == DUPLEX_HALF) + mac_mode |= MAC_MODE_HALF_DUPLEX; + else { + lcl_adv = tg3_advert_flowctrl_1000T( + tp->link_config.flowctrl); + + if (phydev->pause) + rmt_adv = LPA_PAUSE_CAP; + if (phydev->asym_pause) + rmt_adv |= LPA_PAUSE_ASYM; + } + + tg3_setup_flow_control(tp, lcl_adv, rmt_adv); + } else + mac_mode |= MAC_MODE_PORT_MODE_GMII; + + if (mac_mode != tp->mac_mode) { + tp->mac_mode = mac_mode; + tw32_f(MAC_MODE, tp->mac_mode); + udelay(40); + } + + if (phydev->speed == SPEED_1000 && phydev->duplex == DUPLEX_HALF) + tw32(MAC_TX_LENGTHS, + ((2 << TX_LENGTHS_IPG_CRS_SHIFT) | + (6 << TX_LENGTHS_IPG_SHIFT) | + (0xff << TX_LENGTHS_SLOT_TIME_SHIFT))); + else + tw32(MAC_TX_LENGTHS, + ((2 << TX_LENGTHS_IPG_CRS_SHIFT) | + (6 << TX_LENGTHS_IPG_SHIFT) | + (32 << TX_LENGTHS_SLOT_TIME_SHIFT))); + + if ((phydev->link && tp->link_config.active_speed == SPEED_INVALID) || + (!phydev->link && tp->link_config.active_speed != SPEED_INVALID) || + phydev->speed != tp->link_config.active_speed || + phydev->duplex != tp->link_config.active_duplex || + oldflowctrl != tp->link_config.active_flowctrl) + linkmesg = 1; + + tp->link_config.active_speed = phydev->speed; + tp->link_config.active_duplex = phydev->duplex; + + spin_unlock(&tp->lock); + + if (linkmesg) + tg3_link_report(tp); +} + +static int tg3_phy_init(struct tg3 *tp) +{ + struct phy_device *phydev; + + if (tp->tg3_flags3 & TG3_FLG3_PHY_CONNECTED) + return 0; + + /* Bring the PHY back to a known state. */ + tg3_bmcr_reset(tp); + + phydev = tp->mdio_bus.phy_map[PHY_ADDR]; + + /* Attach the MAC to the PHY. */ + phydev = phy_connect(tp->dev, phydev->dev.bus_id, tg3_adjust_link, + phydev->dev_flags, phydev->interface); + if (IS_ERR(phydev)) { + printk(KERN_ERR "%s: Could not attach to PHY\n", tp->dev->name); + return PTR_ERR(phydev); + } + + tp->tg3_flags3 |= TG3_FLG3_PHY_CONNECTED; + + /* Mask with MAC supported features. */ + phydev->supported &= (PHY_GBIT_FEATURES | + SUPPORTED_Pause | + SUPPORTED_Asym_Pause); + + phydev->advertising = phydev->supported; + + printk(KERN_INFO + "%s: attached PHY driver [%s] (mii_bus:phy_addr=%s)\n", + tp->dev->name, phydev->drv->name, phydev->dev.bus_id); + + return 0; +} + +static void tg3_phy_start(struct tg3 *tp) +{ + struct phy_device *phydev; + + if (!(tp->tg3_flags3 & TG3_FLG3_PHY_CONNECTED)) + return; + + phydev = tp->mdio_bus.phy_map[PHY_ADDR]; + + if (tp->link_config.phy_is_low_power) { + tp->link_config.phy_is_low_power = 0; + phydev->speed = tp->link_config.orig_speed; + phydev->duplex = tp->link_config.orig_duplex; + phydev->autoneg = tp->link_config.orig_autoneg; + phydev->advertising = tp->link_config.orig_advertising; + } + + phy_start(phydev); + + phy_start_aneg(phydev); +} + +static void tg3_phy_stop(struct tg3 *tp) +{ + if (!(tp->tg3_flags3 & TG3_FLG3_PHY_CONNECTED)) + return; + + phy_stop(tp->mdio_bus.phy_map[PHY_ADDR]); +} + +static void tg3_phy_fini(struct tg3 *tp) +{ + if (tp->tg3_flags3 & TG3_FLG3_PHY_CONNECTED) { + phy_disconnect(tp->mdio_bus.phy_map[PHY_ADDR]); + tp->tg3_flags3 &= ~TG3_FLG3_PHY_CONNECTED; + } +} + static void tg3_phydsp_write(struct tg3 *tp, u32 reg, u32 val) { tg3_writephy(tp, MII_TG3_DSP_ADDRESS, reg); @@ -861,37 +1427,6 @@ static void tg3_phy_set_wirespeed(struct tg3 *tp) (val | (1 << 15) | (1 << 4))); } -static int tg3_bmcr_reset(struct tg3 *tp) -{ - u32 phy_control; - int limit, err; - - /* OK, reset it, and poll the BMCR_RESET bit until it - * clears or we time out. - */ - phy_control = BMCR_RESET; - err = tg3_writephy(tp, MII_BMCR, phy_control); - if (err != 0) - return -EBUSY; - - limit = 5000; - while (limit--) { - err = tg3_readphy(tp, MII_BMCR, &phy_control); - if (err != 0) - return -EBUSY; - - if ((phy_control & BMCR_RESET) == 0) { - udelay(40); - break; - } - udelay(10); - } - if (limit <= 0) - return -EBUSY; - - return 0; -} - static void tg3_phy_apply_otp(struct tg3 *tp) { u32 otp, phy; @@ -1115,8 +1650,6 @@ static int tg3_phy_reset_5703_4_5(struct tg3 *tp) return err; } -static void tg3_link_report(struct tg3 *); - /* This will reset the tigon3 PHY if there is no valid * link unless the FORCE argument is non-zero. */ @@ -1406,7 +1939,7 @@ static void tg3_power_down_phy(struct tg3 *tp) tw32_f(GRC_MISC_CFG, val | GRC_MISC_CFG_EPHY_IDDQ); udelay(40); return; - } else { + } else if (!(tp->tg3_flags3 & TG3_FLG3_USE_PHYLIB)) { tg3_writephy(tp, MII_TG3_EXT_CTRL, MII_TG3_EXT_CTRL_FORCE_LED_OFF); tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x01b2); @@ -1480,7 +2013,7 @@ static int tg3_set_power_state(struct tg3 *tp, pci_power_t state) "requested.\n", tp->dev->name, state); return -EINVAL; - }; + } power_control |= PCI_PM_CTRL_PME_ENABLE; @@ -1488,18 +2021,55 @@ static int tg3_set_power_state(struct tg3 *tp, pci_power_t state) tw32(TG3PCI_MISC_HOST_CTRL, misc_host_ctrl | MISC_HOST_CTRL_MASK_PCI_INT); - if (tp->link_config.phy_is_low_power == 0) { - tp->link_config.phy_is_low_power = 1; - tp->link_config.orig_speed = tp->link_config.speed; - tp->link_config.orig_duplex = tp->link_config.duplex; - tp->link_config.orig_autoneg = tp->link_config.autoneg; - } + if (tp->tg3_flags3 & TG3_FLG3_USE_PHYLIB) { + if ((tp->tg3_flags3 & TG3_FLG3_PHY_CONNECTED) && + !tp->link_config.phy_is_low_power) { + struct phy_device *phydev; + u32 advertising; + + phydev = tp->mdio_bus.phy_map[PHY_ADDR]; + + tp->link_config.phy_is_low_power = 1; + + tp->link_config.orig_speed = phydev->speed; + tp->link_config.orig_duplex = phydev->duplex; + tp->link_config.orig_autoneg = phydev->autoneg; + tp->link_config.orig_advertising = phydev->advertising; + + advertising = ADVERTISED_TP | + ADVERTISED_Pause | + ADVERTISED_Autoneg | + ADVERTISED_10baseT_Half; + + if ((tp->tg3_flags & TG3_FLAG_ENABLE_ASF) || + (tp->tg3_flags & TG3_FLAG_WOL_ENABLE)) { + if (tp->tg3_flags & TG3_FLAG_WOL_SPEED_100MB) + advertising |= + ADVERTISED_100baseT_Half | + ADVERTISED_100baseT_Full | + ADVERTISED_10baseT_Full; + else + advertising |= ADVERTISED_10baseT_Full; + } - if (!(tp->tg3_flags2 & TG3_FLG2_ANY_SERDES)) { - tp->link_config.speed = SPEED_10; - tp->link_config.duplex = DUPLEX_HALF; - tp->link_config.autoneg = AUTONEG_ENABLE; - tg3_setup_phy(tp, 0); + phydev->advertising = advertising; + + phy_start_aneg(phydev); + } + } else { + if (tp->link_config.phy_is_low_power == 0) { + tp->link_config.phy_is_low_power = 1; + tp->link_config.orig_speed = tp->link_config.speed; + tp->link_config.orig_duplex = tp->link_config.duplex; + tp->link_config.orig_autoneg = tp->link_config.autoneg; + } + + if (!(tp->tg3_flags2 & TG3_FLG2_ANY_SERDES)) { + tp->link_config.speed = SPEED_10; + tp->link_config.duplex = DUPLEX_HALF; + tp->link_config.autoneg = AUTONEG_ENABLE; + tg3_setup_phy(tp, 0); + } } if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) { @@ -1530,8 +2100,10 @@ static int tg3_set_power_state(struct tg3 *tp, pci_power_t state) u32 mac_mode; if (!(tp->tg3_flags2 & TG3_FLG2_PHY_SERDES)) { - tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x5a); - udelay(40); + if (!(tp->tg3_flags3 & TG3_FLG3_USE_PHYLIB)) { + tg3_writephy(tp, MII_TG3_AUX_CTRL, 0x5a); + udelay(40); + } if (tp->tg3_flags2 & TG3_FLG2_MII_SERDES) mac_mode = MAC_MODE_PORT_MODE_GMII; @@ -1656,212 +2228,6 @@ static int tg3_set_power_state(struct tg3 *tp, pci_power_t state) return 0; } -/* tp->lock is held. */ -static void tg3_wait_for_event_ack(struct tg3 *tp) -{ - int i; - - /* Wait for up to 2.5 milliseconds */ - for (i = 0; i < 250000; i++) { - if (!(tr32(GRC_RX_CPU_EVENT) & GRC_RX_CPU_DRIVER_EVENT)) - break; - udelay(10); - } -} - -/* tp->lock is held. */ -static void tg3_ump_link_report(struct tg3 *tp) -{ - u32 reg; - u32 val; - - if (!(tp->tg3_flags2 & TG3_FLG2_5780_CLASS) || - !(tp->tg3_flags & TG3_FLAG_ENABLE_ASF)) - return; - - tg3_wait_for_event_ack(tp); - - tg3_write_mem(tp, NIC_SRAM_FW_CMD_MBOX, FWCMD_NICDRV_LINK_UPDATE); - - tg3_write_mem(tp, NIC_SRAM_FW_CMD_LEN_MBOX, 14); - - val = 0; - if (!tg3_readphy(tp, MII_BMCR, ®)) - val = reg << 16; - if (!tg3_readphy(tp, MII_BMSR, ®)) - val |= (reg & 0xffff); - tg3_write_mem(tp, NIC_SRAM_FW_CMD_DATA_MBOX, val); - - val = 0; - if (!tg3_readphy(tp, MII_ADVERTISE, ®)) - val = reg << 16; - if (!tg3_readphy(tp, MII_LPA, ®)) - val |= (reg & 0xffff); - tg3_write_mem(tp, NIC_SRAM_FW_CMD_DATA_MBOX + 4, val); - - val = 0; - if (!(tp->tg3_flags2 & TG3_FLG2_MII_SERDES)) { - if (!tg3_readphy(tp, MII_CTRL1000, ®)) - val = reg << 16; - if (!tg3_readphy(tp, MII_STAT1000, ®)) - val |= (reg & 0xffff); - } - tg3_write_mem(tp, NIC_SRAM_FW_CMD_DATA_MBOX + 8, val); - - if (!tg3_readphy(tp, MII_PHYADDR, ®)) - val = reg << 16; - else - val = 0; - tg3_write_mem(tp, NIC_SRAM_FW_CMD_DATA_MBOX + 12, val); - - val = tr32(GRC_RX_CPU_EVENT); - val |= GRC_RX_CPU_DRIVER_EVENT; - tw32_f(GRC_RX_CPU_EVENT, val); -} - -static void tg3_link_report(struct tg3 *tp) -{ - if (!netif_carrier_ok(tp->dev)) { - if (netif_msg_link(tp)) - printk(KERN_INFO PFX "%s: Link is down.\n", - tp->dev->name); - tg3_ump_link_report(tp); - } else if (netif_msg_link(tp)) { - printk(KERN_INFO PFX "%s: Link is up at %d Mbps, %s duplex.\n", - tp->dev->name, - (tp->link_config.active_speed == SPEED_1000 ? - 1000 : - (tp->link_config.active_speed == SPEED_100 ? - 100 : 10)), - (tp->link_config.active_duplex == DUPLEX_FULL ? - "full" : "half")); - - printk(KERN_INFO PFX - "%s: Flow control is %s for TX and %s for RX.\n", - tp->dev->name, - (tp->link_config.active_flowctrl & TG3_FLOW_CTRL_TX) ? - "on" : "off", - (tp->link_config.active_flowctrl & TG3_FLOW_CTRL_RX) ? - "on" : "off"); - tg3_ump_link_report(tp); - } -} - -static u16 tg3_advert_flowctrl_1000T(u8 flow_ctrl) -{ - u16 miireg; - - if ((flow_ctrl & TG3_FLOW_CTRL_TX) && (flow_ctrl & TG3_FLOW_CTRL_RX)) - miireg = ADVERTISE_PAUSE_CAP; - else if (flow_ctrl & TG3_FLOW_CTRL_TX) - miireg = ADVERTISE_PAUSE_ASYM; - else if (flow_ctrl & TG3_FLOW_CTRL_RX) - miireg = ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM; - else - miireg = 0; - - return miireg; -} - -static u16 tg3_advert_flowctrl_1000X(u8 flow_ctrl) -{ - u16 miireg; - - if ((flow_ctrl & TG3_FLOW_CTRL_TX) && (flow_ctrl & TG3_FLOW_CTRL_RX)) - miireg = ADVERTISE_1000XPAUSE; - else if (flow_ctrl & TG3_FLOW_CTRL_TX) - miireg = ADVERTISE_1000XPSE_ASYM; - else if (flow_ctrl & TG3_FLOW_CTRL_RX) - miireg = ADVERTISE_1000XPAUSE | ADVERTISE_1000XPSE_ASYM; - else - miireg = 0; - - return miireg; -} - -static u8 tg3_resolve_flowctrl_1000T(u16 lcladv, u16 rmtadv) -{ - u8 cap = 0; - - if (lcladv & ADVERTISE_PAUSE_CAP) { - if (lcladv & ADVERTISE_PAUSE_ASYM) { - if (rmtadv & LPA_PAUSE_CAP) - cap = TG3_FLOW_CTRL_TX | TG3_FLOW_CTRL_RX; - else if (rmtadv & LPA_PAUSE_ASYM) - cap = TG3_FLOW_CTRL_RX; - } else { - if (rmtadv & LPA_PAUSE_CAP) - cap = TG3_FLOW_CTRL_TX | TG3_FLOW_CTRL_RX; - } - } else if (lcladv & ADVERTISE_PAUSE_ASYM) { - if ((rmtadv & LPA_PAUSE_CAP) && (rmtadv & LPA_PAUSE_ASYM)) - cap = TG3_FLOW_CTRL_TX; - } - - return cap; -} - -static u8 tg3_resolve_flowctrl_1000X(u16 lcladv, u16 rmtadv) -{ - u8 cap = 0; - - if (lcladv & ADVERTISE_1000XPAUSE) { - if (lcladv & ADVERTISE_1000XPSE_ASYM) { - if (rmtadv & LPA_1000XPAUSE) - cap = TG3_FLOW_CTRL_TX | TG3_FLOW_CTRL_RX; - else if (rmtadv & LPA_1000XPAUSE_ASYM) - cap = TG3_FLOW_CTRL_RX; - } else { - if (rmtadv & LPA_1000XPAUSE) - cap = TG3_FLOW_CTRL_TX | TG3_FLOW_CTRL_RX; - } - } else if (lcladv & ADVERTISE_1000XPSE_ASYM) { - if ((rmtadv & LPA_1000XPAUSE) && (rmtadv & LPA_1000XPAUSE_ASYM)) - cap = TG3_FLOW_CTRL_TX; - } - - return cap; -} - -static void tg3_setup_flow_control(struct tg3 *tp, u32 local_adv, u32 remote_adv) -{ - u8 new_tg3_flags = 0; - u32 old_rx_mode = tp->rx_mode; - u32 old_tx_mode = tp->tx_mode; - - if (tp->link_config.autoneg == AUTONEG_ENABLE && - (tp->tg3_flags & TG3_FLAG_PAUSE_AUTONEG)) { - if (tp->tg3_flags2 & TG3_FLG2_ANY_SERDES) - new_tg3_flags = tg3_resolve_flowctrl_1000X(local_adv, - remote_adv); - else - new_tg3_flags = tg3_resolve_flowctrl_1000T(local_adv, - remote_adv); - } else { - new_tg3_flags = tp->link_config.flowctrl; - } - - tp->link_config.active_flowctrl = new_tg3_flags; - - if (new_tg3_flags & TG3_FLOW_CTRL_RX) - tp->rx_mode |= RX_MODE_FLOW_CTRL_ENABLE; - else - tp->rx_mode &= ~RX_MODE_FLOW_CTRL_ENABLE; - - if (old_rx_mode != tp->rx_mode) { - tw32_f(MAC_RX_MODE, tp->rx_mode); - } - - if (new_tg3_flags & TG3_FLOW_CTRL_TX) - tp->tx_mode |= TX_MODE_FLOW_CTRL_ENABLE; - else - tp->tx_mode &= ~TX_MODE_FLOW_CTRL_ENABLE; - - if (old_tx_mode != tp->tx_mode) { - tw32_f(MAC_TX_MODE, tp->tx_mode); - } -} - static void tg3_aux_stat_to_speed_duplex(struct tg3 *tp, u32 val, u16 *speed, u8 *duplex) { switch (val & MII_TG3_AUX_STAT_SPDMASK) { @@ -1906,7 +2272,7 @@ static void tg3_aux_stat_to_speed_duplex(struct tg3 *tp, u32 val, u16 *speed, u8 *speed = SPEED_INVALID; *duplex = DUPLEX_INVALID; break; - }; + } } static void tg3_phy_copper_begin(struct tg3 *tp) @@ -2018,7 +2384,7 @@ static void tg3_phy_copper_begin(struct tg3 *tp) case SPEED_1000: bmcr |= TG3_BMCR_SPEED1000; break; - }; + } if (tp->link_config.duplex == DUPLEX_FULL) bmcr |= BMCR_FULLDPLX; @@ -2716,7 +3082,7 @@ static int tg3_fiber_aneg_smachine(struct tg3 *tp, default: ret = ANEG_FAILED; break; - }; + } return ret; } @@ -3558,7 +3924,7 @@ static int tg3_alloc_rx_skb(struct tg3 *tp, u32 opaque_key, default: return -EINVAL; - }; + } /* Do not overwrite any of the map or rp information * until we are sure we can commit to a new buffer. @@ -3618,7 +3984,7 @@ static void tg3_recycle_rx(struct tg3 *tp, u32 opaque_key, default: return; - }; + } dest_map->skb = src_map->skb; pci_unmap_addr_set(dest_map, mapping, @@ -3828,7 +4194,15 @@ static int tg3_poll_work(struct tg3 *tp, int work_done, int budget) sblk->status = SD_STATUS_UPDATED | (sblk->status & ~SD_STATUS_LINK_CHG); spin_lock(&tp->lock); - tg3_setup_phy(tp, 0); + if (tp->tg3_flags3 & TG3_FLG3_USE_PHYLIB) { + tw32_f(MAC_STATUS, + (MAC_STATUS_SYNC_CHANGED | + MAC_STATUS_CFG_CHANGED | + MAC_STATUS_MI_COMPLETION | + MAC_STATUS_LNKSTATE_CHANGED)); + udelay(40); + } else + tg3_setup_phy(tp, 0); spin_unlock(&tp->lock); } } @@ -4116,6 +4490,7 @@ static void tg3_poll_controller(struct net_device *dev) static void tg3_reset_task(struct work_struct *work) { struct tg3 *tp = container_of(work, struct tg3, reset_task); + int err; unsigned int restart_timer; tg3_full_lock(tp, 0); @@ -4127,6 +4502,8 @@ static void tg3_reset_task(struct work_struct *work) tg3_full_unlock(tp); + tg3_phy_stop(tp); + tg3_netif_stop(tp); tg3_full_lock(tp, 1); @@ -4142,7 +4519,8 @@ static void tg3_reset_task(struct work_struct *work) } tg3_halt(tp, RESET_KIND_SHUTDOWN, 0); - if (tg3_init_hw(tp, 1)) + err = tg3_init_hw(tp, 1); + if (err) goto out; tg3_netif_start(tp); @@ -4152,6 +4530,9 @@ static void tg3_reset_task(struct work_struct *work) out: tg3_full_unlock(tp); + + if (!err) + tg3_phy_start(tp); } static void tg3_dump_short_state(struct tg3 *tp) @@ -4655,6 +5036,8 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu) return 0; } + tg3_phy_stop(tp); + tg3_netif_stop(tp); tg3_full_lock(tp, 1); @@ -4670,6 +5053,9 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu) tg3_full_unlock(tp); + if (!err) + tg3_phy_start(tp); + return err; } @@ -4961,7 +5347,7 @@ static int tg3_stop_block(struct tg3 *tp, unsigned long ofs, u32 enable_bit, int default: break; - }; + } } val = tr32(ofs); @@ -5203,7 +5589,7 @@ static void tg3_write_sig_pre_reset(struct tg3 *tp, int kind) default: break; - }; + } } if (kind == RESET_KIND_INIT || @@ -5228,7 +5614,7 @@ static void tg3_write_sig_post_reset(struct tg3 *tp, int kind) default: break; - }; + } } if (kind == RESET_KIND_SHUTDOWN) @@ -5257,7 +5643,7 @@ static void tg3_write_sig_legacy(struct tg3 *tp, int kind) default: break; - }; + } } } @@ -5379,6 +5765,8 @@ static int tg3_chip_reset(struct tg3 *tp) tg3_nvram_lock(tp); + tg3_mdio_stop(tp); + /* No matching tg3_nvram_unlock() after this because * chip reset below will undo the nvram lock. */ @@ -5394,7 +5782,8 @@ static int tg3_chip_reset(struct tg3 *tp) GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5755 || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787 || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5784 || - GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761) + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5785) tw32(GRC_FASTBOOT_PC, 0); /* @@ -5530,6 +5919,8 @@ static int tg3_chip_reset(struct tg3 *tp) tw32_f(MAC_MODE, 0); udelay(40); + tg3_mdio_start(tp); + err = tg3_poll_fw(tp); if (err) return err; @@ -6609,7 +7000,8 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy) tg3_abort_hw(tp, 1); } - if (reset_phy) + if (reset_phy && + !(tp->tg3_flags3 & TG3_FLG3_USE_PHYLIB)) tg3_phy_reset(tp); err = tg3_chip_reset(tp); @@ -6685,7 +7077,8 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy) return err; if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5784 && - GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5761) { + GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5761 && + GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5785) { /* This value is determined during the probe time DMA * engine test, tg3_test_dma. */ @@ -6924,7 +7317,8 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy) RDMAC_MODE_FIFOURUN_ENAB | RDMAC_MODE_FIFOOREAD_ENAB | RDMAC_MODE_LNGREAD_ENAB); - if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5784) + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5784 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5785) rdmac_mode |= RDMAC_MODE_BD_SBD_CRPT_ENAB | RDMAC_MODE_MBUF_RBD_CRPT_ENAB | RDMAC_MODE_MBUF_SBD_CRPT_ENAB; @@ -7092,8 +7486,9 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy) if ((GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5755) || (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787) || (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5784) || - (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761)) - val |= (1 << 29); + (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761) || + (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5785)) + val |= WDMAC_MODE_STATUS_TAG_FIX; tw32_f(WDMAC_MODE, val); udelay(40); @@ -7154,23 +7549,14 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy) tp->rx_mode = RX_MODE_ENABLE; if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5755 || - GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761) + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5784 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5785) tp->rx_mode |= RX_MODE_IPV6_CSUM_ENABLE; tw32_f(MAC_RX_MODE, tp->rx_mode); udelay(10); - if (tp->link_config.phy_is_low_power) { - tp->link_config.phy_is_low_power = 0; - tp->link_config.speed = tp->link_config.orig_speed; - tp->link_config.duplex = tp->link_config.orig_duplex; - tp->link_config.autoneg = tp->link_config.orig_autoneg; - } - - tp->mi_mode &= ~MAC_MI_MODE_AUTO_POLL; - tw32_f(MAC_MI_MODE, tp->mi_mode); - udelay(80); - tw32(MAC_LED_CTRL, tp->led_ctrl); tw32(MAC_MI_STAT, MAC_MI_STAT_LNKSTAT_ATTN_ENAB); @@ -7217,19 +7603,28 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy) tw32(GRC_LOCAL_CTRL, tp->grc_local_ctrl); } - err = tg3_setup_phy(tp, 0); - if (err) - return err; + if (!(tp->tg3_flags3 & TG3_FLG3_USE_PHYLIB)) { + if (tp->link_config.phy_is_low_power) { + tp->link_config.phy_is_low_power = 0; + tp->link_config.speed = tp->link_config.orig_speed; + tp->link_config.duplex = tp->link_config.orig_duplex; + tp->link_config.autoneg = tp->link_config.orig_autoneg; + } - if (!(tp->tg3_flags2 & TG3_FLG2_PHY_SERDES) && - GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5906) { - u32 tmp; + err = tg3_setup_phy(tp, 0); + if (err) + return err; - /* Clear CRC stats. */ - if (!tg3_readphy(tp, MII_TG3_TEST1, &tmp)) { - tg3_writephy(tp, MII_TG3_TEST1, - tmp | MII_TG3_TEST1_CRC_EN); - tg3_readphy(tp, 0x14, &tmp); + if (!(tp->tg3_flags2 & TG3_FLG2_PHY_SERDES) && + GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5906) { + u32 tmp; + + /* Clear CRC stats. */ + if (!tg3_readphy(tp, MII_TG3_TEST1, &tmp)) { + tg3_writephy(tp, MII_TG3_TEST1, + tmp | MII_TG3_TEST1_CRC_EN); + tg3_readphy(tp, 0x14, &tmp); + } } } @@ -7282,7 +7677,7 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy) default: break; - }; + } if (tp->tg3_flags3 & TG3_FLG3_ENABLE_APE) /* Write our heartbeat update interval to APE. */ @@ -7744,6 +8139,8 @@ static int tg3_open(struct net_device *dev) } } + tg3_phy_start(tp); + tg3_full_lock(tp, 0); add_timer(&tp->timer); @@ -8545,7 +8942,13 @@ static int tg3_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, static int tg3_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { - struct tg3 *tp = netdev_priv(dev); + struct tg3 *tp = netdev_priv(dev); + + if (tp->tg3_flags3 & TG3_FLG3_USE_PHYLIB) { + if (!(tp->tg3_flags3 & TG3_FLG3_PHY_CONNECTED)) + return -EAGAIN; + return phy_ethtool_gset(tp->mdio_bus.phy_map[PHY_ADDR], cmd); + } cmd->supported = (SUPPORTED_Autoneg); @@ -8582,6 +8985,12 @@ static int tg3_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) { struct tg3 *tp = netdev_priv(dev); + if (tp->tg3_flags3 & TG3_FLG3_USE_PHYLIB) { + if (!(tp->tg3_flags3 & TG3_FLG3_PHY_CONNECTED)) + return -EAGAIN; + return phy_ethtool_sset(tp->mdio_bus.phy_map[PHY_ADDR], cmd); + } + if (tp->tg3_flags2 & TG3_FLG2_ANY_SERDES) { /* These are the only valid advertisement bits allowed. */ if (cmd->autoneg == AUTONEG_ENABLE && @@ -8614,7 +9023,7 @@ static int tg3_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) tp->link_config.advertising = 0; tp->link_config.speed = cmd->speed; tp->link_config.duplex = cmd->duplex; - } + } tp->link_config.orig_speed = tp->link_config.speed; tp->link_config.orig_duplex = tp->link_config.duplex; @@ -8697,7 +9106,10 @@ static int tg3_set_tso(struct net_device *dev, u32 value) (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5906)) { if (value) { dev->features |= NETIF_F_TSO6; - if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761) + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761 || + (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5784 && + GET_CHIP_REV(tp->pci_chip_rev_id) != CHIPREV_5784_AX) || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5785) dev->features |= NETIF_F_TSO_ECN; } else dev->features &= ~(NETIF_F_TSO6 | NETIF_F_TSO_ECN); @@ -8708,7 +9120,6 @@ static int tg3_set_tso(struct net_device *dev, u32 value) static int tg3_nway_reset(struct net_device *dev) { struct tg3 *tp = netdev_priv(dev); - u32 bmcr; int r; if (!netif_running(dev)) @@ -8717,17 +9128,25 @@ static int tg3_nway_reset(struct net_device *dev) if (tp->tg3_flags2 & TG3_FLG2_PHY_SERDES) return -EINVAL; - spin_lock_bh(&tp->lock); - r = -EINVAL; - tg3_readphy(tp, MII_BMCR, &bmcr); - if (!tg3_readphy(tp, MII_BMCR, &bmcr) && - ((bmcr & BMCR_ANENABLE) || - (tp->tg3_flags2 & TG3_FLG2_PARALLEL_DETECT))) { - tg3_writephy(tp, MII_BMCR, bmcr | BMCR_ANRESTART | - BMCR_ANENABLE); - r = 0; + if (tp->tg3_flags3 & TG3_FLG3_USE_PHYLIB) { + if (!(tp->tg3_flags3 & TG3_FLG3_PHY_CONNECTED)) + return -EAGAIN; + r = phy_start_aneg(tp->mdio_bus.phy_map[PHY_ADDR]); + } else { + u32 bmcr; + + spin_lock_bh(&tp->lock); + r = -EINVAL; + tg3_readphy(tp, MII_BMCR, &bmcr); + if (!tg3_readphy(tp, MII_BMCR, &bmcr) && + ((bmcr & BMCR_ANENABLE) || + (tp->tg3_flags2 & TG3_FLG2_PARALLEL_DETECT))) { + tg3_writephy(tp, MII_BMCR, bmcr | BMCR_ANRESTART | + BMCR_ANENABLE); + r = 0; + } + spin_unlock_bh(&tp->lock); } - spin_unlock_bh(&tp->lock); return r; } @@ -8769,6 +9188,7 @@ static int tg3_set_ringparam(struct net_device *dev, struct ethtool_ringparam *e return -EINVAL; if (netif_running(dev)) { + tg3_phy_stop(tp); tg3_netif_stop(tp); irq_sync = 1; } @@ -8792,6 +9212,9 @@ static int tg3_set_ringparam(struct net_device *dev, struct ethtool_ringparam *e tg3_full_unlock(tp); + if (irq_sync && !err) + tg3_phy_start(tp); + return err; } @@ -8815,36 +9238,92 @@ static void tg3_get_pauseparam(struct net_device *dev, struct ethtool_pauseparam static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *epause) { struct tg3 *tp = netdev_priv(dev); - int irq_sync = 0, err = 0; + int err = 0; - if (netif_running(dev)) { - tg3_netif_stop(tp); - irq_sync = 1; - } + if (tp->tg3_flags3 & TG3_FLG3_USE_PHYLIB) { + if (!(tp->tg3_flags3 & TG3_FLG3_PHY_CONNECTED)) + return -EAGAIN; - tg3_full_lock(tp, irq_sync); + if (epause->autoneg) { + u32 newadv; + struct phy_device *phydev; - if (epause->autoneg) - tp->tg3_flags |= TG3_FLAG_PAUSE_AUTONEG; - else - tp->tg3_flags &= ~TG3_FLAG_PAUSE_AUTONEG; - if (epause->rx_pause) - tp->link_config.flowctrl |= TG3_FLOW_CTRL_RX; - else - tp->link_config.flowctrl &= ~TG3_FLOW_CTRL_RX; - if (epause->tx_pause) - tp->link_config.flowctrl |= TG3_FLOW_CTRL_TX; - else - tp->link_config.flowctrl &= ~TG3_FLOW_CTRL_TX; + phydev = tp->mdio_bus.phy_map[PHY_ADDR]; - if (netif_running(dev)) { - tg3_halt(tp, RESET_KIND_SHUTDOWN, 1); - err = tg3_restart_hw(tp, 1); - if (!err) - tg3_netif_start(tp); - } + if (epause->rx_pause) { + if (epause->tx_pause) + newadv = ADVERTISED_Pause; + else + newadv = ADVERTISED_Pause | + ADVERTISED_Asym_Pause; + } else if (epause->tx_pause) { + newadv = ADVERTISED_Asym_Pause; + } else + newadv = 0; + + if (tp->tg3_flags3 & TG3_FLG3_PHY_CONNECTED) { + u32 oldadv = phydev->advertising & + (ADVERTISED_Pause | + ADVERTISED_Asym_Pause); + if (oldadv != newadv) { + phydev->advertising &= + ~(ADVERTISED_Pause | + ADVERTISED_Asym_Pause); + phydev->advertising |= newadv; + err = phy_start_aneg(phydev); + } + } else { + tp->link_config.advertising &= + ~(ADVERTISED_Pause | + ADVERTISED_Asym_Pause); + tp->link_config.advertising |= newadv; + } + } else { + if (epause->rx_pause) + tp->link_config.flowctrl |= TG3_FLOW_CTRL_RX; + else + tp->link_config.flowctrl &= ~TG3_FLOW_CTRL_RX; - tg3_full_unlock(tp); + if (epause->tx_pause) + tp->link_config.flowctrl |= TG3_FLOW_CTRL_TX; + else + tp->link_config.flowctrl &= ~TG3_FLOW_CTRL_TX; + + if (netif_running(dev)) + tg3_setup_flow_control(tp, 0, 0); + } + } else { + int irq_sync = 0; + + if (netif_running(dev)) { + tg3_netif_stop(tp); + irq_sync = 1; + } + + tg3_full_lock(tp, irq_sync); + + if (epause->autoneg) + tp->tg3_flags |= TG3_FLAG_PAUSE_AUTONEG; + else + tp->tg3_flags &= ~TG3_FLAG_PAUSE_AUTONEG; + if (epause->rx_pause) + tp->link_config.flowctrl |= TG3_FLOW_CTRL_RX; + else + tp->link_config.flowctrl &= ~TG3_FLOW_CTRL_RX; + if (epause->tx_pause) + tp->link_config.flowctrl |= TG3_FLOW_CTRL_TX; + else + tp->link_config.flowctrl &= ~TG3_FLOW_CTRL_TX; + + if (netif_running(dev)) { + tg3_halt(tp, RESET_KIND_SHUTDOWN, 1); + err = tg3_restart_hw(tp, 1); + if (!err) + tg3_netif_start(tp); + } + + tg3_full_unlock(tp); + } return err; } @@ -8888,7 +9367,8 @@ static int tg3_set_tx_csum(struct net_device *dev, u32 data) if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5755 || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787 || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5784 || - GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761) + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5785) ethtool_op_set_tx_ipv6_csum(dev, data); else ethtool_op_set_tx_csum(dev, data); @@ -9409,7 +9889,8 @@ static int tg3_test_memory(struct tg3 *tp) if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5755 || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787 || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5784 || - GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761) + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5785) mem_tbl = mem_tbl_5755; else if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) mem_tbl = mem_tbl_5906; @@ -9616,7 +10097,8 @@ static int tg3_test_loopback(struct tg3 *tp) return TG3_LOOPBACK_FAILED; if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5784 || - GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761) { + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5785) { int i; u32 status; @@ -9644,14 +10126,16 @@ static int tg3_test_loopback(struct tg3 *tp) err |= TG3_MAC_LOOPBACK_FAILED; if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5784 || - GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761) { + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5785) { tw32(TG3_CPMU_CTRL, cpmuctrl); /* Release the mutex */ tw32(TG3_CPMU_MUTEX_GNT, CPMU_MUTEX_GNT_DRIVER); } - if (!(tp->tg3_flags2 & TG3_FLG2_PHY_SERDES)) { + if (!(tp->tg3_flags2 & TG3_FLG2_PHY_SERDES) && + !(tp->tg3_flags3 & TG3_FLG3_USE_PHYLIB)) { if (tg3_run_loopback(tp, TG3_PHY_LOOPBACK)) err |= TG3_PHY_LOOPBACK_FAILED; } @@ -9678,9 +10162,10 @@ static void tg3_self_test(struct net_device *dev, struct ethtool_test *etest, data[1] = 1; } if (etest->flags & ETH_TEST_FL_OFFLINE) { - int err, irq_sync = 0; + int err, err2 = 0, irq_sync = 0; if (netif_running(dev)) { + tg3_phy_stop(tp); tg3_netif_stop(tp); irq_sync = 1; } @@ -9721,11 +10206,15 @@ static void tg3_self_test(struct net_device *dev, struct ethtool_test *etest, tg3_halt(tp, RESET_KIND_SHUTDOWN, 1); if (netif_running(dev)) { tp->tg3_flags |= TG3_FLAG_INIT_COMPLETE; - if (!tg3_restart_hw(tp, 1)) + err2 = tg3_restart_hw(tp, 1); + if (!err2) tg3_netif_start(tp); } tg3_full_unlock(tp); + + if (irq_sync && !err2) + tg3_phy_start(tp); } if (tp->link_config.phy_is_low_power) tg3_set_power_state(tp, PCI_D3hot); @@ -9738,6 +10227,12 @@ static int tg3_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) struct tg3 *tp = netdev_priv(dev); int err; + if (tp->tg3_flags3 & TG3_FLG3_USE_PHYLIB) { + if (!(tp->tg3_flags3 & TG3_FLG3_PHY_CONNECTED)) + return -EAGAIN; + return phy_mii_ioctl(tp->mdio_bus.phy_map[PHY_ADDR], data, cmd); + } + switch(cmd) { case SIOCGMIIPHY: data->phy_id = PHY_ADDR; @@ -10280,7 +10775,8 @@ static void __devinit tg3_nvram_init(struct tg3 *tp) else if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5755) tg3_get_5755_nvram_info(tp); else if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787 || - GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5784) + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5784 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5785) tg3_get_5787_nvram_info(tp); else if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761) tg3_get_5761_nvram_info(tp); @@ -10611,6 +11107,7 @@ static int tg3_nvram_write_block_buffered(struct tg3 *tp, u32 offset, u32 len, (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5787) && (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5784) && (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5761) && + (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5785) && (tp->nvram_jedecnum == JEDEC_ST) && (nvram_cmd & NVRAM_CMD_FIRST)) { @@ -10793,7 +11290,7 @@ static void __devinit tg3_get_eeprom_hw_cfg(struct tg3 *tp) tg3_read_mem(tp, NIC_SRAM_DATA_SIG, &val); if (val == NIC_SRAM_DATA_SIG_MAGIC) { u32 nic_cfg, led_cfg; - u32 nic_phy_id, ver, cfg2 = 0, eeprom_phy_id; + u32 nic_phy_id, ver, cfg2 = 0, cfg4 = 0, eeprom_phy_id; int eeprom_phy_serdes = 0; tg3_read_mem(tp, NIC_SRAM_DATA_CFG, &nic_cfg); @@ -10807,6 +11304,9 @@ static void __devinit tg3_get_eeprom_hw_cfg(struct tg3 *tp) (ver > 0) && (ver < 0x100)) tg3_read_mem(tp, NIC_SRAM_DATA_CFG_2, &cfg2); + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5785) + tg3_read_mem(tp, NIC_SRAM_DATA_CFG_4, &cfg4); + if ((nic_cfg & NIC_SRAM_DATA_CFG_PHY_TYPE_MASK) == NIC_SRAM_DATA_CFG_PHY_TYPE_FIBER) eeprom_phy_serdes = 1; @@ -10879,7 +11379,7 @@ static void __devinit tg3_get_eeprom_hw_cfg(struct tg3 *tp) LED_CTRL_MODE_PHY_2); break; - }; + } if ((GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700 || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5701) && @@ -10931,6 +11431,13 @@ static void __devinit tg3_get_eeprom_hw_cfg(struct tg3 *tp) if (cfg3 & NIC_SRAM_ASPM_DEBOUNCE) tp->tg3_flags |= TG3_FLAG_ASPM_WORKAROUND; } + + if (cfg4 & NIC_SRAM_RGMII_STD_IBND_DISABLE) + tp->tg3_flags3 |= TG3_FLG3_RGMII_STD_IBND_DISABLE; + if (cfg4 & NIC_SRAM_RGMII_EXT_IBND_RX_EN) + tp->tg3_flags3 |= TG3_FLG3_RGMII_EXT_IBND_RX_EN; + if (cfg4 & NIC_SRAM_RGMII_EXT_IBND_TX_EN) + tp->tg3_flags3 |= TG3_FLG3_RGMII_EXT_IBND_TX_EN; } } @@ -10989,6 +11496,9 @@ static int __devinit tg3_phy_probe(struct tg3 *tp) u32 hw_phy_id, hw_phy_id_masked; int err; + if (tp->tg3_flags3 & TG3_FLG3_USE_PHYLIB) + return tg3_phy_init(tp); + /* Reading the PHY ID register can conflict with ASF * firwmare access to the PHY hardware. */ @@ -11511,6 +12021,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787 || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5784 || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5785 || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906 || (tp->tg3_flags2 & TG3_FLG2_5780_CLASS)) tp->tg3_flags2 |= TG3_FLG2_5750_PLUS; @@ -11532,6 +12043,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787 || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5784 || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5785 || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) { tp->tg3_flags2 |= TG3_FLG2_HW_TSO_2; tp->tg3_flags2 |= TG3_FLG2_1SHOT_MSI; @@ -11544,14 +12056,8 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) } } - if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5705 && - GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5750 && - GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5752 && - GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5755 && - GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5787 && - GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5784 && - GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5761 && - GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5906) + if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS) || + (tp->tg3_flags2 & TG3_FLG2_5780_CLASS)) tp->tg3_flags2 |= TG3_FLG2_JUMBO_CAPABLE; pcie_cap = pci_find_capability(tp->pdev, PCI_CAP_ID_EXP); @@ -11740,7 +12246,8 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) } if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5784 || - GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761) { + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5785) { tp->tg3_flags |= TG3_FLAG_CPMU_PRESENT; if (tp->pci_chip_rev_id == CHIPREV_ID_5784_A0 || @@ -11824,7 +12331,8 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) tp->tg3_flags2 |= TG3_FLG2_PHY_JITTER_BUG; if (tp->pdev->device == PCI_DEVICE_ID_TIGON3_5755M) tp->tg3_flags2 |= TG3_FLG2_PHY_ADJUST_TRIM; - } else if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5906) + } else if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5906 && + GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5785) tp->tg3_flags2 |= TG3_FLG2_PHY_BER_BUG; } @@ -11835,8 +12343,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) tp->phy_otp = TG3_OTP_DEFAULT; } - if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5784 || - GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761) + if (tp->tg3_flags & TG3_FLAG_CPMU_PRESENT) tp->mi_mode = MAC_MI_MODE_500KHZ_CONST; else tp->mi_mode = MAC_MI_MODE_BASE; @@ -11846,9 +12353,12 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) GET_CHIP_REV(tp->pci_chip_rev_id) != CHIPREV_5700_BX) tp->coalesce_mode |= HOSTCC_MODE_32BYTE; - /* Initialize MAC MI mode, polling disabled. */ - tw32_f(MAC_MI_MODE, tp->mi_mode); - udelay(80); + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5785) + tp->tg3_flags3 |= TG3_FLG3_USE_PHYLIB; + + err = tg3_mdio_init(tp); + if (err) + return err; /* Initialize data/descriptor byte/word swapping. */ val = tr32(GRC_MODE); @@ -11929,6 +12439,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) printk(KERN_ERR PFX "(%s) phy probe failed, err %d\n", pci_name(tp->pdev), err); /* ... but do not return immediately ... */ + tg3_mdio_fini(tp); } tg3_read_partno(tp); @@ -11976,6 +12487,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787 || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5784 || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5785 || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) tp->dev->hard_start_xmit = tg3_start_xmit; else @@ -12178,7 +12690,7 @@ static u32 __devinit tg3_calc_dma_bndry(struct tg3 *tp, u32 val) val |= (DMA_RWCTRL_READ_BNDRY_384_PCIX | DMA_RWCTRL_WRITE_BNDRY_384_PCIX); break; - }; + } } else if (tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS) { switch (cacheline_size) { case 16: @@ -12195,7 +12707,7 @@ static u32 __devinit tg3_calc_dma_bndry(struct tg3 *tp, u32 val) val &= ~DMA_RWCTRL_WRITE_BNDRY_DISAB_PCIE; val |= DMA_RWCTRL_WRITE_BNDRY_128_PCIE; break; - }; + } } else { switch (cacheline_size) { case 16: @@ -12239,7 +12751,7 @@ static u32 __devinit tg3_calc_dma_bndry(struct tg3 *tp, u32 val) val |= (DMA_RWCTRL_READ_BNDRY_1024 | DMA_RWCTRL_WRITE_BNDRY_1024); break; - }; + } } out: @@ -12599,7 +13111,7 @@ static char * __devinit tg3_phy_string(struct tg3 *tp) case PHY_ID_BCM8002: return "8002/serdes"; case 0: return "serdes"; default: return "unknown"; - }; + } } static char * __devinit tg3_bus_string(struct tg3 *tp, char *str) @@ -12900,7 +13412,10 @@ static int __devinit tg3_init_one(struct pci_dev *pdev, if ((tp->tg3_flags2 & TG3_FLG2_HW_TSO_2) && (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5906)) dev->features |= NETIF_F_TSO6; - if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761) + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761 || + (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5784 && + GET_CHIP_REV(tp->pci_chip_rev_id) != CHIPREV_5784_AX) || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5785) dev->features |= NETIF_F_TSO_ECN; } @@ -12966,7 +13481,8 @@ static int __devinit tg3_init_one(struct pci_dev *pdev, if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5755 || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5787 || GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5784 || - GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761) + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5761 || + GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5785) dev->features |= NETIF_F_IPV6_CSUM; tp->tg3_flags |= TG3_FLAG_RX_CHECKSUMS; @@ -13048,6 +13564,12 @@ static void __devexit tg3_remove_one(struct pci_dev *pdev) struct tg3 *tp = netdev_priv(dev); flush_scheduled_work(); + + if (tp->tg3_flags3 & TG3_FLG3_USE_PHYLIB) { + tg3_phy_fini(tp); + tg3_mdio_fini(tp); + } + unregister_netdev(dev); if (tp->aperegs) { iounmap(tp->aperegs); @@ -13080,6 +13602,7 @@ static int tg3_suspend(struct pci_dev *pdev, pm_message_t state) return 0; flush_scheduled_work(); + tg3_phy_stop(tp); tg3_netif_stop(tp); del_timer_sync(&tp->timer); @@ -13097,10 +13620,13 @@ static int tg3_suspend(struct pci_dev *pdev, pm_message_t state) err = tg3_set_power_state(tp, pci_choose_state(pdev, state)); if (err) { + int err2; + tg3_full_lock(tp, 0); tp->tg3_flags |= TG3_FLAG_INIT_COMPLETE; - if (tg3_restart_hw(tp, 1)) + err2 = tg3_restart_hw(tp, 1); + if (err2) goto out; tp->timer.expires = jiffies + tp->timer_offset; @@ -13111,6 +13637,9 @@ static int tg3_suspend(struct pci_dev *pdev, pm_message_t state) out: tg3_full_unlock(tp); + + if (!err2) + tg3_phy_start(tp); } return err; @@ -13148,6 +13677,9 @@ static int tg3_resume(struct pci_dev *pdev) out: tg3_full_unlock(tp); + if (!err) + tg3_phy_start(tp); + return err; } diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h index 0404f93baa2..df07842172b 100644 --- a/drivers/net/tg3.h +++ b/drivers/net/tg3.h @@ -128,6 +128,7 @@ #define ASIC_REV_USE_PROD_ID_REG 0x0f #define ASIC_REV_5784 0x5784 #define ASIC_REV_5761 0x5761 +#define ASIC_REV_5785 0x5785 #define GET_CHIP_REV(CHIP_REV_ID) ((CHIP_REV_ID) >> 8) #define CHIPREV_5700_AX 0x70 #define CHIPREV_5700_BX 0x71 @@ -528,7 +529,23 @@ #define MAC_SERDES_CFG 0x00000590 #define MAC_SERDES_CFG_EDGE_SELECT 0x00001000 #define MAC_SERDES_STAT 0x00000594 -/* 0x598 --> 0x5b0 unused */ +/* 0x598 --> 0x5a0 unused */ +#define MAC_PHYCFG1 0x000005a0 +#define MAC_PHYCFG1_RGMII_INT 0x00000001 +#define MAC_PHYCFG1_RGMII_EXT_RX_DEC 0x02000000 +#define MAC_PHYCFG1_RGMII_SND_STAT_EN 0x04000000 +#define MAC_PHYCFG1_TXC_DRV 0x20000000 +#define MAC_PHYCFG2 0x000005a4 +#define MAC_PHYCFG2_INBAND_ENABLE 0x00000001 +#define MAC_EXT_RGMII_MODE 0x000005a8 +#define MAC_RGMII_MODE_TX_ENABLE 0x00000001 +#define MAC_RGMII_MODE_TX_LOWPWR 0x00000002 +#define MAC_RGMII_MODE_TX_RESET 0x00000004 +#define MAC_RGMII_MODE_RX_INT_B 0x00000100 +#define MAC_RGMII_MODE_RX_QUALITY 0x00000200 +#define MAC_RGMII_MODE_RX_ACTIVITY 0x00000400 +#define MAC_RGMII_MODE_RX_ENG_DET 0x00000800 +/* 0x5ac --> 0x5b0 unused */ #define SERDES_RX_CTRL 0x000005b0 /* 5780/5714 only */ #define SERDES_RX_SIG_DETECT 0x00000400 #define SG_DIG_CTRL 0x000005b0 @@ -1109,6 +1126,7 @@ #define WDMAC_MODE_FIFOOREAD_ENAB 0x00000100 #define WDMAC_MODE_LNGREAD_ENAB 0x00000200 #define WDMAC_MODE_RX_ACCEL 0x00000400 +#define WDMAC_MODE_STATUS_TAG_FIX 0x20000000 #define WDMAC_STATUS 0x00004c04 #define WDMAC_STATUS_TGTABORT 0x00000004 #define WDMAC_STATUS_MSTABORT 0x00000008 @@ -1713,6 +1731,12 @@ #define NIC_SRAM_DATA_CFG_3 0x00000d3c #define NIC_SRAM_ASPM_DEBOUNCE 0x00000002 +#define NIC_SRAM_DATA_CFG_4 0x00000d60 +#define NIC_SRAM_GMII_MODE 0x00000002 +#define NIC_SRAM_RGMII_STD_IBND_DISABLE 0x00000004 +#define NIC_SRAM_RGMII_EXT_IBND_RX_EN 0x00000008 +#define NIC_SRAM_RGMII_EXT_IBND_TX_EN 0x00000010 + #define NIC_SRAM_RX_MINI_BUFFER_DESC 0x00001000 #define NIC_SRAM_DMA_DESC_POOL_BASE 0x00002000 @@ -2204,6 +2228,7 @@ struct tg3_link_config { u16 orig_speed; u8 orig_duplex; u8 orig_autoneg; + u32 orig_advertising; }; struct tg3_bufmgr_config { @@ -2479,6 +2504,13 @@ struct tg3 { #define TG3_FLG3_ENABLE_APE 0x00000002 #define TG3_FLG3_5761_5784_AX_FIXES 0x00000004 #define TG3_FLG3_5701_DMA_BUG 0x00000008 +#define TG3_FLG3_USE_PHYLIB 0x00000010 +#define TG3_FLG3_MDIOBUS_INITED 0x00000020 +#define TG3_FLG3_MDIOBUS_PAUSED 0x00000040 +#define TG3_FLG3_PHY_CONNECTED 0x00000080 +#define TG3_FLG3_RGMII_STD_IBND_DISABLE 0x00000100 +#define TG3_FLG3_RGMII_EXT_IBND_RX_EN 0x00000200 +#define TG3_FLG3_RGMII_EXT_IBND_TX_EN 0x00000400 struct timer_list timer; u16 timer_counter; @@ -2519,6 +2551,9 @@ struct tg3 { int msi_cap; int pcix_cap; + struct mii_bus mdio_bus; + int mdio_irq[PHY_MAX_ADDR]; + /* PHY info */ u32 phy_id; #define PHY_ID_MASK 0xfffffff0 @@ -2546,6 +2581,9 @@ struct tg3 { #define PHY_REV_BCM5401_B2 0x3 #define PHY_REV_BCM5401_C0 0x6 #define PHY_REV_BCM5411_X0 0x1 /* Found on Netgear GA302T */ +#define TG3_PHY_ID_BCM50610 0x143bd60 +#define TG3_PHY_ID_BCMAC131 0x143bc70 + u32 led_ctrl; u32 phy_otp; diff --git a/drivers/net/tlan.c b/drivers/net/tlan.c index 0166407d706..85246ed7cb9 100644 --- a/drivers/net/tlan.c +++ b/drivers/net/tlan.c @@ -13,8 +13,6 @@ * This software may be used and distributed according to the terms * of the GNU General Public License, incorporated herein by reference. * - ** This file is best viewed/edited with columns>=132. - * ** Useful (if not required) reading: * * Texas Instruments, ThunderLAN Programmer's Guide, @@ -218,9 +216,7 @@ static int bbuf; module_param(bbuf, int, 0); MODULE_PARM_DESC(bbuf, "ThunderLAN use big buffer (0-1)"); -static u8 *TLanPadBuffer; -static dma_addr_t TLanPadBufferDMA; -static char TLanSignature[] = "TLAN"; +static const char TLanSignature[] = "TLAN"; static const char tlan_banner[] = "ThunderLAN driver v1.15\n"; static int tlan_have_pci; static int tlan_have_eisa; @@ -238,9 +234,11 @@ static struct board { { "Compaq Netelligent 10 T PCI UTP", TLAN_ADAPTER_ACTIVITY_LED, 0x83 }, { "Compaq Netelligent 10/100 TX PCI UTP", TLAN_ADAPTER_ACTIVITY_LED, 0x83 }, { "Compaq Integrated NetFlex-3/P", TLAN_ADAPTER_NONE, 0x83 }, - { "Compaq NetFlex-3/P", TLAN_ADAPTER_UNMANAGED_PHY | TLAN_ADAPTER_BIT_RATE_PHY, 0x83 }, + { "Compaq NetFlex-3/P", + TLAN_ADAPTER_UNMANAGED_PHY | TLAN_ADAPTER_BIT_RATE_PHY, 0x83 }, { "Compaq NetFlex-3/P", TLAN_ADAPTER_NONE, 0x83 }, - { "Compaq Netelligent Integrated 10/100 TX UTP", TLAN_ADAPTER_ACTIVITY_LED, 0x83 }, + { "Compaq Netelligent Integrated 10/100 TX UTP", + TLAN_ADAPTER_ACTIVITY_LED, 0x83 }, { "Compaq Netelligent Dual 10/100 TX PCI UTP", TLAN_ADAPTER_NONE, 0x83 }, { "Compaq Netelligent 10/100 TX Embedded UTP", TLAN_ADAPTER_NONE, 0x83 }, { "Olicom OC-2183/2185", TLAN_ADAPTER_USE_INTERN_10, 0x83 }, @@ -248,8 +246,9 @@ static struct board { { "Olicom OC-2326", TLAN_ADAPTER_USE_INTERN_10, 0xF8 }, { "Compaq Netelligent 10/100 TX UTP", TLAN_ADAPTER_ACTIVITY_LED, 0x83 }, { "Compaq Netelligent 10 T/2 PCI UTP/Coax", TLAN_ADAPTER_NONE, 0x83 }, - { "Compaq NetFlex-3/E", TLAN_ADAPTER_ACTIVITY_LED | /* EISA card */ - TLAN_ADAPTER_UNMANAGED_PHY | TLAN_ADAPTER_BIT_RATE_PHY, 0x83 }, + { "Compaq NetFlex-3/E", + TLAN_ADAPTER_ACTIVITY_LED | /* EISA card */ + TLAN_ADAPTER_UNMANAGED_PHY | TLAN_ADAPTER_BIT_RATE_PHY, 0x83 }, { "Compaq NetFlex-3/E", TLAN_ADAPTER_ACTIVITY_LED, 0x83 }, /* EISA card */ }; @@ -294,12 +293,12 @@ static int TLan_Close( struct net_device *); static struct net_device_stats *TLan_GetStats( struct net_device *); static void TLan_SetMulticastList( struct net_device *); static int TLan_ioctl( struct net_device *dev, struct ifreq *rq, int cmd); -static int TLan_probe1( struct pci_dev *pdev, long ioaddr, int irq, int rev, const struct pci_device_id *ent); +static int TLan_probe1( struct pci_dev *pdev, long ioaddr, + int irq, int rev, const struct pci_device_id *ent); static void TLan_tx_timeout( struct net_device *dev); static void TLan_tx_timeout_work(struct work_struct *work); static int tlan_init_one( struct pci_dev *pdev, const struct pci_device_id *ent); -static u32 TLan_HandleInvalid( struct net_device *, u16 ); static u32 TLan_HandleTxEOF( struct net_device *, u16 ); static u32 TLan_HandleStatOverflow( struct net_device *, u16 ); static u32 TLan_HandleRxEOF( struct net_device *, u16 ); @@ -348,29 +347,27 @@ static void TLan_EeReceiveByte( u16, u8 *, int ); static int TLan_EeReadByte( struct net_device *, u8, u8 * ); -static void +static inline void TLan_StoreSKB( struct tlan_list_tag *tag, struct sk_buff *skb) { unsigned long addr = (unsigned long)skb; - tag->buffer[9].address = (u32)addr; - addr >>= 31; /* >>= 32 is undefined for 32bit arch, stupid C */ - addr >>= 1; - tag->buffer[8].address = (u32)addr; + tag->buffer[9].address = addr; + tag->buffer[8].address = upper_32_bits(addr); } -static struct sk_buff * -TLan_GetSKB( struct tlan_list_tag *tag) +static inline struct sk_buff * +TLan_GetSKB( const struct tlan_list_tag *tag) { - unsigned long addr = tag->buffer[8].address; - addr <<= 31; - addr <<= 1; - addr |= tag->buffer[9].address; + unsigned long addr; + + addr = tag->buffer[8].address; + addr |= (tag->buffer[9].address << 16) << 16; return (struct sk_buff *) addr; } static TLanIntVectorFunc *TLanIntVector[TLAN_INT_NUMBER_OF_INTS] = { - TLan_HandleInvalid, + NULL, TLan_HandleTxEOF, TLan_HandleStatOverflow, TLan_HandleRxEOF, @@ -444,7 +441,9 @@ static void __devexit tlan_remove_one( struct pci_dev *pdev) unregister_netdev( dev ); if ( priv->dmaStorage ) { - pci_free_consistent(priv->pciDev, priv->dmaSize, priv->dmaStorage, priv->dmaStorageDMA ); + pci_free_consistent(priv->pciDev, + priv->dmaSize, priv->dmaStorage, + priv->dmaStorageDMA ); } #ifdef CONFIG_PCI @@ -469,16 +468,6 @@ static int __init tlan_probe(void) printk(KERN_INFO "%s", tlan_banner); - TLanPadBuffer = (u8 *) pci_alloc_consistent(NULL, TLAN_MIN_FRAME_SIZE, &TLanPadBufferDMA); - - if (TLanPadBuffer == NULL) { - printk(KERN_ERR "TLAN: Could not allocate memory for pad buffer.\n"); - rc = -ENOMEM; - goto err_out; - } - - memset(TLanPadBuffer, 0, TLAN_MIN_FRAME_SIZE); - TLAN_DBG(TLAN_DEBUG_PROBE, "Starting PCI Probe....\n"); /* Use new style PCI probing. Now the kernel will @@ -506,8 +495,6 @@ static int __init tlan_probe(void) err_out_pci_unreg: pci_unregister_driver(&tlan_driver); err_out_pci_free: - pci_free_consistent(NULL, TLAN_MIN_FRAME_SIZE, TLanPadBuffer, TLanPadBufferDMA); -err_out: return rc; } @@ -539,7 +526,8 @@ static int __devinit tlan_init_one( struct pci_dev *pdev, **************************************************************/ static int __devinit TLan_probe1(struct pci_dev *pdev, - long ioaddr, int irq, int rev, const struct pci_device_id *ent ) + long ioaddr, int irq, int rev, + const struct pci_device_id *ent ) { struct net_device *dev; @@ -625,8 +613,10 @@ static int __devinit TLan_probe1(struct pci_dev *pdev, /* Kernel parameters */ if (dev->mem_start) { priv->aui = dev->mem_start & 0x01; - priv->duplex = ((dev->mem_start & 0x06) == 0x06) ? 0 : (dev->mem_start & 0x06) >> 1; - priv->speed = ((dev->mem_start & 0x18) == 0x18) ? 0 : (dev->mem_start & 0x18) >> 3; + priv->duplex = ((dev->mem_start & 0x06) == 0x06) ? 0 + : (dev->mem_start & 0x06) >> 1; + priv->speed = ((dev->mem_start & 0x18) == 0x18) ? 0 + : (dev->mem_start & 0x18) >> 3; if (priv->speed == 0x1) { priv->speed = TLAN_SPEED_10; @@ -706,7 +696,8 @@ static void TLan_Eisa_Cleanup(void) dev = TLan_Eisa_Devices; priv = netdev_priv(dev); if (priv->dmaStorage) { - pci_free_consistent(priv->pciDev, priv->dmaSize, priv->dmaStorage, priv->dmaStorageDMA ); + pci_free_consistent(priv->pciDev, priv->dmaSize, + priv->dmaStorage, priv->dmaStorageDMA ); } release_region( dev->base_addr, 0x10); unregister_netdev( dev ); @@ -724,8 +715,6 @@ static void __exit tlan_exit(void) if (tlan_have_eisa) TLan_Eisa_Cleanup(); - pci_free_consistent(NULL, TLAN_MIN_FRAME_SIZE, TLanPadBuffer, TLanPadBufferDMA); - } @@ -763,8 +752,10 @@ static void __init TLan_EisaProbe (void) /* Loop through all slots of the EISA bus */ for (ioaddr = 0x1000; ioaddr < 0x9000; ioaddr += 0x1000) { - TLAN_DBG(TLAN_DEBUG_PROBE,"EISA_ID 0x%4x: 0x%4x\n", (int) ioaddr + 0xC80, inw(ioaddr + EISA_ID)); - TLAN_DBG(TLAN_DEBUG_PROBE,"EISA_ID 0x%4x: 0x%4x\n", (int) ioaddr + 0xC82, inw(ioaddr + EISA_ID2)); + TLAN_DBG(TLAN_DEBUG_PROBE,"EISA_ID 0x%4x: 0x%4x\n", + (int) ioaddr + 0xC80, inw(ioaddr + EISA_ID)); + TLAN_DBG(TLAN_DEBUG_PROBE,"EISA_ID 0x%4x: 0x%4x\n", + (int) ioaddr + 0xC82, inw(ioaddr + EISA_ID2)); TLAN_DBG(TLAN_DEBUG_PROBE, "Probing for EISA adapter at IO: 0x%4x : ", @@ -874,7 +865,8 @@ static int TLan_Init( struct net_device *dev ) dma_size = ( TLAN_NUM_RX_LISTS + TLAN_NUM_TX_LISTS ) * ( sizeof(TLanList) ); } - priv->dmaStorage = pci_alloc_consistent(priv->pciDev, dma_size, &priv->dmaStorageDMA); + priv->dmaStorage = pci_alloc_consistent(priv->pciDev, + dma_size, &priv->dmaStorageDMA); priv->dmaSize = dma_size; if ( priv->dmaStorage == NULL ) { @@ -883,16 +875,19 @@ static int TLan_Init( struct net_device *dev ) return -ENOMEM; } memset( priv->dmaStorage, 0, dma_size ); - priv->rxList = (TLanList *) - ( ( ( (u32) priv->dmaStorage ) + 7 ) & 0xFFFFFFF8 ); - priv->rxListDMA = ( ( ( (u32) priv->dmaStorageDMA ) + 7 ) & 0xFFFFFFF8 ); + priv->rxList = (TLanList *) ALIGN((unsigned long)priv->dmaStorage, 8); + priv->rxListDMA = ALIGN(priv->dmaStorageDMA, 8); priv->txList = priv->rxList + TLAN_NUM_RX_LISTS; priv->txListDMA = priv->rxListDMA + sizeof(TLanList) * TLAN_NUM_RX_LISTS; + if ( bbuf ) { priv->rxBuffer = (u8 *) ( priv->txList + TLAN_NUM_TX_LISTS ); - priv->rxBufferDMA =priv->txListDMA + sizeof(TLanList) * TLAN_NUM_TX_LISTS; - priv->txBuffer = priv->rxBuffer + ( TLAN_NUM_RX_LISTS * TLAN_MAX_FRAME_SIZE ); - priv->txBufferDMA = priv->rxBufferDMA + ( TLAN_NUM_RX_LISTS * TLAN_MAX_FRAME_SIZE ); + priv->rxBufferDMA =priv->txListDMA + + sizeof(TLanList) * TLAN_NUM_TX_LISTS; + priv->txBuffer = priv->rxBuffer + + ( TLAN_NUM_RX_LISTS * TLAN_MAX_FRAME_SIZE ); + priv->txBufferDMA = priv->rxBufferDMA + + ( TLAN_NUM_RX_LISTS * TLAN_MAX_FRAME_SIZE ); } err = 0; @@ -952,10 +947,12 @@ static int TLan_Open( struct net_device *dev ) int err; priv->tlanRev = TLan_DioRead8( dev->base_addr, TLAN_DEF_REVISION ); - err = request_irq( dev->irq, TLan_HandleInterrupt, IRQF_SHARED, TLanSignature, dev ); + err = request_irq( dev->irq, TLan_HandleInterrupt, IRQF_SHARED, + dev->name, dev ); if ( err ) { - printk(KERN_ERR "TLAN: Cannot open %s because IRQ %d is already in use.\n", dev->name, dev->irq ); + pr_err("TLAN: Cannot open %s because IRQ %d is already in use.\n", + dev->name, dev->irq ); return err; } @@ -969,7 +966,8 @@ static int TLan_Open( struct net_device *dev ) TLan_ReadAndClearStats( dev, TLAN_IGNORE ); TLan_ResetAdapter( dev ); - TLAN_DBG( TLAN_DEBUG_GNRL, "%s: Opened. TLAN Chip Rev: %x\n", dev->name, priv->tlanRev ); + TLAN_DBG( TLAN_DEBUG_GNRL, "%s: Opened. TLAN Chip Rev: %x\n", + dev->name, priv->tlanRev ); return 0; @@ -1007,14 +1005,16 @@ static int TLan_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) case SIOCGMIIREG: /* Read MII PHY register. */ - TLan_MiiReadReg(dev, data->phy_id & 0x1f, data->reg_num & 0x1f, &data->val_out); + TLan_MiiReadReg(dev, data->phy_id & 0x1f, + data->reg_num & 0x1f, &data->val_out); return 0; case SIOCSMIIREG: /* Write MII PHY register. */ if (!capable(CAP_NET_ADMIN)) return -EPERM; - TLan_MiiWriteReg(dev, data->phy_id & 0x1f, data->reg_num & 0x1f, data->val_in); + TLan_MiiWriteReg(dev, data->phy_id & 0x1f, + data->reg_num & 0x1f, data->val_in); return 0; default: return -EOPNOTSUPP; @@ -1096,20 +1096,25 @@ static int TLan_StartTx( struct sk_buff *skb, struct net_device *dev ) TLanList *tail_list; dma_addr_t tail_list_phys; u8 *tail_buffer; - int pad; unsigned long flags; if ( ! priv->phyOnline ) { - TLAN_DBG( TLAN_DEBUG_TX, "TRANSMIT: %s PHY is not ready\n", dev->name ); + TLAN_DBG( TLAN_DEBUG_TX, "TRANSMIT: %s PHY is not ready\n", + dev->name ); dev_kfree_skb_any(skb); return 0; } + if (skb_padto(skb, TLAN_MIN_FRAME_SIZE)) + return 0; + tail_list = priv->txList + priv->txTail; tail_list_phys = priv->txListDMA + sizeof(TLanList) * priv->txTail; if ( tail_list->cStat != TLAN_CSTAT_UNUSED ) { - TLAN_DBG( TLAN_DEBUG_TX, "TRANSMIT: %s is busy (Head=%d Tail=%d)\n", dev->name, priv->txHead, priv->txTail ); + TLAN_DBG( TLAN_DEBUG_TX, + "TRANSMIT: %s is busy (Head=%d Tail=%d)\n", + dev->name, priv->txHead, priv->txTail ); netif_stop_queue(dev); priv->txBusyCount++; return 1; @@ -1121,37 +1126,34 @@ static int TLan_StartTx( struct sk_buff *skb, struct net_device *dev ) tail_buffer = priv->txBuffer + ( priv->txTail * TLAN_MAX_FRAME_SIZE ); skb_copy_from_linear_data(skb, tail_buffer, skb->len); } else { - tail_list->buffer[0].address = pci_map_single(priv->pciDev, skb->data, skb->len, PCI_DMA_TODEVICE); + tail_list->buffer[0].address = pci_map_single(priv->pciDev, + skb->data, skb->len, + PCI_DMA_TODEVICE); TLan_StoreSKB(tail_list, skb); } - pad = TLAN_MIN_FRAME_SIZE - skb->len; - - if ( pad > 0 ) { - tail_list->frameSize = (u16) skb->len + pad; - tail_list->buffer[0].count = (u32) skb->len; - tail_list->buffer[1].count = TLAN_LAST_BUFFER | (u32) pad; - tail_list->buffer[1].address = TLanPadBufferDMA; - } else { - tail_list->frameSize = (u16) skb->len; - tail_list->buffer[0].count = TLAN_LAST_BUFFER | (u32) skb->len; - tail_list->buffer[1].count = 0; - tail_list->buffer[1].address = 0; - } + tail_list->frameSize = (u16) skb->len; + tail_list->buffer[0].count = TLAN_LAST_BUFFER | (u32) skb->len; + tail_list->buffer[1].count = 0; + tail_list->buffer[1].address = 0; spin_lock_irqsave(&priv->lock, flags); tail_list->cStat = TLAN_CSTAT_READY; if ( ! priv->txInProgress ) { priv->txInProgress = 1; - TLAN_DBG( TLAN_DEBUG_TX, "TRANSMIT: Starting TX on buffer %d\n", priv->txTail ); + TLAN_DBG( TLAN_DEBUG_TX, + "TRANSMIT: Starting TX on buffer %d\n", priv->txTail ); outl( tail_list_phys, dev->base_addr + TLAN_CH_PARM ); outl( TLAN_HC_GO, dev->base_addr + TLAN_HOST_CMD ); } else { - TLAN_DBG( TLAN_DEBUG_TX, "TRANSMIT: Adding buffer %d to TX channel\n", priv->txTail ); + TLAN_DBG( TLAN_DEBUG_TX, "TRANSMIT: Adding buffer %d to TX channel\n", + priv->txTail ); if ( priv->txTail == 0 ) { - ( priv->txList + ( TLAN_NUM_TX_LISTS - 1 ) )->forward = tail_list_phys; + ( priv->txList + ( TLAN_NUM_TX_LISTS - 1 ) )->forward + = tail_list_phys; } else { - ( priv->txList + ( priv->txTail - 1 ) )->forward = tail_list_phys; + ( priv->txList + ( priv->txTail - 1 ) )->forward + = tail_list_phys; } } spin_unlock_irqrestore(&priv->lock, flags); @@ -1191,33 +1193,31 @@ static int TLan_StartTx( struct sk_buff *skb, struct net_device *dev ) static irqreturn_t TLan_HandleInterrupt(int irq, void *dev_id) { - u32 ack; - struct net_device *dev; - u32 host_cmd; + struct net_device *dev = dev_id; + TLanPrivateInfo *priv = netdev_priv(dev); u16 host_int; - int type; - TLanPrivateInfo *priv; - - dev = dev_id; - priv = netdev_priv(dev); + u16 type; spin_lock(&priv->lock); host_int = inw( dev->base_addr + TLAN_HOST_INT ); - outw( host_int, dev->base_addr + TLAN_HOST_INT ); - type = ( host_int & TLAN_HI_IT_MASK ) >> 2; + if ( type ) { + u32 ack; + u32 host_cmd; - ack = TLanIntVector[type]( dev, host_int ); + outw( host_int, dev->base_addr + TLAN_HOST_INT ); + ack = TLanIntVector[type]( dev, host_int ); - if ( ack ) { - host_cmd = TLAN_HC_ACK | ack | ( type << 18 ); - outl( host_cmd, dev->base_addr + TLAN_HOST_CMD ); + if ( ack ) { + host_cmd = TLAN_HC_ACK | ack | ( type << 18 ); + outl( host_cmd, dev->base_addr + TLAN_HOST_CMD ); + } } spin_unlock(&priv->lock); - return IRQ_HANDLED; + return IRQ_RETVAL(type); } /* TLan_HandleInterrupts */ @@ -1286,8 +1286,10 @@ static struct net_device_stats *TLan_GetStats( struct net_device *dev ) /* Should only read stats if open ? */ TLan_ReadAndClearStats( dev, TLAN_RECORD ); - TLAN_DBG( TLAN_DEBUG_RX, "RECEIVE: %s EOC count = %d\n", dev->name, priv->rxEocCount ); - TLAN_DBG( TLAN_DEBUG_TX, "TRANSMIT: %s Busy count = %d\n", dev->name, priv->txBusyCount ); + TLAN_DBG( TLAN_DEBUG_RX, "RECEIVE: %s EOC count = %d\n", dev->name, + priv->rxEocCount ); + TLAN_DBG( TLAN_DEBUG_TX, "TRANSMIT: %s Busy count = %d\n", dev->name, + priv->txBusyCount ); if ( debug & TLAN_DEBUG_GNRL ) { TLan_PrintDio( dev->base_addr ); TLan_PhyPrint( dev ); @@ -1299,7 +1301,7 @@ static struct net_device_stats *TLan_GetStats( struct net_device *dev ) TLan_PrintList( priv->txList + i, "TX", i ); } - return ( &( (TLanPrivateInfo *) netdev_priv(dev) )->stats ); + return &dev->stats; } /* TLan_GetStats */ @@ -1337,10 +1339,12 @@ static void TLan_SetMulticastList( struct net_device *dev ) if ( dev->flags & IFF_PROMISC ) { tmp = TLan_DioRead8( dev->base_addr, TLAN_NET_CMD ); - TLan_DioWrite8( dev->base_addr, TLAN_NET_CMD, tmp | TLAN_NET_CMD_CAF ); + TLan_DioWrite8( dev->base_addr, + TLAN_NET_CMD, tmp | TLAN_NET_CMD_CAF ); } else { tmp = TLan_DioRead8( dev->base_addr, TLAN_NET_CMD ); - TLan_DioWrite8( dev->base_addr, TLAN_NET_CMD, tmp & ~TLAN_NET_CMD_CAF ); + TLan_DioWrite8( dev->base_addr, + TLAN_NET_CMD, tmp & ~TLAN_NET_CMD_CAF ); if ( dev->flags & IFF_ALLMULTI ) { for ( i = 0; i < 3; i++ ) TLan_SetMac( dev, i + 1, NULL ); @@ -1349,7 +1353,8 @@ static void TLan_SetMulticastList( struct net_device *dev ) } else { for ( i = 0; i < dev->mc_count; i++ ) { if ( i < 3 ) { - TLan_SetMac( dev, i + 1, (char *) &dmi->dmi_addr ); + TLan_SetMac( dev, i + 1, + (char *) &dmi->dmi_addr ); } else { offset = TLan_HashFunc( (u8 *) &dmi->dmi_addr ); if ( offset < 32 ) @@ -1383,31 +1388,6 @@ static void TLan_SetMulticastList( struct net_device *dev ) *****************************************************************************/ - /*************************************************************** - * TLan_HandleInvalid - * - * Returns: - * 0 - * Parms: - * dev Device assigned the IRQ that was - * raised. - * host_int The contents of the HOST_INT - * port. - * - * This function handles invalid interrupts. This should - * never happen unless some other adapter is trying to use - * the IRQ line assigned to the device. - * - **************************************************************/ - -static u32 TLan_HandleInvalid( struct net_device *dev, u16 host_int ) -{ - /* printk( "TLAN: Invalid interrupt on %s.\n", dev->name ); */ - return 0; - -} /* TLan_HandleInvalid */ - - /*************************************************************** @@ -1441,14 +1421,16 @@ static u32 TLan_HandleTxEOF( struct net_device *dev, u16 host_int ) u32 ack = 0; u16 tmpCStat; - TLAN_DBG( TLAN_DEBUG_TX, "TRANSMIT: Handling TX EOF (Head=%d Tail=%d)\n", priv->txHead, priv->txTail ); + TLAN_DBG( TLAN_DEBUG_TX, "TRANSMIT: Handling TX EOF (Head=%d Tail=%d)\n", + priv->txHead, priv->txTail ); head_list = priv->txList + priv->txHead; while (((tmpCStat = head_list->cStat ) & TLAN_CSTAT_FRM_CMP) && (ack < 255)) { ack++; if ( ! bbuf ) { struct sk_buff *skb = TLan_GetSKB(head_list); - pci_unmap_single(priv->pciDev, head_list->buffer[0].address, skb->len, PCI_DMA_TODEVICE); + pci_unmap_single(priv->pciDev, head_list->buffer[0].address, + skb->len, PCI_DMA_TODEVICE); dev_kfree_skb_any(skb); head_list->buffer[8].address = 0; head_list->buffer[9].address = 0; @@ -1457,7 +1439,7 @@ static u32 TLan_HandleTxEOF( struct net_device *dev, u16 host_int ) if ( tmpCStat & TLAN_CSTAT_EOC ) eoc = 1; - priv->stats.tx_bytes += head_list->frameSize; + dev->stats.tx_bytes += head_list->frameSize; head_list->cStat = TLAN_CSTAT_UNUSED; netif_start_queue(dev); @@ -1469,7 +1451,9 @@ static u32 TLan_HandleTxEOF( struct net_device *dev, u16 host_int ) printk(KERN_INFO "TLAN: Received interrupt for uncompleted TX frame.\n"); if ( eoc ) { - TLAN_DBG( TLAN_DEBUG_TX, "TRANSMIT: Handling TX EOC (Head=%d Tail=%d)\n", priv->txHead, priv->txTail ); + TLAN_DBG( TLAN_DEBUG_TX, + "TRANSMIT: Handling TX EOC (Head=%d Tail=%d)\n", + priv->txHead, priv->txTail ); head_list = priv->txList + priv->txHead; head_list_phys = priv->txListDMA + sizeof(TLanList) * priv->txHead; if ( ( head_list->cStat & TLAN_CSTAT_READY ) == TLAN_CSTAT_READY ) { @@ -1481,7 +1465,8 @@ static u32 TLan_HandleTxEOF( struct net_device *dev, u16 host_int ) } if ( priv->adapter->flags & TLAN_ADAPTER_ACTIVITY_LED ) { - TLan_DioWrite8( dev->base_addr, TLAN_LED_REG, TLAN_LED_LINK | TLAN_LED_ACT ); + TLan_DioWrite8( dev->base_addr, + TLAN_LED_REG, TLAN_LED_LINK | TLAN_LED_ACT ); if ( priv->timer.function == NULL ) { priv->timer.function = &TLan_Timer; priv->timer.data = (unsigned long) dev; @@ -1563,66 +1548,65 @@ static u32 TLan_HandleRxEOF( struct net_device *dev, u16 host_int ) TLanList *head_list; struct sk_buff *skb; TLanList *tail_list; - void *t; - u32 frameSize; u16 tmpCStat; dma_addr_t head_list_phys; - TLAN_DBG( TLAN_DEBUG_RX, "RECEIVE: Handling RX EOF (Head=%d Tail=%d)\n", priv->rxHead, priv->rxTail ); + TLAN_DBG( TLAN_DEBUG_RX, "RECEIVE: Handling RX EOF (Head=%d Tail=%d)\n", + priv->rxHead, priv->rxTail ); head_list = priv->rxList + priv->rxHead; head_list_phys = priv->rxListDMA + sizeof(TLanList) * priv->rxHead; while (((tmpCStat = head_list->cStat) & TLAN_CSTAT_FRM_CMP) && (ack < 255)) { - frameSize = head_list->frameSize; + dma_addr_t frameDma = head_list->buffer[0].address; + u32 frameSize = head_list->frameSize; ack++; if (tmpCStat & TLAN_CSTAT_EOC) eoc = 1; if (bbuf) { - skb = dev_alloc_skb(frameSize + 7); - if (skb == NULL) - printk(KERN_INFO "TLAN: Couldn't allocate memory for received data.\n"); - else { - head_buffer = priv->rxBuffer + (priv->rxHead * TLAN_MAX_FRAME_SIZE); - skb_reserve(skb, 2); - t = (void *) skb_put(skb, frameSize); - - priv->stats.rx_bytes += head_list->frameSize; - - memcpy( t, head_buffer, frameSize ); - skb->protocol = eth_type_trans( skb, dev ); - netif_rx( skb ); - } + skb = netdev_alloc_skb(dev, frameSize + 7); + if ( !skb ) + goto drop_and_reuse; + + head_buffer = priv->rxBuffer + + (priv->rxHead * TLAN_MAX_FRAME_SIZE); + skb_reserve(skb, 2); + pci_dma_sync_single_for_cpu(priv->pciDev, + frameDma, frameSize, + PCI_DMA_FROMDEVICE); + skb_copy_from_linear_data(skb, head_buffer, frameSize); + skb_put(skb, frameSize); + dev->stats.rx_bytes += frameSize; + + skb->protocol = eth_type_trans( skb, dev ); + netif_rx( skb ); } else { struct sk_buff *new_skb; - /* - * I changed the algorithm here. What we now do - * is allocate the new frame. If this fails we - * simply recycle the frame. - */ + new_skb = netdev_alloc_skb(dev, TLAN_MAX_FRAME_SIZE + 7 ); + if ( !new_skb ) + goto drop_and_reuse; - new_skb = dev_alloc_skb( TLAN_MAX_FRAME_SIZE + 7 ); + skb = TLan_GetSKB(head_list); + pci_unmap_single(priv->pciDev, frameDma, + TLAN_MAX_FRAME_SIZE, PCI_DMA_FROMDEVICE); + skb_put( skb, frameSize ); - if ( new_skb != NULL ) { - skb = TLan_GetSKB(head_list); - pci_unmap_single(priv->pciDev, head_list->buffer[0].address, TLAN_MAX_FRAME_SIZE, PCI_DMA_FROMDEVICE); - skb_trim( skb, frameSize ); + dev->stats.rx_bytes += frameSize; - priv->stats.rx_bytes += frameSize; + skb->protocol = eth_type_trans( skb, dev ); + netif_rx( skb ); - skb->protocol = eth_type_trans( skb, dev ); - netif_rx( skb ); + skb_reserve( new_skb, NET_IP_ALIGN ); + head_list->buffer[0].address = pci_map_single(priv->pciDev, + new_skb->data, + TLAN_MAX_FRAME_SIZE, + PCI_DMA_FROMDEVICE); - skb_reserve( new_skb, 2 ); - t = (void *) skb_put( new_skb, TLAN_MAX_FRAME_SIZE ); - head_list->buffer[0].address = pci_map_single(priv->pciDev, new_skb->data, TLAN_MAX_FRAME_SIZE, PCI_DMA_FROMDEVICE); - head_list->buffer[8].address = (u32) t; - TLan_StoreSKB(head_list, new_skb); - } else - printk(KERN_WARNING "TLAN: Couldn't allocate memory for received data.\n" ); - } + TLan_StoreSKB(head_list, new_skb); + } +drop_and_reuse: head_list->forward = 0; head_list->cStat = 0; tail_list = priv->rxList + priv->rxTail; @@ -1638,10 +1622,10 @@ static u32 TLan_HandleRxEOF( struct net_device *dev, u16 host_int ) printk(KERN_INFO "TLAN: Received interrupt for uncompleted RX frame.\n"); - - if ( eoc ) { - TLAN_DBG( TLAN_DEBUG_RX, "RECEIVE: Handling RX EOC (Head=%d Tail=%d)\n", priv->rxHead, priv->rxTail ); + TLAN_DBG( TLAN_DEBUG_RX, + "RECEIVE: Handling RX EOC (Head=%d Tail=%d)\n", + priv->rxHead, priv->rxTail ); head_list = priv->rxList + priv->rxHead; head_list_phys = priv->rxListDMA + sizeof(TLanList) * priv->rxHead; outl(head_list_phys, dev->base_addr + TLAN_CH_PARM ); @@ -1650,7 +1634,8 @@ static u32 TLan_HandleRxEOF( struct net_device *dev, u16 host_int ) } if ( priv->adapter->flags & TLAN_ADAPTER_ACTIVITY_LED ) { - TLan_DioWrite8( dev->base_addr, TLAN_LED_REG, TLAN_LED_LINK | TLAN_LED_ACT ); + TLan_DioWrite8( dev->base_addr, + TLAN_LED_REG, TLAN_LED_LINK | TLAN_LED_ACT ); if ( priv->timer.function == NULL ) { priv->timer.function = &TLan_Timer; priv->timer.data = (unsigned long) dev; @@ -1728,7 +1713,9 @@ static u32 TLan_HandleTxEOC( struct net_device *dev, u16 host_int ) host_int = 0; if ( priv->tlanRev < 0x30 ) { - TLAN_DBG( TLAN_DEBUG_TX, "TRANSMIT: Handling TX EOC (Head=%d Tail=%d) -- IRQ\n", priv->txHead, priv->txTail ); + TLAN_DBG( TLAN_DEBUG_TX, + "TRANSMIT: Handling TX EOC (Head=%d Tail=%d) -- IRQ\n", + priv->txHead, priv->txTail ); head_list = priv->txList + priv->txHead; head_list_phys = priv->txListDMA + sizeof(TLanList) * priv->txHead; if ( ( head_list->cStat & TLAN_CSTAT_READY ) == TLAN_CSTAT_READY ) { @@ -1796,15 +1783,18 @@ static u32 TLan_HandleStatusCheck( struct net_device *dev, u16 host_int ) net_sts = TLan_DioRead8( dev->base_addr, TLAN_NET_STS ); if ( net_sts ) { TLan_DioWrite8( dev->base_addr, TLAN_NET_STS, net_sts ); - TLAN_DBG( TLAN_DEBUG_GNRL, "%s: Net_Sts = %x\n", dev->name, (unsigned) net_sts ); + TLAN_DBG( TLAN_DEBUG_GNRL, "%s: Net_Sts = %x\n", + dev->name, (unsigned) net_sts ); } if ( ( net_sts & TLAN_NET_STS_MIRQ ) && ( priv->phyNum == 0 ) ) { TLan_MiiReadReg( dev, phy, TLAN_TLPHY_STS, &tlphy_sts ); TLan_MiiReadReg( dev, phy, TLAN_TLPHY_CTL, &tlphy_ctl ); - if ( ! ( tlphy_sts & TLAN_TS_POLOK ) && ! ( tlphy_ctl & TLAN_TC_SWAPOL ) ) { + if ( ! ( tlphy_sts & TLAN_TS_POLOK ) && + ! ( tlphy_ctl & TLAN_TC_SWAPOL ) ) { tlphy_ctl |= TLAN_TC_SWAPOL; TLan_MiiWriteReg( dev, phy, TLAN_TLPHY_CTL, tlphy_ctl); - } else if ( ( tlphy_sts & TLAN_TS_POLOK ) && ( tlphy_ctl & TLAN_TC_SWAPOL ) ) { + } else if ( ( tlphy_sts & TLAN_TS_POLOK ) + && ( tlphy_ctl & TLAN_TC_SWAPOL ) ) { tlphy_ctl &= ~TLAN_TC_SWAPOL; TLan_MiiWriteReg( dev, phy, TLAN_TLPHY_CTL, tlphy_ctl); } @@ -1849,7 +1839,9 @@ static u32 TLan_HandleRxEOC( struct net_device *dev, u16 host_int ) u32 ack = 1; if ( priv->tlanRev < 0x30 ) { - TLAN_DBG( TLAN_DEBUG_RX, "RECEIVE: Handling RX EOC (Head=%d Tail=%d) -- IRQ\n", priv->rxHead, priv->rxTail ); + TLAN_DBG( TLAN_DEBUG_RX, + "RECEIVE: Handling RX EOC (Head=%d Tail=%d) -- IRQ\n", + priv->rxHead, priv->rxTail ); head_list_phys = priv->rxListDMA + sizeof(TLanList) * priv->rxHead; outl( head_list_phys, dev->base_addr + TLAN_CH_PARM ); ack |= TLAN_HC_GO | TLAN_HC_RT; @@ -1940,10 +1932,12 @@ static void TLan_Timer( unsigned long data ) if ( priv->timer.function == NULL ) { elapsed = jiffies - priv->timerSetAt; if ( elapsed >= TLAN_TIMER_ACT_DELAY ) { - TLan_DioWrite8( dev->base_addr, TLAN_LED_REG, TLAN_LED_LINK ); + TLan_DioWrite8( dev->base_addr, + TLAN_LED_REG, TLAN_LED_LINK ); } else { priv->timer.function = &TLan_Timer; - priv->timer.expires = priv->timerSetAt + TLAN_TIMER_ACT_DELAY; + priv->timer.expires = priv->timerSetAt + + TLAN_TIMER_ACT_DELAY; spin_unlock_irqrestore(&priv->lock, flags); add_timer( &priv->timer ); break; @@ -1998,7 +1992,8 @@ static void TLan_ResetLists( struct net_device *dev ) list = priv->txList + i; list->cStat = TLAN_CSTAT_UNUSED; if ( bbuf ) { - list->buffer[0].address = priv->txBufferDMA + ( i * TLAN_MAX_FRAME_SIZE ); + list->buffer[0].address = priv->txBufferDMA + + ( i * TLAN_MAX_FRAME_SIZE ); } else { list->buffer[0].address = 0; } @@ -2017,28 +2012,32 @@ static void TLan_ResetLists( struct net_device *dev ) list->frameSize = TLAN_MAX_FRAME_SIZE; list->buffer[0].count = TLAN_MAX_FRAME_SIZE | TLAN_LAST_BUFFER; if ( bbuf ) { - list->buffer[0].address = priv->rxBufferDMA + ( i * TLAN_MAX_FRAME_SIZE ); + list->buffer[0].address = priv->rxBufferDMA + + ( i * TLAN_MAX_FRAME_SIZE ); } else { - skb = dev_alloc_skb( TLAN_MAX_FRAME_SIZE + 7 ); - if ( skb == NULL ) { - printk( "TLAN: Couldn't allocate memory for received data.\n" ); - /* If this ever happened it would be a problem */ - } else { - skb->dev = dev; - skb_reserve( skb, 2 ); - t = (void *) skb_put( skb, TLAN_MAX_FRAME_SIZE ); + skb = netdev_alloc_skb(dev, TLAN_MAX_FRAME_SIZE + 7 ); + if ( !skb ) { + pr_err("TLAN: out of memory for received data.\n" ); + break; } - list->buffer[0].address = pci_map_single(priv->pciDev, t, TLAN_MAX_FRAME_SIZE, PCI_DMA_FROMDEVICE); - list->buffer[8].address = (u32) t; + + skb_reserve( skb, NET_IP_ALIGN ); + list->buffer[0].address = pci_map_single(priv->pciDev, t, + TLAN_MAX_FRAME_SIZE, + PCI_DMA_FROMDEVICE); TLan_StoreSKB(list, skb); } list->buffer[1].count = 0; list->buffer[1].address = 0; - if ( i < TLAN_NUM_RX_LISTS - 1 ) - list->forward = list_phys + sizeof(TLanList); - else - list->forward = 0; + list->forward = list_phys + sizeof(TLanList); + } + + /* in case ran out of memory early, clear bits */ + while (i < TLAN_NUM_RX_LISTS) { + TLan_StoreSKB(priv->rxList + i, NULL); + ++i; } + list->forward = 0; } /* TLan_ResetLists */ @@ -2055,7 +2054,9 @@ static void TLan_FreeLists( struct net_device *dev ) list = priv->txList + i; skb = TLan_GetSKB(list); if ( skb ) { - pci_unmap_single(priv->pciDev, list->buffer[0].address, skb->len, PCI_DMA_TODEVICE); + pci_unmap_single(priv->pciDev, + list->buffer[0].address, skb->len, + PCI_DMA_TODEVICE); dev_kfree_skb_any( skb ); list->buffer[8].address = 0; list->buffer[9].address = 0; @@ -2066,7 +2067,10 @@ static void TLan_FreeLists( struct net_device *dev ) list = priv->rxList + i; skb = TLan_GetSKB(list); if ( skb ) { - pci_unmap_single(priv->pciDev, list->buffer[0].address, TLAN_MAX_FRAME_SIZE, PCI_DMA_FROMDEVICE); + pci_unmap_single(priv->pciDev, + list->buffer[0].address, + TLAN_MAX_FRAME_SIZE, + PCI_DMA_FROMDEVICE); dev_kfree_skb_any( skb ); list->buffer[8].address = 0; list->buffer[9].address = 0; @@ -2097,7 +2101,8 @@ static void TLan_PrintDio( u16 io_base ) u32 data0, data1; int i; - printk( "TLAN: Contents of internal registers for io base 0x%04hx.\n", io_base ); + printk( "TLAN: Contents of internal registers for io base 0x%04hx.\n", + io_base ); printk( "TLAN: Off. +0 +4\n" ); for ( i = 0; i < 0x4C; i+= 8 ) { data0 = TLan_DioRead32( io_base, i ); @@ -2131,13 +2136,14 @@ static void TLan_PrintList( TLanList *list, char *type, int num) { int i; - printk( "TLAN: %s List %d at 0x%08x\n", type, num, (u32) list ); + printk( "TLAN: %s List %d at %p\n", type, num, list ); printk( "TLAN: Forward = 0x%08x\n", list->forward ); printk( "TLAN: CSTAT = 0x%04hx\n", list->cStat ); printk( "TLAN: Frame Size = 0x%04hx\n", list->frameSize ); /* for ( i = 0; i < 10; i++ ) { */ for ( i = 0; i < 2; i++ ) { - printk( "TLAN: Buffer[%d].count, addr = 0x%08x, 0x%08x\n", i, list->buffer[i].count, list->buffer[i].address ); + printk( "TLAN: Buffer[%d].count, addr = 0x%08x, 0x%08x\n", + i, list->buffer[i].count, list->buffer[i].address ); } } /* TLan_PrintList */ @@ -2165,7 +2171,6 @@ static void TLan_PrintList( TLanList *list, char *type, int num) static void TLan_ReadAndClearStats( struct net_device *dev, int record ) { - TLanPrivateInfo *priv = netdev_priv(dev); u32 tx_good, tx_under; u32 rx_good, rx_over; u32 def_tx, crc, code; @@ -2202,18 +2207,18 @@ static void TLan_ReadAndClearStats( struct net_device *dev, int record ) loss = inb( dev->base_addr + TLAN_DIO_DATA + 2 ); if ( record ) { - priv->stats.rx_packets += rx_good; - priv->stats.rx_errors += rx_over + crc + code; - priv->stats.tx_packets += tx_good; - priv->stats.tx_errors += tx_under + loss; - priv->stats.collisions += multi_col + single_col + excess_col + late_col; + dev->stats.rx_packets += rx_good; + dev->stats.rx_errors += rx_over + crc + code; + dev->stats.tx_packets += tx_good; + dev->stats.tx_errors += tx_under + loss; + dev->stats.collisions += multi_col + single_col + excess_col + late_col; - priv->stats.rx_over_errors += rx_over; - priv->stats.rx_crc_errors += crc; - priv->stats.rx_frame_errors += code; + dev->stats.rx_over_errors += rx_over; + dev->stats.rx_crc_errors += crc; + dev->stats.rx_frame_errors += code; - priv->stats.tx_aborted_errors += tx_under; - priv->stats.tx_carrier_errors += loss; + dev->stats.tx_aborted_errors += tx_under; + dev->stats.tx_carrier_errors += loss; } } /* TLan_ReadAndClearStats */ @@ -2354,14 +2359,16 @@ TLan_FinishReset( struct net_device *dev ) TLan_MiiReadReg( dev, phy, MII_GEN_ID_HI, &tlphy_id1 ); TLan_MiiReadReg( dev, phy, MII_GEN_ID_LO, &tlphy_id2 ); - if ( ( priv->adapter->flags & TLAN_ADAPTER_UNMANAGED_PHY ) || ( priv->aui ) ) { + if ( ( priv->adapter->flags & TLAN_ADAPTER_UNMANAGED_PHY ) || + ( priv->aui ) ) { status = MII_GS_LINK; printk( "TLAN: %s: Link forced.\n", dev->name ); } else { TLan_MiiReadReg( dev, phy, MII_GEN_STS, &status ); udelay( 1000 ); TLan_MiiReadReg( dev, phy, MII_GEN_STS, &status ); - if ( (status & MII_GS_LINK) && /* We only support link info on Nat.Sem. PHY's */ + if ( (status & MII_GS_LINK) && + /* We only support link info on Nat.Sem. PHY's */ (tlphy_id1 == NAT_SEM_ID1) && (tlphy_id2 == NAT_SEM_ID2) ) { TLan_MiiReadReg( dev, phy, MII_AN_LPA, &partner ); @@ -2370,12 +2377,12 @@ TLan_FinishReset( struct net_device *dev ) printk( "TLAN: %s: Link active with ", dev->name ); if (!(tlphy_par & TLAN_PHY_AN_EN_STAT)) { printk( "forced 10%sMbps %s-Duplex\n", - tlphy_par & TLAN_PHY_SPEED_100 ? "" : "0", - tlphy_par & TLAN_PHY_DUPLEX_FULL ? "Full" : "Half"); + tlphy_par & TLAN_PHY_SPEED_100 ? "" : "0", + tlphy_par & TLAN_PHY_DUPLEX_FULL ? "Full" : "Half"); } else { printk( "AutoNegotiation enabled, at 10%sMbps %s-Duplex\n", - tlphy_par & TLAN_PHY_SPEED_100 ? "" : "0", - tlphy_par & TLAN_PHY_DUPLEX_FULL ? "Full" : "Half"); + tlphy_par & TLAN_PHY_SPEED_100 ? "" : "0", + tlphy_par & TLAN_PHY_DUPLEX_FULL ? "Full" : "Half"); printk("TLAN: Partner capability: "); for (i = 5; i <= 10; i++) if (partner & (1<<i)) @@ -2416,7 +2423,8 @@ TLan_FinishReset( struct net_device *dev ) outl( TLAN_HC_GO | TLAN_HC_RT, dev->base_addr + TLAN_HOST_CMD ); netif_carrier_on(dev); } else { - printk( "TLAN: %s: Link inactive, will retry in 10 secs...\n", dev->name ); + printk( "TLAN: %s: Link inactive, will retry in 10 secs...\n", + dev->name ); TLan_SetTimer( dev, (10*HZ), TLAN_TIMER_FINISH_RESET ); return; } @@ -2456,10 +2464,12 @@ static void TLan_SetMac( struct net_device *dev, int areg, char *mac ) if ( mac != NULL ) { for ( i = 0; i < 6; i++ ) - TLan_DioWrite8( dev->base_addr, TLAN_AREG_0 + areg + i, mac[i] ); + TLan_DioWrite8( dev->base_addr, + TLAN_AREG_0 + areg + i, mac[i] ); } else { for ( i = 0; i < 6; i++ ) - TLan_DioWrite8( dev->base_addr, TLAN_AREG_0 + areg + i, 0 ); + TLan_DioWrite8( dev->base_addr, + TLAN_AREG_0 + areg + i, 0 ); } } /* TLan_SetMac */ @@ -2565,9 +2575,13 @@ static void TLan_PhyDetect( struct net_device *dev ) TLan_MiiReadReg( dev, phy, MII_GEN_CTL, &control ); TLan_MiiReadReg( dev, phy, MII_GEN_ID_HI, &hi ); TLan_MiiReadReg( dev, phy, MII_GEN_ID_LO, &lo ); - if ( ( control != 0xFFFF ) || ( hi != 0xFFFF ) || ( lo != 0xFFFF ) ) { - TLAN_DBG( TLAN_DEBUG_GNRL, "PHY found at %02x %04x %04x %04x\n", phy, control, hi, lo ); - if ( ( priv->phy[1] == TLAN_PHY_NONE ) && ( phy != TLAN_PHY_MAX_ADDR ) ) { + if ( ( control != 0xFFFF ) || + ( hi != 0xFFFF ) || ( lo != 0xFFFF ) ) { + TLAN_DBG( TLAN_DEBUG_GNRL, + "PHY found at %02x %04x %04x %04x\n", + phy, control, hi, lo ); + if ( ( priv->phy[1] == TLAN_PHY_NONE ) && + ( phy != TLAN_PHY_MAX_ADDR ) ) { priv->phy[1] = phy; } } @@ -2595,7 +2609,9 @@ static void TLan_PhyPowerDown( struct net_device *dev ) value = MII_GC_PDOWN | MII_GC_LOOPBK | MII_GC_ISOLATE; TLan_MiiSync( dev->base_addr ); TLan_MiiWriteReg( dev, priv->phy[priv->phyNum], MII_GEN_CTL, value ); - if ( ( priv->phyNum == 0 ) && ( priv->phy[1] != TLAN_PHY_NONE ) && ( ! ( priv->adapter->flags & TLAN_ADAPTER_USE_INTERN_10 ) ) ) { + if ( ( priv->phyNum == 0 ) && + ( priv->phy[1] != TLAN_PHY_NONE ) && + ( ! ( priv->adapter->flags & TLAN_ADAPTER_USE_INTERN_10 ) ) ) { TLan_MiiSync( dev->base_addr ); TLan_MiiWriteReg( dev, priv->phy[1], MII_GEN_CTL, value ); } @@ -2768,10 +2784,10 @@ static void TLan_PhyFinishAutoNeg( struct net_device *dev ) * more time. Perhaps we should fail after a while. */ if (!priv->neg_be_verbose++) { - printk(KERN_INFO "TLAN: Giving autonegotiation more time.\n"); - printk(KERN_INFO "TLAN: Please check that your adapter has\n"); - printk(KERN_INFO "TLAN: been properly connected to a HUB or Switch.\n"); - printk(KERN_INFO "TLAN: Trying to establish link in the background...\n"); + pr_info("TLAN: Giving autonegotiation more time.\n"); + pr_info("TLAN: Please check that your adapter has\n"); + pr_info("TLAN: been properly connected to a HUB or Switch.\n"); + pr_info("TLAN: Trying to establish link in the background...\n"); } TLan_SetTimer( dev, (8*HZ), TLAN_TIMER_PHY_FINISH_AN ); return; @@ -2787,7 +2803,9 @@ static void TLan_PhyFinishAutoNeg( struct net_device *dev ) priv->tlanFullDuplex = TRUE; } - if ( ( ! ( mode & 0x0180 ) ) && ( priv->adapter->flags & TLAN_ADAPTER_USE_INTERN_10 ) && ( priv->phyNum != 0 ) ) { + if ( ( ! ( mode & 0x0180 ) ) && + ( priv->adapter->flags & TLAN_ADAPTER_USE_INTERN_10 ) && + ( priv->phyNum != 0 ) ) { priv->phyNum = 0; data = TLAN_NET_CFG_1FRAG | TLAN_NET_CFG_1CHAN | TLAN_NET_CFG_PHY_EN; TLan_DioWrite16( dev->base_addr, TLAN_NET_CONFIG, data ); @@ -2796,12 +2814,14 @@ static void TLan_PhyFinishAutoNeg( struct net_device *dev ) } if ( priv->phyNum == 0 ) { - if ( ( priv->duplex == TLAN_DUPLEX_FULL ) || ( an_adv & an_lpa & 0x0040 ) ) { - TLan_MiiWriteReg( dev, phy, MII_GEN_CTL, MII_GC_AUTOENB | MII_GC_DUPLEX ); - printk( "TLAN: Starting internal PHY with FULL-DUPLEX\n" ); + if ( ( priv->duplex == TLAN_DUPLEX_FULL ) || + ( an_adv & an_lpa & 0x0040 ) ) { + TLan_MiiWriteReg( dev, phy, MII_GEN_CTL, + MII_GC_AUTOENB | MII_GC_DUPLEX ); + pr_info("TLAN: Starting internal PHY with FULL-DUPLEX\n" ); } else { TLan_MiiWriteReg( dev, phy, MII_GEN_CTL, MII_GC_AUTOENB ); - printk( "TLAN: Starting internal PHY with HALF-DUPLEX\n" ); + pr_info( "TLAN: Starting internal PHY with HALF-DUPLEX\n" ); } } @@ -3209,7 +3229,8 @@ static int TLan_EeSendByte( u16 io_base, u8 data, int stop ) TLan_SetBit( TLAN_NET_SIO_ETXEN, sio ); if ( ( ! err ) && stop ) { - TLan_ClearBit( TLAN_NET_SIO_EDATA, sio ); /* STOP, raise data while clock is high */ + /* STOP, raise data while clock is high */ + TLan_ClearBit( TLAN_NET_SIO_EDATA, sio ); TLan_SetBit( TLAN_NET_SIO_ECLOK, sio ); TLan_SetBit( TLAN_NET_SIO_EDATA, sio ); } @@ -3272,7 +3293,8 @@ static void TLan_EeReceiveByte( u16 io_base, u8 *data, int stop ) TLan_SetBit( TLAN_NET_SIO_EDATA, sio ); /* No ack = 1 (?) */ TLan_SetBit( TLAN_NET_SIO_ECLOK, sio ); TLan_ClearBit( TLAN_NET_SIO_ECLOK, sio ); - TLan_ClearBit( TLAN_NET_SIO_EDATA, sio ); /* STOP, raise data while clock is high */ + /* STOP, raise data while clock is high */ + TLan_ClearBit( TLAN_NET_SIO_EDATA, sio ); TLan_SetBit( TLAN_NET_SIO_ECLOK, sio ); TLan_SetBit( TLAN_NET_SIO_EDATA, sio ); } diff --git a/drivers/net/tlan.h b/drivers/net/tlan.h index 41ce0b66593..4b82f283e98 100644 --- a/drivers/net/tlan.h +++ b/drivers/net/tlan.h @@ -13,8 +13,6 @@ * This software may be used and distributed according to the terms * of the GNU General Public License, incorporated herein by reference. * - ** This file is best viewed/edited with tabstop=4, colums>=132 - * * * Dec 10, 1999 Torben Mathiasen <torben.mathiasen@compaq.com> * New Maintainer @@ -45,7 +43,9 @@ #define TLAN_IGNORE 0 #define TLAN_RECORD 1 -#define TLAN_DBG(lvl, format, args...) if (debug&lvl) printk(KERN_DEBUG "TLAN: " format, ##args ); +#define TLAN_DBG(lvl, format, args...) \ + do { if (debug&lvl) printk(KERN_DEBUG "TLAN: " format, ##args ); } while(0) + #define TLAN_DEBUG_GNRL 0x0001 #define TLAN_DEBUG_TX 0x0002 #define TLAN_DEBUG_RX 0x0004 @@ -194,7 +194,6 @@ typedef struct tlan_private_tag { u32 timerSetAt; u32 timerType; struct timer_list timer; - struct net_device_stats stats; struct board *adapter; u32 adapterRev; u32 aui; @@ -205,7 +204,6 @@ typedef struct tlan_private_tag { u32 speed; u8 tlanRev; u8 tlanFullDuplex; - char devName[8]; spinlock_t lock; u8 link; u8 is_eisa; @@ -517,12 +515,18 @@ static inline void TLan_DioWrite32(u16 base_addr, u16 internal_addr, u32 data) * xor( a, xor( b, xor( c, xor( d, xor( e, xor( f, xor( g, h ) ) ) ) ) ) ) * #define DA( a, bit ) ( ( (u8) a[bit/8] ) & ( (u8) ( 1 << bit%8 ) ) ) * - * hash = XOR8( DA(a,0), DA(a, 6), DA(a,12), DA(a,18), DA(a,24), DA(a,30), DA(a,36), DA(a,42) ); - * hash |= XOR8( DA(a,1), DA(a, 7), DA(a,13), DA(a,19), DA(a,25), DA(a,31), DA(a,37), DA(a,43) ) << 1; - * hash |= XOR8( DA(a,2), DA(a, 8), DA(a,14), DA(a,20), DA(a,26), DA(a,32), DA(a,38), DA(a,44) ) << 2; - * hash |= XOR8( DA(a,3), DA(a, 9), DA(a,15), DA(a,21), DA(a,27), DA(a,33), DA(a,39), DA(a,45) ) << 3; - * hash |= XOR8( DA(a,4), DA(a,10), DA(a,16), DA(a,22), DA(a,28), DA(a,34), DA(a,40), DA(a,46) ) << 4; - * hash |= XOR8( DA(a,5), DA(a,11), DA(a,17), DA(a,23), DA(a,29), DA(a,35), DA(a,41), DA(a,47) ) << 5; + * hash = XOR8( DA(a,0), DA(a, 6), DA(a,12), DA(a,18), DA(a,24), + * DA(a,30), DA(a,36), DA(a,42) ); + * hash |= XOR8( DA(a,1), DA(a, 7), DA(a,13), DA(a,19), DA(a,25), + * DA(a,31), DA(a,37), DA(a,43) ) << 1; + * hash |= XOR8( DA(a,2), DA(a, 8), DA(a,14), DA(a,20), DA(a,26), + * DA(a,32), DA(a,38), DA(a,44) ) << 2; + * hash |= XOR8( DA(a,3), DA(a, 9), DA(a,15), DA(a,21), DA(a,27), + * DA(a,33), DA(a,39), DA(a,45) ) << 3; + * hash |= XOR8( DA(a,4), DA(a,10), DA(a,16), DA(a,22), DA(a,28), + * DA(a,34), DA(a,40), DA(a,46) ) << 4; + * hash |= XOR8( DA(a,5), DA(a,11), DA(a,17), DA(a,23), DA(a,29), + * DA(a,35), DA(a,41), DA(a,47) ) << 5; * */ static inline u32 TLan_HashFunc( const u8 *a ) diff --git a/drivers/net/tokenring/3c359.h b/drivers/net/tokenring/3c359.h index 0f8b4ec8695..66b1ff60323 100644 --- a/drivers/net/tokenring/3c359.h +++ b/drivers/net/tokenring/3c359.h @@ -264,7 +264,7 @@ struct xl_private { u16 asb; u8 __iomem *xl_mmio; - char *xl_card_name; + const char *xl_card_name; struct pci_dev *pdev ; spinlock_t xl_lock ; diff --git a/drivers/net/tokenring/olympic.h b/drivers/net/tokenring/olympic.h index c91956310fb..10fbba08978 100644 --- a/drivers/net/tokenring/olympic.h +++ b/drivers/net/tokenring/olympic.h @@ -254,7 +254,7 @@ struct olympic_private { u8 __iomem *olympic_mmio; u8 __iomem *olympic_lap; struct pci_dev *pdev ; - char *olympic_card_name ; + const char *olympic_card_name; spinlock_t olympic_lock ; diff --git a/drivers/net/tsi108_eth.c b/drivers/net/tsi108_eth.c index c028facd934..febfaee44fe 100644 --- a/drivers/net/tsi108_eth.c +++ b/drivers/net/tsi108_eth.c @@ -803,7 +803,8 @@ static int tsi108_refill_rx(struct net_device *dev, int budget) int rx = data->rxhead; struct sk_buff *skb; - data->rxskbs[rx] = skb = dev_alloc_skb(TSI108_RXBUF_SIZE + 2); + data->rxskbs[rx] = skb = netdev_alloc_skb(dev, + TSI108_RXBUF_SIZE + 2); if (!skb) break; @@ -1352,8 +1353,9 @@ static int tsi108_open(struct net_device *dev) data->rxhead = 0; for (i = 0; i < TSI108_RXRING_LEN; i++) { - struct sk_buff *skb = dev_alloc_skb(TSI108_RXBUF_SIZE + NET_IP_ALIGN); + struct sk_buff *skb; + skb = netdev_alloc_skb(dev, TSI108_RXBUF_SIZE + NET_IP_ALIGN); if (!skb) { /* Bah. No memory for now, but maybe we'll get * some more later. diff --git a/drivers/net/tulip/uli526x.c b/drivers/net/tulip/uli526x.c index 2511ca7a12a..e9e62862163 100644 --- a/drivers/net/tulip/uli526x.c +++ b/drivers/net/tulip/uli526x.c @@ -225,6 +225,9 @@ static void uli526x_set_filter_mode(struct net_device *); static const struct ethtool_ops netdev_ethtool_ops; static u16 read_srom_word(long, int); static irqreturn_t uli526x_interrupt(int, void *); +#ifdef CONFIG_NET_POLL_CONTROLLER +static void uli526x_poll(struct net_device *dev); +#endif static void uli526x_descriptor_init(struct uli526x_board_info *, unsigned long); static void allocate_rx_buffer(struct uli526x_board_info *); static void update_cr6(u32, unsigned long); @@ -339,6 +342,9 @@ static int __devinit uli526x_init_one (struct pci_dev *pdev, dev->get_stats = &uli526x_get_stats; dev->set_multicast_list = &uli526x_set_filter_mode; dev->ethtool_ops = &netdev_ethtool_ops; +#ifdef CONFIG_NET_POLL_CONTROLLER + dev->poll_controller = &uli526x_poll; +#endif spin_lock_init(&db->lock); @@ -681,8 +687,9 @@ static irqreturn_t uli526x_interrupt(int irq, void *dev_id) db->cr5_data = inl(ioaddr + DCR5); outl(db->cr5_data, ioaddr + DCR5); if ( !(db->cr5_data & 0x180c1) ) { - spin_unlock_irqrestore(&db->lock, flags); + /* Restore CR7 to enable interrupt mask */ outl(db->cr7_data, ioaddr + DCR7); + spin_unlock_irqrestore(&db->lock, flags); return IRQ_HANDLED; } @@ -715,6 +722,13 @@ static irqreturn_t uli526x_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } +#ifdef CONFIG_NET_POLL_CONTROLLER +static void uli526x_poll(struct net_device *dev) +{ + /* ISR grabs the irqsave lock, so this should be safe */ + uli526x_interrupt(dev->irq, dev); +} +#endif /* * Free TX resource after TX complete diff --git a/drivers/net/ucc_geth.c b/drivers/net/ucc_geth.c index ca0bdac07a7..fb0b918e5cc 100644 --- a/drivers/net/ucc_geth.c +++ b/drivers/net/ucc_geth.c @@ -237,7 +237,7 @@ static struct sk_buff *get_new_skb(struct ucc_geth_private *ugeth, skb->dev = ugeth->dev; out_be32(&((struct qe_bd __iomem *)bd)->buf, - dma_map_single(NULL, + dma_map_single(&ugeth->dev->dev, skb->data, ugeth->ug_info->uf_info.max_rx_buf_length + UCC_GETH_RX_DATA_BUF_ALIGNMENT, @@ -2158,7 +2158,7 @@ static void ucc_geth_memclean(struct ucc_geth_private *ugeth) continue; for (j = 0; j < ugeth->ug_info->bdRingLenTx[i]; j++) { if (ugeth->tx_skbuff[i][j]) { - dma_unmap_single(NULL, + dma_unmap_single(&ugeth->dev->dev, in_be32(&((struct qe_bd __iomem *)bd)->buf), (in_be32((u32 __iomem *)bd) & BD_LENGTH_MASK), @@ -2186,7 +2186,7 @@ static void ucc_geth_memclean(struct ucc_geth_private *ugeth) bd = ugeth->p_rx_bd_ring[i]; for (j = 0; j < ugeth->ug_info->bdRingLenRx[i]; j++) { if (ugeth->rx_skbuff[i][j]) { - dma_unmap_single(NULL, + dma_unmap_single(&ugeth->dev->dev, in_be32(&((struct qe_bd __iomem *)bd)->buf), ugeth->ug_info-> uf_info.max_rx_buf_length + @@ -3406,7 +3406,8 @@ static int ucc_geth_start_xmit(struct sk_buff *skb, struct net_device *dev) /* set up the buffer descriptor */ out_be32(&((struct qe_bd __iomem *)bd)->buf, - dma_map_single(NULL, skb->data, skb->len, DMA_TO_DEVICE)); + dma_map_single(&ugeth->dev->dev, skb->data, + skb->len, DMA_TO_DEVICE)); /* printk(KERN_DEBUG"skb->data is 0x%x\n",skb->data); */ diff --git a/drivers/net/usb/asix.c b/drivers/net/usb/asix.c index dc6f097062d..37ecf845edf 100644 --- a/drivers/net/usb/asix.c +++ b/drivers/net/usb/asix.c @@ -1440,6 +1440,10 @@ static const struct usb_device_id products [] = { // Belkin F5D5055 USB_DEVICE(0x050d, 0x5055), .driver_info = (unsigned long) &ax88178_info, +}, { + // Apple USB Ethernet Adapter + USB_DEVICE(0x05ac, 0x1402), + .driver_info = (unsigned long) &ax88772_info, }, { }, // END }; diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c index 76752d84a30..22c17bbacb6 100644 --- a/drivers/net/usb/catc.c +++ b/drivers/net/usb/catc.c @@ -423,7 +423,10 @@ static int catc_hard_start_xmit(struct sk_buff *skb, struct net_device *netdev) catc->tx_ptr = (((catc->tx_ptr - 1) >> 6) + 1) << 6; tx_buf = catc->tx_buf[catc->tx_idx] + catc->tx_ptr; - *((u16*)tx_buf) = (catc->is_f5u011) ? cpu_to_be16((u16)skb->len) : cpu_to_le16((u16)skb->len); + if (catc->is_f5u011) + *(__be16 *)tx_buf = cpu_to_be16(skb->len); + else + *(__le16 *)tx_buf = cpu_to_le16(skb->len); skb_copy_from_linear_data(skb, tx_buf + 2, skb->len); catc->tx_ptr += skb->len + 2; diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c index 21a7785cb8b..ae467f182c4 100644 --- a/drivers/net/usb/rndis_host.c +++ b/drivers/net/usb/rndis_host.c @@ -194,7 +194,7 @@ int rndis_command(struct usbnet *dev, struct rndis_msg_hdr *buf) dev_dbg(&info->control->dev, "rndis response error, code %d\n", retval); } - msleep(2); + msleep(20); } dev_dbg(&info->control->dev, "rndis response timeout\n"); return -ETIMEDOUT; @@ -283,8 +283,8 @@ generic_rndis_bind(struct usbnet *dev, struct usb_interface *intf, int flags) struct rndis_set_c *set_c; struct rndis_halt *halt; } u; - u32 tmp, phym_unspec; - __le32 *phym; + u32 tmp; + __le32 phym_unspec, *phym; int reply_len; unsigned char *bp; diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c index 6b8d882d197..bcbf2fa9b94 100644 --- a/drivers/net/via-velocity.c +++ b/drivers/net/via-velocity.c @@ -1495,24 +1495,18 @@ static inline void velocity_rx_csum(struct rx_desc *rd, struct sk_buff *skb) * enough. This function returns a negative value if the received * packet is too big or if memory is exhausted. */ -static inline int velocity_rx_copy(struct sk_buff **rx_skb, int pkt_size, - struct velocity_info *vptr) +static int velocity_rx_copy(struct sk_buff **rx_skb, int pkt_size, + struct velocity_info *vptr) { int ret = -1; - if (pkt_size < rx_copybreak) { struct sk_buff *new_skb; - new_skb = dev_alloc_skb(pkt_size + 2); + new_skb = netdev_alloc_skb(vptr->dev, pkt_size + 2); if (new_skb) { - new_skb->dev = vptr->dev; new_skb->ip_summed = rx_skb[0]->ip_summed; - - if (vptr->flags & VELOCITY_FLAGS_IP_ALIGN) - skb_reserve(new_skb, 2); - - skb_copy_from_linear_data(rx_skb[0], new_skb->data, - pkt_size); + skb_reserve(new_skb, 2); + skb_copy_from_linear_data(*rx_skb, new_skb->data, pkt_size); *rx_skb = new_skb; ret = 0; } @@ -1533,12 +1527,8 @@ static inline int velocity_rx_copy(struct sk_buff **rx_skb, int pkt_size, static inline void velocity_iph_realign(struct velocity_info *vptr, struct sk_buff *skb, int pkt_size) { - /* FIXME - memmove ? */ if (vptr->flags & VELOCITY_FLAGS_IP_ALIGN) { - int i; - - for (i = pkt_size; i >= 0; i--) - *(skb->data + i + 2) = *(skb->data + i); + memmove(skb->data + 2, skb->data, pkt_size); skb_reserve(skb, 2); } } @@ -1629,7 +1619,7 @@ static int velocity_alloc_rx_buf(struct velocity_info *vptr, int idx) struct rx_desc *rd = &(vptr->rd_ring[idx]); struct velocity_rd_info *rd_info = &(vptr->rd_info[idx]); - rd_info->skb = dev_alloc_skb(vptr->rx_buf_sz + 64); + rd_info->skb = netdev_alloc_skb(vptr->dev, vptr->rx_buf_sz + 64); if (rd_info->skb == NULL) return -ENOMEM; @@ -1638,7 +1628,6 @@ static int velocity_alloc_rx_buf(struct velocity_info *vptr, int idx) * 64byte alignment. */ skb_reserve(rd_info->skb, (unsigned long) rd_info->skb->data & 63); - rd_info->skb->dev = vptr->dev; rd_info->skb_dma = pci_map_single(vptr->pdev, rd_info->skb->data, vptr->rx_buf_sz, PCI_DMA_FROMDEVICE); /* diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index f926b5ab3d0..fe7cdf2a2a2 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -470,8 +470,7 @@ static void virtnet_remove(struct virtio_device *vdev) kfree_skb(skb); vi->num--; } - while ((skb = __skb_dequeue(&vi->send)) != NULL) - kfree_skb(skb); + __skb_queue_purge(&vi->send); BUG_ON(vi->num != 0); diff --git a/drivers/net/wan/hdlc.c b/drivers/net/wan/hdlc.c index 9a83c9d5b8c..7f984895b0d 100644 --- a/drivers/net/wan/hdlc.c +++ b/drivers/net/wan/hdlc.c @@ -43,8 +43,7 @@ static const char* version = "HDLC support module revision 1.22"; #undef DEBUG_LINK -static struct hdlc_proto *first_proto = NULL; - +static struct hdlc_proto *first_proto; static int hdlc_change_mtu(struct net_device *dev, int new_mtu) { @@ -314,21 +313,25 @@ void detach_hdlc_protocol(struct net_device *dev) void register_hdlc_protocol(struct hdlc_proto *proto) { + rtnl_lock(); proto->next = first_proto; first_proto = proto; + rtnl_unlock(); } void unregister_hdlc_protocol(struct hdlc_proto *proto) { - struct hdlc_proto **p = &first_proto; - while (*p) { - if (*p == proto) { - *p = proto->next; - return; - } + struct hdlc_proto **p; + + rtnl_lock(); + p = &first_proto; + while (*p != proto) { + BUG_ON(!*p); p = &((*p)->next); } + *p = proto->next; + rtnl_unlock(); } diff --git a/drivers/net/wan/hdlc_cisco.c b/drivers/net/wan/hdlc_cisco.c index 7133c688cf2..762d21c1c70 100644 --- a/drivers/net/wan/hdlc_cisco.c +++ b/drivers/net/wan/hdlc_cisco.c @@ -56,6 +56,7 @@ struct cisco_state { cisco_proto settings; struct timer_list timer; + spinlock_t lock; unsigned long last_poll; int up; int request_sent; @@ -158,6 +159,7 @@ static int cisco_rx(struct sk_buff *skb) { struct net_device *dev = skb->dev; hdlc_device *hdlc = dev_to_hdlc(dev); + struct cisco_state *st = state(hdlc); struct hdlc_header *data = (struct hdlc_header*)skb->data; struct cisco_packet *cisco_data; struct in_device *in_dev; @@ -220,11 +222,12 @@ static int cisco_rx(struct sk_buff *skb) goto rx_error; case CISCO_KEEPALIVE_REQ: - state(hdlc)->rxseq = ntohl(cisco_data->par1); - if (state(hdlc)->request_sent && - ntohl(cisco_data->par2) == state(hdlc)->txseq) { - state(hdlc)->last_poll = jiffies; - if (!state(hdlc)->up) { + spin_lock(&st->lock); + st->rxseq = ntohl(cisco_data->par1); + if (st->request_sent && + ntohl(cisco_data->par2) == st->txseq) { + st->last_poll = jiffies; + if (!st->up) { u32 sec, min, hrs, days; sec = ntohl(cisco_data->time) / 1000; min = sec / 60; sec -= min * 60; @@ -232,12 +235,12 @@ static int cisco_rx(struct sk_buff *skb) days = hrs / 24; hrs -= days * 24; printk(KERN_INFO "%s: Link up (peer " "uptime %ud%uh%um%us)\n", - dev->name, days, hrs, - min, sec); + dev->name, days, hrs, min, sec); netif_dormant_off(dev); - state(hdlc)->up = 1; + st->up = 1; } } + spin_unlock(&st->lock); dev_kfree_skb_any(skb); return NET_RX_SUCCESS; @@ -261,24 +264,25 @@ static void cisco_timer(unsigned long arg) { struct net_device *dev = (struct net_device *)arg; hdlc_device *hdlc = dev_to_hdlc(dev); + struct cisco_state *st = state(hdlc); - if (state(hdlc)->up && - time_after(jiffies, state(hdlc)->last_poll + - state(hdlc)->settings.timeout * HZ)) { - state(hdlc)->up = 0; + spin_lock(&st->lock); + if (st->up && + time_after(jiffies, st->last_poll + st->settings.timeout * HZ)) { + st->up = 0; printk(KERN_INFO "%s: Link down\n", dev->name); netif_dormant_on(dev); } - cisco_keepalive_send(dev, CISCO_KEEPALIVE_REQ, - htonl(++state(hdlc)->txseq), - htonl(state(hdlc)->rxseq)); - state(hdlc)->request_sent = 1; - state(hdlc)->timer.expires = jiffies + - state(hdlc)->settings.interval * HZ; - state(hdlc)->timer.function = cisco_timer; - state(hdlc)->timer.data = arg; - add_timer(&state(hdlc)->timer); + cisco_keepalive_send(dev, CISCO_KEEPALIVE_REQ, htonl(++st->txseq), + htonl(st->rxseq)); + st->request_sent = 1; + spin_unlock(&st->lock); + + st->timer.expires = jiffies + st->settings.interval * HZ; + st->timer.function = cisco_timer; + st->timer.data = arg; + add_timer(&st->timer); } @@ -286,15 +290,20 @@ static void cisco_timer(unsigned long arg) static void cisco_start(struct net_device *dev) { hdlc_device *hdlc = dev_to_hdlc(dev); - state(hdlc)->up = 0; - state(hdlc)->request_sent = 0; - state(hdlc)->txseq = state(hdlc)->rxseq = 0; - - init_timer(&state(hdlc)->timer); - state(hdlc)->timer.expires = jiffies + HZ; /*First poll after 1s*/ - state(hdlc)->timer.function = cisco_timer; - state(hdlc)->timer.data = (unsigned long)dev; - add_timer(&state(hdlc)->timer); + struct cisco_state *st = state(hdlc); + unsigned long flags; + + spin_lock_irqsave(&st->lock, flags); + st->up = 0; + st->request_sent = 0; + st->txseq = st->rxseq = 0; + spin_unlock_irqrestore(&st->lock, flags); + + init_timer(&st->timer); + st->timer.expires = jiffies + HZ; /* First poll after 1 s */ + st->timer.function = cisco_timer; + st->timer.data = (unsigned long)dev; + add_timer(&st->timer); } @@ -302,10 +311,16 @@ static void cisco_start(struct net_device *dev) static void cisco_stop(struct net_device *dev) { hdlc_device *hdlc = dev_to_hdlc(dev); - del_timer_sync(&state(hdlc)->timer); + struct cisco_state *st = state(hdlc); + unsigned long flags; + + del_timer_sync(&st->timer); + + spin_lock_irqsave(&st->lock, flags); netif_dormant_on(dev); - state(hdlc)->up = 0; - state(hdlc)->request_sent = 0; + st->up = 0; + st->request_sent = 0; + spin_unlock_irqrestore(&st->lock, flags); } @@ -367,6 +382,7 @@ static int cisco_ioctl(struct net_device *dev, struct ifreq *ifr) return result; memcpy(&state(hdlc)->settings, &new_settings, size); + spin_lock_init(&state(hdlc)->lock); dev->hard_start_xmit = hdlc->xmit; dev->header_ops = &cisco_header_ops; dev->type = ARPHRD_CISCO; diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c index 8a78283e860..17ced37e55e 100644 --- a/drivers/net/wireless/airo.c +++ b/drivers/net/wireless/airo.c @@ -2669,6 +2669,7 @@ static struct net_device *init_wifidev(struct airo_info *ai, dev->irq = ethdev->irq; dev->base_addr = ethdev->base_addr; dev->wireless_data = ethdev->wireless_data; + SET_NETDEV_DEV(dev, ethdev->dev.parent); memcpy(dev->dev_addr, ethdev->dev_addr, dev->addr_len); err = register_netdev(dev); if (err<0) { @@ -2905,7 +2906,7 @@ EXPORT_SYMBOL(init_airo_card); static int waitbusy (struct airo_info *ai) { int delay = 0; - while ((IN4500 (ai, COMMAND) & COMMAND_BUSY) & (delay < 10000)) { + while ((IN4500 (ai, COMMAND) & COMMAND_BUSY) && (delay < 10000)) { udelay (10); if ((++delay % 20) == 0) OUT4500(ai, EVACK, EV_CLEARCOMMANDBUSY); diff --git a/drivers/net/wireless/hostap/hostap_cs.c b/drivers/net/wireless/hostap/hostap_cs.c index 437a9bcc9bd..ed4317a17cb 100644 --- a/drivers/net/wireless/hostap/hostap_cs.c +++ b/drivers/net/wireless/hostap/hostap_cs.c @@ -833,6 +833,7 @@ static struct pcmcia_device_id hostap_cs_ids[] = { PCMCIA_DEVICE_MANF_CARD(0x50c2, 0x0001), PCMCIA_DEVICE_MANF_CARD(0x50c2, 0x7300), /* PCMCIA_DEVICE_MANF_CARD(0xc00f, 0x0000), conflict with pcnet_cs */ + PCMCIA_DEVICE_MANF_CARD(0xc250, 0x0002), PCMCIA_DEVICE_MANF_CARD(0xd601, 0x0002), PCMCIA_DEVICE_MANF_CARD(0xd601, 0x0005), PCMCIA_DEVICE_MANF_CARD(0xd601, 0x0010), diff --git a/drivers/net/wireless/hostap/hostap_hw.c b/drivers/net/wireless/hostap/hostap_hw.c index 7be68db6f30..cdf90c40f11 100644 --- a/drivers/net/wireless/hostap/hostap_hw.c +++ b/drivers/net/wireless/hostap/hostap_hw.c @@ -3276,11 +3276,6 @@ while (0) } printk(KERN_INFO "%s: Registered netdevice %s\n", dev_info, dev->name); -#ifndef PRISM2_NO_PROCFS_DEBUG - create_proc_read_entry("registers", 0, local->proc, - prism2_registers_proc_read, local); -#endif /* PRISM2_NO_PROCFS_DEBUG */ - hostap_init_data(local); return dev; @@ -3307,6 +3302,10 @@ static int hostap_hw_ready(struct net_device *dev) netif_carrier_off(local->ddev); } hostap_init_proc(local); +#ifndef PRISM2_NO_PROCFS_DEBUG + create_proc_read_entry("registers", 0, local->proc, + prism2_registers_proc_read, local); +#endif /* PRISM2_NO_PROCFS_DEBUG */ hostap_init_ap_proc(local); return 0; } diff --git a/drivers/net/wireless/ipw2200.c b/drivers/net/wireless/ipw2200.c index fa87c5c2ae0..d74c061994a 100644 --- a/drivers/net/wireless/ipw2200.c +++ b/drivers/net/wireless/ipw2200.c @@ -11584,6 +11584,7 @@ static int ipw_prom_alloc(struct ipw_priv *priv) priv->prom_net_dev->hard_start_xmit = ipw_prom_hard_start_xmit; priv->prom_priv->ieee->iw_mode = IW_MODE_MONITOR; + SET_NETDEV_DEV(priv->prom_net_dev, &priv->pci_dev->dev); rc = register_netdev(priv->prom_net_dev); if (rc) { diff --git a/drivers/net/wireless/libertas/ethtool.c b/drivers/net/wireless/libertas/ethtool.c index dcfdb404678..688d60de55c 100644 --- a/drivers/net/wireless/libertas/ethtool.c +++ b/drivers/net/wireless/libertas/ethtool.c @@ -73,8 +73,8 @@ out: return ret; } -static void lbs_ethtool_get_stats(struct net_device * dev, - struct ethtool_stats * stats, u64 * data) +static void lbs_ethtool_get_stats(struct net_device *dev, + struct ethtool_stats *stats, uint64_t *data) { struct lbs_private *priv = dev->priv; struct cmd_ds_mesh_access mesh_access; @@ -83,12 +83,12 @@ static void lbs_ethtool_get_stats(struct net_device * dev, lbs_deb_enter(LBS_DEB_ETHTOOL); /* Get Mesh Statistics */ - ret = lbs_prepare_and_send_command(priv, - CMD_MESH_ACCESS, CMD_ACT_MESH_GET_STATS, - CMD_OPTION_WAITFORRSP, 0, &mesh_access); + ret = lbs_mesh_access(priv, CMD_ACT_MESH_GET_STATS, &mesh_access); - if (ret) + if (ret) { + memset(data, 0, MESH_STATS_NUM*(sizeof(uint64_t))); return; + } priv->mstats.fwd_drop_rbt = le32_to_cpu(mesh_access.data[0]); priv->mstats.fwd_drop_ttl = le32_to_cpu(mesh_access.data[1]); @@ -111,19 +111,18 @@ static void lbs_ethtool_get_stats(struct net_device * dev, lbs_deb_enter(LBS_DEB_ETHTOOL); } -static int lbs_ethtool_get_sset_count(struct net_device * dev, int sset) +static int lbs_ethtool_get_sset_count(struct net_device *dev, int sset) { - switch (sset) { - case ETH_SS_STATS: + struct lbs_private *priv = dev->priv; + + if (sset == ETH_SS_STATS && dev == priv->mesh_dev) return MESH_STATS_NUM; - default: - return -EOPNOTSUPP; - } + + return -EOPNOTSUPP; } static void lbs_ethtool_get_strings(struct net_device *dev, - u32 stringset, - u8 * s) + uint32_t stringset, uint8_t *s) { int i; diff --git a/drivers/net/wireless/libertas/main.c b/drivers/net/wireless/libertas/main.c index 039e09a8b02..b7ab3590b58 100644 --- a/drivers/net/wireless/libertas/main.c +++ b/drivers/net/wireless/libertas/main.c @@ -792,6 +792,7 @@ static int lbs_thread(void *data) priv->reset_card(priv); } else { priv->cur_cmd = NULL; + priv->dnld_sent = DNLD_RES_RECEIVED; lbs_pr_info("requeueing command 0x%04x due " "to timeout (#%d)\n", le16_to_cpu(cmdnode->cmdbuf->command), @@ -1602,6 +1603,7 @@ static int lbs_add_rtap(struct lbs_private *priv) rtap_dev->get_stats = lbs_rtap_get_stats; rtap_dev->hard_start_xmit = lbs_rtap_hard_start_xmit; rtap_dev->priv = priv; + SET_NETDEV_DEV(rtap_dev, priv->dev->dev.parent); ret = register_netdev(rtap_dev); if (ret) { diff --git a/drivers/net/wireless/orinoco_cs.c b/drivers/net/wireless/orinoco_cs.c index 8b7f5768a10..1c216e015f6 100644 --- a/drivers/net/wireless/orinoco_cs.c +++ b/drivers/net/wireless/orinoco_cs.c @@ -461,6 +461,7 @@ static struct pcmcia_device_id orinoco_cs_ids[] = { PCMCIA_DEVICE_MANF_CARD(0x028a, 0x0673), /* Linksys WCF12 Wireless CompactFlash Card */ PCMCIA_DEVICE_MANF_CARD(0x02aa, 0x0002), /* ASUS SpaceLink WL-100 */ PCMCIA_DEVICE_MANF_CARD(0x02ac, 0x0002), /* SpeedStream SS1021 Wireless Adapter */ + PCMCIA_DEVICE_MANF_CARD(0x02ac, 0x3021), /* SpeedStream Wireless Adapter */ PCMCIA_DEVICE_MANF_CARD(0x14ea, 0xb001), /* PLANEX RoadLannerWave GW-NS11H */ PCMCIA_DEVICE_MANF_CARD(0x50c2, 0x7300), /* Airvast WN-100 */ PCMCIA_DEVICE_MANF_CARD(0x9005, 0x0021), /* Adaptec Ultra Wireless ANW-8030 */ diff --git a/drivers/net/wireless/rtl8187_dev.c b/drivers/net/wireless/rtl8187_dev.c index b581ef8a637..0078c7e9918 100644 --- a/drivers/net/wireless/rtl8187_dev.c +++ b/drivers/net/wireless/rtl8187_dev.c @@ -92,6 +92,7 @@ static void rtl8187_iowrite_async(struct rtl8187_priv *priv, __le16 addr, u8 data[4]; struct usb_ctrlrequest dr; } *buf; + int rc; buf = kmalloc(sizeof(*buf), GFP_ATOMIC); if (!buf) @@ -116,7 +117,11 @@ static void rtl8187_iowrite_async(struct rtl8187_priv *priv, __le16 addr, usb_fill_control_urb(urb, priv->udev, usb_sndctrlpipe(priv->udev, 0), (unsigned char *)dr, buf, len, rtl8187_iowrite_async_cb, buf); - usb_submit_urb(urb, GFP_ATOMIC); + rc = usb_submit_urb(urb, GFP_ATOMIC); + if (rc < 0) { + kfree(buf); + usb_free_urb(urb); + } } static inline void rtl818x_iowrite32_async(struct rtl8187_priv *priv, @@ -164,6 +169,7 @@ static int rtl8187_tx(struct ieee80211_hw *dev, struct sk_buff *skb) struct urb *urb; __le16 rts_dur = 0; u32 flags; + int rc; urb = usb_alloc_urb(0, GFP_ATOMIC); if (!urb) { @@ -197,7 +203,11 @@ static int rtl8187_tx(struct ieee80211_hw *dev, struct sk_buff *skb) info->driver_data[1] = urb; usb_fill_bulk_urb(urb, priv->udev, usb_sndbulkpipe(priv->udev, 2), hdr, skb->len, rtl8187_tx_cb, skb); - usb_submit_urb(urb, GFP_ATOMIC); + rc = usb_submit_urb(urb, GFP_ATOMIC); + if (rc < 0) { + usb_free_urb(urb); + kfree_skb(skb); + } return 0; } diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c index edb1aefb4ad..d2378d083a3 100644 --- a/drivers/net/wireless/zd1211rw/zd_mac.c +++ b/drivers/net/wireless/zd1211rw/zd_mac.c @@ -735,7 +735,7 @@ void zd_process_intr(struct work_struct *work) u16 int_status; struct zd_mac *mac = container_of(work, struct zd_mac, process_intr); - int_status = le16_to_cpu(*(u16 *)(mac->intr_buffer+4)); + int_status = le16_to_cpu(*(__le16 *)(mac->intr_buffer+4)); if (int_status & INT_CFG_NEXT_BCN) { if (net_ratelimit()) dev_dbg_f(zd_mac_dev(mac), "INT_CFG_NEXT_BCN\n"); diff --git a/drivers/net/wireless/zd1211rw/zd_usb.c b/drivers/net/wireless/zd1211rw/zd_usb.c index 6a51ae419e6..1ccff240bf9 100644 --- a/drivers/net/wireless/zd1211rw/zd_usb.c +++ b/drivers/net/wireless/zd1211rw/zd_usb.c @@ -353,7 +353,7 @@ static inline void handle_regs_int(struct urb *urb) ZD_ASSERT(in_interrupt()); spin_lock(&intr->lock); - int_num = le16_to_cpu(*(u16 *)(urb->transfer_buffer+2)); + int_num = le16_to_cpu(*(__le16 *)(urb->transfer_buffer+2)); if (int_num == CR_INTERRUPT) { struct zd_mac *mac = zd_hw_mac(zd_usb_to_hw(urb->context)); memcpy(&mac->intr_buffer, urb->transfer_buffer, diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 8bddff150c7..d26f69b0184 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -946,8 +946,7 @@ err: work_done++; } - while ((skb = __skb_dequeue(&errq))) - kfree_skb(skb); + __skb_queue_purge(&errq); work_done -= handle_incoming_queue(dev, &rxq); @@ -1079,8 +1078,7 @@ static void xennet_release_rx_bufs(struct netfront_info *np) } } - while ((skb = __skb_dequeue(&free_list)) != NULL) - dev_kfree_skb(skb); + __skb_queue_purge(&free_list); spin_unlock_bh(&np->rx_lock); } diff --git a/include/linux/Kbuild b/include/linux/Kbuild index b7d81b2a904..54a4cfb50ed 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -92,7 +92,6 @@ header-y += if_slip.h header-y += if_strip.h header-y += if_tun.h header-y += if_tunnel.h -header-y += in6.h header-y += in_route.h header-y += ioctl.h header-y += ip6_tunnel.h @@ -236,6 +235,7 @@ unifdef-y += if_vlan.h unifdef-y += igmp.h unifdef-y += inet_diag.h unifdef-y += in.h +unifdef-y += in6.h unifdef-y += inotify.h unifdef-y += input.h unifdef-y += ip.h diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h new file mode 100644 index 00000000000..9b64b6d6787 --- /dev/null +++ b/include/linux/brcmphy.h @@ -0,0 +1,6 @@ +#define PHY_BRCM_WIRESPEED_ENABLE 0x00000001 +#define PHY_BRCM_AUTO_PWRDWN_ENABLE 0x00000002 +#define PHY_BRCM_APD_CLK125_ENABLE 0x00000004 +#define PHY_BRCM_STD_IBND_DISABLE 0x00000008 +#define PHY_BRCM_EXT_IBND_RX_ENABLE 0x00000010 +#define PHY_BRCM_EXT_IBND_TX_ENABLE 0x00000020 diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b11e6e19e96..f27fd200933 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -745,6 +745,9 @@ struct net_device /* rtnetlink link ops */ const struct rtnl_link_ops *rtnl_link_ops; + /* VLAN feature mask */ + unsigned long vlan_features; + /* for setting kernel sock attribute on TCP connection setup */ #define GSO_MAX_SIZE 65536 unsigned int gso_max_size; diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index e4c66593b5c..0c5eb7ed8b3 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -3,7 +3,6 @@ #ifdef __KERNEL__ #include <linux/init.h> -#include <linux/types.h> #include <linux/skbuff.h> #include <linux/net.h> #include <linux/netdevice.h> @@ -14,6 +13,7 @@ #include <linux/list.h> #include <net/net_namespace.h> #endif +#include <linux/types.h> #include <linux/compiler.h> /* Responses from hook functions. */ diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index 0a383ac083c..759bc043dc6 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -81,6 +81,7 @@ enum ctattr_protoinfo { CTA_PROTOINFO_UNSPEC, CTA_PROTOINFO_TCP, CTA_PROTOINFO_DCCP, + CTA_PROTOINFO_SCTP, __CTA_PROTOINFO_MAX }; #define CTA_PROTOINFO_MAX (__CTA_PROTOINFO_MAX - 1) @@ -103,6 +104,15 @@ enum ctattr_protoinfo_dccp { }; #define CTA_PROTOINFO_DCCP_MAX (__CTA_PROTOINFO_DCCP_MAX - 1) +enum ctattr_protoinfo_sctp { + CTA_PROTOINFO_SCTP_UNSPEC, + CTA_PROTOINFO_SCTP_STATE, + CTA_PROTOINFO_SCTP_VTAG_ORIGINAL, + CTA_PROTOINFO_SCTP_VTAG_REPLY, + __CTA_PROTOINFO_SCTP_MAX +}; +#define CTA_PROTOINFO_SCTP_MAX (__CTA_PROTOINFO_SCTP_MAX - 1) + enum ctattr_counters { CTA_COUNTERS_UNSPEC, CTA_COUNTERS_PACKETS, /* old 64bit counters */ diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index dd9c97f2d43..590ac3d6d5d 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -11,11 +11,11 @@ #ifdef __KERNEL__ #include <linux/if.h> -#include <linux/types.h> #include <linux/in.h> #include <linux/if_arp.h> #include <linux/skbuff.h> #endif +#include <linux/types.h> #include <linux/compiler.h> #include <linux/netfilter_arp.h> diff --git a/include/linux/netfilter_bridge/ebt_ip6.h b/include/linux/netfilter_bridge/ebt_ip6.h new file mode 100644 index 00000000000..2273c3ae33c --- /dev/null +++ b/include/linux/netfilter_bridge/ebt_ip6.h @@ -0,0 +1,40 @@ +/* + * ebt_ip6 + * + * Authors: + * Kuo-Lang Tseng <kuo-lang.tseng@intel.com> + * Manohar Castelino <manohar.r.castelino@intel.com> + * + * Jan 11, 2008 + * + */ + +#ifndef __LINUX_BRIDGE_EBT_IP6_H +#define __LINUX_BRIDGE_EBT_IP6_H + +#define EBT_IP6_SOURCE 0x01 +#define EBT_IP6_DEST 0x02 +#define EBT_IP6_TCLASS 0x04 +#define EBT_IP6_PROTO 0x08 +#define EBT_IP6_SPORT 0x10 +#define EBT_IP6_DPORT 0x20 +#define EBT_IP6_MASK (EBT_IP6_SOURCE | EBT_IP6_DEST | EBT_IP6_TCLASS |\ + EBT_IP6_PROTO | EBT_IP6_SPORT | EBT_IP6_DPORT) +#define EBT_IP6_MATCH "ip6" + +/* the same values are used for the invflags */ +struct ebt_ip6_info +{ + struct in6_addr saddr; + struct in6_addr daddr; + struct in6_addr smsk; + struct in6_addr dmsk; + uint8_t tclass; + uint8_t protocol; + uint8_t bitmask; + uint8_t invflags; + uint16_t sport[2]; + uint16_t dport[2]; +}; + +#endif diff --git a/include/linux/netfilter_bridge/ebt_log.h b/include/linux/netfilter_bridge/ebt_log.h index 96e231ae755..b76e653157e 100644 --- a/include/linux/netfilter_bridge/ebt_log.h +++ b/include/linux/netfilter_bridge/ebt_log.h @@ -4,7 +4,8 @@ #define EBT_LOG_IP 0x01 /* if the frame is made by ip, log the ip information */ #define EBT_LOG_ARP 0x02 #define EBT_LOG_NFLOG 0x04 -#define EBT_LOG_MASK (EBT_LOG_IP | EBT_LOG_ARP) +#define EBT_LOG_IP6 0x08 +#define EBT_LOG_MASK (EBT_LOG_IP | EBT_LOG_ARP | EBT_LOG_IP6) #define EBT_LOG_PREFIX_SIZE 30 #define EBT_LOG_WATCHER "log" diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index 650318b0c40..29c7727ff0e 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -60,6 +60,7 @@ enum nf_ip_hook_priorities { NF_IP_PRI_MANGLE = -150, NF_IP_PRI_NAT_DST = -100, NF_IP_PRI_FILTER = 0, + NF_IP_PRI_SECURITY = 50, NF_IP_PRI_NAT_SRC = 100, NF_IP_PRI_SELINUX_LAST = 225, NF_IP_PRI_CONNTRACK_CONFIRM = INT_MAX, diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index bfc889f9027..092bd50581a 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -17,11 +17,11 @@ #ifdef __KERNEL__ #include <linux/if.h> -#include <linux/types.h> #include <linux/in.h> #include <linux/ip.h> #include <linux/skbuff.h> #endif +#include <linux/types.h> #include <linux/compiler.h> #include <linux/netfilter_ipv4.h> diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 3475a65dae9..fd50988b83e 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -64,6 +64,7 @@ enum nf_ip6_hook_priorities { NF_IP6_PRI_MANGLE = -150, NF_IP6_PRI_NAT_DST = -100, NF_IP6_PRI_FILTER = 0, + NF_IP6_PRI_SECURITY = 50, NF_IP6_PRI_NAT_SRC = 100, NF_IP6_PRI_SELINUX_LAST = 225, NF_IP6_PRI_LAST = INT_MAX, diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index f2507dcc575..1089e33cf63 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -17,11 +17,11 @@ #ifdef __KERNEL__ #include <linux/if.h> -#include <linux/types.h> #include <linux/in6.h> #include <linux/ipv6.h> #include <linux/skbuff.h> #endif +#include <linux/types.h> #include <linux/compiler.h> #include <linux/netfilter_ipv6.h> diff --git a/include/linux/netlink.h b/include/linux/netlink.h index bec1062a25a..9ff1b54908f 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -193,7 +193,7 @@ extern int netlink_unregister_notifier(struct notifier_block *nb); /* finegrained unicast helpers: */ struct sock *netlink_getsockbyfilp(struct file *filp); -int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, +int netlink_attachskb(struct sock *sk, struct sk_buff *skb, long *timeo, struct sock *ssk); void netlink_detachskb(struct sock *sk, struct sk_buff *skb); int netlink_sendskb(struct sock *sk, struct sk_buff *skb); diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index cf6dbd75939..72c038560e7 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1979,6 +1979,7 @@ #define PCI_DEVICE_ID_TIGON3_5787M 0x1693 #define PCI_DEVICE_ID_TIGON3_5782 0x1696 #define PCI_DEVICE_ID_TIGON3_5784 0x1698 +#define PCI_DEVICE_ID_TIGON3_5785 0x1699 #define PCI_DEVICE_ID_TIGON3_5786 0x169a #define PCI_DEVICE_ID_TIGON3_5787 0x169b #define PCI_DEVICE_ID_TIGON3_5788 0x169c diff --git a/include/linux/tcp.h b/include/linux/tcp.h index d96d9b12230..9881295f385 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -296,10 +296,9 @@ struct tcp_sock { u32 rcv_ssthresh; /* Current window clamp */ u32 frto_highmark; /* snd_nxt when RTO occurred */ - u8 reordering; /* Packet reordering metric. */ + u16 advmss; /* Advertised MSS */ u8 frto_counter; /* Number of new acks after RTO */ u8 nonagle; /* Disable Nagle algorithm? */ - u8 keepalive_probes; /* num of allowed keep alive probes */ /* RTT measurement */ u32 srtt; /* smoothed round trip time << 3 */ @@ -310,6 +309,10 @@ struct tcp_sock { u32 packets_out; /* Packets which are "in flight" */ u32 retrans_out; /* Retransmitted packets out */ + + u16 urg_data; /* Saved octet of OOB data and control flags */ + u8 urg_mode; /* In urgent mode */ + u8 ecn_flags; /* ECN status bits. */ /* * Options received (usually on last packet, some only on SYN packets). */ @@ -325,13 +328,24 @@ struct tcp_sock { u32 snd_cwnd_used; u32 snd_cwnd_stamp; - struct sk_buff_head out_of_order_queue; /* Out of order segments go here */ - u32 rcv_wnd; /* Current receiver window */ u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ u32 pushed_seq; /* Last pushed seq, required to talk to windows */ + u32 lost_out; /* Lost packets */ + u32 sacked_out; /* SACK'd packets */ + u32 fackets_out; /* FACK'd packets */ + u32 tso_deferred; + u32 bytes_acked; /* Appropriate Byte Counting - RFC3465 */ -/* SACKs data */ + /* from STCP, retrans queue hinting */ + struct sk_buff* lost_skb_hint; + struct sk_buff *scoreboard_skb_hint; + struct sk_buff *retransmit_skb_hint; + struct sk_buff *forward_skb_hint; + + struct sk_buff_head out_of_order_queue; /* Out of order segments go here */ + + /* SACKs data, these 2 need to be together (see tcp_build_and_update_options) */ struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */ struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/ @@ -342,23 +356,14 @@ struct tcp_sock { * sacked_out > 0) */ - /* from STCP, retrans queue hinting */ - struct sk_buff* lost_skb_hint; - - struct sk_buff *scoreboard_skb_hint; - struct sk_buff *retransmit_skb_hint; - struct sk_buff *forward_skb_hint; - int lost_cnt_hint; int retransmit_cnt_hint; u32 lost_retrans_low; /* Sent seq after any rxmit (lowest) */ - u16 advmss; /* Advertised MSS */ - u16 prior_ssthresh; /* ssthresh saved at recovery start */ - u32 lost_out; /* Lost packets */ - u32 sacked_out; /* SACK'd packets */ - u32 fackets_out; /* FACK'd packets */ + u8 reordering; /* Packet reordering metric. */ + u8 keepalive_probes; /* num of allowed keep alive probes */ + u32 prior_ssthresh; /* ssthresh saved at recovery start */ u32 high_seq; /* snd_nxt at onset of congestion */ u32 retrans_stamp; /* Timestamp of the last retransmit, @@ -366,25 +371,18 @@ struct tcp_sock { * the first SYN. */ u32 undo_marker; /* tracking retrans started here. */ int undo_retrans; /* number of undoable retransmissions. */ + u32 total_retrans; /* Total retransmits for entire connection */ + u32 urg_seq; /* Seq of received urgent pointer */ - u16 urg_data; /* Saved octet of OOB data and control flags */ - u8 urg_mode; /* In urgent mode */ - u8 ecn_flags; /* ECN status bits. */ u32 snd_up; /* Urgent pointer */ - u32 total_retrans; /* Total retransmits for entire connection */ - u32 bytes_acked; /* Appropriate Byte Counting - RFC3465 */ - unsigned int keepalive_time; /* time before keep alive takes place */ unsigned int keepalive_intvl; /* time interval between keep alive probes */ - int linger2; struct tcp_deferred_accept_info defer_tcp_accept; unsigned long last_synq_overflow; - u32 tso_deferred; - /* Receiver side RTT estimation */ struct { u32 rtt; @@ -412,6 +410,8 @@ struct tcp_sock { /* TCP MD5 Signagure Option information */ struct tcp_md5sig_info *md5sig_info; #endif + + int linger2; }; static inline struct tcp_sock *tcp_sk(const struct sock *sk) diff --git a/include/linux/tipc_config.h b/include/linux/tipc_config.h index b0c916d1f37..2bc6fa4adeb 100644 --- a/include/linux/tipc_config.h +++ b/include/linux/tipc_config.h @@ -2,7 +2,7 @@ * include/linux/tipc_config.h: Include file for TIPC configuration interface * * Copyright (c) 2003-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -136,6 +136,14 @@ #define TIPC_CMD_SET_NETID 0x800B /* tx unsigned, rx none */ /* + * Reserved commands: + * May not be issued by any process. + * Used internally by TIPC. + */ + +#define TIPC_CMD_NOT_NET_ADMIN 0xC001 /* tx none, rx none */ + +/* * TLV types defined for TIPC */ diff --git a/include/linux/wanrouter.h b/include/linux/wanrouter.h index 3add87465b1..e0aa39612eb 100644 --- a/include/linux/wanrouter.h +++ b/include/linux/wanrouter.h @@ -522,7 +522,7 @@ extern int wanrouter_proc_init(void); extern void wanrouter_proc_cleanup(void); extern int wanrouter_proc_add(struct wan_device *wandev); extern int wanrouter_proc_delete(struct wan_device *wandev); -extern int wanrouter_ioctl( struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg); +extern long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg); /* Public Data */ /* list of registered devices */ diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 6512d85f11b..3780592ebe8 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -19,7 +19,6 @@ struct ip6_tnl { struct ip6_tnl *next; /* next tunnel in list */ struct net_device *dev; /* virtual device associated with tunnel */ - struct net_device_stats stat; /* statistics for tunnel device */ int recursion; /* depth of hard_start_xmit recursion */ struct ip6_tnl_parm parms; /* tunnel configuration parameters */ struct flowi fl; /* flowi template for xmit */ diff --git a/include/net/ipip.h b/include/net/ipip.h index 633ed4def8e..a85bda64b85 100644 --- a/include/net/ipip.h +++ b/include/net/ipip.h @@ -11,7 +11,6 @@ struct ip_tunnel { struct ip_tunnel *next; struct net_device *dev; - struct net_device_stats stat; int recursion; /* Depth of hard_start_xmit recursion */ int err_count; /* Number of arrived ICMP errors */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index c80e3be8f79..1196de85f8d 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1608,13 +1608,16 @@ void ieee80211_wake_queues(struct ieee80211_hw *hw); void ieee80211_scan_completed(struct ieee80211_hw *hw); /** - * ieee80211_iterate_active_interfaces - iterate active interfaces + * ieee80211_iterate_active_interfaces- iterate active interfaces * * This function iterates over the interfaces associated with a given * hardware that are currently active and calls the callback for them. + * This function allows the iterator function to sleep, when the iterator + * function is atomic @ieee80211_iterate_active_interfaces_atomic can + * be used. * * @hw: the hardware struct of which the interfaces should be iterated over - * @iterator: the iterator function to call, cannot sleep + * @iterator: the iterator function to call * @data: first argument of the iterator function */ void ieee80211_iterate_active_interfaces(struct ieee80211_hw *hw, @@ -1623,6 +1626,24 @@ void ieee80211_iterate_active_interfaces(struct ieee80211_hw *hw, void *data); /** + * ieee80211_iterate_active_interfaces_atomic - iterate active interfaces + * + * This function iterates over the interfaces associated with a given + * hardware that are currently active and calls the callback for them. + * This function requires the iterator callback function to be atomic, + * if that is not desired, use @ieee80211_iterate_active_interfaces instead. + * + * @hw: the hardware struct of which the interfaces should be iterated over + * @iterator: the iterator function to call, cannot sleep + * @data: first argument of the iterator function + */ +void ieee80211_iterate_active_interfaces_atomic(struct ieee80211_hw *hw, + void (*iterator)(void *data, + u8 *mac, + struct ieee80211_vif *vif), + void *data); + +/** * ieee80211_start_tx_ba_session - Start a tx Block Ack session. * @hw: pointer as obtained from ieee80211_alloc_hw(). * @ra: receiver address of the BA session recipient diff --git a/include/net/ndisc.h b/include/net/ndisc.h index 9c451ff2f4f..a01b7c4dc76 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -129,6 +129,10 @@ extern int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, void __user *buffer, size_t *lenp, loff_t *ppos); +int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name, + int nlen, void __user *oldval, + size_t __user *oldlenp, + void __user *newval, size_t newlen); #endif extern void inet6_ifinfo_notify(int event, diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index aa540e6be50..8df751b3be5 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -201,8 +201,11 @@ extern void unregister_pernet_gen_device(int id, struct pernet_operations *); struct ctl_path; struct ctl_table; struct ctl_table_header; + extern struct ctl_table_header *register_net_sysctl_table(struct net *net, const struct ctl_path *path, struct ctl_table *table); +extern struct ctl_table_header *register_net_sysctl_rotable( + const struct ctl_path *path, struct ctl_table *table); extern void unregister_net_sysctl_table(struct ctl_table_header *header); #endif /* __NET_NET_NAMESPACE_H */ diff --git a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index 9bf059817ae..7573d52a434 100644 --- a/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -9,8 +9,6 @@ #ifndef _NF_CONNTRACK_IPV4_H #define _NF_CONNTRACK_IPV4_H -/* Returns new sk_buff, or NULL */ -struct sk_buff *nf_ct_ipv4_ct_gather_frags(struct sk_buff *skb); extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 2dbd6c015b9..d77dec768dc 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -223,6 +223,25 @@ static inline void nf_ct_refresh(struct nf_conn *ct, __nf_ct_refresh_acct(ct, 0, skb, extra_jiffies, 0); } +extern void __nf_ct_kill_acct(struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + const struct sk_buff *skb, + int do_acct); + +/* kill conntrack and do accounting */ +static inline void nf_ct_kill_acct(struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + const struct sk_buff *skb) +{ + __nf_ct_kill_acct(ct, ctinfo, skb, 1); +} + +/* kill conntrack without accounting */ +static inline void nf_ct_kill(struct nf_conn *ct) +{ + __nf_ct_kill_acct(ct, 0, NULL, 0); +} + /* These are for NAT. Icky. */ /* Update TCP window tracking data when NAT mangles the packet */ extern void nf_conntrack_tcp_update(const struct sk_buff *skb, diff --git a/include/net/netlink.h b/include/net/netlink.h index a5506c42f03..112dcdf7e34 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -772,12 +772,13 @@ static inline int __nla_parse_nested_compat(struct nlattr *tb[], int maxtype, const struct nla_policy *policy, int len) { - if (nla_len(nla) < len) + int nested_len = nla_len(nla) - NLA_ALIGN(len); + + if (nested_len < 0) return -1; - if (nla_len(nla) >= NLA_ALIGN(len) + sizeof(struct nlattr)) - return nla_parse_nested(tb, maxtype, - nla_data(nla) + NLA_ALIGN(len), - policy); + if (nested_len >= nla_attr_size(0)) + return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), + nested_len, policy); memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); return 0; } diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 34ee348a2cf..6ef90b5fafb 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -36,6 +36,7 @@ struct netns_ipv4 { struct xt_table *iptable_mangle; struct xt_table *iptable_raw; struct xt_table *arptable_filter; + struct xt_table *iptable_security; #endif int sysctl_icmp_echo_ignore_all; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index ac053be6c25..5bacd838e88 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -35,6 +35,7 @@ struct netns_ipv6 { struct xt_table *ip6table_filter; struct xt_table *ip6table_mangle; struct xt_table *ip6table_raw; + struct xt_table *ip6table_security; #endif struct rt6_info *ip6_null_entry; struct rt6_statistics *rt6_stats; diff --git a/ipc/mqueue.c b/ipc/mqueue.c index b3b69fd5133..3e84b958186 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -1054,7 +1054,7 @@ retry: } timeo = MAX_SCHEDULE_TIMEOUT; - ret = netlink_attachskb(sock, nc, 0, &timeo, NULL); + ret = netlink_attachskb(sock, nc, &timeo, NULL); if (ret == 1) goto retry; if (ret) { diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 2a739adaa92..ab2225da0ee 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -382,6 +382,18 @@ static void vlan_sync_address(struct net_device *dev, memcpy(vlan->real_dev_addr, dev->dev_addr, ETH_ALEN); } +static void vlan_transfer_features(struct net_device *dev, + struct net_device *vlandev) +{ + unsigned long old_features = vlandev->features; + + vlandev->features &= ~dev->vlan_features; + vlandev->features |= dev->features & dev->vlan_features; + + if (old_features != vlandev->features) + netdev_features_change(vlandev); +} + static void __vlan_device_event(struct net_device *dev, unsigned long event) { switch (event) { @@ -410,10 +422,8 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, int i, flgs; struct net_device *vlandev; - if (is_vlan_dev(dev)) { + if (is_vlan_dev(dev)) __vlan_device_event(dev, event); - goto out; - } grp = __vlan_find_group(dev); if (!grp) @@ -450,6 +460,18 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, } break; + case NETDEV_FEAT_CHANGE: + /* Propagate device features to underlying device */ + for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { + vlandev = vlan_group_get_device(grp, i); + if (!vlandev) + continue; + + vlan_transfer_features(dev, vlandev); + } + + break; + case NETDEV_DOWN: /* Put all VLANs for this dev in the down state too. */ for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index c961f082600..5d055c242ed 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -663,6 +663,8 @@ static int vlan_dev_init(struct net_device *dev) (1<<__LINK_STATE_DORMANT))) | (1<<__LINK_STATE_PRESENT); + dev->features |= real_dev->features & real_dev->vlan_features; + /* ipv6 shared card related stuff */ dev->dev_id = real_dev->dev_id; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index bf7787395fe..626c7795ae3 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -21,12 +21,6 @@ #include <asm/uaccess.h> #include "br_private.h" -static struct net_device_stats *br_dev_get_stats(struct net_device *dev) -{ - struct net_bridge *br = netdev_priv(dev); - return &br->statistics; -} - /* net device transmit always called with no BH (preempt_disabled) */ int br_dev_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -34,8 +28,8 @@ int br_dev_xmit(struct sk_buff *skb, struct net_device *dev) const unsigned char *dest = skb->data; struct net_bridge_fdb_entry *dst; - br->statistics.tx_packets++; - br->statistics.tx_bytes += skb->len; + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; skb_reset_mac_header(skb); skb_pull(skb, ETH_HLEN); @@ -161,7 +155,6 @@ void br_dev_setup(struct net_device *dev) ether_setup(dev); dev->do_ioctl = br_dev_ioctl; - dev->get_stats = br_dev_get_stats; dev->hard_start_xmit = br_dev_xmit; dev->open = br_dev_open; dev->set_multicast_list = br_dev_set_multicast_list; diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index bdd7c35c3c7..a4711674b3d 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -115,7 +115,7 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb, struct sk_buff *skb2; if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) { - br->statistics.tx_dropped++; + br->dev->stats.tx_dropped++; kfree_skb(skb); return; } diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 255c00f60ce..fa0f5711a99 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -24,13 +24,13 @@ const u8 br_group_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; static void br_pass_frame_up(struct net_bridge *br, struct sk_buff *skb) { - struct net_device *indev; + struct net_device *indev, *brdev = br->dev; - br->statistics.rx_packets++; - br->statistics.rx_bytes += skb->len; + brdev->stats.rx_packets++; + brdev->stats.rx_bytes += skb->len; indev = skb->dev; - skb->dev = br->dev; + skb->dev = brdev; NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL, netif_receive_skb); @@ -64,7 +64,7 @@ int br_handle_frame_finish(struct sk_buff *skb) dst = NULL; if (is_multicast_ether_addr(dest)) { - br->statistics.multicast++; + br->dev->stats.multicast++; skb2 = skb; } else if ((dst = __br_fdb_get(br, dest)) && dst->is_local) { skb2 = skb; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index c11b554fd10..0243cb489ed 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -90,7 +90,6 @@ struct net_bridge spinlock_t lock; struct list_head port_list; struct net_device *dev; - struct net_device_stats statistics; spinlock_t hash_lock; struct hlist_head hash[BR_HASH_SIZE]; struct list_head age_list; diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index 7beeefa0f9c..fb684c2ff8b 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -83,6 +83,15 @@ config BRIDGE_EBT_IP To compile it as a module, choose M here. If unsure, say N. +config BRIDGE_EBT_IP6 + tristate "ebt: IP6 filter support" + depends on BRIDGE_NF_EBTABLES + help + This option adds the IP6 match, which allows basic IPV6 header field + filtering. + + To compile it as a module, choose M here. If unsure, say N. + config BRIDGE_EBT_LIMIT tristate "ebt: limit match support" depends on BRIDGE_NF_EBTABLES diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile index 83715d73a50..dd960645b41 100644 --- a/net/bridge/netfilter/Makefile +++ b/net/bridge/netfilter/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_BRIDGE_EBT_802_3) += ebt_802_3.o obj-$(CONFIG_BRIDGE_EBT_AMONG) += ebt_among.o obj-$(CONFIG_BRIDGE_EBT_ARP) += ebt_arp.o obj-$(CONFIG_BRIDGE_EBT_IP) += ebt_ip.o +obj-$(CONFIG_BRIDGE_EBT_IP) += ebt_ip6.o obj-$(CONFIG_BRIDGE_EBT_LIMIT) += ebt_limit.o obj-$(CONFIG_BRIDGE_EBT_MARK) += ebt_mark_m.o obj-$(CONFIG_BRIDGE_EBT_PKTTYPE) += ebt_pkttype.o diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c new file mode 100644 index 00000000000..36efb3a7524 --- /dev/null +++ b/net/bridge/netfilter/ebt_ip6.c @@ -0,0 +1,144 @@ +/* + * ebt_ip6 + * + * Authors: + * Manohar Castelino <manohar.r.castelino@intel.com> + * Kuo-Lang Tseng <kuo-lang.tseng@intel.com> + * Jan Engelhardt <jengelh@computergmbh.de> + * + * Summary: + * This is just a modification of the IPv4 code written by + * Bart De Schuymer <bdschuym@pandora.be> + * with the changes required to support IPv6 + * + * Jan, 2008 + */ + +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_ip6.h> +#include <linux/ipv6.h> +#include <net/ipv6.h> +#include <linux/in.h> +#include <linux/module.h> +#include <net/dsfield.h> + +struct tcpudphdr { + __be16 src; + __be16 dst; +}; + +static int ebt_filter_ip6(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, const void *data, + unsigned int datalen) +{ + const struct ebt_ip6_info *info = (struct ebt_ip6_info *)data; + const struct ipv6hdr *ih6; + struct ipv6hdr _ip6h; + const struct tcpudphdr *pptr; + struct tcpudphdr _ports; + struct in6_addr tmp_addr; + int i; + + ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h); + if (ih6 == NULL) + return EBT_NOMATCH; + if (info->bitmask & EBT_IP6_TCLASS && + FWINV(info->tclass != ipv6_get_dsfield(ih6), EBT_IP6_TCLASS)) + return EBT_NOMATCH; + for (i = 0; i < 4; i++) + tmp_addr.in6_u.u6_addr32[i] = ih6->saddr.in6_u.u6_addr32[i] & + info->smsk.in6_u.u6_addr32[i]; + if (info->bitmask & EBT_IP6_SOURCE && + FWINV((ipv6_addr_cmp(&tmp_addr, &info->saddr) != 0), + EBT_IP6_SOURCE)) + return EBT_NOMATCH; + for (i = 0; i < 4; i++) + tmp_addr.in6_u.u6_addr32[i] = ih6->daddr.in6_u.u6_addr32[i] & + info->dmsk.in6_u.u6_addr32[i]; + if (info->bitmask & EBT_IP6_DEST && + FWINV((ipv6_addr_cmp(&tmp_addr, &info->daddr) != 0), EBT_IP6_DEST)) + return EBT_NOMATCH; + if (info->bitmask & EBT_IP6_PROTO) { + uint8_t nexthdr = ih6->nexthdr; + int offset_ph; + + offset_ph = ipv6_skip_exthdr(skb, sizeof(_ip6h), &nexthdr); + if (offset_ph == -1) + return EBT_NOMATCH; + if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO)) + return EBT_NOMATCH; + if (!(info->bitmask & EBT_IP6_DPORT) && + !(info->bitmask & EBT_IP6_SPORT)) + return EBT_MATCH; + pptr = skb_header_pointer(skb, offset_ph, sizeof(_ports), + &_ports); + if (pptr == NULL) + return EBT_NOMATCH; + if (info->bitmask & EBT_IP6_DPORT) { + u32 dst = ntohs(pptr->dst); + if (FWINV(dst < info->dport[0] || + dst > info->dport[1], EBT_IP6_DPORT)) + return EBT_NOMATCH; + } + if (info->bitmask & EBT_IP6_SPORT) { + u32 src = ntohs(pptr->src); + if (FWINV(src < info->sport[0] || + src > info->sport[1], EBT_IP6_SPORT)) + return EBT_NOMATCH; + } + return EBT_MATCH; + } + return EBT_MATCH; +} + +static int ebt_ip6_check(const char *tablename, unsigned int hookmask, + const struct ebt_entry *e, void *data, unsigned int datalen) +{ + struct ebt_ip6_info *info = (struct ebt_ip6_info *)data; + + if (datalen != EBT_ALIGN(sizeof(struct ebt_ip6_info))) + return -EINVAL; + if (e->ethproto != htons(ETH_P_IPV6) || e->invflags & EBT_IPROTO) + return -EINVAL; + if (info->bitmask & ~EBT_IP6_MASK || info->invflags & ~EBT_IP6_MASK) + return -EINVAL; + if (info->bitmask & (EBT_IP6_DPORT | EBT_IP6_SPORT)) { + if (info->invflags & EBT_IP6_PROTO) + return -EINVAL; + if (info->protocol != IPPROTO_TCP && + info->protocol != IPPROTO_UDP && + info->protocol != IPPROTO_UDPLITE && + info->protocol != IPPROTO_SCTP && + info->protocol != IPPROTO_DCCP) + return -EINVAL; + } + if (info->bitmask & EBT_IP6_DPORT && info->dport[0] > info->dport[1]) + return -EINVAL; + if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1]) + return -EINVAL; + return 0; +} + +static struct ebt_match filter_ip6 = +{ + .name = EBT_IP6_MATCH, + .match = ebt_filter_ip6, + .check = ebt_ip6_check, + .me = THIS_MODULE, +}; + +static int __init ebt_ip6_init(void) +{ + return ebt_register_match(&filter_ip6); +} + +static void __exit ebt_ip6_fini(void) +{ + ebt_unregister_match(&filter_ip6); +} + +module_init(ebt_ip6_init); +module_exit(ebt_ip6_fini); +MODULE_DESCRIPTION("Ebtables: IPv6 protocol packet match"); +MODULE_LICENSE("GPL"); diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 0b209e4aad0..c883ec8a28b 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -18,6 +18,9 @@ #include <linux/if_arp.h> #include <linux/spinlock.h> #include <net/netfilter/nf_log.h> +#include <linux/ipv6.h> +#include <net/ipv6.h> +#include <linux/in6.h> static DEFINE_SPINLOCK(ebt_log_lock); @@ -58,6 +61,27 @@ static void print_MAC(const unsigned char *p) printk("%02x%c", *p, i == ETH_ALEN - 1 ? ' ':':'); } +static void +print_ports(const struct sk_buff *skb, uint8_t protocol, int offset) +{ + if (protocol == IPPROTO_TCP || + protocol == IPPROTO_UDP || + protocol == IPPROTO_UDPLITE || + protocol == IPPROTO_SCTP || + protocol == IPPROTO_DCCP) { + const struct tcpudphdr *pptr; + struct tcpudphdr _ports; + + pptr = skb_header_pointer(skb, offset, + sizeof(_ports), &_ports); + if (pptr == NULL) { + printk(" INCOMPLETE TCP/UDP header"); + return; + } + printk(" SPT=%u DPT=%u", ntohs(pptr->src), ntohs(pptr->dst)); + } +} + #define myNIPQUAD(a) a[0], a[1], a[2], a[3] static void ebt_log_packet(unsigned int pf, unsigned int hooknum, @@ -95,23 +119,31 @@ ebt_log_packet(unsigned int pf, unsigned int hooknum, printk(" IP SRC=%u.%u.%u.%u IP DST=%u.%u.%u.%u, IP " "tos=0x%02X, IP proto=%d", NIPQUAD(ih->saddr), NIPQUAD(ih->daddr), ih->tos, ih->protocol); - if (ih->protocol == IPPROTO_TCP || - ih->protocol == IPPROTO_UDP || - ih->protocol == IPPROTO_UDPLITE || - ih->protocol == IPPROTO_SCTP || - ih->protocol == IPPROTO_DCCP) { - const struct tcpudphdr *pptr; - struct tcpudphdr _ports; - - pptr = skb_header_pointer(skb, ih->ihl*4, - sizeof(_ports), &_ports); - if (pptr == NULL) { - printk(" INCOMPLETE TCP/UDP header"); - goto out; - } - printk(" SPT=%u DPT=%u", ntohs(pptr->src), - ntohs(pptr->dst)); + print_ports(skb, ih->protocol, ih->ihl*4); + goto out; + } + + if ((bitmask & EBT_LOG_IP6) && eth_hdr(skb)->h_proto == + htons(ETH_P_IPV6)) { + const struct ipv6hdr *ih; + struct ipv6hdr _iph; + uint8_t nexthdr; + int offset_ph; + + ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); + if (ih == NULL) { + printk(" INCOMPLETE IPv6 header"); + goto out; } + printk(" IPv6 SRC=%x:%x:%x:%x:%x:%x:%x:%x " + "IPv6 DST=%x:%x:%x:%x:%x:%x:%x:%x, IPv6 " + "priority=0x%01X, Next Header=%d", NIP6(ih->saddr), + NIP6(ih->daddr), ih->priority, ih->nexthdr); + nexthdr = ih->nexthdr; + offset_ph = ipv6_skip_exthdr(skb, sizeof(_iph), &nexthdr); + if (offset_ph == -1) + goto out; + print_ports(skb, nexthdr, offset_ph); goto out; } diff --git a/net/core/dev.c b/net/core/dev.c index ce88c0d3e35..58296307787 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3141,7 +3141,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags) * Load in the correct multicast list now the flags have changed. */ - if (dev->change_rx_flags && (dev->flags ^ flags) & IFF_MULTICAST) + if (dev->change_rx_flags && (old_flags ^ flags) & IFF_MULTICAST) dev->change_rx_flags(dev, IFF_MULTICAST); dev_set_rx_mode(dev); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 90e2177af08..dccd737ea2e 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -242,11 +242,11 @@ static ssize_t netstat_show(const struct device *d, offset % sizeof(unsigned long) != 0); read_lock(&dev_base_lock); - if (dev_isalive(dev) && dev->get_stats && - (stats = (*dev->get_stats)(dev))) + if (dev_isalive(dev)) { + stats = dev->get_stats(dev); ret = sprintf(buf, fmt_ulong, *(unsigned long *)(((u8 *) stats) + offset)); - + } read_unlock(&dev_base_lock); return ret; } @@ -457,8 +457,7 @@ int netdev_register_kobject(struct net_device *net) strlcpy(dev->bus_id, net->name, BUS_ID_SIZE); #ifdef CONFIG_SYSFS - if (net->get_stats) - *groups++ = &netstat_group; + *groups++ = &netstat_group; #ifdef CONFIG_WIRELESS_EXT if (net->wireless_handlers && net->wireless_handlers->get_wireless_stats) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 8dca2111049..fdf537707e5 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -390,6 +390,7 @@ struct pktgen_thread { int cpu; wait_queue_head_t queue; + struct completion start_done; }; #define REMOVE 1 @@ -3414,6 +3415,7 @@ static int pktgen_thread_worker(void *arg) BUG_ON(smp_processor_id() != cpu); init_waitqueue_head(&t->queue); + complete(&t->start_done); pr_debug("pktgen: starting pktgen/%d: pid=%d\n", cpu, task_pid_nr(current)); @@ -3615,6 +3617,7 @@ static int __init pktgen_create_thread(int cpu) INIT_LIST_HEAD(&t->if_list); list_add_tail(&t->th_list, &pktgen_threads); + init_completion(&t->start_done); p = kthread_create(pktgen_thread_worker, t, "kpktgend_%d", cpu); if (IS_ERR(p)) { @@ -3639,6 +3642,7 @@ static int __init pktgen_create_thread(int cpu) } wake_up_process(p); + wait_for_completion(&t->start_done); return 0; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index cf857c4dc7b..ca32ddb8ad1 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -606,6 +606,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, { struct ifinfomsg *ifm; struct nlmsghdr *nlh; + struct net_device_stats *stats; + struct nlattr *attr; nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); if (nlh == NULL) @@ -652,19 +654,13 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, NLA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast); } - if (dev->get_stats) { - struct net_device_stats *stats = dev->get_stats(dev); - if (stats) { - struct nlattr *attr; + attr = nla_reserve(skb, IFLA_STATS, + sizeof(struct rtnl_link_stats)); + if (attr == NULL) + goto nla_put_failure; - attr = nla_reserve(skb, IFLA_STATS, - sizeof(struct rtnl_link_stats)); - if (attr == NULL) - goto nla_put_failure; - - copy_rtnl_link_stats(nla_data(attr), stats); - } - } + stats = dev->get_stats(dev); + copy_rtnl_link_stats(nla_data(attr), stats); if (dev->rtnl_link_ops) { if (rtnl_link_fill(skb, dev) < 0) diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 5fc80105724..a570e2af22c 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -125,14 +125,6 @@ static struct ctl_table net_core_table[] = { #endif /* CONFIG_XFRM */ #endif /* CONFIG_NET */ { - .ctl_name = NET_CORE_SOMAXCONN, - .procname = "somaxconn", - .data = &init_net.core.sysctl_somaxconn, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { .ctl_name = NET_CORE_BUDGET, .procname = "netdev_budget", .data = &netdev_budget, @@ -151,6 +143,18 @@ static struct ctl_table net_core_table[] = { { .ctl_name = 0 } }; +static struct ctl_table netns_core_table[] = { + { + .ctl_name = NET_CORE_SOMAXCONN, + .procname = "somaxconn", + .data = &init_net.core.sysctl_somaxconn, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { .ctl_name = 0 } +}; + static __net_initdata struct ctl_path net_core_path[] = { { .procname = "net", .ctl_name = CTL_NET, }, { .procname = "core", .ctl_name = NET_CORE, }, @@ -159,23 +163,17 @@ static __net_initdata struct ctl_path net_core_path[] = { static __net_init int sysctl_core_net_init(struct net *net) { - struct ctl_table *tbl, *tmp; + struct ctl_table *tbl; net->core.sysctl_somaxconn = SOMAXCONN; - tbl = net_core_table; + tbl = netns_core_table; if (net != &init_net) { - tbl = kmemdup(tbl, sizeof(net_core_table), GFP_KERNEL); + tbl = kmemdup(tbl, sizeof(netns_core_table), GFP_KERNEL); if (tbl == NULL) goto err_dup; - for (tmp = tbl; tmp->procname; tmp++) { - if (tmp->data >= (void *)&init_net && - tmp->data < (void *)(&init_net + 1)) - tmp->data += (char *)net - (char *)&init_net; - else - tmp->mode &= ~0222; - } + tbl[0].data = &net->core.sysctl_somaxconn; } net->core.sysctl_hdr = register_net_sysctl_table(net, @@ -186,7 +184,7 @@ static __net_init int sysctl_core_net_init(struct net *net) return 0; err_reg: - if (tbl != net_core_table) + if (tbl != netns_core_table) kfree(tbl); err_dup: return -ENOMEM; @@ -198,7 +196,7 @@ static __net_exit void sysctl_core_net_exit(struct net *net) tbl = net->core.sysctl_hdr->ctl_table_arg; unregister_net_sysctl_table(net->core.sysctl_hdr); - BUG_ON(tbl == net_core_table); + BUG_ON(tbl == netns_core_table); kfree(tbl); } @@ -209,6 +207,7 @@ static __net_initdata struct pernet_operations sysctl_core_ops = { static __init int sysctl_core_init(void) { + register_net_sysctl_rotable(net_core_path, net_core_table); return register_pernet_subsys(&sysctl_core_ops); } diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 418862f1bf2..9b539fa9fe1 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1288,7 +1288,6 @@ static void arp_format_neigh_entry(struct seq_file *seq, struct neighbour *n) { char hbuffer[HBUFFERLEN]; - const char hexbuf[] = "0123456789ABCDEF"; int k, j; char tbuf[16]; struct net_device *dev = n->dev; @@ -1302,8 +1301,8 @@ static void arp_format_neigh_entry(struct seq_file *seq, else { #endif for (k = 0, j = 0; k < HBUFFERLEN - 3 && j < dev->addr_len; j++) { - hbuffer[k++] = hexbuf[(n->ha[j] >> 4) & 15]; - hbuffer[k++] = hexbuf[n->ha[j] & 15]; + hbuffer[k++] = hex_asc_hi(n->ha[j]); + hbuffer[k++] = hex_asc_lo(n->ha[j]); hbuffer[k++] = ':'; } hbuffer[--k] = 0; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index cd6ce6ac635..be1cb89a8d5 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -598,7 +598,7 @@ int ip_defrag(struct sk_buff *skb, u32 user) #ifdef CONFIG_SYSCTL static int zero; -static struct ctl_table ip4_frags_ctl_table[] = { +static struct ctl_table ip4_frags_ns_ctl_table[] = { { .ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH, .procname = "ipfrag_high_thresh", @@ -624,6 +624,10 @@ static struct ctl_table ip4_frags_ctl_table[] = { .proc_handler = &proc_dointvec_jiffies, .strategy = &sysctl_jiffies }, + { } +}; + +static struct ctl_table ip4_frags_ctl_table[] = { { .ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL, .procname = "ipfrag_secret_interval", @@ -644,22 +648,20 @@ static struct ctl_table ip4_frags_ctl_table[] = { { } }; -static int ip4_frags_ctl_register(struct net *net) +static int ip4_frags_ns_ctl_register(struct net *net) { struct ctl_table *table; struct ctl_table_header *hdr; - table = ip4_frags_ctl_table; + table = ip4_frags_ns_ctl_table; if (net != &init_net) { - table = kmemdup(table, sizeof(ip4_frags_ctl_table), GFP_KERNEL); + table = kmemdup(table, sizeof(ip4_frags_ns_ctl_table), GFP_KERNEL); if (table == NULL) goto err_alloc; table[0].data = &net->ipv4.frags.high_thresh; table[1].data = &net->ipv4.frags.low_thresh; table[2].data = &net->ipv4.frags.timeout; - table[3].mode &= ~0222; - table[4].mode &= ~0222; } hdr = register_net_sysctl_table(net, net_ipv4_ctl_path, table); @@ -676,7 +678,7 @@ err_alloc: return -ENOMEM; } -static void ip4_frags_ctl_unregister(struct net *net) +static void ip4_frags_ns_ctl_unregister(struct net *net) { struct ctl_table *table; @@ -684,13 +686,22 @@ static void ip4_frags_ctl_unregister(struct net *net) unregister_net_sysctl_table(net->ipv4.frags_hdr); kfree(table); } + +static void ip4_frags_ctl_register(void) +{ + register_net_sysctl_rotable(net_ipv4_ctl_path, ip4_frags_ctl_table); +} #else -static inline int ip4_frags_ctl_register(struct net *net) +static inline int ip4_frags_ns_ctl_register(struct net *net) { return 0; } -static inline void ip4_frags_ctl_unregister(struct net *net) +static inline void ip4_frags_ns_ctl_unregister(struct net *net) +{ +} + +static inline void ip4_frags_ctl_register(void) { } #endif @@ -714,12 +725,12 @@ static int ipv4_frags_init_net(struct net *net) inet_frags_init_net(&net->ipv4.frags); - return ip4_frags_ctl_register(net); + return ip4_frags_ns_ctl_register(net); } static void ipv4_frags_exit_net(struct net *net) { - ip4_frags_ctl_unregister(net); + ip4_frags_ns_ctl_unregister(net); inet_frags_exit_net(&net->ipv4.frags, &ip4_frags); } @@ -730,6 +741,7 @@ static struct pernet_operations ip4_frags_ops = { void __init ipfrag_init(void) { + ip4_frags_ctl_register(); register_pernet_subsys(&ip4_frags_ops); ip4_frags.hashfn = ip4_hashfn; ip4_frags.constructor = ip4_frag_init; diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 2ada033406d..2a61158ea72 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -313,9 +313,8 @@ static void ipgre_tunnel_uninit(struct net_device *dev) static void ipgre_err(struct sk_buff *skb, u32 info) { -#ifndef I_WISH_WORLD_WERE_PERFECT -/* It is not :-( All the routers (except for Linux) return only +/* All the routers (except for Linux) return only 8 bytes of packet payload. It means, that precise relaying of ICMP in the real Internet is absolutely infeasible. @@ -398,149 +397,6 @@ static void ipgre_err(struct sk_buff *skb, u32 info) out: read_unlock(&ipgre_lock); return; -#else - struct iphdr *iph = (struct iphdr*)dp; - struct iphdr *eiph; - __be16 *p = (__be16*)(dp+(iph->ihl<<2)); - const int type = icmp_hdr(skb)->type; - const int code = icmp_hdr(skb)->code; - int rel_type = 0; - int rel_code = 0; - __be32 rel_info = 0; - __u32 n = 0; - __be16 flags; - int grehlen = (iph->ihl<<2) + 4; - struct sk_buff *skb2; - struct flowi fl; - struct rtable *rt; - - if (p[1] != htons(ETH_P_IP)) - return; - - flags = p[0]; - if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { - if (flags&(GRE_VERSION|GRE_ROUTING)) - return; - if (flags&GRE_CSUM) - grehlen += 4; - if (flags&GRE_KEY) - grehlen += 4; - if (flags&GRE_SEQ) - grehlen += 4; - } - if (len < grehlen + sizeof(struct iphdr)) - return; - eiph = (struct iphdr*)(dp + grehlen); - - switch (type) { - default: - return; - case ICMP_PARAMETERPROB: - n = ntohl(icmp_hdr(skb)->un.gateway) >> 24; - if (n < (iph->ihl<<2)) - return; - - /* So... This guy found something strange INSIDE encapsulated - packet. Well, he is fool, but what can we do ? - */ - rel_type = ICMP_PARAMETERPROB; - n -= grehlen; - rel_info = htonl(n << 24); - break; - - case ICMP_DEST_UNREACH: - switch (code) { - case ICMP_SR_FAILED: - case ICMP_PORT_UNREACH: - /* Impossible event. */ - return; - case ICMP_FRAG_NEEDED: - /* And it is the only really necessary thing :-) */ - n = ntohs(icmp_hdr(skb)->un.frag.mtu); - if (n < grehlen+68) - return; - n -= grehlen; - /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ - if (n > ntohs(eiph->tot_len)) - return; - rel_info = htonl(n); - break; - default: - /* All others are translated to HOST_UNREACH. - rfc2003 contains "deep thoughts" about NET_UNREACH, - I believe, it is just ether pollution. --ANK - */ - rel_type = ICMP_DEST_UNREACH; - rel_code = ICMP_HOST_UNREACH; - break; - } - break; - case ICMP_TIME_EXCEEDED: - if (code != ICMP_EXC_TTL) - return; - break; - } - - /* Prepare fake skb to feed it to icmp_send */ - skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2 == NULL) - return; - dst_release(skb2->dst); - skb2->dst = NULL; - skb_pull(skb2, skb->data - (u8*)eiph); - skb_reset_network_header(skb2); - - /* Try to guess incoming interface */ - memset(&fl, 0, sizeof(fl)); - fl.fl4_dst = eiph->saddr; - fl.fl4_tos = RT_TOS(eiph->tos); - fl.proto = IPPROTO_GRE; - if (ip_route_output_key(dev_net(skb->dev), &rt, &fl)) { - kfree_skb(skb2); - return; - } - skb2->dev = rt->u.dst.dev; - - /* route "incoming" packet */ - if (rt->rt_flags&RTCF_LOCAL) { - ip_rt_put(rt); - rt = NULL; - fl.fl4_dst = eiph->daddr; - fl.fl4_src = eiph->saddr; - fl.fl4_tos = eiph->tos; - if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) || - rt->u.dst.dev->type != ARPHRD_IPGRE) { - ip_rt_put(rt); - kfree_skb(skb2); - return; - } - } else { - ip_rt_put(rt); - if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || - skb2->dst->dev->type != ARPHRD_IPGRE) { - kfree_skb(skb2); - return; - } - } - - /* change mtu on this route */ - if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - if (n > dst_mtu(skb2->dst)) { - kfree_skb(skb2); - return; - } - skb2->dst->ops->update_pmtu(skb2->dst, n); - } else if (type == ICMP_TIME_EXCEEDED) { - struct ip_tunnel *t = netdev_priv(skb2->dev); - if (t->parms.iph.ttl) { - rel_type = ICMP_DEST_UNREACH; - rel_code = ICMP_HOST_UNREACH; - } - } - - icmp_send(skb2, rel_type, rel_code, rel_info); - kfree_skb(skb2); -#endif } static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) @@ -617,6 +473,8 @@ static int ipgre_rcv(struct sk_buff *skb) read_lock(&ipgre_lock); if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr, key)) != NULL) { + struct net_device_stats *stats = &tunnel->dev->stats; + secpath_reset(skb); skb->protocol = *(__be16*)(h + 2); @@ -641,28 +499,28 @@ static int ipgre_rcv(struct sk_buff *skb) /* Looped back packet, drop it! */ if (skb->rtable->fl.iif == 0) goto drop; - tunnel->stat.multicast++; + stats->multicast++; skb->pkt_type = PACKET_BROADCAST; } #endif if (((flags&GRE_CSUM) && csum) || (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { - tunnel->stat.rx_crc_errors++; - tunnel->stat.rx_errors++; + stats->rx_crc_errors++; + stats->rx_errors++; goto drop; } if (tunnel->parms.i_flags&GRE_SEQ) { if (!(flags&GRE_SEQ) || (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) { - tunnel->stat.rx_fifo_errors++; - tunnel->stat.rx_errors++; + stats->rx_fifo_errors++; + stats->rx_errors++; goto drop; } tunnel->i_seqno = seqno + 1; } - tunnel->stat.rx_packets++; - tunnel->stat.rx_bytes += skb->len; + stats->rx_packets++; + stats->rx_bytes += skb->len; skb->dev = tunnel->dev; dst_release(skb->dst); skb->dst = NULL; @@ -684,7 +542,7 @@ drop_nolock: static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - struct net_device_stats *stats = &tunnel->stat; + struct net_device_stats *stats = &tunnel->dev->stats; struct iphdr *old_iph = ip_hdr(skb); struct iphdr *tiph; u8 tos; @@ -698,7 +556,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) int mtu; if (tunnel->recursion++) { - tunnel->stat.collisions++; + stats->collisions++; goto tx_error; } @@ -714,7 +572,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) /* NBMA tunnel */ if (skb->dst == NULL) { - tunnel->stat.tx_fifo_errors++; + stats->tx_fifo_errors++; goto tx_error; } @@ -765,7 +623,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) .tos = RT_TOS(tos) } }, .proto = IPPROTO_GRE }; if (ip_route_output_key(dev_net(dev), &rt, &fl)) { - tunnel->stat.tx_carrier_errors++; + stats->tx_carrier_errors++; goto tx_error; } } @@ -773,7 +631,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (tdev == dev) { ip_rt_put(rt); - tunnel->stat.collisions++; + stats->collisions++; goto tx_error; } @@ -1098,11 +956,6 @@ done: return err; } -static struct net_device_stats *ipgre_tunnel_get_stats(struct net_device *dev) -{ - return &(((struct ip_tunnel*)netdev_priv(dev))->stat); -} - static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) { struct ip_tunnel *tunnel = netdev_priv(dev); @@ -1228,7 +1081,6 @@ static void ipgre_tunnel_setup(struct net_device *dev) dev->uninit = ipgre_tunnel_uninit; dev->destructor = free_netdev; dev->hard_start_xmit = ipgre_tunnel_xmit; - dev->get_stats = ipgre_tunnel_get_stats; dev->do_ioctl = ipgre_tunnel_ioctl; dev->change_mtu = ipgre_tunnel_change_mtu; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 149111f08e8..86d8836551b 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -278,9 +278,8 @@ static void ipip_tunnel_uninit(struct net_device *dev) static int ipip_err(struct sk_buff *skb, u32 info) { -#ifndef I_WISH_WORLD_WERE_PERFECT -/* It is not :-( All the routers (except for Linux) return only +/* All the routers (except for Linux) return only 8 bytes of packet payload. It means, that precise relaying of ICMP in the real Internet is absolutely infeasible. */ @@ -337,133 +336,6 @@ static int ipip_err(struct sk_buff *skb, u32 info) out: read_unlock(&ipip_lock); return err; -#else - struct iphdr *iph = (struct iphdr*)dp; - int hlen = iph->ihl<<2; - struct iphdr *eiph; - const int type = icmp_hdr(skb)->type; - const int code = icmp_hdr(skb)->code; - int rel_type = 0; - int rel_code = 0; - __be32 rel_info = 0; - __u32 n = 0; - struct sk_buff *skb2; - struct flowi fl; - struct rtable *rt; - - if (len < hlen + sizeof(struct iphdr)) - return 0; - eiph = (struct iphdr*)(dp + hlen); - - switch (type) { - default: - return 0; - case ICMP_PARAMETERPROB: - n = ntohl(icmp_hdr(skb)->un.gateway) >> 24; - if (n < hlen) - return 0; - - /* So... This guy found something strange INSIDE encapsulated - packet. Well, he is fool, but what can we do ? - */ - rel_type = ICMP_PARAMETERPROB; - rel_info = htonl((n - hlen) << 24); - break; - - case ICMP_DEST_UNREACH: - switch (code) { - case ICMP_SR_FAILED: - case ICMP_PORT_UNREACH: - /* Impossible event. */ - return 0; - case ICMP_FRAG_NEEDED: - /* And it is the only really necessary thing :-) */ - n = ntohs(icmp_hdr(skb)->un.frag.mtu); - if (n < hlen+68) - return 0; - n -= hlen; - /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ - if (n > ntohs(eiph->tot_len)) - return 0; - rel_info = htonl(n); - break; - default: - /* All others are translated to HOST_UNREACH. - rfc2003 contains "deep thoughts" about NET_UNREACH, - I believe, it is just ether pollution. --ANK - */ - rel_type = ICMP_DEST_UNREACH; - rel_code = ICMP_HOST_UNREACH; - break; - } - break; - case ICMP_TIME_EXCEEDED: - if (code != ICMP_EXC_TTL) - return 0; - break; - } - - /* Prepare fake skb to feed it to icmp_send */ - skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2 == NULL) - return 0; - dst_release(skb2->dst); - skb2->dst = NULL; - skb_pull(skb2, skb->data - (u8*)eiph); - skb_reset_network_header(skb2); - - /* Try to guess incoming interface */ - memset(&fl, 0, sizeof(fl)); - fl.fl4_daddr = eiph->saddr; - fl.fl4_tos = RT_TOS(eiph->tos); - fl.proto = IPPROTO_IPIP; - if (ip_route_output_key(dev_net(skb->dev), &rt, &key)) { - kfree_skb(skb2); - return 0; - } - skb2->dev = rt->u.dst.dev; - - /* route "incoming" packet */ - if (rt->rt_flags&RTCF_LOCAL) { - ip_rt_put(rt); - rt = NULL; - fl.fl4_daddr = eiph->daddr; - fl.fl4_src = eiph->saddr; - fl.fl4_tos = eiph->tos; - if (ip_route_output_key(dev_net(skb->dev), &rt, &fl) || - rt->u.dst.dev->type != ARPHRD_TUNNEL) { - ip_rt_put(rt); - kfree_skb(skb2); - return 0; - } - } else { - ip_rt_put(rt); - if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || - skb2->dst->dev->type != ARPHRD_TUNNEL) { - kfree_skb(skb2); - return 0; - } - } - - /* change mtu on this route */ - if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - if (n > dst_mtu(skb2->dst)) { - kfree_skb(skb2); - return 0; - } - skb2->dst->ops->update_pmtu(skb2->dst, n); - } else if (type == ICMP_TIME_EXCEEDED) { - struct ip_tunnel *t = netdev_priv(skb2->dev); - if (t->parms.iph.ttl) { - rel_type = ICMP_DEST_UNREACH; - rel_code = ICMP_HOST_UNREACH; - } - } - - icmp_send(skb2, rel_type, rel_code, rel_info); - kfree_skb(skb2); - return 0; -#endif } static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph, @@ -496,8 +368,8 @@ static int ipip_rcv(struct sk_buff *skb) skb->protocol = htons(ETH_P_IP); skb->pkt_type = PACKET_HOST; - tunnel->stat.rx_packets++; - tunnel->stat.rx_bytes += skb->len; + tunnel->dev->stats.rx_packets++; + tunnel->dev->stats.rx_bytes += skb->len; skb->dev = tunnel->dev; dst_release(skb->dst); skb->dst = NULL; @@ -520,7 +392,7 @@ static int ipip_rcv(struct sk_buff *skb) static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - struct net_device_stats *stats = &tunnel->stat; + struct net_device_stats *stats = &tunnel->dev->stats; struct iphdr *tiph = &tunnel->parms.iph; u8 tos = tunnel->parms.iph.tos; __be16 df = tiph->frag_off; @@ -533,7 +405,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) int mtu; if (tunnel->recursion++) { - tunnel->stat.collisions++; + stats->collisions++; goto tx_error; } @@ -546,7 +418,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (!dst) { /* NBMA tunnel */ if ((rt = skb->rtable) == NULL) { - tunnel->stat.tx_fifo_errors++; + stats->tx_fifo_errors++; goto tx_error; } if ((dst = rt->rt_gateway) == 0) @@ -561,7 +433,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) .tos = RT_TOS(tos) } }, .proto = IPPROTO_IPIP }; if (ip_route_output_key(dev_net(dev), &rt, &fl)) { - tunnel->stat.tx_carrier_errors++; + stats->tx_carrier_errors++; goto tx_error_icmp; } } @@ -569,7 +441,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (tdev == dev) { ip_rt_put(rt); - tunnel->stat.collisions++; + stats->collisions++; goto tx_error; } @@ -579,7 +451,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu; if (mtu < 68) { - tunnel->stat.collisions++; + stats->collisions++; ip_rt_put(rt); goto tx_error; } @@ -813,11 +685,6 @@ done: return err; } -static struct net_device_stats *ipip_tunnel_get_stats(struct net_device *dev) -{ - return &(((struct ip_tunnel*)netdev_priv(dev))->stat); -} - static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu) { if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr)) @@ -830,7 +697,6 @@ static void ipip_tunnel_setup(struct net_device *dev) { dev->uninit = ipip_tunnel_uninit; dev->hard_start_xmit = ipip_tunnel_xmit; - dev->get_stats = ipip_tunnel_get_stats; dev->do_ioctl = ipip_tunnel_ioctl; dev->change_mtu = ipip_tunnel_change_mtu; dev->destructor = free_netdev; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 11700a4dcd9..a34da4977c7 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -181,26 +181,20 @@ static int reg_vif_num = -1; static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) { read_lock(&mrt_lock); - ((struct net_device_stats*)netdev_priv(dev))->tx_bytes += skb->len; - ((struct net_device_stats*)netdev_priv(dev))->tx_packets++; + dev->stats.tx_bytes += skb->len; + dev->stats.tx_packets++; ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT); read_unlock(&mrt_lock); kfree_skb(skb); return 0; } -static struct net_device_stats *reg_vif_get_stats(struct net_device *dev) -{ - return (struct net_device_stats*)netdev_priv(dev); -} - static void reg_vif_setup(struct net_device *dev) { dev->type = ARPHRD_PIMREG; dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8; dev->flags = IFF_NOARP; dev->hard_start_xmit = reg_vif_xmit; - dev->get_stats = reg_vif_get_stats; dev->destructor = free_netdev; } @@ -209,8 +203,7 @@ static struct net_device *ipmr_reg_vif(void) struct net_device *dev; struct in_device *in_dev; - dev = alloc_netdev(sizeof(struct net_device_stats), "pimreg", - reg_vif_setup); + dev = alloc_netdev(0, "pimreg", reg_vif_setup); if (dev == NULL) return NULL; @@ -1170,8 +1163,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) if (vif->flags & VIFF_REGISTER) { vif->pkt_out++; vif->bytes_out+=skb->len; - ((struct net_device_stats*)netdev_priv(vif->dev))->tx_bytes += skb->len; - ((struct net_device_stats*)netdev_priv(vif->dev))->tx_packets++; + vif->dev->stats.tx_bytes += skb->len; + vif->dev->stats.tx_packets++; ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT); kfree_skb(skb); return; @@ -1230,8 +1223,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) if (vif->flags & VIFF_TUNNEL) { ip_encap(skb, vif->local, vif->remote); /* FIXME: extra output firewall step used to be here. --RR */ - ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_packets++; - ((struct ip_tunnel *)netdev_priv(vif->dev))->stat.tx_bytes+=skb->len; + vif->dev->stats.tx_packets++; + vif->dev->stats.tx_bytes += skb->len; } IPCB(skb)->flags |= IPSKB_FORWARDED; @@ -1487,8 +1480,8 @@ int pim_rcv_v1(struct sk_buff * skb) skb->pkt_type = PACKET_HOST; dst_release(skb->dst); skb->dst = NULL; - ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len; - ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++; + reg_dev->stats.rx_bytes += skb->len; + reg_dev->stats.rx_packets++; nf_reset(skb); netif_rx(skb); dev_put(reg_dev); @@ -1542,8 +1535,8 @@ static int pim_rcv(struct sk_buff * skb) skb->ip_summed = 0; skb->pkt_type = PACKET_HOST; dst_release(skb->dst); - ((struct net_device_stats*)netdev_priv(reg_dev))->rx_bytes += skb->len; - ((struct net_device_stats*)netdev_priv(reg_dev))->rx_packets++; + reg_dev->stats.rx_bytes += skb->len; + reg_dev->stats.rx_packets++; skb->dst = NULL; nf_reset(skb); netif_rx(skb); diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 2767841a8ce..6e251402506 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -365,6 +365,18 @@ config IP_NF_RAW If you want to compile it as a module, say M here and read <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. +# security table for MAC policy +config IP_NF_SECURITY + tristate "Security table" + depends on IP_NF_IPTABLES + depends on SECURITY + default m if NETFILTER_ADVANCED=n + help + This option adds a `security' table to iptables, for use + with Mandatory Access Control (MAC) policy. + + If unsure, say N. + # ARP tables config IP_NF_ARPTABLES tristate "ARP tables support" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index d9b92fbf557..3f31291f37c 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -42,6 +42,7 @@ obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o obj-$(CONFIG_NF_NAT) += iptable_nat.o obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o +obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o # matches obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 26a37cedcf2..aa33a4a7a71 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -156,7 +156,6 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) case IPQ_COPY_META: case IPQ_COPY_NONE: size = NLMSG_SPACE(sizeof(*pmsg)); - data_len = 0; break; case IPQ_COPY_PACKET: @@ -224,8 +223,6 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) return skb; nlmsg_failure: - if (skb) - kfree_skb(skb); *errp = -EINVAL; printk(KERN_ERR "ip_queue: error creating packet message\n"); return NULL; diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c new file mode 100644 index 00000000000..2b472ac2263 --- /dev/null +++ b/net/ipv4/netfilter/iptable_security.c @@ -0,0 +1,180 @@ +/* + * "security" table + * + * This is for use by Mandatory Access Control (MAC) security models, + * which need to be able to manage security policy in separate context + * to DAC. + * + * Based on iptable_mangle.c + * + * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling + * Copyright (C) 2000-2004 Netfilter Core Team <coreteam <at> netfilter.org> + * Copyright (C) 2008 Red Hat, Inc., James Morris <jmorris <at> redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/module.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <net/ip.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("James Morris <jmorris <at> redhat.com>"); +MODULE_DESCRIPTION("iptables security table, for MAC rules"); + +#define SECURITY_VALID_HOOKS (1 << NF_INET_LOCAL_IN) | \ + (1 << NF_INET_FORWARD) | \ + (1 << NF_INET_LOCAL_OUT) + +static struct +{ + struct ipt_replace repl; + struct ipt_standard entries[3]; + struct ipt_error term; +} initial_table __initdata = { + .repl = { + .name = "security", + .valid_hooks = SECURITY_VALID_HOOKS, + .num_entries = 4, + .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error), + .hook_entry = { + [NF_INET_LOCAL_IN] = 0, + [NF_INET_FORWARD] = sizeof(struct ipt_standard), + [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, + }, + .underflow = { + [NF_INET_LOCAL_IN] = 0, + [NF_INET_FORWARD] = sizeof(struct ipt_standard), + [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2, + }, + }, + .entries = { + IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */ + IPT_STANDARD_INIT(NF_ACCEPT), /* FORWARD */ + IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ + }, + .term = IPT_ERROR_INIT, /* ERROR */ +}; + +static struct xt_table security_table = { + .name = "security", + .valid_hooks = SECURITY_VALID_HOOKS, + .lock = __RW_LOCK_UNLOCKED(security_table.lock), + .me = THIS_MODULE, + .af = AF_INET, +}; + +static unsigned int +ipt_local_in_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ipt_do_table(skb, hook, in, out, + nf_local_in_net(in, out)->ipv4.iptable_security); +} + +static unsigned int +ipt_forward_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ipt_do_table(skb, hook, in, out, + nf_forward_net(in, out)->ipv4.iptable_security); +} + +static unsigned int +ipt_local_out_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + /* Somebody is playing with raw sockets. */ + if (skb->len < sizeof(struct iphdr) + || ip_hdrlen(skb) < sizeof(struct iphdr)) { + if (net_ratelimit()) + printk(KERN_INFO "iptable_security: ignoring short " + "SOCK_RAW packet.\n"); + return NF_ACCEPT; + } + return ipt_do_table(skb, hook, in, out, + nf_local_out_net(in, out)->ipv4.iptable_security); +} + +static struct nf_hook_ops ipt_ops[] __read_mostly = { + { + .hook = ipt_local_in_hook, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP_PRI_SECURITY, + }, + { + .hook = ipt_forward_hook, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_INET_FORWARD, + .priority = NF_IP_PRI_SECURITY, + }, + { + .hook = ipt_local_out_hook, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP_PRI_SECURITY, + }, +}; + +static int __net_init iptable_security_net_init(struct net *net) +{ + net->ipv4.iptable_security = + ipt_register_table(net, &security_table, &initial_table.repl); + + if (IS_ERR(net->ipv4.iptable_security)) + return PTR_ERR(net->ipv4.iptable_security); + + return 0; +} + +static void __net_exit iptable_security_net_exit(struct net *net) +{ + ipt_unregister_table(net->ipv4.iptable_security); +} + +static struct pernet_operations iptable_security_net_ops = { + .init = iptable_security_net_init, + .exit = iptable_security_net_exit, +}; + +static int __init iptable_security_init(void) +{ + int ret; + + ret = register_pernet_subsys(&iptable_security_net_ops); + if (ret < 0) + return ret; + + ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); + if (ret < 0) + goto cleanup_table; + + return ret; + +cleanup_table: + unregister_pernet_subsys(&iptable_security_net_ops); + return ret; +} + +static void __exit iptable_security_fini(void) +{ + nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops)); + unregister_pernet_subsys(&iptable_security_net_ops); +} + +module_init(iptable_security_init); +module_exit(iptable_security_fini); diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 78ab19accac..97791048fa9 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -87,9 +87,8 @@ static int icmp_packet(struct nf_conn *ct, means this will only run once even if count hits zero twice (theoretically possible with SMP) */ if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) { - if (atomic_dec_and_test(&ct->proto.icmp.count) - && del_timer(&ct->timeout)) - ct->timeout.function((unsigned long)ct); + if (atomic_dec_and_test(&ct->proto.icmp.count)) + nf_ct_kill_acct(ct, ctinfo, skb); } else { atomic_inc(&ct->proto.icmp.count); nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 92f90ae46f4..df41026b60d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -160,7 +160,7 @@ static struct dst_ops ipv4_dst_ops = { .negative_advice = ipv4_negative_advice, .link_failure = ipv4_link_failure, .update_pmtu = ip_rt_update_pmtu, - .local_out = ip_local_out, + .local_out = __ip_local_out, .entry_size = sizeof(struct rtable), .entries = ATOMIC_INIT(0), }; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index debf2358160..e399bde7813 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1836,7 +1836,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); - unsigned int cur_mss = tcp_current_mss(sk, 0); + unsigned int cur_mss; int err; /* Inconslusive MTU probe */ @@ -1858,6 +1858,11 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) return -ENOMEM; } + if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) + return -EHOSTUNREACH; /* Routing failure or similar. */ + + cur_mss = tcp_current_mss(sk, 0); + /* If receiver has shrunk his window, and skb is out of * new window, do not retransmit it. The exception is the * case, when window is shrunk to zero. In this case @@ -1884,9 +1889,6 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) (sysctl_tcp_retrans_collapse != 0)) tcp_retrans_try_collapse(sk, skb, cur_mss); - if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) - return -EHOSTUNREACH; /* Routing failure or similar. */ - /* Some Solaris stacks overoptimize and ignore the FIN on a * retransmit when old data is attached. So strip it off * since it is cheap to do so and saves bytes on the network. diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e591e09e5e4..3a835578fd1 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1764,14 +1764,16 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) * 2) Configure prefixes with the auto flag set */ - /* Avoid arithmetic overflow. Really, we could - save rt_expires in seconds, likely valid_lft, - but it would require division in fib gc, that it - not good. - */ - if (valid_lft >= 0x7FFFFFFF/HZ) + if (valid_lft == INFINITY_LIFE_TIME) + rt_expires = ~0UL; + else if (valid_lft >= 0x7FFFFFFF/HZ) { + /* Avoid arithmetic overflow. Really, we could + * save rt_expires in seconds, likely valid_lft, + * but it would require division in fib gc, that it + * not good. + */ rt_expires = 0x7FFFFFFF - (0x7FFFFFFF % HZ); - else + } else rt_expires = valid_lft * HZ; /* @@ -1779,7 +1781,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) * Avoid arithmetic overflow there as well. * Overflow can happen only if HZ < USER_HZ. */ - if (HZ < USER_HZ && rt_expires > 0x7FFFFFFF / USER_HZ) + if (HZ < USER_HZ && ~rt_expires && rt_expires > 0x7FFFFFFF / USER_HZ) rt_expires = 0x7FFFFFFF / USER_HZ; if (pinfo->onlink) { @@ -1788,17 +1790,28 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) dev->ifindex, 1); if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { - if (rt->rt6i_flags&RTF_EXPIRES) { - if (valid_lft == 0) { - ip6_del_rt(rt); - rt = NULL; - } else { - rt->rt6i_expires = jiffies + rt_expires; - } + /* Autoconf prefix route */ + if (valid_lft == 0) { + ip6_del_rt(rt); + rt = NULL; + } else if (~rt_expires) { + /* not infinity */ + rt->rt6i_expires = jiffies + rt_expires; + rt->rt6i_flags |= RTF_EXPIRES; + } else { + rt->rt6i_flags &= ~RTF_EXPIRES; + rt->rt6i_expires = 0; } } else if (valid_lft) { + int flags = RTF_ADDRCONF | RTF_PREFIX_RT; + clock_t expires = 0; + if (~rt_expires) { + /* not infinity */ + flags |= RTF_EXPIRES; + expires = jiffies_to_clock_t(rt_expires); + } addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len, - dev, jiffies_to_clock_t(rt_expires), RTF_ADDRCONF|RTF_EXPIRES|RTF_PREFIX_RT); + dev, expires, flags); } if (rt) dst_release(&rt->u.dst); @@ -2021,7 +2034,8 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, struct inet6_dev *idev; struct net_device *dev; int scope; - u32 flags = RTF_EXPIRES; + u32 flags; + clock_t expires; ASSERT_RTNL(); @@ -2041,8 +2055,13 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, if (valid_lft == INFINITY_LIFE_TIME) { ifa_flags |= IFA_F_PERMANENT; flags = 0; - } else if (valid_lft >= 0x7FFFFFFF/HZ) - valid_lft = 0x7FFFFFFF/HZ; + expires = 0; + } else { + if (valid_lft >= 0x7FFFFFFF/HZ) + valid_lft = 0x7FFFFFFF/HZ; + flags = RTF_EXPIRES; + expires = jiffies_to_clock_t(valid_lft * HZ); + } if (prefered_lft == 0) ifa_flags |= IFA_F_DEPRECATED; @@ -2060,7 +2079,7 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, spin_unlock_bh(&ifp->lock); addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, - jiffies_to_clock_t(valid_lft * HZ), flags); + expires, flags); /* * Note that section 3.1 of RFC 4429 indicates * that the Optimistic flag should not be set for @@ -3148,7 +3167,8 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, u32 prefered_lft, u32 valid_lft) { - u32 flags = RTF_EXPIRES; + u32 flags; + clock_t expires; if (!valid_lft || (prefered_lft > valid_lft)) return -EINVAL; @@ -3156,8 +3176,13 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, if (valid_lft == INFINITY_LIFE_TIME) { ifa_flags |= IFA_F_PERMANENT; flags = 0; - } else if (valid_lft >= 0x7FFFFFFF/HZ) - valid_lft = 0x7FFFFFFF/HZ; + expires = 0; + } else { + if (valid_lft >= 0x7FFFFFFF/HZ) + valid_lft = 0x7FFFFFFF/HZ; + flags = RTF_EXPIRES; + expires = jiffies_to_clock_t(valid_lft * HZ); + } if (prefered_lft == 0) ifa_flags |= IFA_F_DEPRECATED; @@ -3176,7 +3201,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, ipv6_ifa_notify(0, ifp); addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev, - jiffies_to_clock_t(valid_lft * HZ), flags); + expires, flags); addrconf_verify(0); return 0; @@ -4242,7 +4267,7 @@ static void addrconf_sysctl_register(struct inet6_dev *idev) neigh_sysctl_register(idev->dev, idev->nd_parms, NET_IPV6, NET_IPV6_NEIGH, "ipv6", &ndisc_ifinfo_sysctl_change, - NULL); + ndisc_ifinfo_sysctl_strategy); __addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name, idev->dev->ifindex, idev, &idev->cnf); } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 2bda3ba100b..37814810ac4 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -711,7 +711,7 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, } if (!ip6_tnl_rcv_ctl(t)) { - t->stat.rx_dropped++; + t->dev->stats.rx_dropped++; read_unlock(&ip6_tnl_lock); goto discard; } @@ -728,8 +728,8 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, dscp_ecn_decapsulate(t, ipv6h, skb); - t->stat.rx_packets++; - t->stat.rx_bytes += skb->len; + t->dev->stats.rx_packets++; + t->dev->stats.rx_bytes += skb->len; netif_rx(skb); read_unlock(&ip6_tnl_lock); return 0; @@ -849,7 +849,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, __u32 *pmtu) { struct ip6_tnl *t = netdev_priv(dev); - struct net_device_stats *stats = &t->stat; + struct net_device_stats *stats = &t->dev->stats; struct ipv6hdr *ipv6h = ipv6_hdr(skb); struct ipv6_tel_txoption opt; struct dst_entry *dst; @@ -1043,11 +1043,11 @@ static int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - struct net_device_stats *stats = &t->stat; + struct net_device_stats *stats = &t->dev->stats; int ret; if (t->recursion++) { - t->stat.collisions++; + stats->collisions++; goto tx_err; } @@ -1289,19 +1289,6 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } /** - * ip6_tnl_get_stats - return the stats for tunnel device - * @dev: virtual device associated with tunnel - * - * Return: stats for device - **/ - -static struct net_device_stats * -ip6_tnl_get_stats(struct net_device *dev) -{ - return &(((struct ip6_tnl *)netdev_priv(dev))->stat); -} - -/** * ip6_tnl_change_mtu - change mtu manually for tunnel device * @dev: virtual device associated with tunnel * @new_mtu: the new mtu @@ -1334,7 +1321,6 @@ static void ip6_tnl_dev_setup(struct net_device *dev) dev->uninit = ip6_tnl_dev_uninit; dev->destructor = free_netdev; dev->hard_start_xmit = ip6_tnl_xmit; - dev->get_stats = ip6_tnl_get_stats; dev->do_ioctl = ip6_tnl_ioctl; dev->change_mtu = ip6_tnl_change_mtu; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 2de3c464fe7..bf268b38696 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -388,8 +388,8 @@ static int pim6_rcv(struct sk_buff *skb) skb->ip_summed = 0; skb->pkt_type = PACKET_HOST; dst_release(skb->dst); - ((struct net_device_stats *)netdev_priv(reg_dev))->rx_bytes += skb->len; - ((struct net_device_stats *)netdev_priv(reg_dev))->rx_packets++; + reg_dev->stats.rx_bytes += skb->len; + reg_dev->stats.rx_packets++; skb->dst = NULL; nf_reset(skb); netif_rx(skb); @@ -409,26 +409,20 @@ static struct inet6_protocol pim6_protocol = { static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) { read_lock(&mrt_lock); - ((struct net_device_stats *)netdev_priv(dev))->tx_bytes += skb->len; - ((struct net_device_stats *)netdev_priv(dev))->tx_packets++; + dev->stats.tx_bytes += skb->len; + dev->stats.tx_packets++; ip6mr_cache_report(skb, reg_vif_num, MRT6MSG_WHOLEPKT); read_unlock(&mrt_lock); kfree_skb(skb); return 0; } -static struct net_device_stats *reg_vif_get_stats(struct net_device *dev) -{ - return (struct net_device_stats *)netdev_priv(dev); -} - static void reg_vif_setup(struct net_device *dev) { dev->type = ARPHRD_PIMREG; dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8; dev->flags = IFF_NOARP; dev->hard_start_xmit = reg_vif_xmit; - dev->get_stats = reg_vif_get_stats; dev->destructor = free_netdev; } @@ -436,9 +430,7 @@ static struct net_device *ip6mr_reg_vif(void) { struct net_device *dev; - dev = alloc_netdev(sizeof(struct net_device_stats), "pim6reg", - reg_vif_setup); - + dev = alloc_netdev(0, "pim6reg", reg_vif_setup); if (dev == NULL) return NULL; @@ -1377,8 +1369,8 @@ static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi) if (vif->flags & MIFF_REGISTER) { vif->pkt_out++; vif->bytes_out += skb->len; - ((struct net_device_stats *)netdev_priv(vif->dev))->tx_bytes += skb->len; - ((struct net_device_stats *)netdev_priv(vif->dev))->tx_packets++; + vif->dev->stats.tx_bytes += skb->len; + vif->dev->stats.tx_packets++; ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT); kfree_skb(skb); return 0; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index a55fc05b812..282fdb31f8e 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1727,10 +1727,10 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f return ret; } -static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name, - int nlen, void __user *oldval, - size_t __user *oldlenp, - void __user *newval, size_t newlen) +int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name, + int nlen, void __user *oldval, + size_t __user *oldlenp, + void __user *newval, size_t newlen) { struct net_device *dev = ctl->extra1; struct inet6_dev *idev; diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 6cae5475737..689dec899c5 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -208,5 +208,17 @@ config IP6_NF_RAW If you want to compile it as a module, say M here and read <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. +# security table for MAC policy +config IP6_NF_SECURITY + tristate "Security table" + depends on IP6_NF_IPTABLES + depends on SECURITY + default m if NETFILTER_ADVANCED=n + help + This option adds a `security' table to iptables, for use + with Mandatory Access Control (MAC) policy. + + If unsure, say N. + endmenu diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index fbf2c14ed88..3f17c948eef 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o +obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o # objects for l3 independent conntrack nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o nf_conntrack_reasm.o diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 2eff3ae8977..1b8815f6153 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -159,7 +159,6 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) case IPQ_COPY_META: case IPQ_COPY_NONE: size = NLMSG_SPACE(sizeof(*pmsg)); - data_len = 0; break; case IPQ_COPY_PACKET: @@ -226,8 +225,6 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) return skb; nlmsg_failure: - if (skb) - kfree_skb(skb); *errp = -EINVAL; printk(KERN_ERR "ip6_queue: error creating packet message\n"); return NULL; diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c new file mode 100644 index 00000000000..063a3d9c3c6 --- /dev/null +++ b/net/ipv6/netfilter/ip6table_security.c @@ -0,0 +1,172 @@ +/* + * "security" table for IPv6 + * + * This is for use by Mandatory Access Control (MAC) security models, + * which need to be able to manage security policy in separate context + * to DAC. + * + * Based on iptable_mangle.c + * + * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling + * Copyright (C) 2000-2004 Netfilter Core Team <coreteam <at> netfilter.org> + * Copyright (C) 2008 Red Hat, Inc., James Morris <jmorris <at> redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/module.h> +#include <linux/netfilter_ipv6/ip6_tables.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("James Morris <jmorris <at> redhat.com>"); +MODULE_DESCRIPTION("ip6tables security table, for MAC rules"); + +#define SECURITY_VALID_HOOKS (1 << NF_INET_LOCAL_IN) | \ + (1 << NF_INET_FORWARD) | \ + (1 << NF_INET_LOCAL_OUT) + +static struct +{ + struct ip6t_replace repl; + struct ip6t_standard entries[3]; + struct ip6t_error term; +} initial_table __initdata = { + .repl = { + .name = "security", + .valid_hooks = SECURITY_VALID_HOOKS, + .num_entries = 4, + .size = sizeof(struct ip6t_standard) * 3 + sizeof(struct ip6t_error), + .hook_entry = { + [NF_INET_LOCAL_IN] = 0, + [NF_INET_FORWARD] = sizeof(struct ip6t_standard), + [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2, + }, + .underflow = { + [NF_INET_LOCAL_IN] = 0, + [NF_INET_FORWARD] = sizeof(struct ip6t_standard), + [NF_INET_LOCAL_OUT] = sizeof(struct ip6t_standard) * 2, + }, + }, + .entries = { + IP6T_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */ + IP6T_STANDARD_INIT(NF_ACCEPT), /* FORWARD */ + IP6T_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */ + }, + .term = IP6T_ERROR_INIT, /* ERROR */ +}; + +static struct xt_table security_table = { + .name = "security", + .valid_hooks = SECURITY_VALID_HOOKS, + .lock = __RW_LOCK_UNLOCKED(security_table.lock), + .me = THIS_MODULE, + .af = AF_INET6, +}; + +static unsigned int +ip6t_local_in_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ip6t_do_table(skb, hook, in, out, + init_net.ipv6.ip6table_security); +} + +static unsigned int +ip6t_forward_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ip6t_do_table(skb, hook, in, out, + init_net.ipv6.ip6table_security); +} + +static unsigned int +ip6t_local_out_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + /* TBD: handle short packets via raw socket */ + return ip6t_do_table(skb, hook, in, out, + init_net.ipv6.ip6table_security); +} + +static struct nf_hook_ops ip6t_ops[] __read_mostly = { + { + .hook = ip6t_local_in_hook, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP6_PRI_SECURITY, + }, + { + .hook = ip6t_forward_hook, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_INET_FORWARD, + .priority = NF_IP6_PRI_SECURITY, + }, + { + .hook = ip6t_local_out_hook, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP6_PRI_SECURITY, + }, +}; + +static int __net_init ip6table_security_net_init(struct net *net) +{ + net->ipv6.ip6table_security = + ip6t_register_table(net, &security_table, &initial_table.repl); + + if (IS_ERR(net->ipv6.ip6table_security)) + return PTR_ERR(net->ipv6.ip6table_security); + + return 0; +} + +static void __net_exit ip6table_security_net_exit(struct net *net) +{ + ip6t_unregister_table(net->ipv6.ip6table_security); +} + +static struct pernet_operations ip6table_security_net_ops = { + .init = ip6table_security_net_init, + .exit = ip6table_security_net_exit, +}; + +static int __init ip6table_security_init(void) +{ + int ret; + + ret = register_pernet_subsys(&ip6table_security_net_ops); + if (ret < 0) + return ret; + + ret = nf_register_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops)); + if (ret < 0) + goto cleanup_table; + + return ret; + +cleanup_table: + unregister_pernet_subsys(&ip6table_security_net_ops); + return ret; +} + +static void __exit ip6table_security_fini(void) +{ + nf_unregister_hooks(ip6t_ops, ARRAY_SIZE(ip6t_ops)); + unregister_pernet_subsys(&ip6table_security_net_ops); +} + +module_init(ip6table_security_init); +module_exit(ip6table_security_fini); diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index ee713b03e9e..14d47d83354 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -89,9 +89,8 @@ static int icmpv6_packet(struct nf_conn *ct, means this will only run once even if count hits zero twice (theoretically possible with SMP) */ if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) { - if (atomic_dec_and_test(&ct->proto.icmp.count) - && del_timer(&ct->timeout)) - ct->timeout.function((unsigned long)ct); + if (atomic_dec_and_test(&ct->proto.icmp.count)) + nf_ct_kill_acct(ct, ctinfo, skb); } else { atomic_inc(&ct->proto.icmp.count); nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 798cabc7535..9391a6949b9 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -632,7 +632,7 @@ static struct inet6_protocol frag_protocol = }; #ifdef CONFIG_SYSCTL -static struct ctl_table ip6_frags_ctl_table[] = { +static struct ctl_table ip6_frags_ns_ctl_table[] = { { .ctl_name = NET_IPV6_IP6FRAG_HIGH_THRESH, .procname = "ip6frag_high_thresh", @@ -658,6 +658,10 @@ static struct ctl_table ip6_frags_ctl_table[] = { .proc_handler = &proc_dointvec_jiffies, .strategy = &sysctl_jiffies, }, + { } +}; + +static struct ctl_table ip6_frags_ctl_table[] = { { .ctl_name = NET_IPV6_IP6FRAG_SECRET_INTERVAL, .procname = "ip6frag_secret_interval", @@ -670,21 +674,20 @@ static struct ctl_table ip6_frags_ctl_table[] = { { } }; -static int ip6_frags_sysctl_register(struct net *net) +static int ip6_frags_ns_sysctl_register(struct net *net) { struct ctl_table *table; struct ctl_table_header *hdr; - table = ip6_frags_ctl_table; + table = ip6_frags_ns_ctl_table; if (net != &init_net) { - table = kmemdup(table, sizeof(ip6_frags_ctl_table), GFP_KERNEL); + table = kmemdup(table, sizeof(ip6_frags_ns_ctl_table), GFP_KERNEL); if (table == NULL) goto err_alloc; table[0].data = &net->ipv6.frags.high_thresh; table[1].data = &net->ipv6.frags.low_thresh; table[2].data = &net->ipv6.frags.timeout; - table[3].mode &= ~0222; } hdr = register_net_sysctl_table(net, net_ipv6_ctl_path, table); @@ -701,7 +704,7 @@ err_alloc: return -ENOMEM; } -static void ip6_frags_sysctl_unregister(struct net *net) +static void ip6_frags_ns_sysctl_unregister(struct net *net) { struct ctl_table *table; @@ -709,13 +712,36 @@ static void ip6_frags_sysctl_unregister(struct net *net) unregister_net_sysctl_table(net->ipv6.sysctl.frags_hdr); kfree(table); } + +static struct ctl_table_header *ip6_ctl_header; + +static int ip6_frags_sysctl_register(void) +{ + ip6_ctl_header = register_net_sysctl_rotable(net_ipv6_ctl_path, + ip6_frags_ctl_table); + return ip6_ctl_header == NULL ? -ENOMEM : 0; +} + +static void ip6_frags_sysctl_unregister(void) +{ + unregister_net_sysctl_table(ip6_ctl_header); +} #else -static inline int ip6_frags_sysctl_register(struct net *net) +static inline int ip6_frags_ns_sysctl_register(struct net *net) { return 0; } -static inline void ip6_frags_sysctl_unregister(struct net *net) +static inline void ip6_frags_ns_sysctl_unregister(struct net *net) +{ +} + +static inline int ip6_frags_sysctl_register(void) +{ + return 0; +} + +static inline void ip6_frags_sysctl_unregister(void) { } #endif @@ -728,12 +754,12 @@ static int ipv6_frags_init_net(struct net *net) inet_frags_init_net(&net->ipv6.frags); - return ip6_frags_sysctl_register(net); + return ip6_frags_ns_sysctl_register(net); } static void ipv6_frags_exit_net(struct net *net) { - ip6_frags_sysctl_unregister(net); + ip6_frags_ns_sysctl_unregister(net); inet_frags_exit_net(&net->ipv6.frags, &ip6_frags); } @@ -750,7 +776,13 @@ int __init ipv6_frag_init(void) if (ret) goto out; - register_pernet_subsys(&ip6_frags_ops); + ret = ip6_frags_sysctl_register(); + if (ret) + goto err_sysctl; + + ret = register_pernet_subsys(&ip6_frags_ops); + if (ret) + goto err_pernet; ip6_frags.hashfn = ip6_hashfn; ip6_frags.constructor = ip6_frag_init; @@ -763,11 +795,18 @@ int __init ipv6_frag_init(void) inet_frags_init(&ip6_frags); out: return ret; + +err_pernet: + ip6_frags_sysctl_unregister(); +err_sysctl: + inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT); + goto out; } void ipv6_frag_exit(void) { inet_frags_fini(&ip6_frags); + ip6_frags_sysctl_unregister(); unregister_pernet_subsys(&ip6_frags_ops); inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 12bba088034..48534c6c073 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -109,7 +109,7 @@ static struct dst_ops ip6_dst_ops_template = { .negative_advice = ip6_negative_advice, .link_failure = ip6_link_failure, .update_pmtu = ip6_rt_update_pmtu, - .local_out = ip6_local_out, + .local_out = __ip6_local_out, .entry_size = sizeof(struct rt6_info), .entries = ATOMIC_INIT(0), }; @@ -475,7 +475,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, lifetime = ntohl(rinfo->lifetime); if (lifetime == 0xffffffff) { /* infinity */ - } else if (lifetime > 0x7fffffff/HZ) { + } else if (lifetime > 0x7fffffff/HZ - 1) { /* Avoid arithmetic overflow */ lifetime = 0x7fffffff/HZ - 1; } @@ -1106,7 +1106,9 @@ int ip6_route_add(struct fib6_config *cfg) } rt->u.dst.obsolete = -1; - rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires); + rt->rt6i_expires = (cfg->fc_flags & RTF_EXPIRES) ? + jiffies + clock_t_to_jiffies(cfg->fc_expires) : + 0; if (cfg->fc_protocol == RTPROT_UNSPEC) cfg->fc_protocol = RTPROT_BOOT; @@ -2200,7 +2202,9 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); - expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0; + expires = (rt->rt6i_flags & RTF_EXPIRES) ? + rt->rt6i_expires - jiffies : 0; + if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0, expires, rt->u.dst.error) < 0) goto nla_put_failure; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 5a6fab95569..6b8f0583b63 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -403,9 +403,8 @@ static void ipip6_tunnel_uninit(struct net_device *dev) static int ipip6_err(struct sk_buff *skb, u32 info) { -#ifndef I_WISH_WORLD_WERE_PERFECT -/* It is not :-( All the routers (except for Linux) return only +/* All the routers (except for Linux) return only 8 bytes of packet payload. It means, that precise relaying of ICMP in the real Internet is absolutely infeasible. */ @@ -462,92 +461,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info) out: read_unlock(&ipip6_lock); return err; -#else - struct iphdr *iph = (struct iphdr*)dp; - int hlen = iph->ihl<<2; - struct ipv6hdr *iph6; - const int type = icmp_hdr(skb)->type; - const int code = icmp_hdr(skb)->code; - int rel_type = 0; - int rel_code = 0; - int rel_info = 0; - struct sk_buff *skb2; - struct rt6_info *rt6i; - - if (len < hlen + sizeof(struct ipv6hdr)) - return; - iph6 = (struct ipv6hdr*)(dp + hlen); - - switch (type) { - default: - return; - case ICMP_PARAMETERPROB: - if (icmp_hdr(skb)->un.gateway < hlen) - return; - - /* So... This guy found something strange INSIDE encapsulated - packet. Well, he is fool, but what can we do ? - */ - rel_type = ICMPV6_PARAMPROB; - rel_info = icmp_hdr(skb)->un.gateway - hlen; - break; - - case ICMP_DEST_UNREACH: - switch (code) { - case ICMP_SR_FAILED: - case ICMP_PORT_UNREACH: - /* Impossible event. */ - return; - case ICMP_FRAG_NEEDED: - /* Too complicated case ... */ - return; - default: - /* All others are translated to HOST_UNREACH. - rfc2003 contains "deep thoughts" about NET_UNREACH, - I believe, it is just ether pollution. --ANK - */ - rel_type = ICMPV6_DEST_UNREACH; - rel_code = ICMPV6_ADDR_UNREACH; - break; - } - break; - case ICMP_TIME_EXCEEDED: - if (code != ICMP_EXC_TTL) - return; - rel_type = ICMPV6_TIME_EXCEED; - rel_code = ICMPV6_EXC_HOPLIMIT; - break; - } - - /* Prepare fake skb to feed it to icmpv6_send */ - skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2 == NULL) - return 0; - dst_release(skb2->dst); - skb2->dst = NULL; - skb_pull(skb2, skb->data - (u8*)iph6); - skb_reset_network_header(skb2); - - /* Try to guess incoming interface */ - rt6i = rt6_lookup(dev_net(skb->dev), &iph6->saddr, NULL, NULL, 0); - if (rt6i && rt6i->rt6i_dev) { - skb2->dev = rt6i->rt6i_dev; - - rt6i = rt6_lookup(dev_net(skb->dev), - &iph6->daddr, &iph6->saddr, NULL, 0); - - if (rt6i && rt6i->rt6i_dev && rt6i->rt6i_dev->type == ARPHRD_SIT) { - struct ip_tunnel *t = netdev_priv(rt6i->rt6i_dev); - if (rel_type == ICMPV6_TIME_EXCEED && t->parms.iph.ttl) { - rel_type = ICMPV6_DEST_UNREACH; - rel_code = ICMPV6_ADDR_UNREACH; - } - icmpv6_send(skb2, rel_type, rel_code, rel_info, skb2->dev); - } - } - kfree_skb(skb2); - return 0; -#endif } static inline void ipip6_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb) @@ -578,13 +491,13 @@ static int ipip6_rcv(struct sk_buff *skb) if ((tunnel->dev->priv_flags & IFF_ISATAP) && !isatap_chksrc(skb, iph, tunnel)) { - tunnel->stat.rx_errors++; + tunnel->dev->stats.rx_errors++; read_unlock(&ipip6_lock); kfree_skb(skb); return 0; } - tunnel->stat.rx_packets++; - tunnel->stat.rx_bytes += skb->len; + tunnel->dev->stats.rx_packets++; + tunnel->dev->stats.rx_bytes += skb->len; skb->dev = tunnel->dev; dst_release(skb->dst); skb->dst = NULL; @@ -624,7 +537,7 @@ static inline __be32 try_6to4(struct in6_addr *v6dst) static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - struct net_device_stats *stats = &tunnel->stat; + struct net_device_stats *stats = &tunnel->dev->stats; struct iphdr *tiph = &tunnel->parms.iph; struct ipv6hdr *iph6 = ipv6_hdr(skb); u8 tos = tunnel->parms.iph.tos; @@ -638,7 +551,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) int addr_type; if (tunnel->recursion++) { - tunnel->stat.collisions++; + stats->collisions++; goto tx_error; } @@ -705,20 +618,20 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) .oif = tunnel->parms.link, .proto = IPPROTO_IPV6 }; if (ip_route_output_key(dev_net(dev), &rt, &fl)) { - tunnel->stat.tx_carrier_errors++; + stats->tx_carrier_errors++; goto tx_error_icmp; } } if (rt->rt_type != RTN_UNICAST) { ip_rt_put(rt); - tunnel->stat.tx_carrier_errors++; + stats->tx_carrier_errors++; goto tx_error_icmp; } tdev = rt->u.dst.dev; if (tdev == dev) { ip_rt_put(rt); - tunnel->stat.collisions++; + stats->collisions++; goto tx_error; } @@ -728,7 +641,7 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu; if (mtu < 68) { - tunnel->stat.collisions++; + stats->collisions++; ip_rt_put(rt); goto tx_error; } @@ -1003,11 +916,6 @@ done: return err; } -static struct net_device_stats *ipip6_tunnel_get_stats(struct net_device *dev) -{ - return &(((struct ip_tunnel*)netdev_priv(dev))->stat); -} - static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu) { if (new_mtu < IPV6_MIN_MTU || new_mtu > 0xFFF8 - sizeof(struct iphdr)) @@ -1021,7 +929,6 @@ static void ipip6_tunnel_setup(struct net_device *dev) dev->uninit = ipip6_tunnel_uninit; dev->destructor = free_netdev; dev->hard_start_xmit = ipip6_tunnel_xmit; - dev->get_stats = ipip6_tunnel_get_stats; dev->do_ioctl = ipip6_tunnel_ioctl; dev->change_mtu = ipip6_tunnel_change_mtu; diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 3804dcbbfab..5c99274558b 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -37,6 +37,10 @@ static ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = &proc_dointvec }, + { .ctl_name = 0 } +}; + +static ctl_table ipv6_table[] = { { .ctl_name = NET_IPV6_MLD_MAX_MSF, .procname = "mld_max_msf", @@ -80,12 +84,6 @@ static int ipv6_sysctl_net_init(struct net *net) ipv6_table[2].data = &net->ipv6.sysctl.bindv6only; - /* We don't want this value to be per namespace, it should be global - to all namespaces, so make it read-only when we are not in the - init network namespace */ - if (net != &init_net) - ipv6_table[3].mode = 0444; - net->ipv6.sysctl.table = register_net_sysctl_table(net, net_ipv6_ctl_path, ipv6_table); if (!net->ipv6.sysctl.table) @@ -126,12 +124,29 @@ static struct pernet_operations ipv6_sysctl_net_ops = { .exit = ipv6_sysctl_net_exit, }; +static struct ctl_table_header *ip6_header; + int ipv6_sysctl_register(void) { - return register_pernet_subsys(&ipv6_sysctl_net_ops); + int err = -ENOMEM;; + + ip6_header = register_net_sysctl_rotable(net_ipv6_ctl_path, ipv6_table); + if (ip6_header == NULL) + goto out; + + err = register_pernet_subsys(&ipv6_sysctl_net_ops); + if (err) + goto err_pernet; +out: + return err; + +err_pernet: + unregister_net_sysctl_table(ip6_header); + goto out; } void ipv6_sysctl_unregister(void) { + unregister_net_sysctl_table(ip6_header); unregister_pernet_subsys(&ipv6_sysctl_net_ops); } diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index e0eab5927c4..f6e54fa97f4 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -628,8 +628,8 @@ dev_irnet_poll(struct file * file, * This is the way pppd configure us and control us while the PPP * instance is active. */ -static int -dev_irnet_ioctl(struct inode * inode, +static long +dev_irnet_ioctl( struct file * file, unsigned int cmd, unsigned long arg) @@ -660,6 +660,7 @@ dev_irnet_ioctl(struct inode * inode, { DEBUG(FS_INFO, "Entering PPP discipline.\n"); /* PPP channel setup (ap->chan in configued in dev_irnet_open())*/ + lock_kernel(); err = ppp_register_channel(&ap->chan); if(err == 0) { @@ -672,12 +673,14 @@ dev_irnet_ioctl(struct inode * inode, } else DERROR(FS_ERROR, "Can't setup PPP channel...\n"); + unlock_kernel(); } else { /* In theory, should be N_TTY */ DEBUG(FS_INFO, "Exiting PPP discipline.\n"); /* Disconnect from the generic PPP layer */ + lock_kernel(); if(ap->ppp_open) { ap->ppp_open = 0; @@ -686,24 +689,20 @@ dev_irnet_ioctl(struct inode * inode, else DERROR(FS_ERROR, "Channel not registered !\n"); err = 0; + unlock_kernel(); } break; /* Query PPP channel and unit number */ case PPPIOCGCHAN: - if(!ap->ppp_open) - break; - if(put_user(ppp_channel_index(&ap->chan), (int __user *)argp)) - break; - DEBUG(FS_INFO, "Query channel.\n"); - err = 0; + if(ap->ppp_open && !put_user(ppp_channel_index(&ap->chan), + (int __user *)argp)) + err = 0; break; case PPPIOCGUNIT: - if(!ap->ppp_open) - break; - if(put_user(ppp_unit_number(&ap->chan), (int __user *)argp)) - break; - DEBUG(FS_INFO, "Query unit number.\n"); + lock_kernel(); + if(ap->ppp_open && !put_user(ppp_unit_number(&ap->chan), + (int __user *)argp)) err = 0; break; @@ -723,34 +722,39 @@ dev_irnet_ioctl(struct inode * inode, DEBUG(FS_INFO, "Standard PPP ioctl.\n"); if(!capable(CAP_NET_ADMIN)) err = -EPERM; - else + else { + lock_kernel(); err = ppp_irnet_ioctl(&ap->chan, cmd, arg); + unlock_kernel(); + } break; /* TTY IOCTLs : Pretend that we are a tty, to keep pppd happy */ /* Get termios */ case TCGETS: DEBUG(FS_INFO, "Get termios.\n"); + lock_kernel(); #ifndef TCGETS2 - if(kernel_termios_to_user_termios((struct termios __user *)argp, &ap->termios)) - break; + if(!kernel_termios_to_user_termios((struct termios __user *)argp, &ap->termios)) + err = 0; #else if(kernel_termios_to_user_termios_1((struct termios __user *)argp, &ap->termios)) - break; + err = 0; #endif - err = 0; + unlock_kernel(); break; /* Set termios */ case TCSETSF: DEBUG(FS_INFO, "Set termios.\n"); + lock_kernel(); #ifndef TCGETS2 - if(user_termios_to_kernel_termios(&ap->termios, (struct termios __user *)argp)) - break; + if(!user_termios_to_kernel_termios(&ap->termios, (struct termios __user *)argp)) + err = 0; #else - if(user_termios_to_kernel_termios_1(&ap->termios, (struct termios __user *)argp)) - break; + if(!user_termios_to_kernel_termios_1(&ap->termios, (struct termios __user *)argp)) + err = 0; #endif - err = 0; + unlock_kernel(); break; /* Set DTR/RTS */ @@ -773,7 +777,9 @@ dev_irnet_ioctl(struct inode * inode, * We should also worry that we don't accept junk here and that * we get rid of our own buffers */ #ifdef FLUSH_TO_PPP + lock_kernel(); ppp_output_wakeup(&ap->chan); + unlock_kernel(); #endif /* FLUSH_TO_PPP */ err = 0; break; @@ -788,7 +794,7 @@ dev_irnet_ioctl(struct inode * inode, default: DERROR(FS_ERROR, "Unsupported ioctl (0x%X)\n", cmd); - err = -ENOIOCTLCMD; + err = -ENOTTY; } DEXIT(FS_TRACE, " - err = 0x%X\n", err); diff --git a/net/irda/irnet/irnet_ppp.h b/net/irda/irnet/irnet_ppp.h index d2beb7df8f7..d9f8bd4ebd0 100644 --- a/net/irda/irnet/irnet_ppp.h +++ b/net/irda/irnet/irnet_ppp.h @@ -76,9 +76,8 @@ static ssize_t static unsigned int dev_irnet_poll(struct file *, poll_table *); -static int - dev_irnet_ioctl(struct inode *, - struct file *, +static long + dev_irnet_ioctl(struct file *, unsigned int, unsigned long); /* ------------------------ PPP INTERFACE ------------------------ */ @@ -102,7 +101,7 @@ static struct file_operations irnet_device_fops = .read = dev_irnet_read, .write = dev_irnet_write, .poll = dev_irnet_poll, - .ioctl = dev_irnet_ioctl, + .unlocked_ioctl = dev_irnet_ioctl, .open = dev_irnet_open, .release = dev_irnet_close /* Also : llseek, readdir, mmap, flush, fsync, fasync, lock, readv, writev */ diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 7b0038f45b1..58e4aee3e69 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -644,6 +644,7 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, } txmsg.class = 0; + memcpy(&txmsg.class, skb->data, skb->len >= 4 ? 4 : skb->len); txmsg.tag = iucv->send_tag++; memcpy(skb->cb, &txmsg.tag, 4); skb_queue_tail(&iucv->send_skb_q, skb); diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 91897076213..531a206ce7a 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -474,14 +474,14 @@ static void iucv_setmask_mp(void) { int cpu; - preempt_disable(); + get_online_cpus(); for_each_online_cpu(cpu) /* Enable all cpus with a declared buffer. */ if (cpu_isset(cpu, iucv_buffer_cpumask) && !cpu_isset(cpu, iucv_irq_cpumask)) smp_call_function_single(cpu, iucv_allow_cpu, NULL, 0, 1); - preempt_enable(); + put_online_cpus(); } /** @@ -521,16 +521,17 @@ static int iucv_enable(void) goto out; /* Declare per cpu buffers. */ rc = -EIO; - preempt_disable(); + get_online_cpus(); for_each_online_cpu(cpu) smp_call_function_single(cpu, iucv_declare_cpu, NULL, 0, 1); - preempt_enable(); if (cpus_empty(iucv_buffer_cpumask)) /* No cpu could declare an iucv buffer. */ goto out_path; + put_online_cpus(); return 0; out_path: + put_online_cpus(); kfree(iucv_path_table); out: return rc; @@ -545,7 +546,9 @@ out: */ static void iucv_disable(void) { + get_online_cpus(); on_each_cpu(iucv_retrieve_cpu, NULL, 0, 1); + put_online_cpus(); kfree(iucv_path_table); } @@ -598,7 +601,7 @@ static int __cpuinit iucv_cpu_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata iucv_cpu_notifier = { +static struct notifier_block __refdata iucv_cpu_notifier = { .notifier_call = iucv_cpu_notify, }; diff --git a/net/key/af_key.c b/net/key/af_key.c index 9e7236ff6bc..9bba7ac5fee 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1251,7 +1251,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr, x->sel.prefixlen_s = addr->sadb_address_prefixlen; } - if (x->props.mode == XFRM_MODE_TRANSPORT) + if (!x->sel.family) x->sel.family = x->props.family; if (ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]) { diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index d30c11337b0..adbc1c804dd 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -721,7 +721,17 @@ static void ieee80211_send_assoc(struct net_device *dev, capab |= WLAN_CAPABILITY_PRIVACY; if (bss->wmm_ie) wmm = 1; + + /* get all rates supported by the device and the AP as + * some APs don't like getting a superset of their rates + * in the association request (e.g. D-Link DAP 1353 in + * b-only mode) */ + rates_len = ieee80211_compatible_rates(bss, sband, &rates); + ieee80211_rx_bss_put(dev, bss); + } else { + rates = ~0; + rates_len = sband->n_bitrates; } mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); @@ -752,10 +762,7 @@ static void ieee80211_send_assoc(struct net_device *dev, *pos++ = ifsta->ssid_len; memcpy(pos, ifsta->ssid, ifsta->ssid_len); - /* all supported rates should be added here but some APs - * (e.g. D-Link DAP 1353 in b-only mode) don't like that - * Therefore only add rates the AP supports */ - rates_len = ieee80211_compatible_rates(bss, sband, &rates); + /* add all rates which were marked to be used above */ supp_rates_len = rates_len; if (supp_rates_len > 8) supp_rates_len = 8; @@ -3476,21 +3483,17 @@ static int ieee80211_sta_config_auth(struct net_device *dev, struct ieee80211_sta_bss *bss, *selected = NULL; int top_rssi = 0, freq; - if (!(ifsta->flags & (IEEE80211_STA_AUTO_SSID_SEL | - IEEE80211_STA_AUTO_BSSID_SEL | IEEE80211_STA_AUTO_CHANNEL_SEL))) { - ifsta->state = IEEE80211_AUTHENTICATE; - ieee80211_sta_reset_auth(dev, ifsta); - return 0; - } - spin_lock_bh(&local->sta_bss_lock); freq = local->oper_channel->center_freq; list_for_each_entry(bss, &local->sta_bss_list, list) { if (!(bss->capability & WLAN_CAPABILITY_ESS)) continue; - if (!!(bss->capability & WLAN_CAPABILITY_PRIVACY) ^ - !!sdata->default_key) + if ((ifsta->flags & (IEEE80211_STA_AUTO_SSID_SEL | + IEEE80211_STA_AUTO_BSSID_SEL | + IEEE80211_STA_AUTO_CHANNEL_SEL)) && + (!!(bss->capability & WLAN_CAPABILITY_PRIVACY) ^ + !!sdata->default_key)) continue; if (!(ifsta->flags & IEEE80211_STA_AUTO_CHANNEL_SEL) && diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 4f7180b287d..5a77e2c01f4 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -384,6 +384,41 @@ void ieee80211_iterate_active_interfaces( struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_sub_if_data *sdata; + rtnl_lock(); + + list_for_each_entry(sdata, &local->interfaces, list) { + switch (sdata->vif.type) { + case IEEE80211_IF_TYPE_INVALID: + case IEEE80211_IF_TYPE_MNTR: + case IEEE80211_IF_TYPE_VLAN: + continue; + case IEEE80211_IF_TYPE_AP: + case IEEE80211_IF_TYPE_STA: + case IEEE80211_IF_TYPE_IBSS: + case IEEE80211_IF_TYPE_WDS: + case IEEE80211_IF_TYPE_MESH_POINT: + break; + } + if (sdata->dev == local->mdev) + continue; + if (netif_running(sdata->dev)) + iterator(data, sdata->dev->dev_addr, + &sdata->vif); + } + + rtnl_unlock(); +} +EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces); + +void ieee80211_iterate_active_interfaces_atomic( + struct ieee80211_hw *hw, + void (*iterator)(void *data, u8 *mac, + struct ieee80211_vif *vif), + void *data) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_sub_if_data *sdata; + rcu_read_lock(); list_for_each_entry_rcu(sdata, &local->interfaces, list) { @@ -408,4 +443,4 @@ void ieee80211_iterate_active_interfaces( rcu_read_unlock(); } -EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces); +EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces_atomic); diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 6a342a9a40c..c2e2378af08 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -221,7 +221,6 @@ static int ieee80211_ioctl_giwrange(struct net_device *dev, range->num_frequency = c; IW_EVENT_CAPA_SET_KERNEL(range->event_capa); - IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWTHRSPY); IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWAP); IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWSCAN); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index c4b1799da5d..e6d645221d5 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -848,6 +848,25 @@ acct: } EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); +void __nf_ct_kill_acct(struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + const struct sk_buff *skb, + int do_acct) +{ +#ifdef CONFIG_NF_CT_ACCT + if (do_acct) { + spin_lock_bh(&nf_conntrack_lock); + ct->counters[CTINFO2DIR(ctinfo)].packets++; + ct->counters[CTINFO2DIR(ctinfo)].bytes += + skb->len - skb_network_offset(skb); + spin_unlock_bh(&nf_conntrack_lock); + } +#endif + if (del_timer(&ct->timeout)) + ct->timeout.function((unsigned long)ct); +} +EXPORT_SYMBOL_GPL(__nf_ct_kill_acct); + #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) #include <linux/netfilter/nfnetlink.h> diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index bcc19fa4ed1..ba1c4915e9e 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -88,13 +88,11 @@ void *__nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) newlen = newoff + t->len; rcu_read_unlock(); - if (newlen >= ksize(ct->ext)) { - new = kmalloc(newlen, gfp); - if (!new) - return NULL; - - memcpy(new, ct->ext, ct->ext->len); + new = krealloc(ct->ext, newlen, gfp); + if (!new) + return NULL; + if (new != ct->ext) { for (i = 0; i < NF_CT_EXT_NUM; i++) { if (!nf_ct_ext_exist(ct, i)) continue; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 0edefcfc594..63c4e1f299b 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -4,7 +4,7 @@ * (C) 2001 by Jay Schulist <jschlst@samba.org> * (C) 2002-2006 by Harald Welte <laforge@gnumonks.org> * (C) 2003 by Patrick Mchardy <kaber@trash.net> - * (C) 2005-2007 by Pablo Neira Ayuso <pablo@netfilter.org> + * (C) 2005-2008 by Pablo Neira Ayuso <pablo@netfilter.org> * * Initial connection tracking via netlink development funded and * generally made possible by Network Robots, Inc. (www.networkrobots.com) @@ -475,14 +475,14 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, if (ctnetlink_dump_id(skb, ct) < 0) goto nla_put_failure; + if (ctnetlink_dump_status(skb, ct) < 0) + goto nla_put_failure; + if (events & IPCT_DESTROY) { if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) goto nla_put_failure; } else { - if (ctnetlink_dump_status(skb, ct) < 0) - goto nla_put_failure; - if (ctnetlink_dump_timeout(skb, ct) < 0) goto nla_put_failure; @@ -812,9 +812,8 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, return -ENOENT; } } - if (del_timer(&ct->timeout)) - ct->timeout.function((unsigned long)ct); + nf_ct_kill(ct); nf_ct_put(ct); return 0; @@ -891,20 +890,19 @@ ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[]) if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING)) /* unchangeable */ - return -EINVAL; + return -EBUSY; if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY)) /* SEEN_REPLY bit can only be set */ - return -EINVAL; - + return -EBUSY; if (d & IPS_ASSURED && !(status & IPS_ASSURED)) /* ASSURED bit can only be set */ - return -EINVAL; + return -EBUSY; if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) { #ifndef CONFIG_NF_NAT_NEEDED - return -EINVAL; + return -EOPNOTSUPP; #else struct nf_nat_range range; @@ -945,7 +943,7 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[]) /* don't change helper of sibling connections */ if (ct->master) - return -EINVAL; + return -EBUSY; err = ctnetlink_parse_help(cda[CTA_HELP], &helpname); if (err < 0) @@ -963,7 +961,7 @@ ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[]) helper = __nf_conntrack_helper_find_byname(helpname); if (helper == NULL) - return -EINVAL; + return -EOPNOTSUPP; if (help) { if (help->helper == helper) @@ -1258,12 +1256,12 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, if (!(nlh->nlmsg_flags & NLM_F_EXCL)) { /* we only allow nat config for new conntracks */ if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) { - err = -EINVAL; + err = -EOPNOTSUPP; goto out_unlock; } /* can't link an existing conntrack to a master */ if (cda[CTA_TUPLE_MASTER]) { - err = -EINVAL; + err = -EOPNOTSUPP; goto out_unlock; } err = ctnetlink_change_conntrack(nf_ct_tuplehash_to_ctrack(h), @@ -1608,7 +1606,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, h = __nf_conntrack_helper_find_byname(name); if (!h) { spin_unlock_bh(&nf_conntrack_lock); - return -EINVAL; + return -EOPNOTSUPP; } for (i = 0; i < nf_ct_expect_hsize; i++) { hlist_for_each_entry_safe(exp, n, next, diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index afb4a1861d2..e7866dd3cde 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -475,8 +475,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, if (type == DCCP_PKT_RESET && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { /* Tear down connection immediately if only reply is a RESET */ - if (del_timer(&ct->timeout)) - ct->timeout.function((unsigned long)ct); + nf_ct_kill_acct(ct, ctinfo, skb); return NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index cbf2e27a22b..41183a4d2d6 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -463,6 +463,82 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb, return true; } +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nfnetlink_conntrack.h> + +static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, + const struct nf_conn *ct) +{ + struct nlattr *nest_parms; + + read_lock_bh(&sctp_lock); + nest_parms = nla_nest_start(skb, CTA_PROTOINFO_SCTP | NLA_F_NESTED); + if (!nest_parms) + goto nla_put_failure; + + NLA_PUT_U8(skb, CTA_PROTOINFO_SCTP_STATE, ct->proto.sctp.state); + + NLA_PUT_BE32(skb, + CTA_PROTOINFO_SCTP_VTAG_ORIGINAL, + htonl(ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL])); + + NLA_PUT_BE32(skb, + CTA_PROTOINFO_SCTP_VTAG_REPLY, + htonl(ct->proto.sctp.vtag[IP_CT_DIR_REPLY])); + + read_unlock_bh(&sctp_lock); + + nla_nest_end(skb, nest_parms); + + return 0; + +nla_put_failure: + read_unlock_bh(&sctp_lock); + return -1; +} + +static const struct nla_policy sctp_nla_policy[CTA_PROTOINFO_SCTP_MAX+1] = { + [CTA_PROTOINFO_SCTP_STATE] = { .type = NLA_U8 }, + [CTA_PROTOINFO_SCTP_VTAG_ORIGINAL] = { .type = NLA_U32 }, + [CTA_PROTOINFO_SCTP_VTAG_REPLY] = { .type = NLA_U32 }, +}; + +static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct) +{ + struct nlattr *attr = cda[CTA_PROTOINFO_SCTP]; + struct nlattr *tb[CTA_PROTOINFO_SCTP_MAX+1]; + int err; + + /* updates may not contain the internal protocol info, skip parsing */ + if (!attr) + return 0; + + err = nla_parse_nested(tb, + CTA_PROTOINFO_SCTP_MAX, + attr, + sctp_nla_policy); + if (err < 0) + return err; + + if (!tb[CTA_PROTOINFO_SCTP_STATE] || + !tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL] || + !tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]) + return -EINVAL; + + write_lock_bh(&sctp_lock); + ct->proto.sctp.state = nla_get_u8(tb[CTA_PROTOINFO_SCTP_STATE]); + ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = + ntohl(nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL])); + ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = + ntohl(nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_REPLY])); + write_unlock_bh(&sctp_lock); + + return 0; +} +#endif + #ifdef CONFIG_SYSCTL static unsigned int sctp_sysctl_table_users; static struct ctl_table_header *sctp_sysctl_header; @@ -591,6 +667,8 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { .new = sctp_new, .me = THIS_MODULE, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + .to_nlattr = sctp_to_nlattr, + .from_nlattr = nlattr_to_sctp, .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, @@ -617,6 +695,8 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { .new = sctp_new, .me = THIS_MODULE, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + .to_nlattr = sctp_to_nlattr, + .from_nlattr = nlattr_to_sctp, .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index ba94004fe32..8db13fba10b 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -843,8 +843,7 @@ static int tcp_packet(struct nf_conn *ct, /* Attempt to reopen a closed/aborted connection. * Delete this connection and look up again. */ write_unlock_bh(&tcp_lock); - if (del_timer(&ct->timeout)) - ct->timeout.function((unsigned long)ct); + nf_ct_kill(ct); return -NF_REPEAT; } /* Fall through */ @@ -877,8 +876,7 @@ static int tcp_packet(struct nf_conn *ct, if (LOG_INVALID(IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: killing out of sync session "); - if (del_timer(&ct->timeout)) - ct->timeout.function((unsigned long)ct); + nf_ct_kill(ct); return -NF_DROP; } ct->proto.tcp.last_index = index; @@ -961,8 +959,7 @@ static int tcp_packet(struct nf_conn *ct, problem case, so we can delete the conntrack immediately. --RR */ if (th->rst) { - if (del_timer(&ct->timeout)) - ct->timeout.function((unsigned long)ct); + nf_ct_kill_acct(ct, ctinfo, skb); return NF_ACCEPT; } } else if (!test_bit(IPS_ASSURED_BIT, &ct->status) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 3447025ce06..04e9c965f8c 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -243,7 +243,6 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, switch ((enum nfqnl_config_mode)queue->copy_mode) { case NFQNL_COPY_META: case NFQNL_COPY_NONE: - data_len = 0; break; case NFQNL_COPY_PACKET: diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index 211189eb2b6..76ca1f2421e 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -8,7 +8,7 @@ * Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com> * by Henrik Nordstrom <hno@marasystems.com> * - * (C) 2006 Red Hat, Inc., James Morris <jmorris@redhat.com> + * (C) 2006,2008 Red Hat, Inc., James Morris <jmorris@redhat.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -94,6 +94,12 @@ connsecmark_tg_check(const char *tablename, const void *entry, { const struct xt_connsecmark_target_info *info = targinfo; + if (strcmp(tablename, "mangle") && strcmp(tablename, "security")) { + printk(KERN_INFO PFX "target only valid in the \'mangle\' " + "or \'security\' tables, not \'%s\'.\n", tablename); + return false; + } + switch (info->mode) { case CONNSECMARK_SAVE: case CONNSECMARK_RESTORE: @@ -126,7 +132,6 @@ static struct xt_target connsecmark_tg_reg[] __read_mostly = { .destroy = connsecmark_tg_destroy, .target = connsecmark_tg, .targetsize = sizeof(struct xt_connsecmark_target_info), - .table = "mangle", .me = THIS_MODULE, }, { @@ -136,7 +141,6 @@ static struct xt_target connsecmark_tg_reg[] __read_mostly = { .destroy = connsecmark_tg_destroy, .target = connsecmark_tg, .targetsize = sizeof(struct xt_connsecmark_target_info), - .table = "mangle", .me = THIS_MODULE, }, }; diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index c0284856ccd..94f87ee7552 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -5,7 +5,7 @@ * Based on the nfmark match by: * (C) 1999-2001 Marc Boucher <marc@mbsi.ca> * - * (C) 2006 Red Hat, Inc., James Morris <jmorris@redhat.com> + * (C) 2006,2008 Red Hat, Inc., James Morris <jmorris@redhat.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -89,6 +89,12 @@ secmark_tg_check(const char *tablename, const void *entry, { struct xt_secmark_target_info *info = targinfo; + if (strcmp(tablename, "mangle") && strcmp(tablename, "security")) { + printk(KERN_INFO PFX "target only valid in the \'mangle\' " + "or \'security\' tables, not \'%s\'.\n", tablename); + return false; + } + if (mode && mode != info->mode) { printk(KERN_INFO PFX "mode already set to %hu cannot mix with " "rules for mode %hu\n", mode, info->mode); @@ -127,7 +133,6 @@ static struct xt_target secmark_tg_reg[] __read_mostly = { .destroy = secmark_tg_destroy, .target = secmark_tg, .targetsize = sizeof(struct xt_secmark_target_info), - .table = "mangle", .me = THIS_MODULE, }, { @@ -137,7 +142,6 @@ static struct xt_target secmark_tg_reg[] __read_mostly = { .destroy = secmark_tg_destroy, .target = secmark_tg, .targetsize = sizeof(struct xt_secmark_target_info), - .table = "mangle", .me = THIS_MODULE, }, }; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 9b97f8006c9..6507c02dbe0 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -759,7 +759,7 @@ struct sock *netlink_getsockbyfilp(struct file *filp) * 0: continue * 1: repeat lookup - reference dropped while waiting for socket memory. */ -int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, +int netlink_attachskb(struct sock *sk, struct sk_buff *skb, long *timeo, struct sock *ssk) { struct netlink_sock *nlk; @@ -892,7 +892,7 @@ retry: return err; } - err = netlink_attachskb(sk, skb, nonblock, &timeo, ssk); + err = netlink_attachskb(sk, skb, &timeo, ssk); if (err == 1) goto retry; if (err) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 1086df7478b..9360fc81e8c 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -220,7 +220,7 @@ replay: tp = kzalloc(sizeof(*tp), GFP_KERNEL); if (tp == NULL) goto errout; - err = -EINVAL; + err = -ENOENT; tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND]); if (tp_ops == NULL) { #ifdef CONFIG_KMOD diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 81600eea05d..253e5ea7e1e 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2471,7 +2471,7 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk, (trans->param_flags & ~SPP_SACKDELAY) | SPP_SACKDELAY_ENABLE; } - if (params.sack_delay == 1) { + if (params.sack_freq == 1) { trans->param_flags = (trans->param_flags & ~SPP_SACKDELAY) | SPP_SACKDELAY_DISABLE; @@ -3536,7 +3536,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sp->pathmaxrxt = sctp_max_retrans_path; sp->pathmtu = 0; // allow default discovery sp->sackdelay = sctp_sack_timeout; - sp->sackfreq = 3; + sp->sackfreq = 2; sp->param_flags = SPP_HB_ENABLE | SPP_PMTUD_ENABLE | SPP_SACKDELAY_ENABLE; diff --git a/net/sysctl_net.c b/net/sysctl_net.c index b4f0525f91a..d8e79162724 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -40,6 +40,27 @@ static struct ctl_table_root net_sysctl_root = { .lookup = net_ctl_header_lookup, }; +static LIST_HEAD(net_sysctl_ro_tables); +static struct list_head *net_ctl_ro_header_lookup(struct ctl_table_root *root, + struct nsproxy *namespaces) +{ + return &net_sysctl_ro_tables; +} + +static int net_ctl_ro_header_perms(struct ctl_table_root *root, + struct nsproxy *namespaces, struct ctl_table *table) +{ + if (namespaces->net_ns == &init_net) + return table->mode; + else + return table->mode & ~0222; +} + +static struct ctl_table_root net_sysctl_ro_root = { + .lookup = net_ctl_ro_header_lookup, + .permissions = net_ctl_ro_header_perms, +}; + static int sysctl_net_init(struct net *net) { INIT_LIST_HEAD(&net->sysctl_table_headers); @@ -64,6 +85,7 @@ static __init int sysctl_init(void) if (ret) goto out; register_sysctl_root(&net_sysctl_root); + register_sysctl_root(&net_sysctl_ro_root); out: return ret; } @@ -80,6 +102,14 @@ struct ctl_table_header *register_net_sysctl_table(struct net *net, } EXPORT_SYMBOL_GPL(register_net_sysctl_table); +struct ctl_table_header *register_net_sysctl_rotable(const + struct ctl_path *path, struct ctl_table *table) +{ + return __register_sysctl_paths(&net_sysctl_ro_root, + &init_nsproxy, path, table); +} +EXPORT_SYMBOL_GPL(register_net_sysctl_rotable); + void unregister_net_sysctl_table(struct ctl_table_header *header) { unregister_sysctl_table(header); diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index e7880172ef1..a5883b1452f 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -276,7 +276,7 @@ static void bclink_send_nack(struct node *n_ptr) if (buf) { msg = buf_msg(buf); msg_init(msg, BCAST_PROTOCOL, STATE_MSG, - TIPC_OK, INT_H_SIZE, n_ptr->addr); + INT_H_SIZE, n_ptr->addr); msg_set_mc_netid(msg, tipc_net_id); msg_set_bcast_ack(msg, mod(n_ptr->bclink.last_in)); msg_set_bcgap_after(msg, n_ptr->bclink.gap_after); @@ -571,7 +571,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf, assert(tipc_cltr_bcast_nodes.count != 0); bcbuf_set_acks(buf, tipc_cltr_bcast_nodes.count); msg = buf_msg(buf); - msg_set_non_seq(msg); + msg_set_non_seq(msg, 1); msg_set_mc_netid(msg, tipc_net_id); } diff --git a/net/tipc/cluster.c b/net/tipc/cluster.c index 4bb3404f610..bc1db474fe0 100644 --- a/net/tipc/cluster.c +++ b/net/tipc/cluster.c @@ -238,7 +238,7 @@ static struct sk_buff *tipc_cltr_prepare_routing_msg(u32 data_size, u32 dest) if (buf) { msg = buf_msg(buf); memset((char *)msg, 0, size); - msg_init(msg, ROUTE_DISTRIBUTOR, 0, TIPC_OK, INT_H_SIZE, dest); + msg_init(msg, ROUTE_DISTRIBUTOR, 0, INT_H_SIZE, dest); } return buf; } diff --git a/net/tipc/config.c b/net/tipc/config.c index 91d56f8fee9..ca3544d030c 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -2,7 +2,7 @@ * net/tipc/config.c: TIPC configuration management code * * Copyright (c) 2002-2006, Ericsson AB - * Copyright (c) 2004-2006, Wind River Systems + * Copyright (c) 2004-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -293,7 +293,6 @@ static struct sk_buff *cfg_set_own_addr(void) if (tipc_mode == TIPC_NET_MODE) return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (cannot change node address once assigned)"); - tipc_own_addr = addr; /* * Must release all spinlocks before calling start_net() because @@ -306,7 +305,7 @@ static struct sk_buff *cfg_set_own_addr(void) */ spin_unlock_bh(&config_lock); - tipc_core_start_net(); + tipc_core_start_net(addr); spin_lock_bh(&config_lock); return tipc_cfg_reply_none(); } @@ -602,6 +601,10 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area case TIPC_CMD_GET_NETID: rep_tlv_buf = tipc_cfg_reply_unsigned(tipc_net_id); break; + case TIPC_CMD_NOT_NET_ADMIN: + rep_tlv_buf = + tipc_cfg_reply_error_string(TIPC_CFG_NOT_NET_ADMIN); + break; default: rep_tlv_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED " (unknown command)"); diff --git a/net/tipc/core.c b/net/tipc/core.c index 3d97386af09..3256bd7d398 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -117,11 +117,11 @@ void tipc_core_stop_net(void) * start_net - start TIPC networking sub-systems */ -int tipc_core_start_net(void) +int tipc_core_start_net(unsigned long addr) { int res; - if ((res = tipc_net_start()) || + if ((res = tipc_net_start(addr)) || (res = tipc_eth_media_start())) { tipc_core_stop_net(); } diff --git a/net/tipc/core.h b/net/tipc/core.h index bd78d1705c0..a881f92a853 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -202,7 +202,7 @@ extern atomic_t tipc_user_count; extern int tipc_core_start(void); extern void tipc_core_stop(void); -extern int tipc_core_start_net(void); +extern int tipc_core_start_net(unsigned long addr); extern void tipc_core_stop_net(void); extern int tipc_handler_start(void); extern void tipc_handler_stop(void); diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 5d643e5721e..1657f0e795f 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -120,9 +120,8 @@ static struct sk_buff *tipc_disc_init_msg(u32 type, if (buf) { msg = buf_msg(buf); - msg_init(msg, LINK_CONFIG, type, TIPC_OK, DSC_H_SIZE, - dest_domain); - msg_set_non_seq(msg); + msg_init(msg, LINK_CONFIG, type, DSC_H_SIZE, dest_domain); + msg_set_non_seq(msg, 1); msg_set_req_links(msg, req_links); msg_set_dest_domain(msg, dest_domain); msg_set_bc_netid(msg, tipc_net_id); @@ -156,11 +155,11 @@ static void disc_dupl_alert(struct bearer *b_ptr, u32 node_addr, /** * tipc_disc_recv_msg - handle incoming link setup message (request or response) * @buf: buffer containing message + * @b_ptr: bearer that message arrived on */ -void tipc_disc_recv_msg(struct sk_buff *buf) +void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr) { - struct bearer *b_ptr = (struct bearer *)TIPC_SKB_CB(buf)->handle; struct link *link; struct tipc_media_addr media_addr; struct tipc_msg *msg = buf_msg(buf); @@ -200,9 +199,8 @@ void tipc_disc_recv_msg(struct sk_buff *buf) dbg(" in own cluster\n"); if (n_ptr == NULL) { n_ptr = tipc_node_create(orig); - } - if (n_ptr == NULL) { - return; + if (!n_ptr) + return; } spin_lock_bh(&n_ptr->lock); link = n_ptr->links[b_ptr->identity]; diff --git a/net/tipc/discover.h b/net/tipc/discover.h index 9fd7587b143..c36eaeb7d5d 100644 --- a/net/tipc/discover.h +++ b/net/tipc/discover.h @@ -48,7 +48,7 @@ struct link_req *tipc_disc_init_link_req(struct bearer *b_ptr, void tipc_disc_update_link_req(struct link_req *req); void tipc_disc_stop_link_req(struct link_req *req); -void tipc_disc_recv_msg(struct sk_buff *buf); +void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr); void tipc_disc_link_event(u32 addr, char *name, int up); #if 0 diff --git a/net/tipc/link.c b/net/tipc/link.c index bd206ebe4ee..9784a8e963b 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -51,6 +51,12 @@ /* + * Out-of-range value for link session numbers + */ + +#define INVALID_SESSION 0x10000 + +/* * Limit for deferred reception queue: */ @@ -462,9 +468,9 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer, l_ptr->pmsg = (struct tipc_msg *)&l_ptr->proto_msg; msg = l_ptr->pmsg; - msg_init(msg, LINK_PROTOCOL, RESET_MSG, TIPC_OK, INT_H_SIZE, l_ptr->addr); + msg_init(msg, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, l_ptr->addr); msg_set_size(msg, sizeof(l_ptr->proto_msg)); - msg_set_session(msg, tipc_random); + msg_set_session(msg, (tipc_random & 0xffff)); msg_set_bearer_id(msg, b_ptr->identity); strcpy((char *)msg_data(msg), if_name); @@ -705,10 +711,10 @@ void tipc_link_reset(struct link *l_ptr) u32 checkpoint = l_ptr->next_in_no; int was_active_link = tipc_link_is_active(l_ptr); - msg_set_session(l_ptr->pmsg, msg_session(l_ptr->pmsg) + 1); + msg_set_session(l_ptr->pmsg, ((msg_session(l_ptr->pmsg) + 1) & 0xffff)); - /* Link is down, accept any session: */ - l_ptr->peer_session = 0; + /* Link is down, accept any session */ + l_ptr->peer_session = INVALID_SESSION; /* Prepare for max packet size negotiation */ link_init_max_pkt(l_ptr); @@ -1122,7 +1128,7 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf) if (bundler) { msg_init(&bundler_hdr, MSG_BUNDLER, OPEN_MSG, - TIPC_OK, INT_H_SIZE, l_ptr->addr); + INT_H_SIZE, l_ptr->addr); skb_copy_to_linear_data(bundler, &bundler_hdr, INT_H_SIZE); skb_trim(bundler, INT_H_SIZE); @@ -1386,7 +1392,7 @@ again: msg_dbg(hdr, ">FRAGMENTING>"); msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT, - TIPC_OK, INT_H_SIZE, msg_destnode(hdr)); + INT_H_SIZE, msg_destnode(hdr)); msg_set_link_selector(&fragm_hdr, sender->publ.ref); msg_set_size(&fragm_hdr, max_pkt); msg_set_fragm_no(&fragm_hdr, 1); @@ -1760,21 +1766,6 @@ void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf, l_ptr->retransm_queue_head = l_ptr->retransm_queue_size = 0; } -/* - * link_recv_non_seq: Receive packets which are outside - * the link sequence flow - */ - -static void link_recv_non_seq(struct sk_buff *buf) -{ - struct tipc_msg *msg = buf_msg(buf); - - if (msg_user(msg) == LINK_CONFIG) - tipc_disc_recv_msg(buf); - else - tipc_bclink_recv_pkt(buf); -} - /** * link_insert_deferred_queue - insert deferred messages back into receive chain */ @@ -1851,7 +1842,7 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr) { read_lock_bh(&tipc_net_lock); while (head) { - struct bearer *b_ptr; + struct bearer *b_ptr = (struct bearer *)tb_ptr; struct node *n_ptr; struct link *l_ptr; struct sk_buff *crs; @@ -1862,9 +1853,6 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr) u32 released = 0; int type; - b_ptr = (struct bearer *)tb_ptr; - TIPC_SKB_CB(buf)->handle = b_ptr; - head = head->next; /* Ensure message is well-formed */ @@ -1883,7 +1871,10 @@ void tipc_recv_msg(struct sk_buff *head, struct tipc_bearer *tb_ptr) msg = buf_msg(buf); if (unlikely(msg_non_seq(msg))) { - link_recv_non_seq(buf); + if (msg_user(msg) == LINK_CONFIG) + tipc_disc_recv_msg(buf, b_ptr); + else + tipc_bclink_recv_pkt(buf); continue; } @@ -1990,8 +1981,6 @@ deliver: if (link_recv_changeover_msg(&l_ptr, &buf)) { msg = buf_msg(buf); seq_no = msg_seqno(msg); - TIPC_SKB_CB(buf)->handle - = b_ptr; if (type == ORIGINAL_MSG) goto deliver; goto protocol_check; @@ -2275,7 +2264,8 @@ static void link_recv_proto_msg(struct link *l_ptr, struct sk_buff *buf) switch (msg_type(msg)) { case RESET_MSG: - if (!link_working_unknown(l_ptr) && l_ptr->peer_session) { + if (!link_working_unknown(l_ptr) && + (l_ptr->peer_session != INVALID_SESSION)) { if (msg_session(msg) == l_ptr->peer_session) { dbg("Duplicate RESET: %u<->%u\n", msg_session(msg), l_ptr->peer_session); @@ -2436,7 +2426,7 @@ void tipc_link_changeover(struct link *l_ptr) } msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL, - ORIGINAL_MSG, TIPC_OK, INT_H_SIZE, l_ptr->addr); + ORIGINAL_MSG, INT_H_SIZE, l_ptr->addr); msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id); msg_set_msgcnt(&tunnel_hdr, msgcount); dbg("Link changeover requires %u tunnel messages\n", msgcount); @@ -2491,7 +2481,7 @@ void tipc_link_send_duplicate(struct link *l_ptr, struct link *tunnel) struct tipc_msg tunnel_hdr; msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL, - DUPLICATE_MSG, TIPC_OK, INT_H_SIZE, l_ptr->addr); + DUPLICATE_MSG, INT_H_SIZE, l_ptr->addr); msg_set_msgcnt(&tunnel_hdr, l_ptr->out_queue_size); msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id); iter = l_ptr->first_out; @@ -2684,10 +2674,12 @@ int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf) u32 pack_sz = link_max_pkt(l_ptr); u32 fragm_sz = pack_sz - INT_H_SIZE; u32 fragm_no = 1; - u32 destaddr = msg_destnode(inmsg); + u32 destaddr; if (msg_short(inmsg)) destaddr = l_ptr->addr; + else + destaddr = msg_destnode(inmsg); if (msg_routed(inmsg)) msg_set_prevnode(inmsg, tipc_own_addr); @@ -2695,7 +2687,7 @@ int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf) /* Prepare reusable fragment header: */ msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT, - TIPC_OK, INT_H_SIZE, destaddr); + INT_H_SIZE, destaddr); msg_set_link_selector(&fragm_hdr, msg_link_selector(inmsg)); msg_set_long_msgno(&fragm_hdr, mod(l_ptr->long_msg_seq_no++)); msg_set_fragm_no(&fragm_hdr, fragm_no); diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 38abebaae88..73dcd00d674 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -230,13 +230,10 @@ void tipc_msg_dbg(struct print_buf *buf, struct tipc_msg *msg, const char *str) switch (usr) { case CONN_MANAGER: - case NAME_DISTRIBUTOR: case TIPC_LOW_IMPORTANCE: case TIPC_MEDIUM_IMPORTANCE: case TIPC_HIGH_IMPORTANCE: case TIPC_CRITICAL_IMPORTANCE: - if (msg_short(msg)) - break; /* No error */ switch (msg_errcode(msg)) { case TIPC_OK: break; diff --git a/net/tipc/msg.h b/net/tipc/msg.h index ad487e8abcc..7ee6ae23814 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -2,7 +2,7 @@ * net/tipc/msg.h: Include file for TIPC message header routines * * Copyright (c) 2000-2007, Ericsson AB - * Copyright (c) 2005-2007, Wind River Systems + * Copyright (c) 2005-2008, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -75,6 +75,14 @@ static inline void msg_set_bits(struct tipc_msg *m, u32 w, m->hdr[w] |= htonl(val); } +static inline void msg_swap_words(struct tipc_msg *msg, u32 a, u32 b) +{ + u32 temp = msg->hdr[a]; + + msg->hdr[a] = msg->hdr[b]; + msg->hdr[b] = temp; +} + /* * Word 0 */ @@ -119,9 +127,9 @@ static inline int msg_non_seq(struct tipc_msg *m) return msg_bits(m, 0, 20, 1); } -static inline void msg_set_non_seq(struct tipc_msg *m) +static inline void msg_set_non_seq(struct tipc_msg *m, u32 n) { - msg_set_bits(m, 0, 20, 1, 1); + msg_set_bits(m, 0, 20, 1, n); } static inline int msg_dest_droppable(struct tipc_msg *m) @@ -224,6 +232,25 @@ static inline void msg_set_seqno(struct tipc_msg *m, u32 n) msg_set_bits(m, 2, 0, 0xffff, n); } +/* + * TIPC may utilize the "link ack #" and "link seq #" fields of a short + * message header to hold the destination node for the message, since the + * normal "dest node" field isn't present. This cache is only referenced + * when required, so populating the cache of a longer message header is + * harmless (as long as the header has the two link sequence fields present). + * + * Note: Host byte order is OK here, since the info never goes off-card. + */ + +static inline u32 msg_destnode_cache(struct tipc_msg *m) +{ + return m->hdr[2]; +} + +static inline void msg_set_destnode_cache(struct tipc_msg *m, u32 dnode) +{ + m->hdr[2] = dnode; +} /* * Words 3-10 @@ -325,7 +352,7 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ w0:|vers |msg usr|hdr sz |n|resrv| packet size | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - w1:|m typ|rsv=0| sequence gap | broadcast ack no | + w1:|m typ| sequence gap | broadcast ack no | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ w2:| link level ack no/bc_gap_from | seq no / bcast_gap_to | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ @@ -388,12 +415,12 @@ static inline struct tipc_msg *msg_get_wrapped(struct tipc_msg *m) static inline u32 msg_seq_gap(struct tipc_msg *m) { - return msg_bits(m, 1, 16, 0xff); + return msg_bits(m, 1, 16, 0x1fff); } static inline void msg_set_seq_gap(struct tipc_msg *m, u32 n) { - msg_set_bits(m, 1, 16, 0xff, n); + msg_set_bits(m, 1, 16, 0x1fff, n); } static inline u32 msg_req_links(struct tipc_msg *m) @@ -696,7 +723,7 @@ static inline u32 msg_tot_importance(struct tipc_msg *m) static inline void msg_init(struct tipc_msg *m, u32 user, u32 type, - u32 err, u32 hsize, u32 destnode) + u32 hsize, u32 destnode) { memset(m, 0, hsize); msg_set_version(m); @@ -705,7 +732,6 @@ static inline void msg_init(struct tipc_msg *m, u32 user, u32 type, msg_set_size(m, hsize); msg_set_prevnode(m, tipc_own_addr); msg_set_type(m, type); - msg_set_errcode(m, err); if (!msg_short(m)) { msg_set_orignode(m, tipc_own_addr); msg_set_destnode(m, destnode); diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index aecba5cd87d..10a69894e2f 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -103,8 +103,7 @@ static struct sk_buff *named_prepare_buf(u32 type, u32 size, u32 dest) if (buf != NULL) { msg = buf_msg(buf); - msg_init(msg, NAME_DISTRIBUTOR, type, TIPC_OK, - LONG_H_SIZE, dest); + msg_init(msg, NAME_DISTRIBUTOR, type, LONG_H_SIZE, dest); msg_set_size(msg, LONG_H_SIZE + size); } return buf; diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 892373e498e..096f7bd240a 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -74,7 +74,7 @@ struct sub_seq { * @first_free: array index of first unused sub-sequence entry * @ns_list: links to adjacent name sequences in hash chain * @subscriptions: list of subscriptions for this 'type' - * @lock: spinlock controlling access to name sequence structure + * @lock: spinlock controlling access to publication lists of all sub-sequences */ struct name_seq { @@ -905,6 +905,9 @@ static void nameseq_list(struct name_seq *seq, struct print_buf *buf, u32 depth, struct sub_seq *sseq; char typearea[11]; + if (seq->first_free == 0) + return; + sprintf(typearea, "%-10u", seq->type); if (depth == 1) { @@ -915,7 +918,9 @@ static void nameseq_list(struct name_seq *seq, struct print_buf *buf, u32 depth, for (sseq = seq->sseqs; sseq != &seq->sseqs[seq->first_free]; sseq++) { if ((lowbound <= sseq->upper) && (upbound >= sseq->lower)) { tipc_printf(buf, "%s ", typearea); + spin_lock_bh(&seq->lock); subseq_list(sseq, buf, depth, index); + spin_unlock_bh(&seq->lock); sprintf(typearea, "%10s", " "); } } diff --git a/net/tipc/net.c b/net/tipc/net.c index c39c76201e8..cc51fa48367 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -266,7 +266,7 @@ void tipc_net_route_msg(struct sk_buff *buf) tipc_link_send(buf, dnode, msg_link_selector(msg)); } -int tipc_net_start(void) +int tipc_net_start(u32 addr) { char addr_string[16]; int res; @@ -274,6 +274,10 @@ int tipc_net_start(void) if (tipc_mode != TIPC_NODE_MODE) return -ENOPROTOOPT; + tipc_subscr_stop(); + tipc_cfg_stop(); + + tipc_own_addr = addr; tipc_mode = TIPC_NET_MODE; tipc_named_reinit(); tipc_port_reinit(); @@ -284,10 +288,10 @@ int tipc_net_start(void) (res = tipc_bclink_init())) { return res; } - tipc_subscr_stop(); - tipc_cfg_stop(); + tipc_k_signal((Handler)tipc_subscr_start, 0); tipc_k_signal((Handler)tipc_cfg_init, 0); + info("Started in network mode\n"); info("Own node address %s, network identity %u\n", addr_string_fill(addr_string, tipc_own_addr), tipc_net_id); diff --git a/net/tipc/net.h b/net/tipc/net.h index a6a0e9976ac..d154ac2bda9 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -58,7 +58,7 @@ void tipc_net_route_msg(struct sk_buff *buf); struct node *tipc_net_select_remote_node(u32 addr, u32 ref); u32 tipc_net_select_router(u32 addr, u32 ref); -int tipc_net_start(void); +int tipc_net_start(u32 addr); void tipc_net_stop(void); #endif diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 6a7f7b4c259..c387217bb23 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -2,7 +2,7 @@ * net/tipc/netlink.c: TIPC configuration handling * * Copyright (c) 2005-2006, Ericsson AB - * Copyright (c) 2005, Wind River Systems + * Copyright (c) 2005-2007, Wind River Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -45,15 +45,17 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info) struct nlmsghdr *req_nlh = info->nlhdr; struct tipc_genlmsghdr *req_userhdr = info->userhdr; int hdr_space = NLMSG_SPACE(GENL_HDRLEN + TIPC_GENL_HDRLEN); + u16 cmd; if ((req_userhdr->cmd & 0xC000) && (!capable(CAP_NET_ADMIN))) - rep_buf = tipc_cfg_reply_error_string(TIPC_CFG_NOT_NET_ADMIN); + cmd = TIPC_CMD_NOT_NET_ADMIN; else - rep_buf = tipc_cfg_do_cmd(req_userhdr->dest, - req_userhdr->cmd, - NLMSG_DATA(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN, - NLMSG_PAYLOAD(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN), - hdr_space); + cmd = req_userhdr->cmd; + + rep_buf = tipc_cfg_do_cmd(req_userhdr->dest, cmd, + NLMSG_DATA(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN, + NLMSG_PAYLOAD(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN), + hdr_space); if (rep_buf) { skb_push(rep_buf, hdr_space); diff --git a/net/tipc/node.c b/net/tipc/node.c index 598f4d3a009..34e9a2bb7c1 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -52,16 +52,40 @@ static void node_established_contact(struct node *n_ptr); struct node *tipc_nodes = NULL; /* sorted list of nodes within cluster */ +static DEFINE_SPINLOCK(node_create_lock); + u32 tipc_own_tag = 0; +/** + * tipc_node_create - create neighboring node + * + * Currently, this routine is called by neighbor discovery code, which holds + * net_lock for reading only. We must take node_create_lock to ensure a node + * isn't created twice if two different bearers discover the node at the same + * time. (It would be preferable to switch to holding net_lock in write mode, + * but this is a non-trivial change.) + */ + struct node *tipc_node_create(u32 addr) { struct cluster *c_ptr; struct node *n_ptr; struct node **curr_node; + spin_lock_bh(&node_create_lock); + + for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) { + if (addr < n_ptr->addr) + break; + if (addr == n_ptr->addr) { + spin_unlock_bh(&node_create_lock); + return n_ptr; + } + } + n_ptr = kzalloc(sizeof(*n_ptr),GFP_ATOMIC); if (!n_ptr) { + spin_unlock_bh(&node_create_lock); warn("Node creation failed, no memory\n"); return NULL; } @@ -71,6 +95,7 @@ struct node *tipc_node_create(u32 addr) c_ptr = tipc_cltr_create(addr); } if (!c_ptr) { + spin_unlock_bh(&node_create_lock); kfree(n_ptr); return NULL; } @@ -91,6 +116,7 @@ struct node *tipc_node_create(u32 addr) } } (*curr_node) = n_ptr; + spin_unlock_bh(&node_create_lock); return n_ptr; } diff --git a/net/tipc/port.c b/net/tipc/port.c index 757de38fe6a..2e0cff408ff 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -244,12 +244,8 @@ u32 tipc_createport_raw(void *usr_handle, p_ptr->publ.max_pkt = MAX_PKT_DEFAULT; p_ptr->publ.ref = ref; msg = &p_ptr->publ.phdr; - msg_init(msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG, TIPC_OK, LONG_H_SIZE, - 0); - msg_set_orignode(msg, tipc_own_addr); - msg_set_prevnode(msg, tipc_own_addr); + msg_init(msg, importance, TIPC_NAMED_MSG, LONG_H_SIZE, 0); msg_set_origport(msg, ref); - msg_set_importance(msg,importance); p_ptr->last_in_seqno = 41; p_ptr->sent = 1; INIT_LIST_HEAD(&p_ptr->wait_list); @@ -404,10 +400,10 @@ static struct sk_buff *port_build_proto_msg(u32 destport, u32 destnode, buf = buf_acquire(LONG_H_SIZE); if (buf) { msg = buf_msg(buf); - msg_init(msg, usr, type, err, LONG_H_SIZE, destnode); + msg_init(msg, usr, type, LONG_H_SIZE, destnode); + msg_set_errcode(msg, err); msg_set_destport(msg, destport); msg_set_origport(msg, origport); - msg_set_destnode(msg, destnode); msg_set_orignode(msg, orignode); msg_set_transp_seqno(msg, seqno); msg_set_msgcnt(msg, ack); @@ -448,17 +444,19 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err) return data_sz; } rmsg = buf_msg(rbuf); - msg_init(rmsg, imp, msg_type(msg), err, hdr_sz, msg_orignode(msg)); + msg_init(rmsg, imp, msg_type(msg), hdr_sz, msg_orignode(msg)); + msg_set_errcode(rmsg, err); msg_set_destport(rmsg, msg_origport(msg)); - msg_set_prevnode(rmsg, tipc_own_addr); msg_set_origport(rmsg, msg_destport(msg)); - if (msg_short(msg)) + if (msg_short(msg)) { msg_set_orignode(rmsg, tipc_own_addr); - else + /* leave name type & instance as zeroes */ + } else { msg_set_orignode(rmsg, msg_destnode(msg)); + msg_set_nametype(rmsg, msg_nametype(msg)); + msg_set_nameinst(rmsg, msg_nameinst(msg)); + } msg_set_size(rmsg, data_sz + hdr_sz); - msg_set_nametype(rmsg, msg_nametype(msg)); - msg_set_nameinst(rmsg, msg_nameinst(msg)); skb_copy_to_linear_data_offset(rbuf, hdr_sz, msg_data(msg), data_sz); /* send self-abort message when rejecting on a connected port */ @@ -780,6 +778,7 @@ void tipc_port_reinit(void) msg = &p_ptr->publ.phdr; if (msg_orignode(msg) == tipc_own_addr) break; + msg_set_prevnode(msg, tipc_own_addr); msg_set_orignode(msg, tipc_own_addr); } spin_unlock_bh(&tipc_port_list_lock); @@ -840,16 +839,13 @@ static void port_dispatcher_sigh(void *dummy) u32 peer_node = port_peernode(p_ptr); tipc_port_unlock(p_ptr); + if (unlikely(!cb)) + goto reject; if (unlikely(!connected)) { - if (unlikely(published)) + if (tipc_connect2port(dref, &orig)) goto reject; - tipc_connect2port(dref,&orig); - } - if (unlikely(msg_origport(msg) != peer_port)) - goto reject; - if (unlikely(msg_orignode(msg) != peer_node)) - goto reject; - if (unlikely(!cb)) + } else if ((msg_origport(msg) != peer_port) || + (msg_orignode(msg) != peer_node)) goto reject; if (unlikely(++p_ptr->publ.conn_unacked >= TIPC_FLOW_CONTROL_WIN)) @@ -864,9 +860,7 @@ static void port_dispatcher_sigh(void *dummy) tipc_msg_event cb = up_ptr->msg_cb; tipc_port_unlock(p_ptr); - if (unlikely(connected)) - goto reject; - if (unlikely(!cb)) + if (unlikely(!cb || connected)) goto reject; skb_pull(buf, msg_hdr_sz(msg)); cb(usr_handle, dref, &buf, msg_data(msg), @@ -879,11 +873,7 @@ static void port_dispatcher_sigh(void *dummy) tipc_named_msg_event cb = up_ptr->named_msg_cb; tipc_port_unlock(p_ptr); - if (unlikely(connected)) - goto reject; - if (unlikely(!cb)) - goto reject; - if (unlikely(!published)) + if (unlikely(!cb || connected || !published)) goto reject; dseq.type = msg_nametype(msg); dseq.lower = msg_nameinst(msg); @@ -910,11 +900,10 @@ err: u32 peer_node = port_peernode(p_ptr); tipc_port_unlock(p_ptr); - if (!connected || !cb) - break; - if (msg_origport(msg) != peer_port) + if (!cb || !connected) break; - if (msg_orignode(msg) != peer_node) + if ((msg_origport(msg) != peer_port) || + (msg_orignode(msg) != peer_node)) break; tipc_disconnect(dref); skb_pull(buf, msg_hdr_sz(msg)); @@ -926,7 +915,7 @@ err: tipc_msg_err_event cb = up_ptr->err_cb; tipc_port_unlock(p_ptr); - if (connected || !cb) + if (!cb || connected) break; skb_pull(buf, msg_hdr_sz(msg)); cb(usr_handle, dref, &buf, msg_data(msg), @@ -939,7 +928,7 @@ err: up_ptr->named_err_cb; tipc_port_unlock(p_ptr); - if (connected || !cb) + if (!cb || connected) break; dseq.type = msg_nametype(msg); dseq.lower = msg_nameinst(msg); diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c index 9ab31a3ce3a..b210a88d096 100644 --- a/net/wanrouter/wanmain.c +++ b/net/wanrouter/wanmain.c @@ -350,9 +350,9 @@ __be16 wanrouter_type_trans(struct sk_buff *skb, struct net_device *dev) * o execute requested action or pass command to the device driver */ -int wanrouter_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) +long wanrouter_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { + struct inode *inode = file->f_path.dentry->d_inode; int err = 0; struct proc_dir_entry *dent; struct wan_device *wandev; @@ -372,6 +372,7 @@ int wanrouter_ioctl(struct inode *inode, struct file *file, if (wandev->magic != ROUTER_MAGIC) return -EINVAL; + lock_kernel(); switch (cmd) { case ROUTER_SETUP: err = wanrouter_device_setup(wandev, data); @@ -403,6 +404,7 @@ int wanrouter_ioctl(struct inode *inode, struct file *file, err = wandev->ioctl(wandev, cmd, arg); else err = -EINVAL; } + unlock_kernel(); return err; } diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c index 5bebe40bf4e..267f7ff4982 100644 --- a/net/wanrouter/wanproc.c +++ b/net/wanrouter/wanproc.c @@ -278,7 +278,7 @@ static const struct file_operations wandev_fops = { .read = seq_read, .llseek = seq_lseek, .release = single_release, - .ioctl = wanrouter_ioctl, + .unlocked_ioctl = wanrouter_ioctl, }; /* diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index a1b0fbe3ea3..b976d9ed10e 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -50,19 +50,8 @@ static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type) switch (type) { case XFRMA_ALG_AUTH: - if (!algp->alg_key_len && - strcmp(algp->alg_name, "digest_null") != 0) - return -EINVAL; - break; - case XFRMA_ALG_CRYPT: - if (!algp->alg_key_len && - strcmp(algp->alg_name, "cipher_null") != 0) - return -EINVAL; - break; - case XFRMA_ALG_COMP: - /* Zero length keys are legal. */ break; default: |