From 241fc4367b3ca5d407b043599ed980304a70b91f Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 27 Jul 2005 11:35:54 +0100 Subject: [SERIAL] Expose 8250_pci setup/removal/suspend/resume functions Re-jig the setup/removal/suspend/resume of 8250 pci ports so that they know slightly less about how they're attached to a PCI device. Expose this as the new interface for registering PCI serial ports, as well as the pciserial_board structure and associated flag definitions. Signed-off-by: Russell King --- include/linux/8250_pci.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'include') diff --git a/include/linux/8250_pci.h b/include/linux/8250_pci.h index 5f3ab21b339..192c0ff7a77 100644 --- a/include/linux/8250_pci.h +++ b/include/linux/8250_pci.h @@ -1,2 +1,40 @@ +/* + * Definitions for PCI support. + */ +#define FL_BASE_MASK 0x0007 +#define FL_BASE0 0x0000 +#define FL_BASE1 0x0001 +#define FL_BASE2 0x0002 +#define FL_BASE3 0x0003 +#define FL_BASE4 0x0004 +#define FL_GET_BASE(x) (x & FL_BASE_MASK) + +/* Use successive BARs (PCI base address registers), + else use offset into some specified BAR */ +#define FL_BASE_BARS 0x0008 + +/* do not assign an irq */ +#define FL_NOIRQ 0x0080 + +/* Use the Base address register size to cap number of ports */ +#define FL_REGION_SZ_CAP 0x0100 + +struct pciserial_board { + unsigned int flags; + unsigned int num_ports; + unsigned int base_baud; + unsigned int uart_offset; + unsigned int reg_shift; + unsigned int first_offset; +}; + +struct serial_private; + +struct serial_private * +pciserial_init_ports(struct pci_dev *dev, struct pciserial_board *board); +void pciserial_remove_ports(struct serial_private *priv); +void pciserial_suspend_ports(struct serial_private *priv); +void pciserial_resume_ports(struct serial_private *priv); + int pci_siig10x_fn(struct pci_dev *dev, int enable); int pci_siig20x_fn(struct pci_dev *dev, int enable); -- cgit v1.2.3 From 05caac585f8abd6c0113856bc8858e3ef214d8a6 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 27 Jul 2005 11:41:18 +0100 Subject: [SERIAL] Convert parport_serial to use new 8250_pci interfaces Convert parport_serial to use the new 8250_pci interface, converting the table to a pciserial_board table. This also unuses the SPCI_* definitions in serialP.h, which can now be removed. Signed-off-by: Russell King --- include/linux/8250_pci.h | 3 --- include/linux/serialP.h | 40 ---------------------------------------- 2 files changed, 43 deletions(-) (limited to 'include') diff --git a/include/linux/8250_pci.h b/include/linux/8250_pci.h index 192c0ff7a77..3209dd46ea7 100644 --- a/include/linux/8250_pci.h +++ b/include/linux/8250_pci.h @@ -35,6 +35,3 @@ pciserial_init_ports(struct pci_dev *dev, struct pciserial_board *board); void pciserial_remove_ports(struct serial_private *priv); void pciserial_suspend_ports(struct serial_private *priv); void pciserial_resume_ports(struct serial_private *priv); - -int pci_siig10x_fn(struct pci_dev *dev, int enable); -int pci_siig20x_fn(struct pci_dev *dev, int enable); diff --git a/include/linux/serialP.h b/include/linux/serialP.h index 2b2f35a64d7..2b9e6b9554d 100644 --- a/include/linux/serialP.h +++ b/include/linux/serialP.h @@ -140,44 +140,4 @@ struct rs_multiport_struct { #define ALPHA_KLUDGE_MCR 0 #endif -/* - * Definitions for PCI support. - */ -#define SPCI_FL_BASE_MASK 0x0007 -#define SPCI_FL_BASE0 0x0000 -#define SPCI_FL_BASE1 0x0001 -#define SPCI_FL_BASE2 0x0002 -#define SPCI_FL_BASE3 0x0003 -#define SPCI_FL_BASE4 0x0004 -#define SPCI_FL_GET_BASE(x) (x & SPCI_FL_BASE_MASK) - -#define SPCI_FL_IRQ_MASK (0x0007 << 4) -#define SPCI_FL_IRQBASE0 (0x0000 << 4) -#define SPCI_FL_IRQBASE1 (0x0001 << 4) -#define SPCI_FL_IRQBASE2 (0x0002 << 4) -#define SPCI_FL_IRQBASE3 (0x0003 << 4) -#define SPCI_FL_IRQBASE4 (0x0004 << 4) -#define SPCI_FL_GET_IRQBASE(x) ((x & SPCI_FL_IRQ_MASK) >> 4) - -/* Use successive BARs (PCI base address registers), - else use offset into some specified BAR */ -#define SPCI_FL_BASE_TABLE 0x0100 - -/* Use successive entries in the irq resource table */ -#define SPCI_FL_IRQ_TABLE 0x0200 - -/* Use the irq resource table instead of dev->irq */ -#define SPCI_FL_IRQRESOURCE 0x0400 - -/* Use the Base address register size to cap number of ports */ -#define SPCI_FL_REGION_SZ_CAP 0x0800 - -/* Do not use irq sharing for this device */ -#define SPCI_FL_NO_SHIRQ 0x1000 - -/* This is a PNP device */ -#define SPCI_FL_ISPNP 0x2000 - -#define SPCI_FL_PNPDEFAULT (SPCI_FL_IRQRESOURCE|SPCI_FL_ISPNP) - #endif /* _LINUX_SERIAL_H */ -- cgit v1.2.3 From 00db8189d984d6c51226dafbbe4a667ce9b7d5da Mon Sep 17 00:00:00 2001 From: Andy Fleming Date: Sat, 30 Jul 2005 19:31:23 -0400 Subject: This patch adds a PHY Abstraction Layer to the Linux Kernel, enabling ethernet drivers to remain as ignorant as is reasonable of the connected PHY's design and operation details. Signed-off-by: Andy Fleming Signed-off-by: Jeff Garzik --- include/linux/ethtool.h | 4 + include/linux/mii.h | 9 +- include/linux/phy.h | 378 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 390 insertions(+), 1 deletion(-) create mode 100644 include/linux/phy.h (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index a0ab26aab45..d7021c391b2 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -408,6 +408,8 @@ struct ethtool_ops { #define SUPPORTED_FIBRE (1 << 10) #define SUPPORTED_BNC (1 << 11) #define SUPPORTED_10000baseT_Full (1 << 12) +#define SUPPORTED_Pause (1 << 13) +#define SUPPORTED_Asym_Pause (1 << 14) /* Indicates what features are advertised by the interface. */ #define ADVERTISED_10baseT_Half (1 << 0) @@ -423,6 +425,8 @@ struct ethtool_ops { #define ADVERTISED_FIBRE (1 << 10) #define ADVERTISED_BNC (1 << 11) #define ADVERTISED_10000baseT_Full (1 << 12) +#define ADVERTISED_Pause (1 << 13) +#define ADVERTISED_Asym_Pause (1 << 14) /* The following are all involved in forcing a particular link * mode for the device for setting things. When getting the diff --git a/include/linux/mii.h b/include/linux/mii.h index 374b615ea9e..9b8d0476988 100644 --- a/include/linux/mii.h +++ b/include/linux/mii.h @@ -22,6 +22,7 @@ #define MII_EXPANSION 0x06 /* Expansion register */ #define MII_CTRL1000 0x09 /* 1000BASE-T control */ #define MII_STAT1000 0x0a /* 1000BASE-T status */ +#define MII_ESTATUS 0x0f /* Extended Status */ #define MII_DCOUNTER 0x12 /* Disconnect counter */ #define MII_FCSCOUNTER 0x13 /* False carrier counter */ #define MII_NWAYTEST 0x14 /* N-way auto-neg test reg */ @@ -54,7 +55,10 @@ #define BMSR_ANEGCAPABLE 0x0008 /* Able to do auto-negotiation */ #define BMSR_RFAULT 0x0010 /* Remote fault detected */ #define BMSR_ANEGCOMPLETE 0x0020 /* Auto-negotiation complete */ -#define BMSR_RESV 0x07c0 /* Unused... */ +#define BMSR_RESV 0x00c0 /* Unused... */ +#define BMSR_ESTATEN 0x0100 /* Extended Status in R15 */ +#define BMSR_100FULL2 0x0200 /* Can do 100BASE-T2 HDX */ +#define BMSR_100HALF2 0x0400 /* Can do 100BASE-T2 FDX */ #define BMSR_10HALF 0x0800 /* Can do 10mbps, half-duplex */ #define BMSR_10FULL 0x1000 /* Can do 10mbps, full-duplex */ #define BMSR_100HALF 0x2000 /* Can do 100mbps, half-duplex */ @@ -114,6 +118,9 @@ #define EXPANSION_MFAULTS 0x0010 /* Multiple faults detected */ #define EXPANSION_RESV 0xffe0 /* Unused... */ +#define ESTATUS_1000_TFULL 0x2000 /* Can do 1000BT Full */ +#define ESTATUS_1000_THALF 0x1000 /* Can do 1000BT Half */ + /* N-way test register. */ #define NWAYTEST_RESV1 0x00ff /* Unused... */ #define NWAYTEST_LOOPBACK 0x0100 /* Enable loopback for N-way */ diff --git a/include/linux/phy.h b/include/linux/phy.h new file mode 100644 index 00000000000..3404804dc22 --- /dev/null +++ b/include/linux/phy.h @@ -0,0 +1,378 @@ +/* + * include/linux/phy.h + * + * Framework and drivers for configuring and reading different PHYs + * Based on code in sungem_phy.c and gianfar_phy.c + * + * Author: Andy Fleming + * + * Copyright (c) 2004 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#ifndef __PHY_H +#define __PHY_H + +#include +#include + +#define PHY_BASIC_FEATURES (SUPPORTED_10baseT_Half | \ + SUPPORTED_10baseT_Full | \ + SUPPORTED_100baseT_Half | \ + SUPPORTED_100baseT_Full | \ + SUPPORTED_Autoneg | \ + SUPPORTED_TP | \ + SUPPORTED_MII) + +#define PHY_GBIT_FEATURES (PHY_BASIC_FEATURES | \ + SUPPORTED_1000baseT_Half | \ + SUPPORTED_1000baseT_Full) + +/* Set phydev->irq to PHY_POLL if interrupts are not supported, + * or not desired for this PHY. Set to PHY_IGNORE_INTERRUPT if + * the attached driver handles the interrupt + */ +#define PHY_POLL -1 +#define PHY_IGNORE_INTERRUPT -2 + +#define PHY_HAS_INTERRUPT 0x00000001 +#define PHY_HAS_MAGICANEG 0x00000002 + +#define MII_BUS_MAX 4 + + +#define PHY_INIT_TIMEOUT 100000 +#define PHY_STATE_TIME 1 +#define PHY_FORCE_TIMEOUT 10 +#define PHY_AN_TIMEOUT 10 + +#define PHY_MAX_ADDR 32 + +/* The Bus class for PHYs. Devices which provide access to + * PHYs should register using this structure */ +struct mii_bus { + const char *name; + int id; + void *priv; + int (*read)(struct mii_bus *bus, int phy_id, int regnum); + int (*write)(struct mii_bus *bus, int phy_id, int regnum, u16 val); + int (*reset)(struct mii_bus *bus); + + /* A lock to ensure that only one thing can read/write + * the MDIO bus at a time */ + spinlock_t mdio_lock; + + struct device *dev; + + /* list of all PHYs on bus */ + struct phy_device *phy_map[PHY_MAX_ADDR]; + + /* Pointer to an array of interrupts, each PHY's + * interrupt at the index matching its address */ + int *irq; +}; + +#define PHY_INTERRUPT_DISABLED 0x0 +#define PHY_INTERRUPT_ENABLED 0x80000000 + +/* PHY state machine states: + * + * DOWN: PHY device and driver are not ready for anything. probe + * should be called if and only if the PHY is in this state, + * given that the PHY device exists. + * - PHY driver probe function will, depending on the PHY, set + * the state to STARTING or READY + * + * STARTING: PHY device is coming up, and the ethernet driver is + * not ready. PHY drivers may set this in the probe function. + * If they do, they are responsible for making sure the state is + * eventually set to indicate whether the PHY is UP or READY, + * depending on the state when the PHY is done starting up. + * - PHY driver will set the state to READY + * - start will set the state to PENDING + * + * READY: PHY is ready to send and receive packets, but the + * controller is not. By default, PHYs which do not implement + * probe will be set to this state by phy_probe(). If the PHY + * driver knows the PHY is ready, and the PHY state is STARTING, + * then it sets this STATE. + * - start will set the state to UP + * + * PENDING: PHY device is coming up, but the ethernet driver is + * ready. phy_start will set this state if the PHY state is + * STARTING. + * - PHY driver will set the state to UP when the PHY is ready + * + * UP: The PHY and attached device are ready to do work. + * Interrupts should be started here. + * - timer moves to AN + * + * AN: The PHY is currently negotiating the link state. Link is + * therefore down for now. phy_timer will set this state when it + * detects the state is UP. config_aneg will set this state + * whenever called with phydev->autoneg set to AUTONEG_ENABLE. + * - If autonegotiation finishes, but there's no link, it sets + * the state to NOLINK. + * - If aneg finishes with link, it sets the state to RUNNING, + * and calls adjust_link + * - If autonegotiation did not finish after an arbitrary amount + * of time, autonegotiation should be tried again if the PHY + * supports "magic" autonegotiation (back to AN) + * - If it didn't finish, and no magic_aneg, move to FORCING. + * + * NOLINK: PHY is up, but not currently plugged in. + * - If the timer notes that the link comes back, we move to RUNNING + * - config_aneg moves to AN + * - phy_stop moves to HALTED + * + * FORCING: PHY is being configured with forced settings + * - if link is up, move to RUNNING + * - If link is down, we drop to the next highest setting, and + * retry (FORCING) after a timeout + * - phy_stop moves to HALTED + * + * RUNNING: PHY is currently up, running, and possibly sending + * and/or receiving packets + * - timer will set CHANGELINK if we're polling (this ensures the + * link state is polled every other cycle of this state machine, + * which makes it every other second) + * - irq will set CHANGELINK + * - config_aneg will set AN + * - phy_stop moves to HALTED + * + * CHANGELINK: PHY experienced a change in link state + * - timer moves to RUNNING if link + * - timer moves to NOLINK if the link is down + * - phy_stop moves to HALTED + * + * HALTED: PHY is up, but no polling or interrupts are done. Or + * PHY is in an error state. + * + * - phy_start moves to RESUMING + * + * RESUMING: PHY was halted, but now wants to run again. + * - If we are forcing, or aneg is done, timer moves to RUNNING + * - If aneg is not done, timer moves to AN + * - phy_stop moves to HALTED + */ +enum phy_state { + PHY_DOWN=0, + PHY_STARTING, + PHY_READY, + PHY_PENDING, + PHY_UP, + PHY_AN, + PHY_RUNNING, + PHY_NOLINK, + PHY_FORCING, + PHY_CHANGELINK, + PHY_HALTED, + PHY_RESUMING +}; + +/* phy_device: An instance of a PHY + * + * drv: Pointer to the driver for this PHY instance + * bus: Pointer to the bus this PHY is on + * dev: driver model device structure for this PHY + * phy_id: UID for this device found during discovery + * state: state of the PHY for management purposes + * dev_flags: Device-specific flags used by the PHY driver. + * addr: Bus address of PHY + * link_timeout: The number of timer firings to wait before the + * giving up on the current attempt at acquiring a link + * irq: IRQ number of the PHY's interrupt (-1 if none) + * phy_timer: The timer for handling the state machine + * phy_queue: A work_queue for the interrupt + * attached_dev: The attached enet driver's device instance ptr + * adjust_link: Callback for the enet controller to respond to + * changes in the link state. + * adjust_state: Callback for the enet driver to respond to + * changes in the state machine. + * + * speed, duplex, pause, supported, advertising, and + * autoneg are used like in mii_if_info + * + * interrupts currently only supports enabled or disabled, + * but could be changed in the future to support enabling + * and disabling specific interrupts + * + * Contains some infrastructure for polling and interrupt + * handling, as well as handling shifts in PHY hardware state + */ +struct phy_device { + /* Information about the PHY type */ + /* And management functions */ + struct phy_driver *drv; + + struct mii_bus *bus; + + struct device dev; + + u32 phy_id; + + enum phy_state state; + + u32 dev_flags; + + /* Bus address of the PHY (0-32) */ + int addr; + + /* forced speed & duplex (no autoneg) + * partner speed & duplex & pause (autoneg) + */ + int speed; + int duplex; + int pause; + int asym_pause; + + /* The most recently read link state */ + int link; + + /* Enabled Interrupts */ + u32 interrupts; + + /* Union of PHY and Attached devices' supported modes */ + /* See mii.h for more info */ + u32 supported; + u32 advertising; + + int autoneg; + + int link_timeout; + + /* Interrupt number for this PHY + * -1 means no interrupt */ + int irq; + + /* private data pointer */ + /* For use by PHYs to maintain extra state */ + void *priv; + + /* Interrupt and Polling infrastructure */ + struct work_struct phy_queue; + struct timer_list phy_timer; + + spinlock_t lock; + + struct net_device *attached_dev; + + void (*adjust_link)(struct net_device *dev); + + void (*adjust_state)(struct net_device *dev); +}; +#define to_phy_device(d) container_of(d, struct phy_device, dev) + +/* struct phy_driver: Driver structure for a particular PHY type + * + * phy_id: The result of reading the UID registers of this PHY + * type, and ANDing them with the phy_id_mask. This driver + * only works for PHYs with IDs which match this field + * name: The friendly name of this PHY type + * phy_id_mask: Defines the important bits of the phy_id + * features: A list of features (speed, duplex, etc) supported + * by this PHY + * flags: A bitfield defining certain other features this PHY + * supports (like interrupts) + * + * The drivers must implement config_aneg and read_status. All + * other functions are optional. Note that none of these + * functions should be called from interrupt time. The goal is + * for the bus read/write functions to be able to block when the + * bus transaction is happening, and be freed up by an interrupt + * (The MPC85xx has this ability, though it is not currently + * supported in the driver). + */ +struct phy_driver { + u32 phy_id; + char *name; + unsigned int phy_id_mask; + u32 features; + u32 flags; + + /* Called to initialize the PHY, + * including after a reset */ + int (*config_init)(struct phy_device *phydev); + + /* Called during discovery. Used to set + * up device-specific structures, if any */ + int (*probe)(struct phy_device *phydev); + + /* PHY Power Management */ + int (*suspend)(struct phy_device *phydev); + int (*resume)(struct phy_device *phydev); + + /* Configures the advertisement and resets + * autonegotiation if phydev->autoneg is on, + * forces the speed to the current settings in phydev + * if phydev->autoneg is off */ + int (*config_aneg)(struct phy_device *phydev); + + /* Determines the negotiated speed and duplex */ + int (*read_status)(struct phy_device *phydev); + + /* Clears any pending interrupts */ + int (*ack_interrupt)(struct phy_device *phydev); + + /* Enables or disables interrupts */ + int (*config_intr)(struct phy_device *phydev); + + /* Clears up any memory if needed */ + void (*remove)(struct phy_device *phydev); + + struct device_driver driver; +}; +#define to_phy_driver(d) container_of(d, struct phy_driver, driver) + +int phy_read(struct phy_device *phydev, u16 regnum); +int phy_write(struct phy_device *phydev, u16 regnum, u16 val); +struct phy_device* get_phy_device(struct mii_bus *bus, int addr); +int phy_clear_interrupt(struct phy_device *phydev); +int phy_config_interrupt(struct phy_device *phydev, u32 interrupts); +struct phy_device * phy_attach(struct net_device *dev, + const char *phy_id, u32 flags); +struct phy_device * phy_connect(struct net_device *dev, const char *phy_id, + void (*handler)(struct net_device *), u32 flags); +void phy_disconnect(struct phy_device *phydev); +void phy_detach(struct phy_device *phydev); +void phy_start(struct phy_device *phydev); +void phy_stop(struct phy_device *phydev); +int phy_start_aneg(struct phy_device *phydev); + +int mdiobus_register(struct mii_bus *bus); +void mdiobus_unregister(struct mii_bus *bus); +void phy_sanitize_settings(struct phy_device *phydev); +int phy_stop_interrupts(struct phy_device *phydev); + +static inline int phy_read_status(struct phy_device *phydev) { + return phydev->drv->read_status(phydev); +} + +int genphy_config_advert(struct phy_device *phydev); +int genphy_setup_forced(struct phy_device *phydev); +int genphy_restart_aneg(struct phy_device *phydev); +int genphy_config_aneg(struct phy_device *phydev); +int genphy_update_link(struct phy_device *phydev); +int genphy_read_status(struct phy_device *phydev); +void phy_driver_unregister(struct phy_driver *drv); +int phy_driver_register(struct phy_driver *new_driver); +void phy_prepare_link(struct phy_device *phydev, + void (*adjust_link)(struct net_device *)); +void phy_start_machine(struct phy_device *phydev, + void (*handler)(struct net_device *)); +void phy_stop_machine(struct phy_device *phydev); +int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd); +int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd); +int phy_mii_ioctl(struct phy_device *phydev, + struct mii_ioctl_data *mii_data, int cmd); +int phy_start_interrupts(struct phy_device *phydev); +void phy_print_status(struct phy_device *phydev); + +extern struct bus_type mdio_bus_type; +extern struct phy_driver genphy_driver; +#endif /* __PHY_H */ -- cgit v1.2.3 From 541134cfe7af179f45458b68421ee1da7bab9cba Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Sun, 3 Jul 2005 13:44:39 +0100 Subject: [PATCH] sata_nv: Support MCP51/MCP55 device IDs This is a multi-part message in MIME format. Signed-off-by: Jeff Garzik --- include/linux/pci_ids.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index bc4cc10fabe..639291fe8ac 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1249,6 +1249,7 @@ #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA 0x0266 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA2 0x0267 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_IDE 0x036E +#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA 0x036F #define PCI_DEVICE_ID_NVIDIA_NVENET_12 0x0268 #define PCI_DEVICE_ID_NVIDIA_NVENET_13 0x0269 #define PCI_DEVICE_ID_NVIDIA_MCP51_AUDIO 0x026B -- cgit v1.2.3 From 8a60a07129fad60bba779a2a4038c7518b167fc7 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Sun, 31 Jul 2005 13:13:24 -0400 Subject: libata: trim trailing whitespace. Also, fixup a tabs-to-spaces block of code in ata_piix. --- include/linux/libata.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 6cd9ba63563..85b0aaee0ef 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -644,7 +644,7 @@ static inline void scr_write(struct ata_port *ap, unsigned int reg, u32 val) ap->ops->scr_write(ap, reg, val); } -static inline void scr_write_flush(struct ata_port *ap, unsigned int reg, +static inline void scr_write_flush(struct ata_port *ap, unsigned int reg, u32 val) { ap->ops->scr_write(ap, reg, val); -- cgit v1.2.3 From 67c4f3fa25502ce7ed82fb0307e09cf36f1f81da Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Thu, 11 Aug 2005 02:07:25 -0400 Subject: Fix numerous minor problems with new phy subsystem. Includes fixes for problems noted by Adrian Bunk, Andrew Morton, and one other person lost in the annals of history (and email folders). --- include/linux/phy.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 3404804dc22..72cb67b66e0 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -374,5 +374,4 @@ int phy_start_interrupts(struct phy_device *phydev); void phy_print_status(struct phy_device *phydev); extern struct bus_type mdio_bus_type; -extern struct phy_driver genphy_driver; #endif /* __PHY_H */ -- cgit v1.2.3 From 2bf69b5fe90b3246ab50064c5a690a363e8c53e2 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Thu, 11 Aug 2005 02:47:54 -0400 Subject: phy subsystem: more cleanups - unexport symbols never used outside of home module - remove dead code - remove CONFIG_PHYCONTROL, make it unconditionally enabled --- include/linux/phy.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 72cb67b66e0..4f2b5effc16 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -334,26 +334,11 @@ int phy_write(struct phy_device *phydev, u16 regnum, u16 val); struct phy_device* get_phy_device(struct mii_bus *bus, int addr); int phy_clear_interrupt(struct phy_device *phydev); int phy_config_interrupt(struct phy_device *phydev, u32 interrupts); -struct phy_device * phy_attach(struct net_device *dev, - const char *phy_id, u32 flags); -struct phy_device * phy_connect(struct net_device *dev, const char *phy_id, - void (*handler)(struct net_device *), u32 flags); -void phy_disconnect(struct phy_device *phydev); -void phy_detach(struct phy_device *phydev); -void phy_start(struct phy_device *phydev); -void phy_stop(struct phy_device *phydev); -int phy_start_aneg(struct phy_device *phydev); - -int mdiobus_register(struct mii_bus *bus); -void mdiobus_unregister(struct mii_bus *bus); -void phy_sanitize_settings(struct phy_device *phydev); -int phy_stop_interrupts(struct phy_device *phydev); static inline int phy_read_status(struct phy_device *phydev) { return phydev->drv->read_status(phydev); } -int genphy_config_advert(struct phy_device *phydev); int genphy_setup_forced(struct phy_device *phydev); int genphy_restart_aneg(struct phy_device *phydev); int genphy_config_aneg(struct phy_device *phydev); @@ -370,8 +355,6 @@ int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd); int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd); int phy_mii_ioctl(struct phy_device *phydev, struct mii_ioctl_data *mii_data, int cmd); -int phy_start_interrupts(struct phy_device *phydev); -void phy_print_status(struct phy_device *phydev); extern struct bus_type mdio_bus_type; #endif /* __PHY_H */ -- cgit v1.2.3 From 972dcafb6d743a6c7611a2e4681ed814e30d6230 Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Thu, 11 Aug 2005 03:35:53 -0400 Subject: [libata scsi] add START STOP UNIT translation --- include/linux/ata.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ata.h b/include/linux/ata.h index ca5fcadf998..9d25e9886d6 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -108,6 +108,8 @@ enum { /* ATA device commands */ ATA_CMD_CHK_POWER = 0xE5, /* check power mode */ + ATA_CMD_STANDBY = 0xE2, /* place in standby power mode */ + ATA_CMD_IDLE = 0xE3, /* place in idle power mode */ ATA_CMD_EDD = 0x90, /* execute device diagnostic */ ATA_CMD_FLUSH = 0xE7, ATA_CMD_FLUSH_EXT = 0xEA, -- cgit v1.2.3 From 323cdfc191b7c1597dc748175062c368568d6af4 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 18 Aug 2005 10:10:46 +0100 Subject: [MFD] Add SA11x0 MCP platform device support Add platform device data for the SA11x0 MCP device. This allows platforms to customise the configuration of the SA11x0 MCP device according to their needs. Signed-off-by: Russell King --- include/asm-arm/arch-sa1100/mcp.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 include/asm-arm/arch-sa1100/mcp.h (limited to 'include') diff --git a/include/asm-arm/arch-sa1100/mcp.h b/include/asm-arm/arch-sa1100/mcp.h new file mode 100644 index 00000000000..f58a22755c6 --- /dev/null +++ b/include/asm-arm/arch-sa1100/mcp.h @@ -0,0 +1,21 @@ +/* + * linux/include/asm-arm/arch-sa1100/mcp.h + * + * Copyright (C) 2005 Russell King. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_ARM_ARCH_MCP_H +#define __ASM_ARM_ARCH_MCP_H + +#include + +struct mcp_plat_data { + u32 mccr0; + u32 mccr1; + unsigned int sclk_rate; +}; + +#endif -- cgit v1.2.3 From f27ecacc54cc0e5397c9b35f6c25065f07c4448d Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 18 Aug 2005 21:31:00 +0100 Subject: [ARM] Add support for ARM GIC Add support for the ARM Generic Interrupt Controller. Signed-off-by: Russell King --- include/asm-arm/hardware/gic.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 include/asm-arm/hardware/gic.h (limited to 'include') diff --git a/include/asm-arm/hardware/gic.h b/include/asm-arm/hardware/gic.h new file mode 100644 index 00000000000..3fa5eb70f64 --- /dev/null +++ b/include/asm-arm/hardware/gic.h @@ -0,0 +1,41 @@ +/* + * linux/include/asm-arm/hardware/gic.h + * + * Copyright (C) 2002 ARM Limited, All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef __ASM_ARM_HARDWARE_GIC_H +#define __ASM_ARM_HARDWARE_GIC_H + +#include + +#define GIC_CPU_CTRL 0x00 +#define GIC_CPU_PRIMASK 0x04 +#define GIC_CPU_BINPOINT 0x08 +#define GIC_CPU_INTACK 0x0c +#define GIC_CPU_EOI 0x10 +#define GIC_CPU_RUNNINGPRI 0x14 +#define GIC_CPU_HIGHPRI 0x18 + +#define GIC_DIST_CTRL 0x000 +#define GIC_DIST_CTR 0x004 +#define GIC_DIST_ENABLE_SET 0x100 +#define GIC_DIST_ENABLE_CLEAR 0x180 +#define GIC_DIST_PENDING_SET 0x200 +#define GIC_DIST_PENDING_CLEAR 0x280 +#define GIC_DIST_ACTIVE_BIT 0x300 +#define GIC_DIST_PRI 0x400 +#define GIC_DIST_TARGET 0x800 +#define GIC_DIST_CONFIG 0xc00 +#define GIC_DIST_SOFTINT 0xf00 + +#ifndef __ASSEMBLY__ +void gic_dist_init(void __iomem *base); +void gic_cpu_init(void __iomem *base); +void gic_raise_softirq(cpumask_t cpumask, unsigned int irq); +#endif + +#endif -- cgit v1.2.3 From d366b6436386875b1310ce8f70e3f9dea4647bac Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 19 Aug 2005 09:40:08 +0100 Subject: [MMC] Add mmc_hostname() macro mmc_hostname() returns a pointer to the hostname for the mmc_host. Signed-off-by: Russell King --- include/linux/mmc/host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index f90f674eb3b..30786230859 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -97,6 +97,7 @@ extern void mmc_free_host(struct mmc_host *); #define mmc_priv(x) ((void *)((x) + 1)) #define mmc_dev(x) ((x)->dev) +#define mmc_hostname(x) ((x)->host_name) extern int mmc_suspend_host(struct mmc_host *, pm_message_t); extern int mmc_resume_host(struct mmc_host *); -- cgit v1.2.3 From 00b137cfda5276b3d2c87d44236fe4c5ee68b405 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 19 Aug 2005 09:41:24 +0100 Subject: [MMC] Add MMC class devices Create a mmc_host class to allow enumeration of MMC host controllers even though they have no card(s) inserted. Patch based on work by Pierre Ossman. Signed-off-by: Russell King --- include/linux/mmc/host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 30786230859..a74a810a130 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -63,6 +63,7 @@ struct device; struct mmc_host { struct device *dev; + struct class_device class_dev; struct mmc_host_ops *ops; unsigned int f_min; unsigned int f_max; -- cgit v1.2.3 From 1ad434d7cf5f490c71cfbbb2fb91076c01c8704e Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 19 Aug 2005 09:42:21 +0100 Subject: [MMC] Use class device name for mmc host name There's no point in having the host name duplicated between the mmc_host structure and the encapsulated class device structure. Signed-off-by: Russell King --- include/linux/mmc/host.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index a74a810a130..113cc27865f 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -68,7 +68,6 @@ struct mmc_host { unsigned int f_min; unsigned int f_max; u32 ocr_avail; - char host_name[8]; /* host specific block data */ unsigned int max_seg_size; /* see blk_queue_max_segment_size */ @@ -98,7 +97,7 @@ extern void mmc_free_host(struct mmc_host *); #define mmc_priv(x) ((void *)((x) + 1)) #define mmc_dev(x) ((x)->dev) -#define mmc_hostname(x) ((x)->host_name) +#define mmc_hostname(x) ((x)->class_dev.class_id) extern int mmc_suspend_host(struct mmc_host *, pm_message_t); extern int mmc_resume_host(struct mmc_host *); -- cgit v1.2.3 From dce773771834221817e2d359a7e07a618ba08807 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 19 Aug 2005 09:42:52 +0100 Subject: [MMC] Use an IDR for host name indicies Signed-off-by: Russell King --- include/linux/mmc/host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 113cc27865f..9a0893f3249 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -64,6 +64,7 @@ struct device; struct mmc_host { struct device *dev; struct class_device class_dev; + int index; struct mmc_host_ops *ops; unsigned int f_min; unsigned int f_max; -- cgit v1.2.3 From c1389503710ef4b4e5d21bea284afde19e9619cf Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 22 Aug 2005 14:59:24 +0900 Subject: [PATCH] fix atapi_packet_task vs. intr race (take 2) Interrupts from devices sharing the same IRQ could cause ata_host_intr to finish commands being processed by atapi_packet_task if the commands are using ATA_PROT_ATAPI_NODATA or ATA_PROT_ATAPI_DMA protocol. This is because libata interrupt handler is unaware that interrupts are not expected during that period. This patch adds ATA_FLAG_NOINTR flag to tell the interrupt handler that we're not expecting interrupts. Note that once proper HSM is implemented for interrupt-driven PIO, this should be merged into it and this flag will be removed. ahci.c is a different kind of beast, so it's left alone. * The following drivers use ata_qc_issue_prot and ata_interrupt, so changes in libata core will do. ata_piix sata_sil sata_svw sata_via sata_sis sata_uli * The following drivers use ata_qc_issue_prot and custom intr handler. They need this change to work correctly. sata_nv sata_vsc * The following drivers use custom issue function and intr handler. Currently all custom issue functions don't support ATAPI, so this change is irrelevant, updated for consistency and to avoid later mistakes. sata_promise sata_qstor sata_sx4 Signed-off-by: Tejun Heo Signed-off-by: Jeff Garzik --- include/linux/libata.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 85b0aaee0ef..724b7d1c18e 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -113,6 +113,8 @@ enum { ATA_FLAG_MMIO = (1 << 6), /* use MMIO, not PIO */ ATA_FLAG_SATA_RESET = (1 << 7), /* use COMRESET */ ATA_FLAG_PIO_DMA = (1 << 8), /* PIO cmds via DMA */ + ATA_FLAG_NOINTR = (1 << 9), /* FIXME: Remove this once + * proper HSM is in place. */ ATA_QCFLAG_ACTIVE = (1 << 1), /* cmd not yet ack'd to scsi lyer */ ATA_QCFLAG_SG = (1 << 3), /* have s/g table? */ -- cgit v1.2.3 From b73fc89f6d1f84326e5e897ad249d00a9f218fd7 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 26 Aug 2005 16:03:19 +0100 Subject: [PATCH] libata: regularize dma_start/stop arguments Needed for a few PATA drivers. Also fix up a wrong comment. Signed-off-by: Jeff Garzik --- include/linux/libata.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 724b7d1c18e..33f3ab4eb82 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -365,7 +365,7 @@ struct ata_port_operations { void (*host_stop) (struct ata_host_set *host_set); - void (*bmdma_stop) (struct ata_port *ap); + void (*bmdma_stop) (struct ata_queued_cmd *qc); u8 (*bmdma_status) (struct ata_port *ap); }; @@ -424,9 +424,10 @@ extern unsigned int ata_dev_classify(struct ata_taskfile *tf); extern void ata_dev_id_string(u16 *id, unsigned char *s, unsigned int ofs, unsigned int len); extern void ata_dev_config(struct ata_port *ap, unsigned int i); +extern void ata_dev_set_protocol(struct ata_device *dev); extern void ata_bmdma_setup (struct ata_queued_cmd *qc); extern void ata_bmdma_start (struct ata_queued_cmd *qc); -extern void ata_bmdma_stop(struct ata_port *ap); +extern void ata_bmdma_stop(struct ata_queued_cmd *qc); extern u8 ata_bmdma_status(struct ata_port *ap); extern void ata_bmdma_irq_clear(struct ata_port *ap); extern void ata_qc_complete(struct ata_queued_cmd *qc, u8 drv_stat); -- cgit v1.2.3 From a4d61e84804f3b14cc35c5e2af768a07c0f64ef6 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 25 Aug 2005 13:40:04 -0700 Subject: [PATCH] IB: move include files to include/rdma Move the InfiniBand headers from drivers/infiniband/include to include/rdma. This allows InfiniBand-using code to live elsewhere, and lets us remove the ugly EXTRA_CFLAGS include path from the InfiniBand Makefiles. Signed-off-by: Roland Dreier --- include/rdma/ib_cache.h | 105 +++ include/rdma/ib_cm.h | 568 ++++++++++++++++ include/rdma/ib_fmr_pool.h | 93 +++ include/rdma/ib_mad.h | 579 +++++++++++++++++ include/rdma/ib_pack.h | 245 +++++++ include/rdma/ib_sa.h | 373 +++++++++++ include/rdma/ib_smi.h | 94 +++ include/rdma/ib_user_cm.h | 328 ++++++++++ include/rdma/ib_user_mad.h | 137 ++++ include/rdma/ib_user_verbs.h | 422 ++++++++++++ include/rdma/ib_verbs.h | 1461 ++++++++++++++++++++++++++++++++++++++++++ 11 files changed, 4405 insertions(+) create mode 100644 include/rdma/ib_cache.h create mode 100644 include/rdma/ib_cm.h create mode 100644 include/rdma/ib_fmr_pool.h create mode 100644 include/rdma/ib_mad.h create mode 100644 include/rdma/ib_pack.h create mode 100644 include/rdma/ib_sa.h create mode 100644 include/rdma/ib_smi.h create mode 100644 include/rdma/ib_user_cm.h create mode 100644 include/rdma/ib_user_mad.h create mode 100644 include/rdma/ib_user_verbs.h create mode 100644 include/rdma/ib_verbs.h (limited to 'include') diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h new file mode 100644 index 00000000000..5bf9834f7dc --- /dev/null +++ b/include/rdma/ib_cache.h @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Intel Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_cache.h 1349 2004-12-16 21:09:43Z roland $ + */ + +#ifndef _IB_CACHE_H +#define _IB_CACHE_H + +#include + +/** + * ib_get_cached_gid - Returns a cached GID table entry + * @device: The device to query. + * @port_num: The port number of the device to query. + * @index: The index into the cached GID table to query. + * @gid: The GID value found at the specified index. + * + * ib_get_cached_gid() fetches the specified GID table entry stored in + * the local software cache. + */ +int ib_get_cached_gid(struct ib_device *device, + u8 port_num, + int index, + union ib_gid *gid); + +/** + * ib_find_cached_gid - Returns the port number and GID table index where + * a specified GID value occurs. + * @device: The device to query. + * @gid: The GID value to search for. + * @port_num: The port number of the device where the GID value was found. + * @index: The index into the cached GID table where the GID was found. This + * parameter may be NULL. + * + * ib_find_cached_gid() searches for the specified GID value in + * the local software cache. + */ +int ib_find_cached_gid(struct ib_device *device, + union ib_gid *gid, + u8 *port_num, + u16 *index); + +/** + * ib_get_cached_pkey - Returns a cached PKey table entry + * @device: The device to query. + * @port_num: The port number of the device to query. + * @index: The index into the cached PKey table to query. + * @pkey: The PKey value found at the specified index. + * + * ib_get_cached_pkey() fetches the specified PKey table entry stored in + * the local software cache. + */ +int ib_get_cached_pkey(struct ib_device *device_handle, + u8 port_num, + int index, + u16 *pkey); + +/** + * ib_find_cached_pkey - Returns the PKey table index where a specified + * PKey value occurs. + * @device: The device to query. + * @port_num: The port number of the device to search for the PKey. + * @pkey: The PKey value to search for. + * @index: The index into the cached PKey table where the PKey was found. + * + * ib_find_cached_pkey() searches the specified PKey table in + * the local software cache. + */ +int ib_find_cached_pkey(struct ib_device *device, + u8 port_num, + u16 pkey, + u16 *index); + +#endif /* _IB_CACHE_H */ diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h new file mode 100644 index 00000000000..77fe9039209 --- /dev/null +++ b/include/rdma/ib_cm.h @@ -0,0 +1,568 @@ +/* + * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_cm.h 2730 2005-06-28 16:43:03Z sean.hefty $ + */ +#if !defined(IB_CM_H) +#define IB_CM_H + +#include +#include + +enum ib_cm_state { + IB_CM_IDLE, + IB_CM_LISTEN, + IB_CM_REQ_SENT, + IB_CM_REQ_RCVD, + IB_CM_MRA_REQ_SENT, + IB_CM_MRA_REQ_RCVD, + IB_CM_REP_SENT, + IB_CM_REP_RCVD, + IB_CM_MRA_REP_SENT, + IB_CM_MRA_REP_RCVD, + IB_CM_ESTABLISHED, + IB_CM_DREQ_SENT, + IB_CM_DREQ_RCVD, + IB_CM_TIMEWAIT, + IB_CM_SIDR_REQ_SENT, + IB_CM_SIDR_REQ_RCVD +}; + +enum ib_cm_lap_state { + IB_CM_LAP_IDLE, + IB_CM_LAP_SENT, + IB_CM_LAP_RCVD, + IB_CM_MRA_LAP_SENT, + IB_CM_MRA_LAP_RCVD, +}; + +enum ib_cm_event_type { + IB_CM_REQ_ERROR, + IB_CM_REQ_RECEIVED, + IB_CM_REP_ERROR, + IB_CM_REP_RECEIVED, + IB_CM_RTU_RECEIVED, + IB_CM_USER_ESTABLISHED, + IB_CM_DREQ_ERROR, + IB_CM_DREQ_RECEIVED, + IB_CM_DREP_RECEIVED, + IB_CM_TIMEWAIT_EXIT, + IB_CM_MRA_RECEIVED, + IB_CM_REJ_RECEIVED, + IB_CM_LAP_ERROR, + IB_CM_LAP_RECEIVED, + IB_CM_APR_RECEIVED, + IB_CM_SIDR_REQ_ERROR, + IB_CM_SIDR_REQ_RECEIVED, + IB_CM_SIDR_REP_RECEIVED +}; + +enum ib_cm_data_size { + IB_CM_REQ_PRIVATE_DATA_SIZE = 92, + IB_CM_MRA_PRIVATE_DATA_SIZE = 222, + IB_CM_REJ_PRIVATE_DATA_SIZE = 148, + IB_CM_REP_PRIVATE_DATA_SIZE = 196, + IB_CM_RTU_PRIVATE_DATA_SIZE = 224, + IB_CM_DREQ_PRIVATE_DATA_SIZE = 220, + IB_CM_DREP_PRIVATE_DATA_SIZE = 224, + IB_CM_REJ_ARI_LENGTH = 72, + IB_CM_LAP_PRIVATE_DATA_SIZE = 168, + IB_CM_APR_PRIVATE_DATA_SIZE = 148, + IB_CM_APR_INFO_LENGTH = 72, + IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE = 216, + IB_CM_SIDR_REP_PRIVATE_DATA_SIZE = 136, + IB_CM_SIDR_REP_INFO_LENGTH = 72 +}; + +struct ib_cm_id; + +struct ib_cm_req_event_param { + struct ib_cm_id *listen_id; + struct ib_device *device; + u8 port; + + struct ib_sa_path_rec *primary_path; + struct ib_sa_path_rec *alternate_path; + + __be64 remote_ca_guid; + u32 remote_qkey; + u32 remote_qpn; + enum ib_qp_type qp_type; + + u32 starting_psn; + u8 responder_resources; + u8 initiator_depth; + unsigned int local_cm_response_timeout:5; + unsigned int flow_control:1; + unsigned int remote_cm_response_timeout:5; + unsigned int retry_count:3; + unsigned int rnr_retry_count:3; + unsigned int srq:1; +}; + +struct ib_cm_rep_event_param { + __be64 remote_ca_guid; + u32 remote_qkey; + u32 remote_qpn; + u32 starting_psn; + u8 responder_resources; + u8 initiator_depth; + unsigned int target_ack_delay:5; + unsigned int failover_accepted:2; + unsigned int flow_control:1; + unsigned int rnr_retry_count:3; + unsigned int srq:1; +}; + +enum ib_cm_rej_reason { + IB_CM_REJ_NO_QP = 1, + IB_CM_REJ_NO_EEC = 2, + IB_CM_REJ_NO_RESOURCES = 3, + IB_CM_REJ_TIMEOUT = 4, + IB_CM_REJ_UNSUPPORTED = 5, + IB_CM_REJ_INVALID_COMM_ID = 6, + IB_CM_REJ_INVALID_COMM_INSTANCE = 7, + IB_CM_REJ_INVALID_SERVICE_ID = 8, + IB_CM_REJ_INVALID_TRANSPORT_TYPE = 9, + IB_CM_REJ_STALE_CONN = 10, + IB_CM_REJ_RDC_NOT_EXIST = 11, + IB_CM_REJ_INVALID_GID = 12, + IB_CM_REJ_INVALID_LID = 13, + IB_CM_REJ_INVALID_SL = 14, + IB_CM_REJ_INVALID_TRAFFIC_CLASS = 15, + IB_CM_REJ_INVALID_HOP_LIMIT = 16, + IB_CM_REJ_INVALID_PACKET_RATE = 17, + IB_CM_REJ_INVALID_ALT_GID = 18, + IB_CM_REJ_INVALID_ALT_LID = 19, + IB_CM_REJ_INVALID_ALT_SL = 20, + IB_CM_REJ_INVALID_ALT_TRAFFIC_CLASS = 21, + IB_CM_REJ_INVALID_ALT_HOP_LIMIT = 22, + IB_CM_REJ_INVALID_ALT_PACKET_RATE = 23, + IB_CM_REJ_PORT_CM_REDIRECT = 24, + IB_CM_REJ_PORT_REDIRECT = 25, + IB_CM_REJ_INVALID_MTU = 26, + IB_CM_REJ_INSUFFICIENT_RESP_RESOURCES = 27, + IB_CM_REJ_CONSUMER_DEFINED = 28, + IB_CM_REJ_INVALID_RNR_RETRY = 29, + IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID = 30, + IB_CM_REJ_INVALID_CLASS_VERSION = 31, + IB_CM_REJ_INVALID_FLOW_LABEL = 32, + IB_CM_REJ_INVALID_ALT_FLOW_LABEL = 33 +}; + +struct ib_cm_rej_event_param { + enum ib_cm_rej_reason reason; + void *ari; + u8 ari_length; +}; + +struct ib_cm_mra_event_param { + u8 service_timeout; +}; + +struct ib_cm_lap_event_param { + struct ib_sa_path_rec *alternate_path; +}; + +enum ib_cm_apr_status { + IB_CM_APR_SUCCESS, + IB_CM_APR_INVALID_COMM_ID, + IB_CM_APR_UNSUPPORTED, + IB_CM_APR_REJECT, + IB_CM_APR_REDIRECT, + IB_CM_APR_IS_CURRENT, + IB_CM_APR_INVALID_QPN_EECN, + IB_CM_APR_INVALID_LID, + IB_CM_APR_INVALID_GID, + IB_CM_APR_INVALID_FLOW_LABEL, + IB_CM_APR_INVALID_TCLASS, + IB_CM_APR_INVALID_HOP_LIMIT, + IB_CM_APR_INVALID_PACKET_RATE, + IB_CM_APR_INVALID_SL +}; + +struct ib_cm_apr_event_param { + enum ib_cm_apr_status ap_status; + void *apr_info; + u8 info_len; +}; + +struct ib_cm_sidr_req_event_param { + struct ib_cm_id *listen_id; + struct ib_device *device; + u8 port; + u16 pkey; +}; + +enum ib_cm_sidr_status { + IB_SIDR_SUCCESS, + IB_SIDR_UNSUPPORTED, + IB_SIDR_REJECT, + IB_SIDR_NO_QP, + IB_SIDR_REDIRECT, + IB_SIDR_UNSUPPORTED_VERSION +}; + +struct ib_cm_sidr_rep_event_param { + enum ib_cm_sidr_status status; + u32 qkey; + u32 qpn; + void *info; + u8 info_len; + +}; + +struct ib_cm_event { + enum ib_cm_event_type event; + union { + struct ib_cm_req_event_param req_rcvd; + struct ib_cm_rep_event_param rep_rcvd; + /* No data for RTU received events. */ + struct ib_cm_rej_event_param rej_rcvd; + struct ib_cm_mra_event_param mra_rcvd; + struct ib_cm_lap_event_param lap_rcvd; + struct ib_cm_apr_event_param apr_rcvd; + /* No data for DREQ/DREP received events. */ + struct ib_cm_sidr_req_event_param sidr_req_rcvd; + struct ib_cm_sidr_rep_event_param sidr_rep_rcvd; + enum ib_wc_status send_status; + } param; + + void *private_data; +}; + +/** + * ib_cm_handler - User-defined callback to process communication events. + * @cm_id: Communication identifier associated with the reported event. + * @event: Information about the communication event. + * + * IB_CM_REQ_RECEIVED and IB_CM_SIDR_REQ_RECEIVED communication events + * generated as a result of listen requests result in the allocation of a + * new @cm_id. The new @cm_id is returned to the user through this callback. + * Clients are responsible for destroying the new @cm_id. For peer-to-peer + * IB_CM_REQ_RECEIVED and all other events, the returned @cm_id corresponds + * to a user's existing communication identifier. + * + * Users may not call ib_destroy_cm_id while in the context of this callback; + * however, returning a non-zero value instructs the communication manager to + * destroy the @cm_id after the callback completes. + */ +typedef int (*ib_cm_handler)(struct ib_cm_id *cm_id, + struct ib_cm_event *event); + +struct ib_cm_id { + ib_cm_handler cm_handler; + void *context; + __be64 service_id; + __be64 service_mask; + enum ib_cm_state state; /* internal CM/debug use */ + enum ib_cm_lap_state lap_state; /* internal CM/debug use */ + __be32 local_id; + __be32 remote_id; +}; + +/** + * ib_create_cm_id - Allocate a communication identifier. + * @cm_handler: Callback invoked to notify the user of CM events. + * @context: User specified context associated with the communication + * identifier. + * + * Communication identifiers are used to track connection states, service + * ID resolution requests, and listen requests. + */ +struct ib_cm_id *ib_create_cm_id(ib_cm_handler cm_handler, + void *context); + +/** + * ib_destroy_cm_id - Destroy a connection identifier. + * @cm_id: Connection identifier to destroy. + * + * This call blocks until the connection identifier is destroyed. + */ +void ib_destroy_cm_id(struct ib_cm_id *cm_id); + +#define IB_SERVICE_ID_AGN_MASK __constant_cpu_to_be64(0xFF00000000000000ULL) +#define IB_CM_ASSIGN_SERVICE_ID __constant_cpu_to_be64(0x0200000000000000ULL) + +/** + * ib_cm_listen - Initiates listening on the specified service ID for + * connection and service ID resolution requests. + * @cm_id: Connection identifier associated with the listen request. + * @service_id: Service identifier matched against incoming connection + * and service ID resolution requests. The service ID should be specified + * network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will + * assign a service ID to the caller. + * @service_mask: Mask applied to service ID used to listen across a + * range of service IDs. If set to 0, the service ID is matched + * exactly. This parameter is ignored if %service_id is set to + * IB_CM_ASSIGN_SERVICE_ID. + */ +int ib_cm_listen(struct ib_cm_id *cm_id, + __be64 service_id, + __be64 service_mask); + +struct ib_cm_req_param { + struct ib_sa_path_rec *primary_path; + struct ib_sa_path_rec *alternate_path; + __be64 service_id; + u32 qp_num; + enum ib_qp_type qp_type; + u32 starting_psn; + const void *private_data; + u8 private_data_len; + u8 peer_to_peer; + u8 responder_resources; + u8 initiator_depth; + u8 remote_cm_response_timeout; + u8 flow_control; + u8 local_cm_response_timeout; + u8 retry_count; + u8 rnr_retry_count; + u8 max_cm_retries; + u8 srq; +}; + +/** + * ib_send_cm_req - Sends a connection request to the remote node. + * @cm_id: Connection identifier that will be associated with the + * connection request. + * @param: Connection request information needed to establish the + * connection. + */ +int ib_send_cm_req(struct ib_cm_id *cm_id, + struct ib_cm_req_param *param); + +struct ib_cm_rep_param { + u32 qp_num; + u32 starting_psn; + const void *private_data; + u8 private_data_len; + u8 responder_resources; + u8 initiator_depth; + u8 target_ack_delay; + u8 failover_accepted; + u8 flow_control; + u8 rnr_retry_count; + u8 srq; +}; + +/** + * ib_send_cm_rep - Sends a connection reply in response to a connection + * request. + * @cm_id: Connection identifier that will be associated with the + * connection request. + * @param: Connection reply information needed to establish the + * connection. + */ +int ib_send_cm_rep(struct ib_cm_id *cm_id, + struct ib_cm_rep_param *param); + +/** + * ib_send_cm_rtu - Sends a connection ready to use message in response + * to a connection reply message. + * @cm_id: Connection identifier associated with the connection request. + * @private_data: Optional user-defined private data sent with the + * ready to use message. + * @private_data_len: Size of the private data buffer, in bytes. + */ +int ib_send_cm_rtu(struct ib_cm_id *cm_id, + const void *private_data, + u8 private_data_len); + +/** + * ib_send_cm_dreq - Sends a disconnection request for an existing + * connection. + * @cm_id: Connection identifier associated with the connection being + * released. + * @private_data: Optional user-defined private data sent with the + * disconnection request message. + * @private_data_len: Size of the private data buffer, in bytes. + */ +int ib_send_cm_dreq(struct ib_cm_id *cm_id, + const void *private_data, + u8 private_data_len); + +/** + * ib_send_cm_drep - Sends a disconnection reply to a disconnection request. + * @cm_id: Connection identifier associated with the connection being + * released. + * @private_data: Optional user-defined private data sent with the + * disconnection reply message. + * @private_data_len: Size of the private data buffer, in bytes. + * + * If the cm_id is in the correct state, the CM will transition the connection + * to the timewait state, even if an error occurs sending the DREP message. + */ +int ib_send_cm_drep(struct ib_cm_id *cm_id, + const void *private_data, + u8 private_data_len); + +/** + * ib_cm_establish - Forces a connection state to established. + * @cm_id: Connection identifier to transition to established. + * + * This routine should be invoked by users who receive messages on a + * connected QP before an RTU has been received. + */ +int ib_cm_establish(struct ib_cm_id *cm_id); + +/** + * ib_send_cm_rej - Sends a connection rejection message to the + * remote node. + * @cm_id: Connection identifier associated with the connection being + * rejected. + * @reason: Reason for the connection request rejection. + * @ari: Optional additional rejection information. + * @ari_length: Size of the additional rejection information, in bytes. + * @private_data: Optional user-defined private data sent with the + * rejection message. + * @private_data_len: Size of the private data buffer, in bytes. + */ +int ib_send_cm_rej(struct ib_cm_id *cm_id, + enum ib_cm_rej_reason reason, + void *ari, + u8 ari_length, + const void *private_data, + u8 private_data_len); + +/** + * ib_send_cm_mra - Sends a message receipt acknowledgement to a connection + * message. + * @cm_id: Connection identifier associated with the connection message. + * @service_timeout: The maximum time required for the sender to reply to + * to the connection message. + * @private_data: Optional user-defined private data sent with the + * message receipt acknowledgement. + * @private_data_len: Size of the private data buffer, in bytes. + */ +int ib_send_cm_mra(struct ib_cm_id *cm_id, + u8 service_timeout, + const void *private_data, + u8 private_data_len); + +/** + * ib_send_cm_lap - Sends a load alternate path request. + * @cm_id: Connection identifier associated with the load alternate path + * message. + * @alternate_path: A path record that identifies the alternate path to + * load. + * @private_data: Optional user-defined private data sent with the + * load alternate path message. + * @private_data_len: Size of the private data buffer, in bytes. + */ +int ib_send_cm_lap(struct ib_cm_id *cm_id, + struct ib_sa_path_rec *alternate_path, + const void *private_data, + u8 private_data_len); + +/** + * ib_cm_init_qp_attr - Initializes the QP attributes for use in transitioning + * to a specified QP state. + * @cm_id: Communication identifier associated with the QP attributes to + * initialize. + * @qp_attr: On input, specifies the desired QP state. On output, the + * mandatory and desired optional attributes will be set in order to + * modify the QP to the specified state. + * @qp_attr_mask: The QP attribute mask that may be used to transition the + * QP to the specified state. + * + * Users must set the @qp_attr->qp_state to the desired QP state. This call + * will set all required attributes for the given transition, along with + * known optional attributes. Users may override the attributes returned from + * this call before calling ib_modify_qp. + */ +int ib_cm_init_qp_attr(struct ib_cm_id *cm_id, + struct ib_qp_attr *qp_attr, + int *qp_attr_mask); + +/** + * ib_send_cm_apr - Sends an alternate path response message in response to + * a load alternate path request. + * @cm_id: Connection identifier associated with the alternate path response. + * @status: Reply status sent with the alternate path response. + * @info: Optional additional information sent with the alternate path + * response. + * @info_length: Size of the additional information, in bytes. + * @private_data: Optional user-defined private data sent with the + * alternate path response message. + * @private_data_len: Size of the private data buffer, in bytes. + */ +int ib_send_cm_apr(struct ib_cm_id *cm_id, + enum ib_cm_apr_status status, + void *info, + u8 info_length, + const void *private_data, + u8 private_data_len); + +struct ib_cm_sidr_req_param { + struct ib_sa_path_rec *path; + __be64 service_id; + int timeout_ms; + const void *private_data; + u8 private_data_len; + u8 max_cm_retries; + u16 pkey; +}; + +/** + * ib_send_cm_sidr_req - Sends a service ID resolution request to the + * remote node. + * @cm_id: Communication identifier that will be associated with the + * service ID resolution request. + * @param: Service ID resolution request information. + */ +int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, + struct ib_cm_sidr_req_param *param); + +struct ib_cm_sidr_rep_param { + u32 qp_num; + u32 qkey; + enum ib_cm_sidr_status status; + const void *info; + u8 info_length; + const void *private_data; + u8 private_data_len; +}; + +/** + * ib_send_cm_sidr_rep - Sends a service ID resolution request to the + * remote node. + * @cm_id: Communication identifier associated with the received service ID + * resolution request. + * @param: Service ID resolution reply information. + */ +int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id, + struct ib_cm_sidr_rep_param *param); + +#endif /* IB_CM_H */ diff --git a/include/rdma/ib_fmr_pool.h b/include/rdma/ib_fmr_pool.h new file mode 100644 index 00000000000..86b7e93f198 --- /dev/null +++ b/include/rdma/ib_fmr_pool.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_fmr_pool.h 2730 2005-06-28 16:43:03Z sean.hefty $ + */ + +#if !defined(IB_FMR_POOL_H) +#define IB_FMR_POOL_H + +#include + +struct ib_fmr_pool; + +/** + * struct ib_fmr_pool_param - Parameters for creating FMR pool + * @max_pages_per_fmr:Maximum number of pages per map request. + * @access:Access flags for FMRs in pool. + * @pool_size:Number of FMRs to allocate for pool. + * @dirty_watermark:Flush is triggered when @dirty_watermark dirty + * FMRs are present. + * @flush_function:Callback called when unmapped FMRs are flushed and + * more FMRs are possibly available for mapping + * @flush_arg:Context passed to user's flush function. + * @cache:If set, FMRs may be reused after unmapping for identical map + * requests. + */ +struct ib_fmr_pool_param { + int max_pages_per_fmr; + enum ib_access_flags access; + int pool_size; + int dirty_watermark; + void (*flush_function)(struct ib_fmr_pool *pool, + void * arg); + void *flush_arg; + unsigned cache:1; +}; + +struct ib_pool_fmr { + struct ib_fmr *fmr; + struct ib_fmr_pool *pool; + struct list_head list; + struct hlist_node cache_node; + int ref_count; + int remap_count; + u64 io_virtual_address; + int page_list_len; + u64 page_list[0]; +}; + +struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd *pd, + struct ib_fmr_pool_param *params); + +void ib_destroy_fmr_pool(struct ib_fmr_pool *pool); + +int ib_flush_fmr_pool(struct ib_fmr_pool *pool); + +struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle, + u64 *page_list, + int list_len, + u64 *io_virtual_address); + +int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr); + +#endif /* IB_FMR_POOL_H */ diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h new file mode 100644 index 00000000000..fc6b1c18ffc --- /dev/null +++ b/include/rdma/ib_mad.h @@ -0,0 +1,579 @@ +/* + * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2004 Infinicon Corporation. All rights reserved. + * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_mad.h 2775 2005-07-02 13:42:12Z halr $ + */ + +#if !defined( IB_MAD_H ) +#define IB_MAD_H + +#include + +#include + +/* Management base version */ +#define IB_MGMT_BASE_VERSION 1 + +/* Management classes */ +#define IB_MGMT_CLASS_SUBN_LID_ROUTED 0x01 +#define IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE 0x81 +#define IB_MGMT_CLASS_SUBN_ADM 0x03 +#define IB_MGMT_CLASS_PERF_MGMT 0x04 +#define IB_MGMT_CLASS_BM 0x05 +#define IB_MGMT_CLASS_DEVICE_MGMT 0x06 +#define IB_MGMT_CLASS_CM 0x07 +#define IB_MGMT_CLASS_SNMP 0x08 +#define IB_MGMT_CLASS_VENDOR_RANGE2_START 0x30 +#define IB_MGMT_CLASS_VENDOR_RANGE2_END 0x4F + +#define IB_OPENIB_OUI (0x001405) + +/* Management methods */ +#define IB_MGMT_METHOD_GET 0x01 +#define IB_MGMT_METHOD_SET 0x02 +#define IB_MGMT_METHOD_GET_RESP 0x81 +#define IB_MGMT_METHOD_SEND 0x03 +#define IB_MGMT_METHOD_TRAP 0x05 +#define IB_MGMT_METHOD_REPORT 0x06 +#define IB_MGMT_METHOD_REPORT_RESP 0x86 +#define IB_MGMT_METHOD_TRAP_REPRESS 0x07 + +#define IB_MGMT_METHOD_RESP 0x80 + +#define IB_MGMT_MAX_METHODS 128 + +/* RMPP information */ +#define IB_MGMT_RMPP_VERSION 1 + +#define IB_MGMT_RMPP_TYPE_DATA 1 +#define IB_MGMT_RMPP_TYPE_ACK 2 +#define IB_MGMT_RMPP_TYPE_STOP 3 +#define IB_MGMT_RMPP_TYPE_ABORT 4 + +#define IB_MGMT_RMPP_FLAG_ACTIVE 1 +#define IB_MGMT_RMPP_FLAG_FIRST (1<<1) +#define IB_MGMT_RMPP_FLAG_LAST (1<<2) + +#define IB_MGMT_RMPP_NO_RESPTIME 0x1F + +#define IB_MGMT_RMPP_STATUS_SUCCESS 0 +#define IB_MGMT_RMPP_STATUS_RESX 1 +#define IB_MGMT_RMPP_STATUS_ABORT_MIN 118 +#define IB_MGMT_RMPP_STATUS_T2L 118 +#define IB_MGMT_RMPP_STATUS_BAD_LEN 119 +#define IB_MGMT_RMPP_STATUS_BAD_SEG 120 +#define IB_MGMT_RMPP_STATUS_BADT 121 +#define IB_MGMT_RMPP_STATUS_W2S 122 +#define IB_MGMT_RMPP_STATUS_S2B 123 +#define IB_MGMT_RMPP_STATUS_BAD_STATUS 124 +#define IB_MGMT_RMPP_STATUS_UNV 125 +#define IB_MGMT_RMPP_STATUS_TMR 126 +#define IB_MGMT_RMPP_STATUS_UNSPEC 127 +#define IB_MGMT_RMPP_STATUS_ABORT_MAX 127 + +#define IB_QP0 0 +#define IB_QP1 __constant_htonl(1) +#define IB_QP1_QKEY 0x80010000 +#define IB_QP_SET_QKEY 0x80000000 + +struct ib_mad_hdr { + u8 base_version; + u8 mgmt_class; + u8 class_version; + u8 method; + __be16 status; + __be16 class_specific; + __be64 tid; + __be16 attr_id; + __be16 resv; + __be32 attr_mod; +}; + +struct ib_rmpp_hdr { + u8 rmpp_version; + u8 rmpp_type; + u8 rmpp_rtime_flags; + u8 rmpp_status; + __be32 seg_num; + __be32 paylen_newwin; +}; + +typedef u64 __bitwise ib_sa_comp_mask; + +#define IB_SA_COMP_MASK(n) ((__force ib_sa_comp_mask) cpu_to_be64(1ull << n)) + +/* + * ib_sa_hdr and ib_sa_mad structures must be packed because they have + * 64-bit fields that are only 32-bit aligned. 64-bit architectures will + * lay them out wrong otherwise. (And unfortunately they are sent on + * the wire so we can't change the layout) + */ +struct ib_sa_hdr { + __be64 sm_key; + __be16 attr_offset; + __be16 reserved; + ib_sa_comp_mask comp_mask; +} __attribute__ ((packed)); + +struct ib_mad { + struct ib_mad_hdr mad_hdr; + u8 data[232]; +}; + +struct ib_rmpp_mad { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; + u8 data[220]; +}; + +struct ib_sa_mad { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; + struct ib_sa_hdr sa_hdr; + u8 data[200]; +} __attribute__ ((packed)); + +struct ib_vendor_mad { + struct ib_mad_hdr mad_hdr; + struct ib_rmpp_hdr rmpp_hdr; + u8 reserved; + u8 oui[3]; + u8 data[216]; +}; + +/** + * ib_mad_send_buf - MAD data buffer and work request for sends. + * @mad: References an allocated MAD data buffer. The size of the data + * buffer is specified in the @send_wr.length field. + * @mapping: DMA mapping information. + * @mad_agent: MAD agent that allocated the buffer. + * @context: User-controlled context fields. + * @send_wr: An initialized work request structure used when sending the MAD. + * The wr_id field of the work request is initialized to reference this + * data structure. + * @sge: A scatter-gather list referenced by the work request. + * + * Users are responsible for initializing the MAD buffer itself, with the + * exception of specifying the payload length field in any RMPP MAD. + */ +struct ib_mad_send_buf { + struct ib_mad *mad; + DECLARE_PCI_UNMAP_ADDR(mapping) + struct ib_mad_agent *mad_agent; + void *context[2]; + struct ib_send_wr send_wr; + struct ib_sge sge; +}; + +/** + * ib_get_rmpp_resptime - Returns the RMPP response time. + * @rmpp_hdr: An RMPP header. + */ +static inline u8 ib_get_rmpp_resptime(struct ib_rmpp_hdr *rmpp_hdr) +{ + return rmpp_hdr->rmpp_rtime_flags >> 3; +} + +/** + * ib_get_rmpp_flags - Returns the RMPP flags. + * @rmpp_hdr: An RMPP header. + */ +static inline u8 ib_get_rmpp_flags(struct ib_rmpp_hdr *rmpp_hdr) +{ + return rmpp_hdr->rmpp_rtime_flags & 0x7; +} + +/** + * ib_set_rmpp_resptime - Sets the response time in an RMPP header. + * @rmpp_hdr: An RMPP header. + * @rtime: The response time to set. + */ +static inline void ib_set_rmpp_resptime(struct ib_rmpp_hdr *rmpp_hdr, u8 rtime) +{ + rmpp_hdr->rmpp_rtime_flags = ib_get_rmpp_flags(rmpp_hdr) | (rtime << 3); +} + +/** + * ib_set_rmpp_flags - Sets the flags in an RMPP header. + * @rmpp_hdr: An RMPP header. + * @flags: The flags to set. + */ +static inline void ib_set_rmpp_flags(struct ib_rmpp_hdr *rmpp_hdr, u8 flags) +{ + rmpp_hdr->rmpp_rtime_flags = (rmpp_hdr->rmpp_rtime_flags & 0xF1) | + (flags & 0x7); +} + +struct ib_mad_agent; +struct ib_mad_send_wc; +struct ib_mad_recv_wc; + +/** + * ib_mad_send_handler - callback handler for a sent MAD. + * @mad_agent: MAD agent that sent the MAD. + * @mad_send_wc: Send work completion information on the sent MAD. + */ +typedef void (*ib_mad_send_handler)(struct ib_mad_agent *mad_agent, + struct ib_mad_send_wc *mad_send_wc); + +/** + * ib_mad_snoop_handler - Callback handler for snooping sent MADs. + * @mad_agent: MAD agent that snooped the MAD. + * @send_wr: Work request information on the sent MAD. + * @mad_send_wc: Work completion information on the sent MAD. Valid + * only for snooping that occurs on a send completion. + * + * Clients snooping MADs should not modify data referenced by the @send_wr + * or @mad_send_wc. + */ +typedef void (*ib_mad_snoop_handler)(struct ib_mad_agent *mad_agent, + struct ib_send_wr *send_wr, + struct ib_mad_send_wc *mad_send_wc); + +/** + * ib_mad_recv_handler - callback handler for a received MAD. + * @mad_agent: MAD agent requesting the received MAD. + * @mad_recv_wc: Received work completion information on the received MAD. + * + * MADs received in response to a send request operation will be handed to + * the user after the send operation completes. All data buffers given + * to registered agents through this routine are owned by the receiving + * client, except for snooping agents. Clients snooping MADs should not + * modify the data referenced by @mad_recv_wc. + */ +typedef void (*ib_mad_recv_handler)(struct ib_mad_agent *mad_agent, + struct ib_mad_recv_wc *mad_recv_wc); + +/** + * ib_mad_agent - Used to track MAD registration with the access layer. + * @device: Reference to device registration is on. + * @qp: Reference to QP used for sending and receiving MADs. + * @mr: Memory region for system memory usable for DMA. + * @recv_handler: Callback handler for a received MAD. + * @send_handler: Callback handler for a sent MAD. + * @snoop_handler: Callback handler for snooped sent MADs. + * @context: User-specified context associated with this registration. + * @hi_tid: Access layer assigned transaction ID for this client. + * Unsolicited MADs sent by this client will have the upper 32-bits + * of their TID set to this value. + * @port_num: Port number on which QP is registered + * @rmpp_version: If set, indicates the RMPP version used by this agent. + */ +struct ib_mad_agent { + struct ib_device *device; + struct ib_qp *qp; + struct ib_mr *mr; + ib_mad_recv_handler recv_handler; + ib_mad_send_handler send_handler; + ib_mad_snoop_handler snoop_handler; + void *context; + u32 hi_tid; + u8 port_num; + u8 rmpp_version; +}; + +/** + * ib_mad_send_wc - MAD send completion information. + * @wr_id: Work request identifier associated with the send MAD request. + * @status: Completion status. + * @vendor_err: Optional vendor error information returned with a failed + * request. + */ +struct ib_mad_send_wc { + u64 wr_id; + enum ib_wc_status status; + u32 vendor_err; +}; + +/** + * ib_mad_recv_buf - received MAD buffer information. + * @list: Reference to next data buffer for a received RMPP MAD. + * @grh: References a data buffer containing the global route header. + * The data refereced by this buffer is only valid if the GRH is + * valid. + * @mad: References the start of the received MAD. + */ +struct ib_mad_recv_buf { + struct list_head list; + struct ib_grh *grh; + struct ib_mad *mad; +}; + +/** + * ib_mad_recv_wc - received MAD information. + * @wc: Completion information for the received data. + * @recv_buf: Specifies the location of the received data buffer(s). + * @rmpp_list: Specifies a list of RMPP reassembled received MAD buffers. + * @mad_len: The length of the received MAD, without duplicated headers. + * + * For received response, the wr_id field of the wc is set to the wr_id + * for the corresponding send request. + */ +struct ib_mad_recv_wc { + struct ib_wc *wc; + struct ib_mad_recv_buf recv_buf; + struct list_head rmpp_list; + int mad_len; +}; + +/** + * ib_mad_reg_req - MAD registration request + * @mgmt_class: Indicates which management class of MADs should be receive + * by the caller. This field is only required if the user wishes to + * receive unsolicited MADs, otherwise it should be 0. + * @mgmt_class_version: Indicates which version of MADs for the given + * management class to receive. + * @oui: Indicates IEEE OUI when mgmt_class is a vendor class + * in the range from 0x30 to 0x4f. Otherwise not used. + * @method_mask: The caller will receive unsolicited MADs for any method + * where @method_mask = 1. + */ +struct ib_mad_reg_req { + u8 mgmt_class; + u8 mgmt_class_version; + u8 oui[3]; + DECLARE_BITMAP(method_mask, IB_MGMT_MAX_METHODS); +}; + +/** + * ib_register_mad_agent - Register to send/receive MADs. + * @device: The device to register with. + * @port_num: The port on the specified device to use. + * @qp_type: Specifies which QP to access. Must be either + * IB_QPT_SMI or IB_QPT_GSI. + * @mad_reg_req: Specifies which unsolicited MADs should be received + * by the caller. This parameter may be NULL if the caller only + * wishes to receive solicited responses. + * @rmpp_version: If set, indicates that the client will send + * and receive MADs that contain the RMPP header for the given version. + * If set to 0, indicates that RMPP is not used by this client. + * @send_handler: The completion callback routine invoked after a send + * request has completed. + * @recv_handler: The completion callback routine invoked for a received + * MAD. + * @context: User specified context associated with the registration. + */ +struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device, + u8 port_num, + enum ib_qp_type qp_type, + struct ib_mad_reg_req *mad_reg_req, + u8 rmpp_version, + ib_mad_send_handler send_handler, + ib_mad_recv_handler recv_handler, + void *context); + +enum ib_mad_snoop_flags { + /*IB_MAD_SNOOP_POSTED_SENDS = 1,*/ + /*IB_MAD_SNOOP_RMPP_SENDS = (1<<1),*/ + IB_MAD_SNOOP_SEND_COMPLETIONS = (1<<2), + /*IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS = (1<<3),*/ + IB_MAD_SNOOP_RECVS = (1<<4) + /*IB_MAD_SNOOP_RMPP_RECVS = (1<<5),*/ + /*IB_MAD_SNOOP_REDIRECTED_QPS = (1<<6)*/ +}; + +/** + * ib_register_mad_snoop - Register to snoop sent and received MADs. + * @device: The device to register with. + * @port_num: The port on the specified device to use. + * @qp_type: Specifies which QP traffic to snoop. Must be either + * IB_QPT_SMI or IB_QPT_GSI. + * @mad_snoop_flags: Specifies information where snooping occurs. + * @send_handler: The callback routine invoked for a snooped send. + * @recv_handler: The callback routine invoked for a snooped receive. + * @context: User specified context associated with the registration. + */ +struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device, + u8 port_num, + enum ib_qp_type qp_type, + int mad_snoop_flags, + ib_mad_snoop_handler snoop_handler, + ib_mad_recv_handler recv_handler, + void *context); + +/** + * ib_unregister_mad_agent - Unregisters a client from using MAD services. + * @mad_agent: Corresponding MAD registration request to deregister. + * + * After invoking this routine, MAD services are no longer usable by the + * client on the associated QP. + */ +int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent); + +/** + * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated + * with the registered client. + * @mad_agent: Specifies the associated registration to post the send to. + * @send_wr: Specifies the information needed to send the MAD(s). + * @bad_send_wr: Specifies the MAD on which an error was encountered. + * + * Sent MADs are not guaranteed to complete in the order that they were posted. + * + * If the MAD requires RMPP, the data buffer should contain a single copy + * of the common MAD, RMPP, and class specific headers, followed by the class + * defined data. If the class defined data would not divide evenly into + * RMPP segments, then space must be allocated at the end of the referenced + * buffer for any required padding. To indicate the amount of class defined + * data being transferred, the paylen_newwin field in the RMPP header should + * be set to the size of the class specific header plus the amount of class + * defined data being transferred. The paylen_newwin field should be + * specified in network-byte order. + */ +int ib_post_send_mad(struct ib_mad_agent *mad_agent, + struct ib_send_wr *send_wr, + struct ib_send_wr **bad_send_wr); + +/** + * ib_coalesce_recv_mad - Coalesces received MAD data into a single buffer. + * @mad_recv_wc: Work completion information for a received MAD. + * @buf: User-provided data buffer to receive the coalesced buffers. The + * referenced buffer should be at least the size of the mad_len specified + * by @mad_recv_wc. + * + * This call copies a chain of received MAD segments into a single data buffer, + * removing duplicated headers. + */ +void ib_coalesce_recv_mad(struct ib_mad_recv_wc *mad_recv_wc, void *buf); + +/** + * ib_free_recv_mad - Returns data buffers used to receive a MAD. + * @mad_recv_wc: Work completion information for a received MAD. + * + * Clients receiving MADs through their ib_mad_recv_handler must call this + * routine to return the work completion buffers to the access layer. + */ +void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc); + +/** + * ib_cancel_mad - Cancels an outstanding send MAD operation. + * @mad_agent: Specifies the registration associated with sent MAD. + * @wr_id: Indicates the work request identifier of the MAD to cancel. + * + * MADs will be returned to the user through the corresponding + * ib_mad_send_handler. + */ +void ib_cancel_mad(struct ib_mad_agent *mad_agent, u64 wr_id); + +/** + * ib_modify_mad - Modifies an outstanding send MAD operation. + * @mad_agent: Specifies the registration associated with sent MAD. + * @wr_id: Indicates the work request identifier of the MAD to modify. + * @timeout_ms: New timeout value for sent MAD. + * + * This call will reset the timeout value for a sent MAD to the specified + * value. + */ +int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms); + +/** + * ib_redirect_mad_qp - Registers a QP for MAD services. + * @qp: Reference to a QP that requires MAD services. + * @rmpp_version: If set, indicates that the client will send + * and receive MADs that contain the RMPP header for the given version. + * If set to 0, indicates that RMPP is not used by this client. + * @send_handler: The completion callback routine invoked after a send + * request has completed. + * @recv_handler: The completion callback routine invoked for a received + * MAD. + * @context: User specified context associated with the registration. + * + * Use of this call allows clients to use MAD services, such as RMPP, + * on user-owned QPs. After calling this routine, users may send + * MADs on the specified QP by calling ib_mad_post_send. + */ +struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp, + u8 rmpp_version, + ib_mad_send_handler send_handler, + ib_mad_recv_handler recv_handler, + void *context); + +/** + * ib_process_mad_wc - Processes a work completion associated with a + * MAD sent or received on a redirected QP. + * @mad_agent: Specifies the registered MAD service using the redirected QP. + * @wc: References a work completion associated with a sent or received + * MAD segment. + * + * This routine is used to complete or continue processing on a MAD request. + * If the work completion is associated with a send operation, calling + * this routine is required to continue an RMPP transfer or to wait for a + * corresponding response, if it is a request. If the work completion is + * associated with a receive operation, calling this routine is required to + * process an inbound or outbound RMPP transfer, or to match a response MAD + * with its corresponding request. + */ +int ib_process_mad_wc(struct ib_mad_agent *mad_agent, + struct ib_wc *wc); + +/** + * ib_create_send_mad - Allocate and initialize a data buffer and work request + * for sending a MAD. + * @mad_agent: Specifies the registered MAD service to associate with the MAD. + * @remote_qpn: Specifies the QPN of the receiving node. + * @pkey_index: Specifies which PKey the MAD will be sent using. This field + * is valid only if the remote_qpn is QP 1. + * @ah: References the address handle used to transfer to the remote node. + * @rmpp_active: Indicates if the send will enable RMPP. + * @hdr_len: Indicates the size of the data header of the MAD. This length + * should include the common MAD header, RMPP header, plus any class + * specific header. + * @data_len: Indicates the size of any user-transferred data. The call will + * automatically adjust the allocated buffer size to account for any + * additional padding that may be necessary. + * @gfp_mask: GFP mask used for the memory allocation. + * + * This is a helper routine that may be used to allocate a MAD. Users are + * not required to allocate outbound MADs using this call. The returned + * MAD send buffer will reference a data buffer usable for sending a MAD, along + * with an initialized work request structure. Users may modify the returned + * MAD data buffer or work request before posting the send. + * + * The returned data buffer will be cleared. Users are responsible for + * initializing the common MAD and any class specific headers. If @rmpp_active + * is set, the RMPP header will be initialized for sending. + */ +struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent, + u32 remote_qpn, u16 pkey_index, + struct ib_ah *ah, int rmpp_active, + int hdr_len, int data_len, + unsigned int __nocast gfp_mask); + +/** + * ib_free_send_mad - Returns data buffers used to send a MAD. + * @send_buf: Previously allocated send data buffer. + */ +void ib_free_send_mad(struct ib_mad_send_buf *send_buf); + +#endif /* IB_MAD_H */ diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h new file mode 100644 index 00000000000..f926020d633 --- /dev/null +++ b/include/rdma/ib_pack.h @@ -0,0 +1,245 @@ +/* + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_pack.h 1349 2004-12-16 21:09:43Z roland $ + */ + +#ifndef IB_PACK_H +#define IB_PACK_H + +#include + +enum { + IB_LRH_BYTES = 8, + IB_GRH_BYTES = 40, + IB_BTH_BYTES = 12, + IB_DETH_BYTES = 8 +}; + +struct ib_field { + size_t struct_offset_bytes; + size_t struct_size_bytes; + int offset_words; + int offset_bits; + int size_bits; + char *field_name; +}; + +#define RESERVED \ + .field_name = "reserved" + +/* + * This macro cleans up the definitions of constants for BTH opcodes. + * It is used to define constants such as IB_OPCODE_UD_SEND_ONLY, + * which becomes IB_OPCODE_UD + IB_OPCODE_SEND_ONLY, and this gives + * the correct value. + * + * In short, user code should use the constants defined using the + * macro rather than worrying about adding together other constants. +*/ +#define IB_OPCODE(transport, op) \ + IB_OPCODE_ ## transport ## _ ## op = \ + IB_OPCODE_ ## transport + IB_OPCODE_ ## op + +enum { + /* transport types -- just used to define real constants */ + IB_OPCODE_RC = 0x00, + IB_OPCODE_UC = 0x20, + IB_OPCODE_RD = 0x40, + IB_OPCODE_UD = 0x60, + + /* operations -- just used to define real constants */ + IB_OPCODE_SEND_FIRST = 0x00, + IB_OPCODE_SEND_MIDDLE = 0x01, + IB_OPCODE_SEND_LAST = 0x02, + IB_OPCODE_SEND_LAST_WITH_IMMEDIATE = 0x03, + IB_OPCODE_SEND_ONLY = 0x04, + IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE = 0x05, + IB_OPCODE_RDMA_WRITE_FIRST = 0x06, + IB_OPCODE_RDMA_WRITE_MIDDLE = 0x07, + IB_OPCODE_RDMA_WRITE_LAST = 0x08, + IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE = 0x09, + IB_OPCODE_RDMA_WRITE_ONLY = 0x0a, + IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE = 0x0b, + IB_OPCODE_RDMA_READ_REQUEST = 0x0c, + IB_OPCODE_RDMA_READ_RESPONSE_FIRST = 0x0d, + IB_OPCODE_RDMA_READ_RESPONSE_MIDDLE = 0x0e, + IB_OPCODE_RDMA_READ_RESPONSE_LAST = 0x0f, + IB_OPCODE_RDMA_READ_RESPONSE_ONLY = 0x10, + IB_OPCODE_ACKNOWLEDGE = 0x11, + IB_OPCODE_ATOMIC_ACKNOWLEDGE = 0x12, + IB_OPCODE_COMPARE_SWAP = 0x13, + IB_OPCODE_FETCH_ADD = 0x14, + + /* real constants follow -- see comment about above IB_OPCODE() + macro for more details */ + + /* RC */ + IB_OPCODE(RC, SEND_FIRST), + IB_OPCODE(RC, SEND_MIDDLE), + IB_OPCODE(RC, SEND_LAST), + IB_OPCODE(RC, SEND_LAST_WITH_IMMEDIATE), + IB_OPCODE(RC, SEND_ONLY), + IB_OPCODE(RC, SEND_ONLY_WITH_IMMEDIATE), + IB_OPCODE(RC, RDMA_WRITE_FIRST), + IB_OPCODE(RC, RDMA_WRITE_MIDDLE), + IB_OPCODE(RC, RDMA_WRITE_LAST), + IB_OPCODE(RC, RDMA_WRITE_LAST_WITH_IMMEDIATE), + IB_OPCODE(RC, RDMA_WRITE_ONLY), + IB_OPCODE(RC, RDMA_WRITE_ONLY_WITH_IMMEDIATE), + IB_OPCODE(RC, RDMA_READ_REQUEST), + IB_OPCODE(RC, RDMA_READ_RESPONSE_FIRST), + IB_OPCODE(RC, RDMA_READ_RESPONSE_MIDDLE), + IB_OPCODE(RC, RDMA_READ_RESPONSE_LAST), + IB_OPCODE(RC, RDMA_READ_RESPONSE_ONLY), + IB_OPCODE(RC, ACKNOWLEDGE), + IB_OPCODE(RC, ATOMIC_ACKNOWLEDGE), + IB_OPCODE(RC, COMPARE_SWAP), + IB_OPCODE(RC, FETCH_ADD), + + /* UC */ + IB_OPCODE(UC, SEND_FIRST), + IB_OPCODE(UC, SEND_MIDDLE), + IB_OPCODE(UC, SEND_LAST), + IB_OPCODE(UC, SEND_LAST_WITH_IMMEDIATE), + IB_OPCODE(UC, SEND_ONLY), + IB_OPCODE(UC, SEND_ONLY_WITH_IMMEDIATE), + IB_OPCODE(UC, RDMA_WRITE_FIRST), + IB_OPCODE(UC, RDMA_WRITE_MIDDLE), + IB_OPCODE(UC, RDMA_WRITE_LAST), + IB_OPCODE(UC, RDMA_WRITE_LAST_WITH_IMMEDIATE), + IB_OPCODE(UC, RDMA_WRITE_ONLY), + IB_OPCODE(UC, RDMA_WRITE_ONLY_WITH_IMMEDIATE), + + /* RD */ + IB_OPCODE(RD, SEND_FIRST), + IB_OPCODE(RD, SEND_MIDDLE), + IB_OPCODE(RD, SEND_LAST), + IB_OPCODE(RD, SEND_LAST_WITH_IMMEDIATE), + IB_OPCODE(RD, SEND_ONLY), + IB_OPCODE(RD, SEND_ONLY_WITH_IMMEDIATE), + IB_OPCODE(RD, RDMA_WRITE_FIRST), + IB_OPCODE(RD, RDMA_WRITE_MIDDLE), + IB_OPCODE(RD, RDMA_WRITE_LAST), + IB_OPCODE(RD, RDMA_WRITE_LAST_WITH_IMMEDIATE), + IB_OPCODE(RD, RDMA_WRITE_ONLY), + IB_OPCODE(RD, RDMA_WRITE_ONLY_WITH_IMMEDIATE), + IB_OPCODE(RD, RDMA_READ_REQUEST), + IB_OPCODE(RD, RDMA_READ_RESPONSE_FIRST), + IB_OPCODE(RD, RDMA_READ_RESPONSE_MIDDLE), + IB_OPCODE(RD, RDMA_READ_RESPONSE_LAST), + IB_OPCODE(RD, RDMA_READ_RESPONSE_ONLY), + IB_OPCODE(RD, ACKNOWLEDGE), + IB_OPCODE(RD, ATOMIC_ACKNOWLEDGE), + IB_OPCODE(RD, COMPARE_SWAP), + IB_OPCODE(RD, FETCH_ADD), + + /* UD */ + IB_OPCODE(UD, SEND_ONLY), + IB_OPCODE(UD, SEND_ONLY_WITH_IMMEDIATE) +}; + +enum { + IB_LNH_RAW = 0, + IB_LNH_IP = 1, + IB_LNH_IBA_LOCAL = 2, + IB_LNH_IBA_GLOBAL = 3 +}; + +struct ib_unpacked_lrh { + u8 virtual_lane; + u8 link_version; + u8 service_level; + u8 link_next_header; + __be16 destination_lid; + __be16 packet_length; + __be16 source_lid; +}; + +struct ib_unpacked_grh { + u8 ip_version; + u8 traffic_class; + __be32 flow_label; + __be16 payload_length; + u8 next_header; + u8 hop_limit; + union ib_gid source_gid; + union ib_gid destination_gid; +}; + +struct ib_unpacked_bth { + u8 opcode; + u8 solicited_event; + u8 mig_req; + u8 pad_count; + u8 transport_header_version; + __be16 pkey; + __be32 destination_qpn; + u8 ack_req; + __be32 psn; +}; + +struct ib_unpacked_deth { + __be32 qkey; + __be32 source_qpn; +}; + +struct ib_ud_header { + struct ib_unpacked_lrh lrh; + int grh_present; + struct ib_unpacked_grh grh; + struct ib_unpacked_bth bth; + struct ib_unpacked_deth deth; + int immediate_present; + __be32 immediate_data; +}; + +void ib_pack(const struct ib_field *desc, + int desc_len, + void *structure, + void *buf); + +void ib_unpack(const struct ib_field *desc, + int desc_len, + void *buf, + void *structure); + +void ib_ud_header_init(int payload_bytes, + int grh_present, + struct ib_ud_header *header); + +int ib_ud_header_pack(struct ib_ud_header *header, + void *buf); + +int ib_ud_header_unpack(void *buf, + struct ib_ud_header *header); + +#endif /* IB_PACK_H */ diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h new file mode 100644 index 00000000000..c022edfc49d --- /dev/null +++ b/include/rdma/ib_sa.h @@ -0,0 +1,373 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_sa.h 2811 2005-07-06 18:11:43Z halr $ + */ + +#ifndef IB_SA_H +#define IB_SA_H + +#include + +#include +#include + +enum { + IB_SA_CLASS_VERSION = 2, /* IB spec version 1.1/1.2 */ + + IB_SA_METHOD_GET_TABLE = 0x12, + IB_SA_METHOD_GET_TABLE_RESP = 0x92, + IB_SA_METHOD_DELETE = 0x15 +}; + +enum ib_sa_selector { + IB_SA_GTE = 0, + IB_SA_LTE = 1, + IB_SA_EQ = 2, + /* + * The meaning of "best" depends on the attribute: for + * example, for MTU best will return the largest available + * MTU, while for packet life time, best will return the + * smallest available life time. + */ + IB_SA_BEST = 3 +}; + +enum ib_sa_rate { + IB_SA_RATE_2_5_GBPS = 2, + IB_SA_RATE_5_GBPS = 5, + IB_SA_RATE_10_GBPS = 3, + IB_SA_RATE_20_GBPS = 6, + IB_SA_RATE_30_GBPS = 4, + IB_SA_RATE_40_GBPS = 7, + IB_SA_RATE_60_GBPS = 8, + IB_SA_RATE_80_GBPS = 9, + IB_SA_RATE_120_GBPS = 10 +}; + +static inline int ib_sa_rate_enum_to_int(enum ib_sa_rate rate) +{ + switch (rate) { + case IB_SA_RATE_2_5_GBPS: return 1; + case IB_SA_RATE_5_GBPS: return 2; + case IB_SA_RATE_10_GBPS: return 4; + case IB_SA_RATE_20_GBPS: return 8; + case IB_SA_RATE_30_GBPS: return 12; + case IB_SA_RATE_40_GBPS: return 16; + case IB_SA_RATE_60_GBPS: return 24; + case IB_SA_RATE_80_GBPS: return 32; + case IB_SA_RATE_120_GBPS: return 48; + default: return -1; + } +} + +/* + * Structures for SA records are named "struct ib_sa_xxx_rec." No + * attempt is made to pack structures to match the physical layout of + * SA records in SA MADs; all packing and unpacking is handled by the + * SA query code. + * + * For a record with structure ib_sa_xxx_rec, the naming convention + * for the component mask value for field yyy is IB_SA_XXX_REC_YYY (we + * never use different abbreviations or otherwise change the spelling + * of xxx/yyy between ib_sa_xxx_rec.yyy and IB_SA_XXX_REC_YYY). + * + * Reserved rows are indicated with comments to help maintainability. + */ + +/* reserved: 0 */ +/* reserved: 1 */ +#define IB_SA_PATH_REC_DGID IB_SA_COMP_MASK( 2) +#define IB_SA_PATH_REC_SGID IB_SA_COMP_MASK( 3) +#define IB_SA_PATH_REC_DLID IB_SA_COMP_MASK( 4) +#define IB_SA_PATH_REC_SLID IB_SA_COMP_MASK( 5) +#define IB_SA_PATH_REC_RAW_TRAFFIC IB_SA_COMP_MASK( 6) +/* reserved: 7 */ +#define IB_SA_PATH_REC_FLOW_LABEL IB_SA_COMP_MASK( 8) +#define IB_SA_PATH_REC_HOP_LIMIT IB_SA_COMP_MASK( 9) +#define IB_SA_PATH_REC_TRAFFIC_CLASS IB_SA_COMP_MASK(10) +#define IB_SA_PATH_REC_REVERSIBLE IB_SA_COMP_MASK(11) +#define IB_SA_PATH_REC_NUMB_PATH IB_SA_COMP_MASK(12) +#define IB_SA_PATH_REC_PKEY IB_SA_COMP_MASK(13) +/* reserved: 14 */ +#define IB_SA_PATH_REC_SL IB_SA_COMP_MASK(15) +#define IB_SA_PATH_REC_MTU_SELECTOR IB_SA_COMP_MASK(16) +#define IB_SA_PATH_REC_MTU IB_SA_COMP_MASK(17) +#define IB_SA_PATH_REC_RATE_SELECTOR IB_SA_COMP_MASK(18) +#define IB_SA_PATH_REC_RATE IB_SA_COMP_MASK(19) +#define IB_SA_PATH_REC_PACKET_LIFE_TIME_SELECTOR IB_SA_COMP_MASK(20) +#define IB_SA_PATH_REC_PACKET_LIFE_TIME IB_SA_COMP_MASK(21) +#define IB_SA_PATH_REC_PREFERENCE IB_SA_COMP_MASK(22) + +struct ib_sa_path_rec { + /* reserved */ + /* reserved */ + union ib_gid dgid; + union ib_gid sgid; + __be16 dlid; + __be16 slid; + int raw_traffic; + /* reserved */ + __be32 flow_label; + u8 hop_limit; + u8 traffic_class; + int reversible; + u8 numb_path; + __be16 pkey; + /* reserved */ + u8 sl; + u8 mtu_selector; + u8 mtu; + u8 rate_selector; + u8 rate; + u8 packet_life_time_selector; + u8 packet_life_time; + u8 preference; +}; + +#define IB_SA_MCMEMBER_REC_MGID IB_SA_COMP_MASK( 0) +#define IB_SA_MCMEMBER_REC_PORT_GID IB_SA_COMP_MASK( 1) +#define IB_SA_MCMEMBER_REC_QKEY IB_SA_COMP_MASK( 2) +#define IB_SA_MCMEMBER_REC_MLID IB_SA_COMP_MASK( 3) +#define IB_SA_MCMEMBER_REC_MTU_SELECTOR IB_SA_COMP_MASK( 4) +#define IB_SA_MCMEMBER_REC_MTU IB_SA_COMP_MASK( 5) +#define IB_SA_MCMEMBER_REC_TRAFFIC_CLASS IB_SA_COMP_MASK( 6) +#define IB_SA_MCMEMBER_REC_PKEY IB_SA_COMP_MASK( 7) +#define IB_SA_MCMEMBER_REC_RATE_SELECTOR IB_SA_COMP_MASK( 8) +#define IB_SA_MCMEMBER_REC_RATE IB_SA_COMP_MASK( 9) +#define IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME_SELECTOR IB_SA_COMP_MASK(10) +#define IB_SA_MCMEMBER_REC_PACKET_LIFE_TIME IB_SA_COMP_MASK(11) +#define IB_SA_MCMEMBER_REC_SL IB_SA_COMP_MASK(12) +#define IB_SA_MCMEMBER_REC_FLOW_LABEL IB_SA_COMP_MASK(13) +#define IB_SA_MCMEMBER_REC_HOP_LIMIT IB_SA_COMP_MASK(14) +#define IB_SA_MCMEMBER_REC_SCOPE IB_SA_COMP_MASK(15) +#define IB_SA_MCMEMBER_REC_JOIN_STATE IB_SA_COMP_MASK(16) +#define IB_SA_MCMEMBER_REC_PROXY_JOIN IB_SA_COMP_MASK(17) + +struct ib_sa_mcmember_rec { + union ib_gid mgid; + union ib_gid port_gid; + __be32 qkey; + __be16 mlid; + u8 mtu_selector; + u8 mtu; + u8 traffic_class; + __be16 pkey; + u8 rate_selector; + u8 rate; + u8 packet_life_time_selector; + u8 packet_life_time; + u8 sl; + __be32 flow_label; + u8 hop_limit; + u8 scope; + u8 join_state; + int proxy_join; +}; + +/* Service Record Component Mask Sec 15.2.5.14 Ver 1.1 */ +#define IB_SA_SERVICE_REC_SERVICE_ID IB_SA_COMP_MASK( 0) +#define IB_SA_SERVICE_REC_SERVICE_GID IB_SA_COMP_MASK( 1) +#define IB_SA_SERVICE_REC_SERVICE_PKEY IB_SA_COMP_MASK( 2) +/* reserved: 3 */ +#define IB_SA_SERVICE_REC_SERVICE_LEASE IB_SA_COMP_MASK( 4) +#define IB_SA_SERVICE_REC_SERVICE_KEY IB_SA_COMP_MASK( 5) +#define IB_SA_SERVICE_REC_SERVICE_NAME IB_SA_COMP_MASK( 6) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_0 IB_SA_COMP_MASK( 7) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_1 IB_SA_COMP_MASK( 8) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_2 IB_SA_COMP_MASK( 9) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_3 IB_SA_COMP_MASK(10) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_4 IB_SA_COMP_MASK(11) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_5 IB_SA_COMP_MASK(12) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_6 IB_SA_COMP_MASK(13) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_7 IB_SA_COMP_MASK(14) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_8 IB_SA_COMP_MASK(15) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_9 IB_SA_COMP_MASK(16) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_10 IB_SA_COMP_MASK(17) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_11 IB_SA_COMP_MASK(18) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_12 IB_SA_COMP_MASK(19) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_13 IB_SA_COMP_MASK(20) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_14 IB_SA_COMP_MASK(21) +#define IB_SA_SERVICE_REC_SERVICE_DATA8_15 IB_SA_COMP_MASK(22) +#define IB_SA_SERVICE_REC_SERVICE_DATA16_0 IB_SA_COMP_MASK(23) +#define IB_SA_SERVICE_REC_SERVICE_DATA16_1 IB_SA_COMP_MASK(24) +#define IB_SA_SERVICE_REC_SERVICE_DATA16_2 IB_SA_COMP_MASK(25) +#define IB_SA_SERVICE_REC_SERVICE_DATA16_3 IB_SA_COMP_MASK(26) +#define IB_SA_SERVICE_REC_SERVICE_DATA16_4 IB_SA_COMP_MASK(27) +#define IB_SA_SERVICE_REC_SERVICE_DATA16_5 IB_SA_COMP_MASK(28) +#define IB_SA_SERVICE_REC_SERVICE_DATA16_6 IB_SA_COMP_MASK(29) +#define IB_SA_SERVICE_REC_SERVICE_DATA16_7 IB_SA_COMP_MASK(30) +#define IB_SA_SERVICE_REC_SERVICE_DATA32_0 IB_SA_COMP_MASK(31) +#define IB_SA_SERVICE_REC_SERVICE_DATA32_1 IB_SA_COMP_MASK(32) +#define IB_SA_SERVICE_REC_SERVICE_DATA32_2 IB_SA_COMP_MASK(33) +#define IB_SA_SERVICE_REC_SERVICE_DATA32_3 IB_SA_COMP_MASK(34) +#define IB_SA_SERVICE_REC_SERVICE_DATA64_0 IB_SA_COMP_MASK(35) +#define IB_SA_SERVICE_REC_SERVICE_DATA64_1 IB_SA_COMP_MASK(36) + +#define IB_DEFAULT_SERVICE_LEASE 0xFFFFFFFF + +struct ib_sa_service_rec { + u64 id; + union ib_gid gid; + __be16 pkey; + /* reserved */ + u32 lease; + u8 key[16]; + u8 name[64]; + u8 data8[16]; + u16 data16[8]; + u32 data32[4]; + u64 data64[2]; +}; + +struct ib_sa_query; + +void ib_sa_cancel_query(int id, struct ib_sa_query *query); + +int ib_sa_path_rec_get(struct ib_device *device, u8 port_num, + struct ib_sa_path_rec *rec, + ib_sa_comp_mask comp_mask, + int timeout_ms, unsigned int __nocast gfp_mask, + void (*callback)(int status, + struct ib_sa_path_rec *resp, + void *context), + void *context, + struct ib_sa_query **query); + +int ib_sa_mcmember_rec_query(struct ib_device *device, u8 port_num, + u8 method, + struct ib_sa_mcmember_rec *rec, + ib_sa_comp_mask comp_mask, + int timeout_ms, unsigned int __nocast gfp_mask, + void (*callback)(int status, + struct ib_sa_mcmember_rec *resp, + void *context), + void *context, + struct ib_sa_query **query); + +int ib_sa_service_rec_query(struct ib_device *device, u8 port_num, + u8 method, + struct ib_sa_service_rec *rec, + ib_sa_comp_mask comp_mask, + int timeout_ms, unsigned int __nocast gfp_mask, + void (*callback)(int status, + struct ib_sa_service_rec *resp, + void *context), + void *context, + struct ib_sa_query **sa_query); + +/** + * ib_sa_mcmember_rec_set - Start an MCMember set query + * @device:device to send query on + * @port_num: port number to send query on + * @rec:MCMember Record to send in query + * @comp_mask:component mask to send in query + * @timeout_ms:time to wait for response + * @gfp_mask:GFP mask to use for internal allocations + * @callback:function called when query completes, times out or is + * canceled + * @context:opaque user context passed to callback + * @sa_query:query context, used to cancel query + * + * Send an MCMember Set query to the SA (eg to join a multicast + * group). The callback function will be called when the query + * completes (or fails); status is 0 for a successful response, -EINTR + * if the query is canceled, -ETIMEDOUT is the query timed out, or + * -EIO if an error occurred sending the query. The resp parameter of + * the callback is only valid if status is 0. + * + * If the return value of ib_sa_mcmember_rec_set() is negative, it is + * an error code. Otherwise it is a query ID that can be used to + * cancel the query. + */ +static inline int +ib_sa_mcmember_rec_set(struct ib_device *device, u8 port_num, + struct ib_sa_mcmember_rec *rec, + ib_sa_comp_mask comp_mask, + int timeout_ms, unsigned int __nocast gfp_mask, + void (*callback)(int status, + struct ib_sa_mcmember_rec *resp, + void *context), + void *context, + struct ib_sa_query **query) +{ + return ib_sa_mcmember_rec_query(device, port_num, + IB_MGMT_METHOD_SET, + rec, comp_mask, + timeout_ms, gfp_mask, callback, + context, query); +} + +/** + * ib_sa_mcmember_rec_delete - Start an MCMember delete query + * @device:device to send query on + * @port_num: port number to send query on + * @rec:MCMember Record to send in query + * @comp_mask:component mask to send in query + * @timeout_ms:time to wait for response + * @gfp_mask:GFP mask to use for internal allocations + * @callback:function called when query completes, times out or is + * canceled + * @context:opaque user context passed to callback + * @sa_query:query context, used to cancel query + * + * Send an MCMember Delete query to the SA (eg to leave a multicast + * group). The callback function will be called when the query + * completes (or fails); status is 0 for a successful response, -EINTR + * if the query is canceled, -ETIMEDOUT is the query timed out, or + * -EIO if an error occurred sending the query. The resp parameter of + * the callback is only valid if status is 0. + * + * If the return value of ib_sa_mcmember_rec_delete() is negative, it + * is an error code. Otherwise it is a query ID that can be used to + * cancel the query. + */ +static inline int +ib_sa_mcmember_rec_delete(struct ib_device *device, u8 port_num, + struct ib_sa_mcmember_rec *rec, + ib_sa_comp_mask comp_mask, + int timeout_ms, unsigned int __nocast gfp_mask, + void (*callback)(int status, + struct ib_sa_mcmember_rec *resp, + void *context), + void *context, + struct ib_sa_query **query) +{ + return ib_sa_mcmember_rec_query(device, port_num, + IB_SA_METHOD_DELETE, + rec, comp_mask, + timeout_ms, gfp_mask, callback, + context, query); +} + + +#endif /* IB_SA_H */ diff --git a/include/rdma/ib_smi.h b/include/rdma/ib_smi.h new file mode 100644 index 00000000000..87f60737f69 --- /dev/null +++ b/include/rdma/ib_smi.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2004 Infinicon Corporation. All rights reserved. + * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_smi.h 1389 2004-12-27 22:56:47Z roland $ + */ + +#if !defined( IB_SMI_H ) +#define IB_SMI_H + +#include + +#define IB_SMP_DATA_SIZE 64 +#define IB_SMP_MAX_PATH_HOPS 64 + +struct ib_smp { + u8 base_version; + u8 mgmt_class; + u8 class_version; + u8 method; + __be16 status; + u8 hop_ptr; + u8 hop_cnt; + __be64 tid; + __be16 attr_id; + __be16 resv; + __be32 attr_mod; + __be64 mkey; + __be16 dr_slid; + __be16 dr_dlid; + u8 reserved[28]; + u8 data[IB_SMP_DATA_SIZE]; + u8 initial_path[IB_SMP_MAX_PATH_HOPS]; + u8 return_path[IB_SMP_MAX_PATH_HOPS]; +} __attribute__ ((packed)); + +#define IB_SMP_DIRECTION __constant_htons(0x8000) + +/* Subnet management attributes */ +#define IB_SMP_ATTR_NOTICE __constant_htons(0x0002) +#define IB_SMP_ATTR_NODE_DESC __constant_htons(0x0010) +#define IB_SMP_ATTR_NODE_INFO __constant_htons(0x0011) +#define IB_SMP_ATTR_SWITCH_INFO __constant_htons(0x0012) +#define IB_SMP_ATTR_GUID_INFO __constant_htons(0x0014) +#define IB_SMP_ATTR_PORT_INFO __constant_htons(0x0015) +#define IB_SMP_ATTR_PKEY_TABLE __constant_htons(0x0016) +#define IB_SMP_ATTR_SL_TO_VL_TABLE __constant_htons(0x0017) +#define IB_SMP_ATTR_VL_ARB_TABLE __constant_htons(0x0018) +#define IB_SMP_ATTR_LINEAR_FORWARD_TABLE __constant_htons(0x0019) +#define IB_SMP_ATTR_RANDOM_FORWARD_TABLE __constant_htons(0x001A) +#define IB_SMP_ATTR_MCAST_FORWARD_TABLE __constant_htons(0x001B) +#define IB_SMP_ATTR_SM_INFO __constant_htons(0x0020) +#define IB_SMP_ATTR_VENDOR_DIAG __constant_htons(0x0030) +#define IB_SMP_ATTR_LED_INFO __constant_htons(0x0031) +#define IB_SMP_ATTR_VENDOR_MASK __constant_htons(0xFF00) + +static inline u8 +ib_get_smp_direction(struct ib_smp *smp) +{ + return ((smp->status & IB_SMP_DIRECTION) == IB_SMP_DIRECTION); +} + +#endif /* IB_SMI_H */ diff --git a/include/rdma/ib_user_cm.h b/include/rdma/ib_user_cm.h new file mode 100644 index 00000000000..72182d16778 --- /dev/null +++ b/include/rdma/ib_user_cm.h @@ -0,0 +1,328 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_user_cm.h 2576 2005-06-09 17:00:30Z libor $ + */ + +#ifndef IB_USER_CM_H +#define IB_USER_CM_H + +#include + +#define IB_USER_CM_ABI_VERSION 1 + +enum { + IB_USER_CM_CMD_CREATE_ID, + IB_USER_CM_CMD_DESTROY_ID, + IB_USER_CM_CMD_ATTR_ID, + + IB_USER_CM_CMD_LISTEN, + IB_USER_CM_CMD_ESTABLISH, + + IB_USER_CM_CMD_SEND_REQ, + IB_USER_CM_CMD_SEND_REP, + IB_USER_CM_CMD_SEND_RTU, + IB_USER_CM_CMD_SEND_DREQ, + IB_USER_CM_CMD_SEND_DREP, + IB_USER_CM_CMD_SEND_REJ, + IB_USER_CM_CMD_SEND_MRA, + IB_USER_CM_CMD_SEND_LAP, + IB_USER_CM_CMD_SEND_APR, + IB_USER_CM_CMD_SEND_SIDR_REQ, + IB_USER_CM_CMD_SEND_SIDR_REP, + + IB_USER_CM_CMD_EVENT, +}; +/* + * command ABI structures. + */ +struct ib_ucm_cmd_hdr { + __u32 cmd; + __u16 in; + __u16 out; +}; + +struct ib_ucm_create_id { + __u64 response; +}; + +struct ib_ucm_create_id_resp { + __u32 id; +}; + +struct ib_ucm_destroy_id { + __u32 id; +}; + +struct ib_ucm_attr_id { + __u64 response; + __u32 id; +}; + +struct ib_ucm_attr_id_resp { + __be64 service_id; + __be64 service_mask; + __be32 local_id; + __be32 remote_id; +}; + +struct ib_ucm_listen { + __be64 service_id; + __be64 service_mask; + __u32 id; +}; + +struct ib_ucm_establish { + __u32 id; +}; + +struct ib_ucm_private_data { + __u64 data; + __u32 id; + __u8 len; + __u8 reserved[3]; +}; + +struct ib_ucm_path_rec { + __u8 dgid[16]; + __u8 sgid[16]; + __be16 dlid; + __be16 slid; + __u32 raw_traffic; + __be32 flow_label; + __u32 reversible; + __u32 mtu; + __be16 pkey; + __u8 hop_limit; + __u8 traffic_class; + __u8 numb_path; + __u8 sl; + __u8 mtu_selector; + __u8 rate_selector; + __u8 rate; + __u8 packet_life_time_selector; + __u8 packet_life_time; + __u8 preference; +}; + +struct ib_ucm_req { + __u32 id; + __u32 qpn; + __u32 qp_type; + __u32 psn; + __be64 sid; + __u64 data; + __u64 primary_path; + __u64 alternate_path; + __u8 len; + __u8 peer_to_peer; + __u8 responder_resources; + __u8 initiator_depth; + __u8 remote_cm_response_timeout; + __u8 flow_control; + __u8 local_cm_response_timeout; + __u8 retry_count; + __u8 rnr_retry_count; + __u8 max_cm_retries; + __u8 srq; + __u8 reserved[1]; +}; + +struct ib_ucm_rep { + __u64 data; + __u32 id; + __u32 qpn; + __u32 psn; + __u8 len; + __u8 responder_resources; + __u8 initiator_depth; + __u8 target_ack_delay; + __u8 failover_accepted; + __u8 flow_control; + __u8 rnr_retry_count; + __u8 srq; +}; + +struct ib_ucm_info { + __u32 id; + __u32 status; + __u64 info; + __u64 data; + __u8 info_len; + __u8 data_len; + __u8 reserved[2]; +}; + +struct ib_ucm_mra { + __u64 data; + __u32 id; + __u8 len; + __u8 timeout; + __u8 reserved[2]; +}; + +struct ib_ucm_lap { + __u64 path; + __u64 data; + __u32 id; + __u8 len; + __u8 reserved[3]; +}; + +struct ib_ucm_sidr_req { + __u32 id; + __u32 timeout; + __be64 sid; + __u64 data; + __u64 path; + __u16 pkey; + __u8 len; + __u8 max_cm_retries; +}; + +struct ib_ucm_sidr_rep { + __u32 id; + __u32 qpn; + __u32 qkey; + __u32 status; + __u64 info; + __u64 data; + __u8 info_len; + __u8 data_len; + __u8 reserved[2]; +}; +/* + * event notification ABI structures. + */ +struct ib_ucm_event_get { + __u64 response; + __u64 data; + __u64 info; + __u8 data_len; + __u8 info_len; + __u8 reserved[2]; +}; + +struct ib_ucm_req_event_resp { + __u32 listen_id; + /* device */ + /* port */ + struct ib_ucm_path_rec primary_path; + struct ib_ucm_path_rec alternate_path; + __be64 remote_ca_guid; + __u32 remote_qkey; + __u32 remote_qpn; + __u32 qp_type; + __u32 starting_psn; + __u8 responder_resources; + __u8 initiator_depth; + __u8 local_cm_response_timeout; + __u8 flow_control; + __u8 remote_cm_response_timeout; + __u8 retry_count; + __u8 rnr_retry_count; + __u8 srq; +}; + +struct ib_ucm_rep_event_resp { + __be64 remote_ca_guid; + __u32 remote_qkey; + __u32 remote_qpn; + __u32 starting_psn; + __u8 responder_resources; + __u8 initiator_depth; + __u8 target_ack_delay; + __u8 failover_accepted; + __u8 flow_control; + __u8 rnr_retry_count; + __u8 srq; + __u8 reserved[1]; +}; + +struct ib_ucm_rej_event_resp { + __u32 reason; + /* ari in ib_ucm_event_get info field. */ +}; + +struct ib_ucm_mra_event_resp { + __u8 timeout; + __u8 reserved[3]; +}; + +struct ib_ucm_lap_event_resp { + struct ib_ucm_path_rec path; +}; + +struct ib_ucm_apr_event_resp { + __u32 status; + /* apr info in ib_ucm_event_get info field. */ +}; + +struct ib_ucm_sidr_req_event_resp { + __u32 listen_id; + /* device */ + /* port */ + __u16 pkey; + __u8 reserved[2]; +}; + +struct ib_ucm_sidr_rep_event_resp { + __u32 status; + __u32 qkey; + __u32 qpn; + /* info in ib_ucm_event_get info field. */ +}; + +#define IB_UCM_PRES_DATA 0x01 +#define IB_UCM_PRES_INFO 0x02 +#define IB_UCM_PRES_PRIMARY 0x04 +#define IB_UCM_PRES_ALTERNATE 0x08 + +struct ib_ucm_event_resp { + __u32 id; + __u32 event; + __u32 present; + union { + struct ib_ucm_req_event_resp req_resp; + struct ib_ucm_rep_event_resp rep_resp; + struct ib_ucm_rej_event_resp rej_resp; + struct ib_ucm_mra_event_resp mra_resp; + struct ib_ucm_lap_event_resp lap_resp; + struct ib_ucm_apr_event_resp apr_resp; + + struct ib_ucm_sidr_req_event_resp sidr_req_resp; + struct ib_ucm_sidr_rep_event_resp sidr_rep_resp; + + __u32 send_status; + } u; +}; + +#endif /* IB_USER_CM_H */ diff --git a/include/rdma/ib_user_mad.h b/include/rdma/ib_user_mad.h new file mode 100644 index 00000000000..44537aa32e6 --- /dev/null +++ b/include/rdma/ib_user_mad.h @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Voltaire, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_user_mad.h 2814 2005-07-06 19:14:09Z halr $ + */ + +#ifndef IB_USER_MAD_H +#define IB_USER_MAD_H + +#include +#include + +/* + * Increment this value if any changes that break userspace ABI + * compatibility are made. + */ +#define IB_USER_MAD_ABI_VERSION 5 + +/* + * Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + */ + +/** + * ib_user_mad_hdr - MAD packet header + * @id - ID of agent MAD received with/to be sent with + * @status - 0 on successful receive, ETIMEDOUT if no response + * received (transaction ID in data[] will be set to TID of original + * request) (ignored on send) + * @timeout_ms - Milliseconds to wait for response (unset on receive) + * @retries - Number of automatic retries to attempt + * @qpn - Remote QP number received from/to be sent to + * @qkey - Remote Q_Key to be sent with (unset on receive) + * @lid - Remote lid received from/to be sent to + * @sl - Service level received with/to be sent with + * @path_bits - Local path bits received with/to be sent with + * @grh_present - If set, GRH was received/should be sent + * @gid_index - Local GID index to send with (unset on receive) + * @hop_limit - Hop limit in GRH + * @traffic_class - Traffic class in GRH + * @gid - Remote GID in GRH + * @flow_label - Flow label in GRH + */ +struct ib_user_mad_hdr { + __u32 id; + __u32 status; + __u32 timeout_ms; + __u32 retries; + __u32 length; + __be32 qpn; + __be32 qkey; + __be16 lid; + __u8 sl; + __u8 path_bits; + __u8 grh_present; + __u8 gid_index; + __u8 hop_limit; + __u8 traffic_class; + __u8 gid[16]; + __be32 flow_label; +}; + +/** + * ib_user_mad - MAD packet + * @hdr - MAD packet header + * @data - Contents of MAD + * + */ +struct ib_user_mad { + struct ib_user_mad_hdr hdr; + __u8 data[0]; +}; + +/** + * ib_user_mad_reg_req - MAD registration request + * @id - Set by the kernel; used to identify agent in future requests. + * @qpn - Queue pair number; must be 0 or 1. + * @method_mask - The caller will receive unsolicited MADs for any method + * where @method_mask = 1. + * @mgmt_class - Indicates which management class of MADs should be receive + * by the caller. This field is only required if the user wishes to + * receive unsolicited MADs, otherwise it should be 0. + * @mgmt_class_version - Indicates which version of MADs for the given + * management class to receive. + * @oui: Indicates IEEE OUI when mgmt_class is a vendor class + * in the range from 0x30 to 0x4f. Otherwise not used. + * @rmpp_version: If set, indicates the RMPP version used. + * + */ +struct ib_user_mad_reg_req { + __u32 id; + __u32 method_mask[4]; + __u8 qpn; + __u8 mgmt_class; + __u8 mgmt_class_version; + __u8 oui[3]; + __u8 rmpp_version; +}; + +#define IB_IOCTL_MAGIC 0x1b + +#define IB_USER_MAD_REGISTER_AGENT _IOWR(IB_IOCTL_MAGIC, 1, \ + struct ib_user_mad_reg_req) + +#define IB_USER_MAD_UNREGISTER_AGENT _IOW(IB_IOCTL_MAGIC, 2, __u32) + +#endif /* IB_USER_MAD_H */ diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h new file mode 100644 index 00000000000..7ebb01c8f99 --- /dev/null +++ b/include/rdma/ib_user_verbs.h @@ -0,0 +1,422 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_user_verbs.h 2708 2005-06-24 17:27:21Z roland $ + */ + +#ifndef IB_USER_VERBS_H +#define IB_USER_VERBS_H + +#include + +/* + * Increment this value if any changes that break userspace ABI + * compatibility are made. + */ +#define IB_USER_VERBS_ABI_VERSION 1 + +enum { + IB_USER_VERBS_CMD_QUERY_PARAMS, + IB_USER_VERBS_CMD_GET_CONTEXT, + IB_USER_VERBS_CMD_QUERY_DEVICE, + IB_USER_VERBS_CMD_QUERY_PORT, + IB_USER_VERBS_CMD_QUERY_GID, + IB_USER_VERBS_CMD_QUERY_PKEY, + IB_USER_VERBS_CMD_ALLOC_PD, + IB_USER_VERBS_CMD_DEALLOC_PD, + IB_USER_VERBS_CMD_CREATE_AH, + IB_USER_VERBS_CMD_MODIFY_AH, + IB_USER_VERBS_CMD_QUERY_AH, + IB_USER_VERBS_CMD_DESTROY_AH, + IB_USER_VERBS_CMD_REG_MR, + IB_USER_VERBS_CMD_REG_SMR, + IB_USER_VERBS_CMD_REREG_MR, + IB_USER_VERBS_CMD_QUERY_MR, + IB_USER_VERBS_CMD_DEREG_MR, + IB_USER_VERBS_CMD_ALLOC_MW, + IB_USER_VERBS_CMD_BIND_MW, + IB_USER_VERBS_CMD_DEALLOC_MW, + IB_USER_VERBS_CMD_CREATE_CQ, + IB_USER_VERBS_CMD_RESIZE_CQ, + IB_USER_VERBS_CMD_DESTROY_CQ, + IB_USER_VERBS_CMD_POLL_CQ, + IB_USER_VERBS_CMD_PEEK_CQ, + IB_USER_VERBS_CMD_REQ_NOTIFY_CQ, + IB_USER_VERBS_CMD_CREATE_QP, + IB_USER_VERBS_CMD_QUERY_QP, + IB_USER_VERBS_CMD_MODIFY_QP, + IB_USER_VERBS_CMD_DESTROY_QP, + IB_USER_VERBS_CMD_POST_SEND, + IB_USER_VERBS_CMD_POST_RECV, + IB_USER_VERBS_CMD_ATTACH_MCAST, + IB_USER_VERBS_CMD_DETACH_MCAST, + IB_USER_VERBS_CMD_CREATE_SRQ, + IB_USER_VERBS_CMD_MODIFY_SRQ, + IB_USER_VERBS_CMD_QUERY_SRQ, + IB_USER_VERBS_CMD_DESTROY_SRQ, + IB_USER_VERBS_CMD_POST_SRQ_RECV +}; + +/* + * Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + * In particular do not use pointer types -- pass pointers in __u64 + * instead. + */ + +struct ib_uverbs_async_event_desc { + __u64 element; + __u32 event_type; /* enum ib_event_type */ + __u32 reserved; +}; + +struct ib_uverbs_comp_event_desc { + __u64 cq_handle; +}; + +/* + * All commands from userspace should start with a __u32 command field + * followed by __u16 in_words and out_words fields (which give the + * length of the command block and response buffer if any in 32-bit + * words). The kernel driver will read these fields first and read + * the rest of the command struct based on these value. + */ + +struct ib_uverbs_cmd_hdr { + __u32 command; + __u16 in_words; + __u16 out_words; +}; + +/* + * No driver_data for "query params" command, since this is intended + * to be a core function with no possible device dependence. + */ +struct ib_uverbs_query_params { + __u64 response; +}; + +struct ib_uverbs_query_params_resp { + __u32 num_cq_events; +}; + +struct ib_uverbs_get_context { + __u64 response; + __u64 cq_fd_tab; + __u64 driver_data[0]; +}; + +struct ib_uverbs_get_context_resp { + __u32 async_fd; + __u32 reserved; +}; + +struct ib_uverbs_query_device { + __u64 response; + __u64 driver_data[0]; +}; + +struct ib_uverbs_query_device_resp { + __u64 fw_ver; + __be64 node_guid; + __be64 sys_image_guid; + __u64 max_mr_size; + __u64 page_size_cap; + __u32 vendor_id; + __u32 vendor_part_id; + __u32 hw_ver; + __u32 max_qp; + __u32 max_qp_wr; + __u32 device_cap_flags; + __u32 max_sge; + __u32 max_sge_rd; + __u32 max_cq; + __u32 max_cqe; + __u32 max_mr; + __u32 max_pd; + __u32 max_qp_rd_atom; + __u32 max_ee_rd_atom; + __u32 max_res_rd_atom; + __u32 max_qp_init_rd_atom; + __u32 max_ee_init_rd_atom; + __u32 atomic_cap; + __u32 max_ee; + __u32 max_rdd; + __u32 max_mw; + __u32 max_raw_ipv6_qp; + __u32 max_raw_ethy_qp; + __u32 max_mcast_grp; + __u32 max_mcast_qp_attach; + __u32 max_total_mcast_qp_attach; + __u32 max_ah; + __u32 max_fmr; + __u32 max_map_per_fmr; + __u32 max_srq; + __u32 max_srq_wr; + __u32 max_srq_sge; + __u16 max_pkeys; + __u8 local_ca_ack_delay; + __u8 phys_port_cnt; + __u8 reserved[4]; +}; + +struct ib_uverbs_query_port { + __u64 response; + __u8 port_num; + __u8 reserved[7]; + __u64 driver_data[0]; +}; + +struct ib_uverbs_query_port_resp { + __u32 port_cap_flags; + __u32 max_msg_sz; + __u32 bad_pkey_cntr; + __u32 qkey_viol_cntr; + __u32 gid_tbl_len; + __u16 pkey_tbl_len; + __u16 lid; + __u16 sm_lid; + __u8 state; + __u8 max_mtu; + __u8 active_mtu; + __u8 lmc; + __u8 max_vl_num; + __u8 sm_sl; + __u8 subnet_timeout; + __u8 init_type_reply; + __u8 active_width; + __u8 active_speed; + __u8 phys_state; + __u8 reserved[3]; +}; + +struct ib_uverbs_query_gid { + __u64 response; + __u8 port_num; + __u8 index; + __u8 reserved[6]; + __u64 driver_data[0]; +}; + +struct ib_uverbs_query_gid_resp { + __u8 gid[16]; +}; + +struct ib_uverbs_query_pkey { + __u64 response; + __u8 port_num; + __u8 index; + __u8 reserved[6]; + __u64 driver_data[0]; +}; + +struct ib_uverbs_query_pkey_resp { + __u16 pkey; + __u16 reserved; +}; + +struct ib_uverbs_alloc_pd { + __u64 response; + __u64 driver_data[0]; +}; + +struct ib_uverbs_alloc_pd_resp { + __u32 pd_handle; +}; + +struct ib_uverbs_dealloc_pd { + __u32 pd_handle; +}; + +struct ib_uverbs_reg_mr { + __u64 response; + __u64 start; + __u64 length; + __u64 hca_va; + __u32 pd_handle; + __u32 access_flags; + __u64 driver_data[0]; +}; + +struct ib_uverbs_reg_mr_resp { + __u32 mr_handle; + __u32 lkey; + __u32 rkey; +}; + +struct ib_uverbs_dereg_mr { + __u32 mr_handle; +}; + +struct ib_uverbs_create_cq { + __u64 response; + __u64 user_handle; + __u32 cqe; + __u32 event_handler; + __u64 driver_data[0]; +}; + +struct ib_uverbs_create_cq_resp { + __u32 cq_handle; + __u32 cqe; +}; + +struct ib_uverbs_destroy_cq { + __u32 cq_handle; +}; + +struct ib_uverbs_create_qp { + __u64 response; + __u64 user_handle; + __u32 pd_handle; + __u32 send_cq_handle; + __u32 recv_cq_handle; + __u32 srq_handle; + __u32 max_send_wr; + __u32 max_recv_wr; + __u32 max_send_sge; + __u32 max_recv_sge; + __u32 max_inline_data; + __u8 sq_sig_all; + __u8 qp_type; + __u8 is_srq; + __u8 reserved; + __u64 driver_data[0]; +}; + +struct ib_uverbs_create_qp_resp { + __u32 qp_handle; + __u32 qpn; +}; + +/* + * This struct needs to remain a multiple of 8 bytes to keep the + * alignment of the modify QP parameters. + */ +struct ib_uverbs_qp_dest { + __u8 dgid[16]; + __u32 flow_label; + __u16 dlid; + __u16 reserved; + __u8 sgid_index; + __u8 hop_limit; + __u8 traffic_class; + __u8 sl; + __u8 src_path_bits; + __u8 static_rate; + __u8 is_global; + __u8 port_num; +}; + +struct ib_uverbs_modify_qp { + struct ib_uverbs_qp_dest dest; + struct ib_uverbs_qp_dest alt_dest; + __u32 qp_handle; + __u32 attr_mask; + __u32 qkey; + __u32 rq_psn; + __u32 sq_psn; + __u32 dest_qp_num; + __u32 qp_access_flags; + __u16 pkey_index; + __u16 alt_pkey_index; + __u8 qp_state; + __u8 cur_qp_state; + __u8 path_mtu; + __u8 path_mig_state; + __u8 en_sqd_async_notify; + __u8 max_rd_atomic; + __u8 max_dest_rd_atomic; + __u8 min_rnr_timer; + __u8 port_num; + __u8 timeout; + __u8 retry_cnt; + __u8 rnr_retry; + __u8 alt_port_num; + __u8 alt_timeout; + __u8 reserved[2]; + __u64 driver_data[0]; +}; + +struct ib_uverbs_modify_qp_resp { +}; + +struct ib_uverbs_destroy_qp { + __u32 qp_handle; +}; + +struct ib_uverbs_attach_mcast { + __u8 gid[16]; + __u32 qp_handle; + __u16 mlid; + __u16 reserved; + __u64 driver_data[0]; +}; + +struct ib_uverbs_detach_mcast { + __u8 gid[16]; + __u32 qp_handle; + __u16 mlid; + __u16 reserved; + __u64 driver_data[0]; +}; + +struct ib_uverbs_create_srq { + __u64 response; + __u64 user_handle; + __u32 pd_handle; + __u32 max_wr; + __u32 max_sge; + __u32 srq_limit; + __u64 driver_data[0]; +}; + +struct ib_uverbs_create_srq_resp { + __u32 srq_handle; +}; + +struct ib_uverbs_modify_srq { + __u32 srq_handle; + __u32 attr_mask; + __u32 max_wr; + __u32 max_sge; + __u32 srq_limit; + __u32 reserved; + __u64 driver_data[0]; +}; + +struct ib_uverbs_destroy_srq { + __u32 srq_handle; +}; + +#endif /* IB_USER_VERBS_H */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h new file mode 100644 index 00000000000..e16cf94870f --- /dev/null +++ b/include/rdma/ib_verbs.h @@ -0,0 +1,1461 @@ +/* + * Copyright (c) 2004 Mellanox Technologies Ltd. All rights reserved. + * Copyright (c) 2004 Infinicon Corporation. All rights reserved. + * Copyright (c) 2004 Intel Corporation. All rights reserved. + * Copyright (c) 2004 Topspin Corporation. All rights reserved. + * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_verbs.h 1349 2004-12-16 21:09:43Z roland $ + */ + +#if !defined(IB_VERBS_H) +#define IB_VERBS_H + +#include +#include + +#include +#include +#include + +union ib_gid { + u8 raw[16]; + struct { + __be64 subnet_prefix; + __be64 interface_id; + } global; +}; + +enum ib_node_type { + IB_NODE_CA = 1, + IB_NODE_SWITCH, + IB_NODE_ROUTER +}; + +enum ib_device_cap_flags { + IB_DEVICE_RESIZE_MAX_WR = 1, + IB_DEVICE_BAD_PKEY_CNTR = (1<<1), + IB_DEVICE_BAD_QKEY_CNTR = (1<<2), + IB_DEVICE_RAW_MULTI = (1<<3), + IB_DEVICE_AUTO_PATH_MIG = (1<<4), + IB_DEVICE_CHANGE_PHY_PORT = (1<<5), + IB_DEVICE_UD_AV_PORT_ENFORCE = (1<<6), + IB_DEVICE_CURR_QP_STATE_MOD = (1<<7), + IB_DEVICE_SHUTDOWN_PORT = (1<<8), + IB_DEVICE_INIT_TYPE = (1<<9), + IB_DEVICE_PORT_ACTIVE_EVENT = (1<<10), + IB_DEVICE_SYS_IMAGE_GUID = (1<<11), + IB_DEVICE_RC_RNR_NAK_GEN = (1<<12), + IB_DEVICE_SRQ_RESIZE = (1<<13), + IB_DEVICE_N_NOTIFY_CQ = (1<<14), +}; + +enum ib_atomic_cap { + IB_ATOMIC_NONE, + IB_ATOMIC_HCA, + IB_ATOMIC_GLOB +}; + +struct ib_device_attr { + u64 fw_ver; + __be64 node_guid; + __be64 sys_image_guid; + u64 max_mr_size; + u64 page_size_cap; + u32 vendor_id; + u32 vendor_part_id; + u32 hw_ver; + int max_qp; + int max_qp_wr; + int device_cap_flags; + int max_sge; + int max_sge_rd; + int max_cq; + int max_cqe; + int max_mr; + int max_pd; + int max_qp_rd_atom; + int max_ee_rd_atom; + int max_res_rd_atom; + int max_qp_init_rd_atom; + int max_ee_init_rd_atom; + enum ib_atomic_cap atomic_cap; + int max_ee; + int max_rdd; + int max_mw; + int max_raw_ipv6_qp; + int max_raw_ethy_qp; + int max_mcast_grp; + int max_mcast_qp_attach; + int max_total_mcast_qp_attach; + int max_ah; + int max_fmr; + int max_map_per_fmr; + int max_srq; + int max_srq_wr; + int max_srq_sge; + u16 max_pkeys; + u8 local_ca_ack_delay; +}; + +enum ib_mtu { + IB_MTU_256 = 1, + IB_MTU_512 = 2, + IB_MTU_1024 = 3, + IB_MTU_2048 = 4, + IB_MTU_4096 = 5 +}; + +static inline int ib_mtu_enum_to_int(enum ib_mtu mtu) +{ + switch (mtu) { + case IB_MTU_256: return 256; + case IB_MTU_512: return 512; + case IB_MTU_1024: return 1024; + case IB_MTU_2048: return 2048; + case IB_MTU_4096: return 4096; + default: return -1; + } +} + +enum ib_port_state { + IB_PORT_NOP = 0, + IB_PORT_DOWN = 1, + IB_PORT_INIT = 2, + IB_PORT_ARMED = 3, + IB_PORT_ACTIVE = 4, + IB_PORT_ACTIVE_DEFER = 5 +}; + +enum ib_port_cap_flags { + IB_PORT_SM = 1 << 1, + IB_PORT_NOTICE_SUP = 1 << 2, + IB_PORT_TRAP_SUP = 1 << 3, + IB_PORT_OPT_IPD_SUP = 1 << 4, + IB_PORT_AUTO_MIGR_SUP = 1 << 5, + IB_PORT_SL_MAP_SUP = 1 << 6, + IB_PORT_MKEY_NVRAM = 1 << 7, + IB_PORT_PKEY_NVRAM = 1 << 8, + IB_PORT_LED_INFO_SUP = 1 << 9, + IB_PORT_SM_DISABLED = 1 << 10, + IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11, + IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12, + IB_PORT_CM_SUP = 1 << 16, + IB_PORT_SNMP_TUNNEL_SUP = 1 << 17, + IB_PORT_REINIT_SUP = 1 << 18, + IB_PORT_DEVICE_MGMT_SUP = 1 << 19, + IB_PORT_VENDOR_CLASS_SUP = 1 << 20, + IB_PORT_DR_NOTICE_SUP = 1 << 21, + IB_PORT_CAP_MASK_NOTICE_SUP = 1 << 22, + IB_PORT_BOOT_MGMT_SUP = 1 << 23, + IB_PORT_LINK_LATENCY_SUP = 1 << 24, + IB_PORT_CLIENT_REG_SUP = 1 << 25 +}; + +enum ib_port_width { + IB_WIDTH_1X = 1, + IB_WIDTH_4X = 2, + IB_WIDTH_8X = 4, + IB_WIDTH_12X = 8 +}; + +static inline int ib_width_enum_to_int(enum ib_port_width width) +{ + switch (width) { + case IB_WIDTH_1X: return 1; + case IB_WIDTH_4X: return 4; + case IB_WIDTH_8X: return 8; + case IB_WIDTH_12X: return 12; + default: return -1; + } +} + +struct ib_port_attr { + enum ib_port_state state; + enum ib_mtu max_mtu; + enum ib_mtu active_mtu; + int gid_tbl_len; + u32 port_cap_flags; + u32 max_msg_sz; + u32 bad_pkey_cntr; + u32 qkey_viol_cntr; + u16 pkey_tbl_len; + u16 lid; + u16 sm_lid; + u8 lmc; + u8 max_vl_num; + u8 sm_sl; + u8 subnet_timeout; + u8 init_type_reply; + u8 active_width; + u8 active_speed; + u8 phys_state; +}; + +enum ib_device_modify_flags { + IB_DEVICE_MODIFY_SYS_IMAGE_GUID = 1 +}; + +struct ib_device_modify { + u64 sys_image_guid; +}; + +enum ib_port_modify_flags { + IB_PORT_SHUTDOWN = 1, + IB_PORT_INIT_TYPE = (1<<2), + IB_PORT_RESET_QKEY_CNTR = (1<<3) +}; + +struct ib_port_modify { + u32 set_port_cap_mask; + u32 clr_port_cap_mask; + u8 init_type; +}; + +enum ib_event_type { + IB_EVENT_CQ_ERR, + IB_EVENT_QP_FATAL, + IB_EVENT_QP_REQ_ERR, + IB_EVENT_QP_ACCESS_ERR, + IB_EVENT_COMM_EST, + IB_EVENT_SQ_DRAINED, + IB_EVENT_PATH_MIG, + IB_EVENT_PATH_MIG_ERR, + IB_EVENT_DEVICE_FATAL, + IB_EVENT_PORT_ACTIVE, + IB_EVENT_PORT_ERR, + IB_EVENT_LID_CHANGE, + IB_EVENT_PKEY_CHANGE, + IB_EVENT_SM_CHANGE, + IB_EVENT_SRQ_ERR, + IB_EVENT_SRQ_LIMIT_REACHED, + IB_EVENT_QP_LAST_WQE_REACHED +}; + +struct ib_event { + struct ib_device *device; + union { + struct ib_cq *cq; + struct ib_qp *qp; + struct ib_srq *srq; + u8 port_num; + } element; + enum ib_event_type event; +}; + +struct ib_event_handler { + struct ib_device *device; + void (*handler)(struct ib_event_handler *, struct ib_event *); + struct list_head list; +}; + +#define INIT_IB_EVENT_HANDLER(_ptr, _device, _handler) \ + do { \ + (_ptr)->device = _device; \ + (_ptr)->handler = _handler; \ + INIT_LIST_HEAD(&(_ptr)->list); \ + } while (0) + +struct ib_global_route { + union ib_gid dgid; + u32 flow_label; + u8 sgid_index; + u8 hop_limit; + u8 traffic_class; +}; + +struct ib_grh { + __be32 version_tclass_flow; + __be16 paylen; + u8 next_hdr; + u8 hop_limit; + union ib_gid sgid; + union ib_gid dgid; +}; + +enum { + IB_MULTICAST_QPN = 0xffffff +}; + +#define IB_LID_PERMISSIVE __constant_htons(0xFFFF) + +enum ib_ah_flags { + IB_AH_GRH = 1 +}; + +struct ib_ah_attr { + struct ib_global_route grh; + u16 dlid; + u8 sl; + u8 src_path_bits; + u8 static_rate; + u8 ah_flags; + u8 port_num; +}; + +enum ib_wc_status { + IB_WC_SUCCESS, + IB_WC_LOC_LEN_ERR, + IB_WC_LOC_QP_OP_ERR, + IB_WC_LOC_EEC_OP_ERR, + IB_WC_LOC_PROT_ERR, + IB_WC_WR_FLUSH_ERR, + IB_WC_MW_BIND_ERR, + IB_WC_BAD_RESP_ERR, + IB_WC_LOC_ACCESS_ERR, + IB_WC_REM_INV_REQ_ERR, + IB_WC_REM_ACCESS_ERR, + IB_WC_REM_OP_ERR, + IB_WC_RETRY_EXC_ERR, + IB_WC_RNR_RETRY_EXC_ERR, + IB_WC_LOC_RDD_VIOL_ERR, + IB_WC_REM_INV_RD_REQ_ERR, + IB_WC_REM_ABORT_ERR, + IB_WC_INV_EECN_ERR, + IB_WC_INV_EEC_STATE_ERR, + IB_WC_FATAL_ERR, + IB_WC_RESP_TIMEOUT_ERR, + IB_WC_GENERAL_ERR +}; + +enum ib_wc_opcode { + IB_WC_SEND, + IB_WC_RDMA_WRITE, + IB_WC_RDMA_READ, + IB_WC_COMP_SWAP, + IB_WC_FETCH_ADD, + IB_WC_BIND_MW, +/* + * Set value of IB_WC_RECV so consumers can test if a completion is a + * receive by testing (opcode & IB_WC_RECV). + */ + IB_WC_RECV = 1 << 7, + IB_WC_RECV_RDMA_WITH_IMM +}; + +enum ib_wc_flags { + IB_WC_GRH = 1, + IB_WC_WITH_IMM = (1<<1) +}; + +struct ib_wc { + u64 wr_id; + enum ib_wc_status status; + enum ib_wc_opcode opcode; + u32 vendor_err; + u32 byte_len; + __be32 imm_data; + u32 qp_num; + u32 src_qp; + int wc_flags; + u16 pkey_index; + u16 slid; + u8 sl; + u8 dlid_path_bits; + u8 port_num; /* valid only for DR SMPs on switches */ +}; + +enum ib_cq_notify { + IB_CQ_SOLICITED, + IB_CQ_NEXT_COMP +}; + +enum ib_srq_attr_mask { + IB_SRQ_MAX_WR = 1 << 0, + IB_SRQ_LIMIT = 1 << 1, +}; + +struct ib_srq_attr { + u32 max_wr; + u32 max_sge; + u32 srq_limit; +}; + +struct ib_srq_init_attr { + void (*event_handler)(struct ib_event *, void *); + void *srq_context; + struct ib_srq_attr attr; +}; + +struct ib_qp_cap { + u32 max_send_wr; + u32 max_recv_wr; + u32 max_send_sge; + u32 max_recv_sge; + u32 max_inline_data; +}; + +enum ib_sig_type { + IB_SIGNAL_ALL_WR, + IB_SIGNAL_REQ_WR +}; + +enum ib_qp_type { + /* + * IB_QPT_SMI and IB_QPT_GSI have to be the first two entries + * here (and in that order) since the MAD layer uses them as + * indices into a 2-entry table. + */ + IB_QPT_SMI, + IB_QPT_GSI, + + IB_QPT_RC, + IB_QPT_UC, + IB_QPT_UD, + IB_QPT_RAW_IPV6, + IB_QPT_RAW_ETY +}; + +struct ib_qp_init_attr { + void (*event_handler)(struct ib_event *, void *); + void *qp_context; + struct ib_cq *send_cq; + struct ib_cq *recv_cq; + struct ib_srq *srq; + struct ib_qp_cap cap; + enum ib_sig_type sq_sig_type; + enum ib_qp_type qp_type; + u8 port_num; /* special QP types only */ +}; + +enum ib_rnr_timeout { + IB_RNR_TIMER_655_36 = 0, + IB_RNR_TIMER_000_01 = 1, + IB_RNR_TIMER_000_02 = 2, + IB_RNR_TIMER_000_03 = 3, + IB_RNR_TIMER_000_04 = 4, + IB_RNR_TIMER_000_06 = 5, + IB_RNR_TIMER_000_08 = 6, + IB_RNR_TIMER_000_12 = 7, + IB_RNR_TIMER_000_16 = 8, + IB_RNR_TIMER_000_24 = 9, + IB_RNR_TIMER_000_32 = 10, + IB_RNR_TIMER_000_48 = 11, + IB_RNR_TIMER_000_64 = 12, + IB_RNR_TIMER_000_96 = 13, + IB_RNR_TIMER_001_28 = 14, + IB_RNR_TIMER_001_92 = 15, + IB_RNR_TIMER_002_56 = 16, + IB_RNR_TIMER_003_84 = 17, + IB_RNR_TIMER_005_12 = 18, + IB_RNR_TIMER_007_68 = 19, + IB_RNR_TIMER_010_24 = 20, + IB_RNR_TIMER_015_36 = 21, + IB_RNR_TIMER_020_48 = 22, + IB_RNR_TIMER_030_72 = 23, + IB_RNR_TIMER_040_96 = 24, + IB_RNR_TIMER_061_44 = 25, + IB_RNR_TIMER_081_92 = 26, + IB_RNR_TIMER_122_88 = 27, + IB_RNR_TIMER_163_84 = 28, + IB_RNR_TIMER_245_76 = 29, + IB_RNR_TIMER_327_68 = 30, + IB_RNR_TIMER_491_52 = 31 +}; + +enum ib_qp_attr_mask { + IB_QP_STATE = 1, + IB_QP_CUR_STATE = (1<<1), + IB_QP_EN_SQD_ASYNC_NOTIFY = (1<<2), + IB_QP_ACCESS_FLAGS = (1<<3), + IB_QP_PKEY_INDEX = (1<<4), + IB_QP_PORT = (1<<5), + IB_QP_QKEY = (1<<6), + IB_QP_AV = (1<<7), + IB_QP_PATH_MTU = (1<<8), + IB_QP_TIMEOUT = (1<<9), + IB_QP_RETRY_CNT = (1<<10), + IB_QP_RNR_RETRY = (1<<11), + IB_QP_RQ_PSN = (1<<12), + IB_QP_MAX_QP_RD_ATOMIC = (1<<13), + IB_QP_ALT_PATH = (1<<14), + IB_QP_MIN_RNR_TIMER = (1<<15), + IB_QP_SQ_PSN = (1<<16), + IB_QP_MAX_DEST_RD_ATOMIC = (1<<17), + IB_QP_PATH_MIG_STATE = (1<<18), + IB_QP_CAP = (1<<19), + IB_QP_DEST_QPN = (1<<20) +}; + +enum ib_qp_state { + IB_QPS_RESET, + IB_QPS_INIT, + IB_QPS_RTR, + IB_QPS_RTS, + IB_QPS_SQD, + IB_QPS_SQE, + IB_QPS_ERR +}; + +enum ib_mig_state { + IB_MIG_MIGRATED, + IB_MIG_REARM, + IB_MIG_ARMED +}; + +struct ib_qp_attr { + enum ib_qp_state qp_state; + enum ib_qp_state cur_qp_state; + enum ib_mtu path_mtu; + enum ib_mig_state path_mig_state; + u32 qkey; + u32 rq_psn; + u32 sq_psn; + u32 dest_qp_num; + int qp_access_flags; + struct ib_qp_cap cap; + struct ib_ah_attr ah_attr; + struct ib_ah_attr alt_ah_attr; + u16 pkey_index; + u16 alt_pkey_index; + u8 en_sqd_async_notify; + u8 sq_draining; + u8 max_rd_atomic; + u8 max_dest_rd_atomic; + u8 min_rnr_timer; + u8 port_num; + u8 timeout; + u8 retry_cnt; + u8 rnr_retry; + u8 alt_port_num; + u8 alt_timeout; +}; + +enum ib_wr_opcode { + IB_WR_RDMA_WRITE, + IB_WR_RDMA_WRITE_WITH_IMM, + IB_WR_SEND, + IB_WR_SEND_WITH_IMM, + IB_WR_RDMA_READ, + IB_WR_ATOMIC_CMP_AND_SWP, + IB_WR_ATOMIC_FETCH_AND_ADD +}; + +enum ib_send_flags { + IB_SEND_FENCE = 1, + IB_SEND_SIGNALED = (1<<1), + IB_SEND_SOLICITED = (1<<2), + IB_SEND_INLINE = (1<<3) +}; + +struct ib_sge { + u64 addr; + u32 length; + u32 lkey; +}; + +struct ib_send_wr { + struct ib_send_wr *next; + u64 wr_id; + struct ib_sge *sg_list; + int num_sge; + enum ib_wr_opcode opcode; + int send_flags; + __be32 imm_data; + union { + struct { + u64 remote_addr; + u32 rkey; + } rdma; + struct { + u64 remote_addr; + u64 compare_add; + u64 swap; + u32 rkey; + } atomic; + struct { + struct ib_ah *ah; + struct ib_mad_hdr *mad_hdr; + u32 remote_qpn; + u32 remote_qkey; + int timeout_ms; /* valid for MADs only */ + int retries; /* valid for MADs only */ + u16 pkey_index; /* valid for GSI only */ + u8 port_num; /* valid for DR SMPs on switch only */ + } ud; + } wr; +}; + +struct ib_recv_wr { + struct ib_recv_wr *next; + u64 wr_id; + struct ib_sge *sg_list; + int num_sge; +}; + +enum ib_access_flags { + IB_ACCESS_LOCAL_WRITE = 1, + IB_ACCESS_REMOTE_WRITE = (1<<1), + IB_ACCESS_REMOTE_READ = (1<<2), + IB_ACCESS_REMOTE_ATOMIC = (1<<3), + IB_ACCESS_MW_BIND = (1<<4) +}; + +struct ib_phys_buf { + u64 addr; + u64 size; +}; + +struct ib_mr_attr { + struct ib_pd *pd; + u64 device_virt_addr; + u64 size; + int mr_access_flags; + u32 lkey; + u32 rkey; +}; + +enum ib_mr_rereg_flags { + IB_MR_REREG_TRANS = 1, + IB_MR_REREG_PD = (1<<1), + IB_MR_REREG_ACCESS = (1<<2) +}; + +struct ib_mw_bind { + struct ib_mr *mr; + u64 wr_id; + u64 addr; + u32 length; + int send_flags; + int mw_access_flags; +}; + +struct ib_fmr_attr { + int max_pages; + int max_maps; + u8 page_size; +}; + +struct ib_ucontext { + struct ib_device *device; + struct list_head pd_list; + struct list_head mr_list; + struct list_head mw_list; + struct list_head cq_list; + struct list_head qp_list; + struct list_head srq_list; + struct list_head ah_list; + spinlock_t lock; +}; + +struct ib_uobject { + u64 user_handle; /* handle given to us by userspace */ + struct ib_ucontext *context; /* associated user context */ + struct list_head list; /* link to context's list */ + u32 id; /* index into kernel idr */ +}; + +struct ib_umem { + unsigned long user_base; + unsigned long virt_base; + size_t length; + int offset; + int page_size; + int writable; + struct list_head chunk_list; +}; + +struct ib_umem_chunk { + struct list_head list; + int nents; + int nmap; + struct scatterlist page_list[0]; +}; + +struct ib_udata { + void __user *inbuf; + void __user *outbuf; + size_t inlen; + size_t outlen; +}; + +#define IB_UMEM_MAX_PAGE_CHUNK \ + ((PAGE_SIZE - offsetof(struct ib_umem_chunk, page_list)) / \ + ((void *) &((struct ib_umem_chunk *) 0)->page_list[1] - \ + (void *) &((struct ib_umem_chunk *) 0)->page_list[0])) + +struct ib_umem_object { + struct ib_uobject uobject; + struct ib_umem umem; +}; + +struct ib_pd { + struct ib_device *device; + struct ib_uobject *uobject; + atomic_t usecnt; /* count all resources */ +}; + +struct ib_ah { + struct ib_device *device; + struct ib_pd *pd; + struct ib_uobject *uobject; +}; + +typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context); + +struct ib_cq { + struct ib_device *device; + struct ib_uobject *uobject; + ib_comp_handler comp_handler; + void (*event_handler)(struct ib_event *, void *); + void * cq_context; + int cqe; + atomic_t usecnt; /* count number of work queues */ +}; + +struct ib_srq { + struct ib_device *device; + struct ib_pd *pd; + struct ib_uobject *uobject; + void (*event_handler)(struct ib_event *, void *); + void *srq_context; + atomic_t usecnt; +}; + +struct ib_qp { + struct ib_device *device; + struct ib_pd *pd; + struct ib_cq *send_cq; + struct ib_cq *recv_cq; + struct ib_srq *srq; + struct ib_uobject *uobject; + void (*event_handler)(struct ib_event *, void *); + void *qp_context; + u32 qp_num; + enum ib_qp_type qp_type; +}; + +struct ib_mr { + struct ib_device *device; + struct ib_pd *pd; + struct ib_uobject *uobject; + u32 lkey; + u32 rkey; + atomic_t usecnt; /* count number of MWs */ +}; + +struct ib_mw { + struct ib_device *device; + struct ib_pd *pd; + struct ib_uobject *uobject; + u32 rkey; +}; + +struct ib_fmr { + struct ib_device *device; + struct ib_pd *pd; + struct list_head list; + u32 lkey; + u32 rkey; +}; + +struct ib_mad; +struct ib_grh; + +enum ib_process_mad_flags { + IB_MAD_IGNORE_MKEY = 1, + IB_MAD_IGNORE_BKEY = 2, + IB_MAD_IGNORE_ALL = IB_MAD_IGNORE_MKEY | IB_MAD_IGNORE_BKEY +}; + +enum ib_mad_result { + IB_MAD_RESULT_FAILURE = 0, /* (!SUCCESS is the important flag) */ + IB_MAD_RESULT_SUCCESS = 1 << 0, /* MAD was successfully processed */ + IB_MAD_RESULT_REPLY = 1 << 1, /* Reply packet needs to be sent */ + IB_MAD_RESULT_CONSUMED = 1 << 2 /* Packet consumed: stop processing */ +}; + +#define IB_DEVICE_NAME_MAX 64 + +struct ib_cache { + rwlock_t lock; + struct ib_event_handler event_handler; + struct ib_pkey_cache **pkey_cache; + struct ib_gid_cache **gid_cache; +}; + +struct ib_device { + struct device *dma_device; + + char name[IB_DEVICE_NAME_MAX]; + + struct list_head event_handler_list; + spinlock_t event_handler_lock; + + struct list_head core_list; + struct list_head client_data_list; + spinlock_t client_data_lock; + + struct ib_cache cache; + + u32 flags; + + int (*query_device)(struct ib_device *device, + struct ib_device_attr *device_attr); + int (*query_port)(struct ib_device *device, + u8 port_num, + struct ib_port_attr *port_attr); + int (*query_gid)(struct ib_device *device, + u8 port_num, int index, + union ib_gid *gid); + int (*query_pkey)(struct ib_device *device, + u8 port_num, u16 index, u16 *pkey); + int (*modify_device)(struct ib_device *device, + int device_modify_mask, + struct ib_device_modify *device_modify); + int (*modify_port)(struct ib_device *device, + u8 port_num, int port_modify_mask, + struct ib_port_modify *port_modify); + struct ib_ucontext * (*alloc_ucontext)(struct ib_device *device, + struct ib_udata *udata); + int (*dealloc_ucontext)(struct ib_ucontext *context); + int (*mmap)(struct ib_ucontext *context, + struct vm_area_struct *vma); + struct ib_pd * (*alloc_pd)(struct ib_device *device, + struct ib_ucontext *context, + struct ib_udata *udata); + int (*dealloc_pd)(struct ib_pd *pd); + struct ib_ah * (*create_ah)(struct ib_pd *pd, + struct ib_ah_attr *ah_attr); + int (*modify_ah)(struct ib_ah *ah, + struct ib_ah_attr *ah_attr); + int (*query_ah)(struct ib_ah *ah, + struct ib_ah_attr *ah_attr); + int (*destroy_ah)(struct ib_ah *ah); + struct ib_srq * (*create_srq)(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr, + struct ib_udata *udata); + int (*modify_srq)(struct ib_srq *srq, + struct ib_srq_attr *srq_attr, + enum ib_srq_attr_mask srq_attr_mask); + int (*query_srq)(struct ib_srq *srq, + struct ib_srq_attr *srq_attr); + int (*destroy_srq)(struct ib_srq *srq); + int (*post_srq_recv)(struct ib_srq *srq, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr); + struct ib_qp * (*create_qp)(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr, + struct ib_udata *udata); + int (*modify_qp)(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, + int qp_attr_mask); + int (*query_qp)(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr); + int (*destroy_qp)(struct ib_qp *qp); + int (*post_send)(struct ib_qp *qp, + struct ib_send_wr *send_wr, + struct ib_send_wr **bad_send_wr); + int (*post_recv)(struct ib_qp *qp, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr); + struct ib_cq * (*create_cq)(struct ib_device *device, int cqe, + struct ib_ucontext *context, + struct ib_udata *udata); + int (*destroy_cq)(struct ib_cq *cq); + int (*resize_cq)(struct ib_cq *cq, int *cqe); + int (*poll_cq)(struct ib_cq *cq, int num_entries, + struct ib_wc *wc); + int (*peek_cq)(struct ib_cq *cq, int wc_cnt); + int (*req_notify_cq)(struct ib_cq *cq, + enum ib_cq_notify cq_notify); + int (*req_ncomp_notif)(struct ib_cq *cq, + int wc_cnt); + struct ib_mr * (*get_dma_mr)(struct ib_pd *pd, + int mr_access_flags); + struct ib_mr * (*reg_phys_mr)(struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, + u64 *iova_start); + struct ib_mr * (*reg_user_mr)(struct ib_pd *pd, + struct ib_umem *region, + int mr_access_flags, + struct ib_udata *udata); + int (*query_mr)(struct ib_mr *mr, + struct ib_mr_attr *mr_attr); + int (*dereg_mr)(struct ib_mr *mr); + int (*rereg_phys_mr)(struct ib_mr *mr, + int mr_rereg_mask, + struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, + u64 *iova_start); + struct ib_mw * (*alloc_mw)(struct ib_pd *pd); + int (*bind_mw)(struct ib_qp *qp, + struct ib_mw *mw, + struct ib_mw_bind *mw_bind); + int (*dealloc_mw)(struct ib_mw *mw); + struct ib_fmr * (*alloc_fmr)(struct ib_pd *pd, + int mr_access_flags, + struct ib_fmr_attr *fmr_attr); + int (*map_phys_fmr)(struct ib_fmr *fmr, + u64 *page_list, int list_len, + u64 iova); + int (*unmap_fmr)(struct list_head *fmr_list); + int (*dealloc_fmr)(struct ib_fmr *fmr); + int (*attach_mcast)(struct ib_qp *qp, + union ib_gid *gid, + u16 lid); + int (*detach_mcast)(struct ib_qp *qp, + union ib_gid *gid, + u16 lid); + int (*process_mad)(struct ib_device *device, + int process_mad_flags, + u8 port_num, + struct ib_wc *in_wc, + struct ib_grh *in_grh, + struct ib_mad *in_mad, + struct ib_mad *out_mad); + + struct module *owner; + struct class_device class_dev; + struct kobject ports_parent; + struct list_head port_list; + + enum { + IB_DEV_UNINITIALIZED, + IB_DEV_REGISTERED, + IB_DEV_UNREGISTERED + } reg_state; + + u8 node_type; + u8 phys_port_cnt; +}; + +struct ib_client { + char *name; + void (*add) (struct ib_device *); + void (*remove)(struct ib_device *); + + struct list_head list; +}; + +struct ib_device *ib_alloc_device(size_t size); +void ib_dealloc_device(struct ib_device *device); + +int ib_register_device (struct ib_device *device); +void ib_unregister_device(struct ib_device *device); + +int ib_register_client (struct ib_client *client); +void ib_unregister_client(struct ib_client *client); + +void *ib_get_client_data(struct ib_device *device, struct ib_client *client); +void ib_set_client_data(struct ib_device *device, struct ib_client *client, + void *data); + +static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len) +{ + return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0; +} + +static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len) +{ + return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0; +} + +int ib_register_event_handler (struct ib_event_handler *event_handler); +int ib_unregister_event_handler(struct ib_event_handler *event_handler); +void ib_dispatch_event(struct ib_event *event); + +int ib_query_device(struct ib_device *device, + struct ib_device_attr *device_attr); + +int ib_query_port(struct ib_device *device, + u8 port_num, struct ib_port_attr *port_attr); + +int ib_query_gid(struct ib_device *device, + u8 port_num, int index, union ib_gid *gid); + +int ib_query_pkey(struct ib_device *device, + u8 port_num, u16 index, u16 *pkey); + +int ib_modify_device(struct ib_device *device, + int device_modify_mask, + struct ib_device_modify *device_modify); + +int ib_modify_port(struct ib_device *device, + u8 port_num, int port_modify_mask, + struct ib_port_modify *port_modify); + +/** + * ib_alloc_pd - Allocates an unused protection domain. + * @device: The device on which to allocate the protection domain. + * + * A protection domain object provides an association between QPs, shared + * receive queues, address handles, memory regions, and memory windows. + */ +struct ib_pd *ib_alloc_pd(struct ib_device *device); + +/** + * ib_dealloc_pd - Deallocates a protection domain. + * @pd: The protection domain to deallocate. + */ +int ib_dealloc_pd(struct ib_pd *pd); + +/** + * ib_create_ah - Creates an address handle for the given address vector. + * @pd: The protection domain associated with the address handle. + * @ah_attr: The attributes of the address vector. + * + * The address handle is used to reference a local or global destination + * in all UD QP post sends. + */ +struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); + +/** + * ib_create_ah_from_wc - Creates an address handle associated with the + * sender of the specified work completion. + * @pd: The protection domain associated with the address handle. + * @wc: Work completion information associated with a received message. + * @grh: References the received global route header. This parameter is + * ignored unless the work completion indicates that the GRH is valid. + * @port_num: The outbound port number to associate with the address. + * + * The address handle is used to reference a local or global destination + * in all UD QP post sends. + */ +struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, struct ib_wc *wc, + struct ib_grh *grh, u8 port_num); + +/** + * ib_modify_ah - Modifies the address vector associated with an address + * handle. + * @ah: The address handle to modify. + * @ah_attr: The new address vector attributes to associate with the + * address handle. + */ +int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); + +/** + * ib_query_ah - Queries the address vector associated with an address + * handle. + * @ah: The address handle to query. + * @ah_attr: The address vector attributes associated with the address + * handle. + */ +int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr); + +/** + * ib_destroy_ah - Destroys an address handle. + * @ah: The address handle to destroy. + */ +int ib_destroy_ah(struct ib_ah *ah); + +/** + * ib_create_srq - Creates a SRQ associated with the specified protection + * domain. + * @pd: The protection domain associated with the SRQ. + * @srq_init_attr: A list of initial attributes required to create the SRQ. + * + * srq_attr->max_wr and srq_attr->max_sge are read the determine the + * requested size of the SRQ, and set to the actual values allocated + * on return. If ib_create_srq() succeeds, then max_wr and max_sge + * will always be at least as large as the requested values. + */ +struct ib_srq *ib_create_srq(struct ib_pd *pd, + struct ib_srq_init_attr *srq_init_attr); + +/** + * ib_modify_srq - Modifies the attributes for the specified SRQ. + * @srq: The SRQ to modify. + * @srq_attr: On input, specifies the SRQ attributes to modify. On output, + * the current values of selected SRQ attributes are returned. + * @srq_attr_mask: A bit-mask used to specify which attributes of the SRQ + * are being modified. + * + * The mask may contain IB_SRQ_MAX_WR to resize the SRQ and/or + * IB_SRQ_LIMIT to set the SRQ's limit and request notification when + * the number of receives queued drops below the limit. + */ +int ib_modify_srq(struct ib_srq *srq, + struct ib_srq_attr *srq_attr, + enum ib_srq_attr_mask srq_attr_mask); + +/** + * ib_query_srq - Returns the attribute list and current values for the + * specified SRQ. + * @srq: The SRQ to query. + * @srq_attr: The attributes of the specified SRQ. + */ +int ib_query_srq(struct ib_srq *srq, + struct ib_srq_attr *srq_attr); + +/** + * ib_destroy_srq - Destroys the specified SRQ. + * @srq: The SRQ to destroy. + */ +int ib_destroy_srq(struct ib_srq *srq); + +/** + * ib_post_srq_recv - Posts a list of work requests to the specified SRQ. + * @srq: The SRQ to post the work request on. + * @recv_wr: A list of work requests to post on the receive queue. + * @bad_recv_wr: On an immediate failure, this parameter will reference + * the work request that failed to be posted on the QP. + */ +static inline int ib_post_srq_recv(struct ib_srq *srq, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr) +{ + return srq->device->post_srq_recv(srq, recv_wr, bad_recv_wr); +} + +/** + * ib_create_qp - Creates a QP associated with the specified protection + * domain. + * @pd: The protection domain associated with the QP. + * @qp_init_attr: A list of initial attributes required to create the QP. + */ +struct ib_qp *ib_create_qp(struct ib_pd *pd, + struct ib_qp_init_attr *qp_init_attr); + +/** + * ib_modify_qp - Modifies the attributes for the specified QP and then + * transitions the QP to the given state. + * @qp: The QP to modify. + * @qp_attr: On input, specifies the QP attributes to modify. On output, + * the current values of selected QP attributes are returned. + * @qp_attr_mask: A bit-mask used to specify which attributes of the QP + * are being modified. + */ +int ib_modify_qp(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, + int qp_attr_mask); + +/** + * ib_query_qp - Returns the attribute list and current values for the + * specified QP. + * @qp: The QP to query. + * @qp_attr: The attributes of the specified QP. + * @qp_attr_mask: A bit-mask used to select specific attributes to query. + * @qp_init_attr: Additional attributes of the selected QP. + * + * The qp_attr_mask may be used to limit the query to gathering only the + * selected attributes. + */ +int ib_query_qp(struct ib_qp *qp, + struct ib_qp_attr *qp_attr, + int qp_attr_mask, + struct ib_qp_init_attr *qp_init_attr); + +/** + * ib_destroy_qp - Destroys the specified QP. + * @qp: The QP to destroy. + */ +int ib_destroy_qp(struct ib_qp *qp); + +/** + * ib_post_send - Posts a list of work requests to the send queue of + * the specified QP. + * @qp: The QP to post the work request on. + * @send_wr: A list of work requests to post on the send queue. + * @bad_send_wr: On an immediate failure, this parameter will reference + * the work request that failed to be posted on the QP. + */ +static inline int ib_post_send(struct ib_qp *qp, + struct ib_send_wr *send_wr, + struct ib_send_wr **bad_send_wr) +{ + return qp->device->post_send(qp, send_wr, bad_send_wr); +} + +/** + * ib_post_recv - Posts a list of work requests to the receive queue of + * the specified QP. + * @qp: The QP to post the work request on. + * @recv_wr: A list of work requests to post on the receive queue. + * @bad_recv_wr: On an immediate failure, this parameter will reference + * the work request that failed to be posted on the QP. + */ +static inline int ib_post_recv(struct ib_qp *qp, + struct ib_recv_wr *recv_wr, + struct ib_recv_wr **bad_recv_wr) +{ + return qp->device->post_recv(qp, recv_wr, bad_recv_wr); +} + +/** + * ib_create_cq - Creates a CQ on the specified device. + * @device: The device on which to create the CQ. + * @comp_handler: A user-specified callback that is invoked when a + * completion event occurs on the CQ. + * @event_handler: A user-specified callback that is invoked when an + * asynchronous event not associated with a completion occurs on the CQ. + * @cq_context: Context associated with the CQ returned to the user via + * the associated completion and event handlers. + * @cqe: The minimum size of the CQ. + * + * Users can examine the cq structure to determine the actual CQ size. + */ +struct ib_cq *ib_create_cq(struct ib_device *device, + ib_comp_handler comp_handler, + void (*event_handler)(struct ib_event *, void *), + void *cq_context, int cqe); + +/** + * ib_resize_cq - Modifies the capacity of the CQ. + * @cq: The CQ to resize. + * @cqe: The minimum size of the CQ. + * + * Users can examine the cq structure to determine the actual CQ size. + */ +int ib_resize_cq(struct ib_cq *cq, int cqe); + +/** + * ib_destroy_cq - Destroys the specified CQ. + * @cq: The CQ to destroy. + */ +int ib_destroy_cq(struct ib_cq *cq); + +/** + * ib_poll_cq - poll a CQ for completion(s) + * @cq:the CQ being polled + * @num_entries:maximum number of completions to return + * @wc:array of at least @num_entries &struct ib_wc where completions + * will be returned + * + * Poll a CQ for (possibly multiple) completions. If the return value + * is < 0, an error occurred. If the return value is >= 0, it is the + * number of completions returned. If the return value is + * non-negative and < num_entries, then the CQ was emptied. + */ +static inline int ib_poll_cq(struct ib_cq *cq, int num_entries, + struct ib_wc *wc) +{ + return cq->device->poll_cq(cq, num_entries, wc); +} + +/** + * ib_peek_cq - Returns the number of unreaped completions currently + * on the specified CQ. + * @cq: The CQ to peek. + * @wc_cnt: A minimum number of unreaped completions to check for. + * + * If the number of unreaped completions is greater than or equal to wc_cnt, + * this function returns wc_cnt, otherwise, it returns the actual number of + * unreaped completions. + */ +int ib_peek_cq(struct ib_cq *cq, int wc_cnt); + +/** + * ib_req_notify_cq - Request completion notification on a CQ. + * @cq: The CQ to generate an event for. + * @cq_notify: If set to %IB_CQ_SOLICITED, completion notification will + * occur on the next solicited event. If set to %IB_CQ_NEXT_COMP, + * notification will occur on the next completion. + */ +static inline int ib_req_notify_cq(struct ib_cq *cq, + enum ib_cq_notify cq_notify) +{ + return cq->device->req_notify_cq(cq, cq_notify); +} + +/** + * ib_req_ncomp_notif - Request completion notification when there are + * at least the specified number of unreaped completions on the CQ. + * @cq: The CQ to generate an event for. + * @wc_cnt: The number of unreaped completions that should be on the + * CQ before an event is generated. + */ +static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt) +{ + return cq->device->req_ncomp_notif ? + cq->device->req_ncomp_notif(cq, wc_cnt) : + -ENOSYS; +} + +/** + * ib_get_dma_mr - Returns a memory region for system memory that is + * usable for DMA. + * @pd: The protection domain associated with the memory region. + * @mr_access_flags: Specifies the memory access rights. + */ +struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags); + +/** + * ib_reg_phys_mr - Prepares a virtually addressed memory region for use + * by an HCA. + * @pd: The protection domain associated assigned to the registered region. + * @phys_buf_array: Specifies a list of physical buffers to use in the + * memory region. + * @num_phys_buf: Specifies the size of the phys_buf_array. + * @mr_access_flags: Specifies the memory access rights. + * @iova_start: The offset of the region's starting I/O virtual address. + */ +struct ib_mr *ib_reg_phys_mr(struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, + u64 *iova_start); + +/** + * ib_rereg_phys_mr - Modifies the attributes of an existing memory region. + * Conceptually, this call performs the functions deregister memory region + * followed by register physical memory region. Where possible, + * resources are reused instead of deallocated and reallocated. + * @mr: The memory region to modify. + * @mr_rereg_mask: A bit-mask used to indicate which of the following + * properties of the memory region are being modified. + * @pd: If %IB_MR_REREG_PD is set in mr_rereg_mask, this field specifies + * the new protection domain to associated with the memory region, + * otherwise, this parameter is ignored. + * @phys_buf_array: If %IB_MR_REREG_TRANS is set in mr_rereg_mask, this + * field specifies a list of physical buffers to use in the new + * translation, otherwise, this parameter is ignored. + * @num_phys_buf: If %IB_MR_REREG_TRANS is set in mr_rereg_mask, this + * field specifies the size of the phys_buf_array, otherwise, this + * parameter is ignored. + * @mr_access_flags: If %IB_MR_REREG_ACCESS is set in mr_rereg_mask, this + * field specifies the new memory access rights, otherwise, this + * parameter is ignored. + * @iova_start: The offset of the region's starting I/O virtual address. + */ +int ib_rereg_phys_mr(struct ib_mr *mr, + int mr_rereg_mask, + struct ib_pd *pd, + struct ib_phys_buf *phys_buf_array, + int num_phys_buf, + int mr_access_flags, + u64 *iova_start); + +/** + * ib_query_mr - Retrieves information about a specific memory region. + * @mr: The memory region to retrieve information about. + * @mr_attr: The attributes of the specified memory region. + */ +int ib_query_mr(struct ib_mr *mr, struct ib_mr_attr *mr_attr); + +/** + * ib_dereg_mr - Deregisters a memory region and removes it from the + * HCA translation table. + * @mr: The memory region to deregister. + */ +int ib_dereg_mr(struct ib_mr *mr); + +/** + * ib_alloc_mw - Allocates a memory window. + * @pd: The protection domain associated with the memory window. + */ +struct ib_mw *ib_alloc_mw(struct ib_pd *pd); + +/** + * ib_bind_mw - Posts a work request to the send queue of the specified + * QP, which binds the memory window to the given address range and + * remote access attributes. + * @qp: QP to post the bind work request on. + * @mw: The memory window to bind. + * @mw_bind: Specifies information about the memory window, including + * its address range, remote access rights, and associated memory region. + */ +static inline int ib_bind_mw(struct ib_qp *qp, + struct ib_mw *mw, + struct ib_mw_bind *mw_bind) +{ + /* XXX reference counting in corresponding MR? */ + return mw->device->bind_mw ? + mw->device->bind_mw(qp, mw, mw_bind) : + -ENOSYS; +} + +/** + * ib_dealloc_mw - Deallocates a memory window. + * @mw: The memory window to deallocate. + */ +int ib_dealloc_mw(struct ib_mw *mw); + +/** + * ib_alloc_fmr - Allocates a unmapped fast memory region. + * @pd: The protection domain associated with the unmapped region. + * @mr_access_flags: Specifies the memory access rights. + * @fmr_attr: Attributes of the unmapped region. + * + * A fast memory region must be mapped before it can be used as part of + * a work request. + */ +struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd, + int mr_access_flags, + struct ib_fmr_attr *fmr_attr); + +/** + * ib_map_phys_fmr - Maps a list of physical pages to a fast memory region. + * @fmr: The fast memory region to associate with the pages. + * @page_list: An array of physical pages to map to the fast memory region. + * @list_len: The number of pages in page_list. + * @iova: The I/O virtual address to use with the mapped region. + */ +static inline int ib_map_phys_fmr(struct ib_fmr *fmr, + u64 *page_list, int list_len, + u64 iova) +{ + return fmr->device->map_phys_fmr(fmr, page_list, list_len, iova); +} + +/** + * ib_unmap_fmr - Removes the mapping from a list of fast memory regions. + * @fmr_list: A linked list of fast memory regions to unmap. + */ +int ib_unmap_fmr(struct list_head *fmr_list); + +/** + * ib_dealloc_fmr - Deallocates a fast memory region. + * @fmr: The fast memory region to deallocate. + */ +int ib_dealloc_fmr(struct ib_fmr *fmr); + +/** + * ib_attach_mcast - Attaches the specified QP to a multicast group. + * @qp: QP to attach to the multicast group. The QP must be type + * IB_QPT_UD. + * @gid: Multicast group GID. + * @lid: Multicast group LID in host byte order. + * + * In order to send and receive multicast packets, subnet + * administration must have created the multicast group and configured + * the fabric appropriately. The port associated with the specified + * QP must also be a member of the multicast group. + */ +int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); + +/** + * ib_detach_mcast - Detaches the specified QP from a multicast group. + * @qp: QP to detach from the multicast group. + * @gid: Multicast group GID. + * @lid: Multicast group LID in host byte order. + */ +int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid); + +#endif /* IB_VERBS_H */ -- cgit v1.2.3 From d18d36b4edbb980c9de7fe00724c3ded5de1b7a7 Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Sat, 27 Aug 2005 04:13:52 -0400 Subject: libata: fix a few alan-isms --- include/linux/libata.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/libata.h b/include/linux/libata.h index 33f3ab4eb82..7c09540c52b 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -424,7 +424,6 @@ extern unsigned int ata_dev_classify(struct ata_taskfile *tf); extern void ata_dev_id_string(u16 *id, unsigned char *s, unsigned int ofs, unsigned int len); extern void ata_dev_config(struct ata_port *ap, unsigned int i); -extern void ata_dev_set_protocol(struct ata_device *dev); extern void ata_bmdma_setup (struct ata_queued_cmd *qc); extern void ata_bmdma_start (struct ata_queued_cmd *qc); extern void ata_bmdma_stop(struct ata_queued_cmd *qc); -- cgit v1.2.3 From af36d7f0df56de3e3e4bbfb15d0915097ecb8cab Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Sun, 28 Aug 2005 20:18:39 -0400 Subject: [libata] license change, other bits - changes license of all code from OSL+GPL to plain ole GPL - except for NVIDIA, who hasn't yet responded about sata_nv - copyright holders were already contacted privately - adds info in each driver about where hardware/protocol docs may be obtained - where I have made major contributions, updated copyright dates --- include/linux/ata.h | 43 ++++++++++++++++++++++++------------------- include/linux/libata.h | 41 ++++++++++++++++++++++------------------- 2 files changed, 46 insertions(+), 38 deletions(-) (limited to 'include') diff --git a/include/linux/ata.h b/include/linux/ata.h index ca5fcadf998..19c3e2853f1 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -1,24 +1,29 @@ /* - Copyright 2003-2004 Red Hat, Inc. All rights reserved. - Copyright 2003-2004 Jeff Garzik - - The contents of this file are subject to the Open - Software License version 1.1 that can be found at - http://www.opensource.org/licenses/osl-1.1.txt and is included herein - by reference. - - Alternatively, the contents of this file may be used under the terms - of the GNU General Public License version 2 (the "GPL") as distributed - in the kernel source COPYING file, in which case the provisions of - the GPL are applicable instead of the above. If you wish to allow - the use of your version of this file only under the terms of the - GPL and not to allow others to use your version of this file under - the OSL, indicate your decision by deleting the provisions above and - replace them with the notice and other provisions required by the GPL. - If you do not delete the provisions above, a recipient may use your - version of this file under either the OSL or the GPL. - + * Copyright 2003-2004 Red Hat, Inc. All rights reserved. + * Copyright 2003-2004 Jeff Garzik + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* + * + * Hardware documentation available from http://www.t13.org/ + * */ #ifndef __LINUX_ATA_H__ diff --git a/include/linux/libata.h b/include/linux/libata.h index 6cd9ba63563..51d2b20d34f 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -1,23 +1,26 @@ /* - Copyright 2003-2004 Red Hat, Inc. All rights reserved. - Copyright 2003-2004 Jeff Garzik - - The contents of this file are subject to the Open - Software License version 1.1 that can be found at - http://www.opensource.org/licenses/osl-1.1.txt and is included herein - by reference. - - Alternatively, the contents of this file may be used under the terms - of the GNU General Public License version 2 (the "GPL") as distributed - in the kernel source COPYING file, in which case the provisions of - the GPL are applicable instead of the above. If you wish to allow - the use of your version of this file only under the terms of the - GPL and not to allow others to use your version of this file under - the OSL, indicate your decision by deleting the provisions above and - replace them with the notice and other provisions required by the GPL. - If you do not delete the provisions above, a recipient may use your - version of this file under either the OSL or the GPL. - + * Copyright 2003-2005 Red Hat, Inc. All rights reserved. + * Copyright 2003-2005 Jeff Garzik + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + * libata documentation is available via 'make {ps|pdf}docs', + * as Documentation/DocBook/libata.* + * */ #ifndef __LINUX_LIBATA_H__ -- cgit v1.2.3 From e13934563db047043ccead26412f552375cea90c Mon Sep 17 00:00:00 2001 From: Andy Fleming Date: Wed, 24 Aug 2005 18:46:21 -0500 Subject: [PATCH] PHY Layer fixup This patch adds back the code that was taken out, thus re-enabling: * The PHY Layer to initialize without crashing * Drivers to actually connect to PHYs * The entire PHY Control Layer This patch is used by the gianfar driver, and other drivers which are in development. Signed-off-by: Andy Fleming Signed-off-by: Jeff Garzik --- include/linux/phy.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include') diff --git a/include/linux/phy.h b/include/linux/phy.h index 4f2b5effc16..72cb67b66e0 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -334,11 +334,26 @@ int phy_write(struct phy_device *phydev, u16 regnum, u16 val); struct phy_device* get_phy_device(struct mii_bus *bus, int addr); int phy_clear_interrupt(struct phy_device *phydev); int phy_config_interrupt(struct phy_device *phydev, u32 interrupts); +struct phy_device * phy_attach(struct net_device *dev, + const char *phy_id, u32 flags); +struct phy_device * phy_connect(struct net_device *dev, const char *phy_id, + void (*handler)(struct net_device *), u32 flags); +void phy_disconnect(struct phy_device *phydev); +void phy_detach(struct phy_device *phydev); +void phy_start(struct phy_device *phydev); +void phy_stop(struct phy_device *phydev); +int phy_start_aneg(struct phy_device *phydev); + +int mdiobus_register(struct mii_bus *bus); +void mdiobus_unregister(struct mii_bus *bus); +void phy_sanitize_settings(struct phy_device *phydev); +int phy_stop_interrupts(struct phy_device *phydev); static inline int phy_read_status(struct phy_device *phydev) { return phydev->drv->read_status(phydev); } +int genphy_config_advert(struct phy_device *phydev); int genphy_setup_forced(struct phy_device *phydev); int genphy_restart_aneg(struct phy_device *phydev); int genphy_config_aneg(struct phy_device *phydev); @@ -355,6 +370,8 @@ int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd); int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd); int phy_mii_ioctl(struct phy_device *phydev, struct mii_ioctl_data *mii_data, int cmd); +int phy_start_interrupts(struct phy_device *phydev); +void phy_print_status(struct phy_device *phydev); extern struct bus_type mdio_bus_type; #endif /* __PHY_H */ -- cgit v1.2.3 From e28f7faf05159f1cfd564596f5e6178edba6bd49 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Fri, 5 Aug 2005 19:39:06 +1000 Subject: [PATCH] Four level pagetables for ppc64 Implement 4-level pagetables for ppc64 This patch implements full four-level page tables for ppc64, thereby extending the usable user address range to 44 bits (16T). The patch uses a full page for the tables at the bottom and top level, and a quarter page for the intermediate levels. It uses full 64-bit pointers at every level, thus also increasing the addressable range of physical memory. This patch also tweaks the VSID allocation to allow matching range for user addresses (this halves the number of available contexts) and adds some #if and BUILD_BUG sanity checks. Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- include/asm-ppc64/imalloc.h | 2 +- include/asm-ppc64/mmu.h | 7 ++-- include/asm-ppc64/page.h | 26 +++++++----- include/asm-ppc64/pgalloc.h | 93 ++++++++++++++++++++++++++++--------------- include/asm-ppc64/pgtable.h | 90 ++++++++++++++++++++++++----------------- include/asm-ppc64/processor.h | 4 +- 6 files changed, 136 insertions(+), 86 deletions(-) (limited to 'include') diff --git a/include/asm-ppc64/imalloc.h b/include/asm-ppc64/imalloc.h index e46ff68a6e4..42adf7033a8 100644 --- a/include/asm-ppc64/imalloc.h +++ b/include/asm-ppc64/imalloc.h @@ -6,7 +6,7 @@ */ #define PHBS_IO_BASE VMALLOC_END #define IMALLOC_BASE (PHBS_IO_BASE + 0x80000000ul) /* Reserve 2 gigs for PHBs */ -#define IMALLOC_END (VMALLOC_START + EADDR_MASK) +#define IMALLOC_END (VMALLOC_START + PGTABLE_RANGE) /* imalloc region types */ diff --git a/include/asm-ppc64/mmu.h b/include/asm-ppc64/mmu.h index 70348a85131..959a4bfdcd6 100644 --- a/include/asm-ppc64/mmu.h +++ b/include/asm-ppc64/mmu.h @@ -259,8 +259,10 @@ extern void stabs_alloc(void); #define VSID_BITS 36 #define VSID_MODULUS ((1UL<context.htlb_segs) @@ -125,36 +126,42 @@ extern void copy_user_page(void *to, void *from, unsigned long vaddr, struct pag * Entries in the pte table are 64b, while entries in the pgd & pmd are 32b. */ typedef struct { unsigned long pte; } pte_t; -typedef struct { unsigned int pmd; } pmd_t; -typedef struct { unsigned int pgd; } pgd_t; +typedef struct { unsigned long pmd; } pmd_t; +typedef struct { unsigned long pud; } pud_t; +typedef struct { unsigned long pgd; } pgd_t; typedef struct { unsigned long pgprot; } pgprot_t; #define pte_val(x) ((x).pte) #define pmd_val(x) ((x).pmd) +#define pud_val(x) ((x).pud) #define pgd_val(x) ((x).pgd) #define pgprot_val(x) ((x).pgprot) -#define __pte(x) ((pte_t) { (x) } ) -#define __pmd(x) ((pmd_t) { (x) } ) -#define __pgd(x) ((pgd_t) { (x) } ) -#define __pgprot(x) ((pgprot_t) { (x) } ) +#define __pte(x) ((pte_t) { (x) }) +#define __pmd(x) ((pmd_t) { (x) }) +#define __pud(x) ((pud_t) { (x) }) +#define __pgd(x) ((pgd_t) { (x) }) +#define __pgprot(x) ((pgprot_t) { (x) }) #else /* * .. while these make it easier on the compiler */ typedef unsigned long pte_t; -typedef unsigned int pmd_t; -typedef unsigned int pgd_t; +typedef unsigned long pmd_t; +typedef unsigned long pud_t; +typedef unsigned long pgd_t; typedef unsigned long pgprot_t; #define pte_val(x) (x) #define pmd_val(x) (x) +#define pud_val(x) (x) #define pgd_val(x) (x) #define pgprot_val(x) (x) #define __pte(x) (x) #define __pmd(x) (x) +#define __pud(x) (x) #define __pgd(x) (x) #define __pgprot(x) (x) @@ -208,9 +215,6 @@ extern u64 ppc64_pft_size; /* Log 2 of page table size */ #define USER_REGION_ID (0UL) #define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT) -#define __bpn_to_ba(x) ((((unsigned long)(x)) << PAGE_SHIFT) + KERNELBASE) -#define __ba_to_bpn(x) ((((unsigned long)(x)) & ~REGION_MASK) >> PAGE_SHIFT) - #define __va(x) ((void *)((unsigned long)(x) + KERNELBASE)) #ifdef CONFIG_DISCONTIGMEM diff --git a/include/asm-ppc64/pgalloc.h b/include/asm-ppc64/pgalloc.h index 4fc4b739b38..26bc49c1108 100644 --- a/include/asm-ppc64/pgalloc.h +++ b/include/asm-ppc64/pgalloc.h @@ -6,7 +6,12 @@ #include #include -extern kmem_cache_t *zero_cache; +extern kmem_cache_t *pgtable_cache[]; + +#define PTE_CACHE_NUM 0 +#define PMD_CACHE_NUM 1 +#define PUD_CACHE_NUM 1 +#define PGD_CACHE_NUM 0 /* * This program is free software; you can redistribute it and/or @@ -15,30 +20,40 @@ extern kmem_cache_t *zero_cache; * 2 of the License, or (at your option) any later version. */ -static inline pgd_t * -pgd_alloc(struct mm_struct *mm) +static inline pgd_t *pgd_alloc(struct mm_struct *mm) { - return kmem_cache_alloc(zero_cache, GFP_KERNEL); + return kmem_cache_alloc(pgtable_cache[PGD_CACHE_NUM], GFP_KERNEL); } -static inline void -pgd_free(pgd_t *pgd) +static inline void pgd_free(pgd_t *pgd) { - kmem_cache_free(zero_cache, pgd); + kmem_cache_free(pgtable_cache[PGD_CACHE_NUM], pgd); +} + +#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, PUD) + +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + return kmem_cache_alloc(pgtable_cache[PUD_CACHE_NUM], + GFP_KERNEL|__GFP_REPEAT); +} + +static inline void pud_free(pud_t *pud) +{ + kmem_cache_free(pgtable_cache[PUD_CACHE_NUM], pud); } #define pud_populate(MM, PUD, PMD) pud_set(PUD, PMD) -static inline pmd_t * -pmd_alloc_one(struct mm_struct *mm, unsigned long addr) +static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - return kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT); + return kmem_cache_alloc(pgtable_cache[PMD_CACHE_NUM], + GFP_KERNEL|__GFP_REPEAT); } -static inline void -pmd_free(pmd_t *pmd) +static inline void pmd_free(pmd_t *pmd) { - kmem_cache_free(zero_cache, pmd); + kmem_cache_free(pgtable_cache[PMD_CACHE_NUM], pmd); } #define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, pte) @@ -47,44 +62,58 @@ pmd_free(pmd_t *pmd) static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - return kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT); + return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM], + GFP_KERNEL|__GFP_REPEAT); } static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) { - pte_t *pte = kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT); - if (pte) - return virt_to_page(pte); - return NULL; + return virt_to_page(pte_alloc_one_kernel(mm, address)); } static inline void pte_free_kernel(pte_t *pte) { - kmem_cache_free(zero_cache, pte); + kmem_cache_free(pgtable_cache[PTE_CACHE_NUM], pte); } static inline void pte_free(struct page *ptepage) { - kmem_cache_free(zero_cache, page_address(ptepage)); + pte_free_kernel(page_address(ptepage)); } -struct pte_freelist_batch +#define PGF_CACHENUM_MASK 0xf + +typedef struct pgtable_free { + unsigned long val; +} pgtable_free_t; + +static inline pgtable_free_t pgtable_free_cache(void *p, int cachenum, + unsigned long mask) { - struct rcu_head rcu; - unsigned int index; - struct page * pages[0]; -}; + BUG_ON(cachenum > PGF_CACHENUM_MASK); -#define PTE_FREELIST_SIZE ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) / \ - sizeof(struct page *)) + return (pgtable_free_t){.val = ((unsigned long) p & ~mask) | cachenum}; +} -extern void pte_free_now(struct page *ptepage); -extern void pte_free_submit(struct pte_freelist_batch *batch); +static inline void pgtable_free(pgtable_free_t pgf) +{ + void *p = (void *)(pgf.val & ~PGF_CACHENUM_MASK); + int cachenum = pgf.val & PGF_CACHENUM_MASK; -DECLARE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); + kmem_cache_free(pgtable_cache[cachenum], p); +} -void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage); -#define __pmd_free_tlb(tlb, pmd) __pte_free_tlb(tlb, virt_to_page(pmd)) +void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf); + +#define __pte_free_tlb(tlb, ptepage) \ + pgtable_free_tlb(tlb, pgtable_free_cache(page_address(ptepage), \ + PTE_CACHE_NUM, PTE_TABLE_SIZE-1)) +#define __pmd_free_tlb(tlb, pmd) \ + pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \ + PMD_CACHE_NUM, PMD_TABLE_SIZE-1)) +#define __pud_free_tlb(tlb, pmd) \ + pgtable_free_tlb(tlb, pgtable_free_cache(pud, \ + PUD_CACHE_NUM, PUD_TABLE_SIZE-1)) #define check_pgt_cache() do { } while (0) diff --git a/include/asm-ppc64/pgtable.h b/include/asm-ppc64/pgtable.h index 46cf61c2ff6..5ea952ad716 100644 --- a/include/asm-ppc64/pgtable.h +++ b/include/asm-ppc64/pgtable.h @@ -15,19 +15,24 @@ #include #endif /* __ASSEMBLY__ */ -#include - /* * Entries per page directory level. The PTE level must use a 64b record * for each page table entry. The PMD and PGD level use a 32b record for * each entry by assuming that each entry is page aligned. */ #define PTE_INDEX_SIZE 9 -#define PMD_INDEX_SIZE 10 -#define PGD_INDEX_SIZE 10 +#define PMD_INDEX_SIZE 7 +#define PUD_INDEX_SIZE 7 +#define PGD_INDEX_SIZE 9 + +#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE) +#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) +#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) +#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) #define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) #define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) +#define PTRS_PER_PUD (1 << PMD_INDEX_SIZE) #define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) /* PMD_SHIFT determines what a second-level page table entry can map */ @@ -35,8 +40,13 @@ #define PMD_SIZE (1UL << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) -/* PGDIR_SHIFT determines what a third-level page table entry can map */ -#define PGDIR_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) +/* PUD_SHIFT determines what a third-level page table entry can map */ +#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) +#define PUD_SIZE (1UL << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE-1)) + +/* PGDIR_SHIFT determines what a fourth-level page table entry can map */ +#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE) #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) @@ -45,15 +55,23 @@ /* * Size of EA range mapped by our pagetables. */ -#define EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \ - PGD_INDEX_SIZE + PAGE_SHIFT) -#define EADDR_MASK ((1UL << EADDR_SIZE) - 1) +#define PGTABLE_EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \ + PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT) +#define PGTABLE_RANGE (1UL << PGTABLE_EADDR_SIZE) + +#if TASK_SIZE_USER64 > PGTABLE_RANGE +#error TASK_SIZE_USER64 exceeds pagetable range +#endif + +#if TASK_SIZE_USER64 > (1UL << (USER_ESID_BITS + SID_SHIFT)) +#error TASK_SIZE_USER64 exceeds user VSID range +#endif /* * Define the address range of the vmalloc VM area. */ #define VMALLOC_START (0xD000000000000000ul) -#define VMALLOC_SIZE (0x10000000000UL) +#define VMALLOC_SIZE (0x80000000000UL) #define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) /* @@ -154,8 +172,6 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; #ifndef __ASSEMBLY__ int hash_huge_page(struct mm_struct *mm, unsigned long access, unsigned long ea, unsigned long vsid, int local); - -void hugetlb_mm_free_pgd(struct mm_struct *mm); #endif /* __ASSEMBLY__ */ #define HAVE_ARCH_UNMAPPED_AREA @@ -163,7 +179,6 @@ void hugetlb_mm_free_pgd(struct mm_struct *mm); #else #define hash_huge_page(mm,a,ea,vsid,local) -1 -#define hugetlb_mm_free_pgd(mm) do {} while (0) #endif @@ -197,39 +212,45 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) #define pte_pfn(x) ((unsigned long)((pte_val(x) >> PTE_SHIFT))) #define pte_page(x) pfn_to_page(pte_pfn(x)) -#define pmd_set(pmdp, ptep) \ - (pmd_val(*(pmdp)) = __ba_to_bpn(ptep)) +#define pmd_set(pmdp, ptep) ({BUG_ON((u64)ptep < KERNELBASE); pmd_val(*(pmdp)) = (unsigned long)(ptep);}) #define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_bad(pmd) (pmd_val(pmd) == 0) #define pmd_present(pmd) (pmd_val(pmd) != 0) #define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0) -#define pmd_page_kernel(pmd) (__bpn_to_ba(pmd_val(pmd))) +#define pmd_page_kernel(pmd) (pmd_val(pmd)) #define pmd_page(pmd) virt_to_page(pmd_page_kernel(pmd)) -#define pud_set(pudp, pmdp) (pud_val(*(pudp)) = (__ba_to_bpn(pmdp))) +#define pud_set(pudp, pmdp) (pud_val(*(pudp)) = (unsigned long)(pmdp)) #define pud_none(pud) (!pud_val(pud)) -#define pud_bad(pud) ((pud_val(pud)) == 0UL) -#define pud_present(pud) (pud_val(pud) != 0UL) -#define pud_clear(pudp) (pud_val(*(pudp)) = 0UL) -#define pud_page(pud) (__bpn_to_ba(pud_val(pud))) +#define pud_bad(pud) ((pud_val(pud)) == 0) +#define pud_present(pud) (pud_val(pud) != 0) +#define pud_clear(pudp) (pud_val(*(pudp)) = 0) +#define pud_page(pud) (pud_val(pud)) + +#define pgd_set(pgdp, pudp) ({pgd_val(*(pgdp)) = (unsigned long)(pudp);}) +#define pgd_none(pgd) (!pgd_val(pgd)) +#define pgd_bad(pgd) (pgd_val(pgd) == 0) +#define pgd_present(pgd) (pgd_val(pgd) != 0) +#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0) +#define pgd_page(pgd) (pgd_val(pgd)) /* * Find an entry in a page-table-directory. We combine the address region * (the high order N bits) and the pgd portion of the address. */ /* to avoid overflow in free_pgtables we don't use PTRS_PER_PGD here */ -#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & 0x7ff) +#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & 0x1ff) #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) -/* Find an entry in the second-level page table.. */ +#define pud_offset(pgdp, addr) \ + (((pud_t *) pgd_page(*(pgdp))) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) + #define pmd_offset(pudp,addr) \ - ((pmd_t *) pud_page(*(pudp)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) + (((pmd_t *) pud_page(*(pudp))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) -/* Find an entry in the third-level page table.. */ #define pte_offset_kernel(dir,addr) \ - ((pte_t *) pmd_page_kernel(*(dir)) \ - + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) + (((pte_t *) pmd_page_kernel(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) #define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) #define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr)) @@ -458,23 +479,18 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr, #define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0) #define pmd_ERROR(e) \ - printk("%s:%d: bad pmd %08x.\n", __FILE__, __LINE__, pmd_val(e)) + printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) +#define pud_ERROR(e) \ + printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pud_val(e)) #define pgd_ERROR(e) \ - printk("%s:%d: bad pgd %08x.\n", __FILE__, __LINE__, pgd_val(e)) + printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) extern pgd_t swapper_pg_dir[]; extern void paging_init(void); -/* - * Because the huge pgtables are only 2 level, they can take - * at most around 4M, much less than one hugepage which the - * process is presumably entitled to use. So we don't bother - * freeing up the pagetables on unmap, and wait until - * destroy_context() to clean up the lot. - */ #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \ - do { } while (0) + free_pgd_range(tlb, addr, end, floor, ceiling) /* * This gets called at the end of handling a page fault, when diff --git a/include/asm-ppc64/processor.h b/include/asm-ppc64/processor.h index 352306cfb57..50b14c0ddb8 100644 --- a/include/asm-ppc64/processor.h +++ b/include/asm-ppc64/processor.h @@ -382,8 +382,8 @@ extern long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); extern struct task_struct *last_task_used_math; extern struct task_struct *last_task_used_altivec; -/* 64-bit user address space is 41-bits (2TBs user VM) */ -#define TASK_SIZE_USER64 (0x0000020000000000UL) +/* 64-bit user address space is 44-bits (16TB user VM) */ +#define TASK_SIZE_USER64 (0x0000100000000000UL) /* * 32-bit user address space is 4GB - 1 page -- cgit v1.2.3 From 34153fa3af45d84f3221d9b67ba2ab7e8a220d28 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 9 Aug 2005 10:36:34 +0200 Subject: [PATCH] flattened device tree changes This patch updates the format of the flattened device-tree passed between the boot trampoline and the kernel to support a more compact representation, for use by embedded systems mostly. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- include/asm-ppc64/prom.h | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/asm-ppc64/prom.h b/include/asm-ppc64/prom.h index 04b1a84f7ca..dc5330b3950 100644 --- a/include/asm-ppc64/prom.h +++ b/include/asm-ppc64/prom.h @@ -22,13 +22,15 @@ #define RELOC(x) (*PTRRELOC(&(x))) /* Definitions used by the flattened device tree */ -#define OF_DT_HEADER 0xd00dfeed /* 4: version, 4: total size */ -#define OF_DT_BEGIN_NODE 0x1 /* Start node: full name */ +#define OF_DT_HEADER 0xd00dfeed /* marker */ +#define OF_DT_BEGIN_NODE 0x1 /* Start of node, full name */ #define OF_DT_END_NODE 0x2 /* End node */ -#define OF_DT_PROP 0x3 /* Property: name off, size, content */ +#define OF_DT_PROP 0x3 /* Property: name off, size, + * content */ +#define OF_DT_NOP 0x4 /* nop */ #define OF_DT_END 0x9 -#define OF_DT_VERSION 1 +#define OF_DT_VERSION 0x10 /* * This is what gets passed to the kernel by prom_init or kexec @@ -54,7 +56,9 @@ struct boot_param_header u32 version; /* format version */ u32 last_comp_version; /* last compatible version */ /* version 2 fields below */ - u32 boot_cpuid_phys; /* Which physical CPU id we're booting on */ + u32 boot_cpuid_phys; /* Physical CPU id we're booting on */ + /* version 3 fields below */ + u32 dt_strings_size; /* size of the DT strings block */ }; -- cgit v1.2.3 From 3e494c80481653bbc810b4e67651097595ea0294 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 12 Jul 2005 17:40:17 +1000 Subject: [PATCH] ppc64: split iSeries specific parts out of vio.c This patch splits the iSeries specific parts out of vio.c. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- include/asm-ppc64/vio.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/asm-ppc64/vio.h b/include/asm-ppc64/vio.h index 20cd98ee633..1e6d4c4b83a 100644 --- a/include/asm-ppc64/vio.h +++ b/include/asm-ppc64/vio.h @@ -56,6 +56,9 @@ const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length); int vio_get_irq(struct vio_dev *dev); int vio_enable_interrupts(struct vio_dev *dev); int vio_disable_interrupts(struct vio_dev *dev); +extern struct vio_dev * __devinit vio_register_device_common( + struct vio_dev *viodev, char *name, char *type, + uint32_t unit_address, struct iommu_table *iommu_table); extern struct dma_mapping_ops vio_dma_ops; @@ -95,9 +98,13 @@ struct vio_dev { struct device dev; }; +extern struct vio_dev vio_bus_device; + static inline struct vio_dev *to_vio_dev(struct device *dev) { return container_of(dev, struct vio_dev, dev); } +extern int vio_bus_init(void); + #endif /* _ASM_VIO_H */ -- cgit v1.2.3 From 8c65b5c955b8598d9c63b4e97392377269873a54 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 12 Jul 2005 17:42:49 +1000 Subject: [PATCH] ppc64: move iSeries vio iommu init Since the iSeries vio iommu tables cannot be used until after the vio bus has been initialised, move the initialisation of the tables to there. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- include/asm-ppc64/iommu.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include') diff --git a/include/asm-ppc64/iommu.h b/include/asm-ppc64/iommu.h index 729de5cc21d..72dcf8116b0 100644 --- a/include/asm-ppc64/iommu.h +++ b/include/asm-ppc64/iommu.h @@ -104,9 +104,6 @@ extern void iommu_devnode_init_pSeries(struct device_node *dn); #ifdef CONFIG_PPC_ISERIES -/* Initializes tables for bio buses */ -extern void __init iommu_vio_init(void); - struct iSeries_Device_Node; /* Creates table for an individual device node */ extern void iommu_devnode_init_iSeries(struct iSeries_Device_Node *dn); -- cgit v1.2.3 From 6312236fe82bbd3b0e1dee60b3eb3b270a2f6aeb Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 12 Jul 2005 17:45:27 +1000 Subject: [PATCH] ppc64: make the bus matching function platform specific This patch allows us to have a different bus if matching function for each platform. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- include/asm-ppc64/vio.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-ppc64/vio.h b/include/asm-ppc64/vio.h index 1e6d4c4b83a..70644a23221 100644 --- a/include/asm-ppc64/vio.h +++ b/include/asm-ppc64/vio.h @@ -105,6 +105,7 @@ static inline struct vio_dev *to_vio_dev(struct device *dev) return container_of(dev, struct vio_dev, dev); } -extern int vio_bus_init(void); +extern int vio_bus_init(int (*is_match)(const struct vio_device_id *id, + const struct vio_dev *dev)); #endif /* _ASM_VIO_H */ -- cgit v1.2.3 From 19dbd0f6a74f7529d6d49dd50ad6b31adbe0598d Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 12 Jul 2005 17:50:26 +1000 Subject: [PATCH] ppc64: split pSeries specific parts out of vio.c This patch just splits out the pSeries specific parts of vio.c. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- include/asm-ppc64/vio.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-ppc64/vio.h b/include/asm-ppc64/vio.h index 70644a23221..a82e87c1c5f 100644 --- a/include/asm-ppc64/vio.h +++ b/include/asm-ppc64/vio.h @@ -106,6 +106,8 @@ static inline struct vio_dev *to_vio_dev(struct device *dev) } extern int vio_bus_init(int (*is_match)(const struct vio_device_id *id, - const struct vio_dev *dev)); + const struct vio_dev *dev), + void (*)(struct vio_dev *), + void (*)(struct device *)); #endif /* _ASM_VIO_H */ -- cgit v1.2.3 From 2e2446ea0758cd57dd065962d9544e3f4d44ea2b Mon Sep 17 00:00:00 2001 From: David Gibson Date: Fri, 19 Aug 2005 14:52:31 +1000 Subject: [PATCH] Remove NACA fixed address constraint Comments in head.S suggest that the iSeries naca has a fixed address, because tools expect to find it there. The only tool which appears to access the naca is addRamDisk, but both the in-kernel version and the version used in RHEL and SuSE in fact locate the NACA the same way as the hypervisor does, by following the pointer in the hvReleaseData structure. Since the requirement for a fixed address seems to be obsolete, this patch removes the naca from head.S and replaces it with a normal C initializer. For good measure, it removes an old version of addRamDisk.c which was sitting, unused, in the ppc32 tree. Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- include/asm-ppc64/naca.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/asm-ppc64/naca.h b/include/asm-ppc64/naca.h index bfb7caa32ea..d2afe644759 100644 --- a/include/asm-ppc64/naca.h +++ b/include/asm-ppc64/naca.h @@ -12,8 +12,6 @@ #include -#ifndef __ASSEMBLY__ - struct naca_struct { /* Kernel only data - undefined for user space */ void *xItVpdAreas; /* VPD Data 0x00 */ @@ -23,9 +21,4 @@ struct naca_struct { extern struct naca_struct naca; -#endif /* __ASSEMBLY__ */ - -#define NACA_PAGE 0x4 -#define NACA_PHYS_ADDR (NACA_PAGE< Date: Fri, 19 Aug 2005 14:52:31 +1000 Subject: [PATCH] Change address of ppc64 initial segment table On ppc64 machines with segment tables, CPU0's segment table is at a fixed address, currently 0x9000. This patch moves it to the free space at 0x6000, just below the fwnmi data area. This saves 8k of space in vmlinux and the runtime kernel image. Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- include/asm-ppc64/mmu.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-ppc64/mmu.h b/include/asm-ppc64/mmu.h index 959a4bfdcd6..789c2693483 100644 --- a/include/asm-ppc64/mmu.h +++ b/include/asm-ppc64/mmu.h @@ -28,9 +28,12 @@ #define STE_VSID_SHIFT 12 /* Location of cpu0's segment table */ -#define STAB0_PAGE 0x9 +#define STAB0_PAGE 0x6 #define STAB0_PHYS_ADDR (STAB0_PAGE< Date: Wed, 3 Aug 2005 14:32:30 +1000 Subject: [PATCH] ppc64: remove firmware features from cpu_spec The firmware_features field of struct cpu_spec should really be a separate variable as the firmware features do not depend on the chip and the bitmask is constructed independently. By removing it, we save 112 bytes from the cpu_specs array and we access the bitmask directly instead of via the cur_cpu_spec pointer. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- include/asm-ppc64/cputable.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/asm-ppc64/cputable.h b/include/asm-ppc64/cputable.h index d67fa9e2607..d55698a60e7 100644 --- a/include/asm-ppc64/cputable.h +++ b/include/asm-ppc64/cputable.h @@ -56,11 +56,6 @@ struct cpu_spec { * BHT, SPD, etc... from head.S before branching to identify_machine */ cpu_setup_t cpu_setup; - - /* This is used to identify firmware features which are available - * to the kernel. - */ - unsigned long firmware_features; }; extern struct cpu_spec cpu_specs[]; @@ -72,6 +67,11 @@ static inline unsigned long cpu_has_feature(unsigned long feature) } +/* This is used to identify firmware features which are available + * to the kernel. + */ +extern unsigned long ppc64_firmware_features; + /* firmware feature bitmask values */ #define FIRMWARE_MAX_FEATURES 63 -- cgit v1.2.3 From 1ababe11480d59d75be806804c71fa55d203a5a6 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 3 Aug 2005 14:35:25 +1000 Subject: [PATCH] ppc64: create firmware_has_feature() Create the firmware_has_feature() inline and move the firmware feature stuff into its own header file. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- include/asm-ppc64/cputable.h | 47 ++----------------------------- include/asm-ppc64/firmware.h | 67 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 44 deletions(-) create mode 100644 include/asm-ppc64/firmware.h (limited to 'include') diff --git a/include/asm-ppc64/cputable.h b/include/asm-ppc64/cputable.h index d55698a60e7..ae6cf383010 100644 --- a/include/asm-ppc64/cputable.h +++ b/include/asm-ppc64/cputable.h @@ -66,44 +66,6 @@ static inline unsigned long cpu_has_feature(unsigned long feature) return cur_cpu_spec->cpu_features & feature; } - -/* This is used to identify firmware features which are available - * to the kernel. - */ -extern unsigned long ppc64_firmware_features; - -/* firmware feature bitmask values */ -#define FIRMWARE_MAX_FEATURES 63 - -#define FW_FEATURE_PFT (1UL<<0) -#define FW_FEATURE_TCE (1UL<<1) -#define FW_FEATURE_SPRG0 (1UL<<2) -#define FW_FEATURE_DABR (1UL<<3) -#define FW_FEATURE_COPY (1UL<<4) -#define FW_FEATURE_ASR (1UL<<5) -#define FW_FEATURE_DEBUG (1UL<<6) -#define FW_FEATURE_TERM (1UL<<7) -#define FW_FEATURE_PERF (1UL<<8) -#define FW_FEATURE_DUMP (1UL<<9) -#define FW_FEATURE_INTERRUPT (1UL<<10) -#define FW_FEATURE_MIGRATE (1UL<<11) -#define FW_FEATURE_PERFMON (1UL<<12) -#define FW_FEATURE_CRQ (1UL<<13) -#define FW_FEATURE_VIO (1UL<<14) -#define FW_FEATURE_RDMA (1UL<<15) -#define FW_FEATURE_LLAN (1UL<<16) -#define FW_FEATURE_BULK (1UL<<17) -#define FW_FEATURE_XDABR (1UL<<18) -#define FW_FEATURE_MULTITCE (1UL<<19) -#define FW_FEATURE_SPLPAR (1UL<<20) - -typedef struct { - unsigned long val; - char * name; -} firmware_feature_t; - -extern firmware_feature_t firmware_features_table[]; - #endif /* __ASSEMBLY__ */ /* CPU kernel features */ @@ -140,10 +102,8 @@ extern firmware_feature_t firmware_features_table[]; #define CPU_FTR_MMCRA_SIHV ASM_CONST(0x0000080000000000) #define CPU_FTR_CTRL ASM_CONST(0x0000100000000000) -/* Platform firmware features */ -#define FW_FTR_ ASM_CONST(0x0000000000000001) - #ifndef __ASSEMBLY__ + #define COMMON_USER_PPC64 (PPC_FEATURE_32 | PPC_FEATURE_64 | \ PPC_FEATURE_HAS_FPU | PPC_FEATURE_HAS_MMU) @@ -156,10 +116,9 @@ extern firmware_feature_t firmware_features_table[]; #define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_PPCAS_ARCH_V2_BASE) #else #define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_PPCAS_ARCH_V2_BASE | CPU_FTR_16M_PAGE) -#endif +#endif /* CONFIG_PPC_ISERIES */ -#define COMMON_PPC64_FW (0) -#endif +#endif /* __ASSEMBLY */ #ifdef __ASSEMBLY__ diff --git a/include/asm-ppc64/firmware.h b/include/asm-ppc64/firmware.h new file mode 100644 index 00000000000..5bb5bf46bb4 --- /dev/null +++ b/include/asm-ppc64/firmware.h @@ -0,0 +1,67 @@ +/* + * include/asm-ppc64/firmware.h + * + * Extracted from include/asm-ppc64/cputable.h + * + * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org) + * + * Modifications for ppc64: + * Copyright (C) 2003 Dave Engebretsen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef __ASM_PPC_FIRMWARE_H +#define __ASM_PPC_FIRMWARE_H + +#ifdef __KERNEL__ + +#ifndef __ASSEMBLY__ + +/* firmware feature bitmask values */ +#define FIRMWARE_MAX_FEATURES 63 + +#define FW_FEATURE_PFT (1UL<<0) +#define FW_FEATURE_TCE (1UL<<1) +#define FW_FEATURE_SPRG0 (1UL<<2) +#define FW_FEATURE_DABR (1UL<<3) +#define FW_FEATURE_COPY (1UL<<4) +#define FW_FEATURE_ASR (1UL<<5) +#define FW_FEATURE_DEBUG (1UL<<6) +#define FW_FEATURE_TERM (1UL<<7) +#define FW_FEATURE_PERF (1UL<<8) +#define FW_FEATURE_DUMP (1UL<<9) +#define FW_FEATURE_INTERRUPT (1UL<<10) +#define FW_FEATURE_MIGRATE (1UL<<11) +#define FW_FEATURE_PERFMON (1UL<<12) +#define FW_FEATURE_CRQ (1UL<<13) +#define FW_FEATURE_VIO (1UL<<14) +#define FW_FEATURE_RDMA (1UL<<15) +#define FW_FEATURE_LLAN (1UL<<16) +#define FW_FEATURE_BULK (1UL<<17) +#define FW_FEATURE_XDABR (1UL<<18) +#define FW_FEATURE_MULTITCE (1UL<<19) +#define FW_FEATURE_SPLPAR (1UL<<20) + +/* This is used to identify firmware features which are available + * to the kernel. + */ +extern unsigned long ppc64_firmware_features; + +static inline unsigned long firmware_has_feature(unsigned long feature) +{ + return ppc64_firmware_features & feature; +} + +typedef struct { + unsigned long val; + char * name; +} firmware_feature_t; + +extern firmware_feature_t firmware_features_table[]; + +#endif /* __ASSEMBLY__ */ +#endif /* __KERNEL__ */ +#endif /* __ASM_PPC_FIRMWARE_H */ -- cgit v1.2.3 From 8d15a3e55f49678b0900dcf5c1cddb322a129325 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 3 Aug 2005 14:40:16 +1000 Subject: [PATCH] ppc64: make firmware_has_feature() stronger Make firmware_has_feature() evaluate at compile time for the non pSeries case and tidy up code where possible. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- include/asm-ppc64/firmware.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-ppc64/firmware.h b/include/asm-ppc64/firmware.h index 5bb5bf46bb4..e3725f3c8ea 100644 --- a/include/asm-ppc64/firmware.h +++ b/include/asm-ppc64/firmware.h @@ -45,6 +45,22 @@ #define FW_FEATURE_MULTITCE (1UL<<19) #define FW_FEATURE_SPLPAR (1UL<<20) +enum { + FW_FEATURE_PSERIES = FW_FEATURE_PFT | FW_FEATURE_TCE | + FW_FEATURE_SPRG0 | FW_FEATURE_DABR | FW_FEATURE_COPY | + FW_FEATURE_ASR | FW_FEATURE_DEBUG | FW_FEATURE_TERM | + FW_FEATURE_PERF | FW_FEATURE_DUMP | FW_FEATURE_INTERRUPT | + FW_FEATURE_MIGRATE | FW_FEATURE_PERFMON | FW_FEATURE_CRQ | + FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN | + FW_FEATURE_BULK | FW_FEATURE_XDABR | FW_FEATURE_MULTITCE | + FW_FEATURE_SPLPAR, + FW_FEATURE_POSSIBLE = +#ifdef CONFIG_PPC_PSERIES + FW_FEATURE_PSERIES | +#endif + 0, +}; + /* This is used to identify firmware features which are available * to the kernel. */ @@ -52,15 +68,17 @@ extern unsigned long ppc64_firmware_features; static inline unsigned long firmware_has_feature(unsigned long feature) { - return ppc64_firmware_features & feature; + return ppc64_firmware_features & feature & FW_FEATURE_POSSIBLE; } +#ifdef CONFIG_PPC_PSERIES typedef struct { unsigned long val; char * name; } firmware_feature_t; extern firmware_feature_t firmware_features_table[]; +#endif #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ -- cgit v1.2.3 From aed31351941aa990fb0865c186565a589c56d3fe Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 3 Aug 2005 14:43:21 +1000 Subject: [PATCH] ppc64: introduce FW_FEATURE_ISERIES Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- include/asm-ppc64/firmware.h | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-ppc64/firmware.h b/include/asm-ppc64/firmware.h index e3725f3c8ea..22bb85cf60a 100644 --- a/include/asm-ppc64/firmware.h +++ b/include/asm-ppc64/firmware.h @@ -44,9 +44,10 @@ #define FW_FEATURE_XDABR (1UL<<18) #define FW_FEATURE_MULTITCE (1UL<<19) #define FW_FEATURE_SPLPAR (1UL<<20) +#define FW_FEATURE_ISERIES (1UL<<21) enum { - FW_FEATURE_PSERIES = FW_FEATURE_PFT | FW_FEATURE_TCE | + FW_FEATURE_PSERIES_POSSIBLE = FW_FEATURE_PFT | FW_FEATURE_TCE | FW_FEATURE_SPRG0 | FW_FEATURE_DABR | FW_FEATURE_COPY | FW_FEATURE_ASR | FW_FEATURE_DEBUG | FW_FEATURE_TERM | FW_FEATURE_PERF | FW_FEATURE_DUMP | FW_FEATURE_INTERRUPT | @@ -54,11 +55,25 @@ enum { FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN | FW_FEATURE_BULK | FW_FEATURE_XDABR | FW_FEATURE_MULTITCE | FW_FEATURE_SPLPAR, + FW_FEATURE_PSERIES_ALWAYS = 0, + FW_FEATURE_ISERIES_POSSIBLE = FW_FEATURE_ISERIES, + FW_FEATURE_ISERIES_ALWAYS = FW_FEATURE_ISERIES, FW_FEATURE_POSSIBLE = #ifdef CONFIG_PPC_PSERIES - FW_FEATURE_PSERIES | + FW_FEATURE_PSERIES_POSSIBLE | +#endif +#ifdef CONFIG_PPC_ISERIES + FW_FEATURE_ISERIES_POSSIBLE | #endif 0, + FW_FEATURE_ALWAYS = +#ifdef CONFIG_PPC_PSERIES + FW_FEATURE_PSERIES_ALWAYS & +#endif +#ifdef CONFIG_PPC_ISERIES + FW_FEATURE_ISERIES_ALWAYS & +#endif + FW_FEATURE_POSSIBLE, }; /* This is used to identify firmware features which are available @@ -68,7 +83,8 @@ extern unsigned long ppc64_firmware_features; static inline unsigned long firmware_has_feature(unsigned long feature) { - return ppc64_firmware_features & feature & FW_FEATURE_POSSIBLE; + return (FW_FEATURE_ALWAYS & feature) || + (FW_FEATURE_POSSIBLE & ppc64_firmware_features & feature); } #ifdef CONFIG_PPC_PSERIES -- cgit v1.2.3 From 38e85dc18036804ada8698951cfad4e6114fec1b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 3 Aug 2005 20:21:23 +1000 Subject: [PATCH] ppc64: Remove PTRRELOC() from msChunks code The msChunks code was written to work on pSeries, but now it's only used on iSeries. This means there's no need to do PTRRELOC anymore, so remove it all. A few places were getting "extern reloc_offset()" from abs_addr.h, move it into system.h instead. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- include/asm-ppc64/abs_addr.h | 36 ++++++++++-------------------------- include/asm-ppc64/system.h | 2 ++ 2 files changed, 12 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/include/asm-ppc64/abs_addr.h b/include/asm-ppc64/abs_addr.h index 6d4e8e78705..93dc63ed4f2 100644 --- a/include/asm-ppc64/abs_addr.h +++ b/include/asm-ppc64/abs_addr.h @@ -29,46 +29,30 @@ struct msChunks { extern struct msChunks msChunks; extern unsigned long msChunks_alloc(unsigned long, unsigned long, unsigned long); -extern unsigned long reloc_offset(void); #ifdef CONFIG_MSCHUNKS -static inline unsigned long -chunk_to_addr(unsigned long chunk) +static inline unsigned long chunk_to_addr(unsigned long chunk) { - unsigned long offset = reloc_offset(); - struct msChunks *_msChunks = PTRRELOC(&msChunks); - - return chunk << _msChunks->chunk_shift; + return chunk << msChunks.chunk_shift; } -static inline unsigned long -addr_to_chunk(unsigned long addr) +static inline unsigned long addr_to_chunk(unsigned long addr) { - unsigned long offset = reloc_offset(); - struct msChunks *_msChunks = PTRRELOC(&msChunks); - - return addr >> _msChunks->chunk_shift; + return addr >> msChunks.chunk_shift; } -static inline unsigned long -chunk_offset(unsigned long addr) +static inline unsigned long chunk_offset(unsigned long addr) { - unsigned long offset = reloc_offset(); - struct msChunks *_msChunks = PTRRELOC(&msChunks); - - return addr & _msChunks->chunk_mask; + return addr & msChunks.chunk_mask; } -static inline unsigned long -abs_chunk(unsigned long pchunk) +static inline unsigned long abs_chunk(unsigned long pchunk) { - unsigned long offset = reloc_offset(); - struct msChunks *_msChunks = PTRRELOC(&msChunks); - if ( pchunk >= _msChunks->num_chunks ) { + if (pchunk >= msChunks.num_chunks) return pchunk; - } - return PTRRELOC(_msChunks->abs)[pchunk]; + + return msChunks.abs[pchunk]; } /* A macro so it can take pointers or unsigned long. */ diff --git a/include/asm-ppc64/system.h b/include/asm-ppc64/system.h index 98d120ca8a9..4104a5dedba 100644 --- a/include/asm-ppc64/system.h +++ b/include/asm-ppc64/system.h @@ -302,5 +302,7 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) #define arch_align_stack(x) (x) +extern unsigned long reloc_offset(void); + #endif /* __KERNEL__ */ #endif -- cgit v1.2.3 From 34c8f6961fc601294a38c5bd5ca12131b2e52674 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 3 Aug 2005 20:21:23 +1000 Subject: [PATCH] ppc64: msChunks cleanups Chunks are 256KB, so use constants for the size/shift/mask, rather than getting them from the msChunks struct. The iSeries debugger (??) might still need access to the values in the msChunks struct, so we keep them around for now, but set them from the constant values. Replace msChunks_entry typedef with regular u32. Simplify msChunks_alloc() to manipulate klimit directly, rather than via a parameter. Move msChunks_alloc() and msChunks into iSeries_setup.c, as that's where they're used. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- include/asm-ppc64/abs_addr.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/asm-ppc64/abs_addr.h b/include/asm-ppc64/abs_addr.h index 93dc63ed4f2..2276567f133 100644 --- a/include/asm-ppc64/abs_addr.h +++ b/include/asm-ppc64/abs_addr.h @@ -17,34 +17,37 @@ #include #include -typedef u32 msChunks_entry; struct msChunks { unsigned long num_chunks; unsigned long chunk_size; unsigned long chunk_shift; unsigned long chunk_mask; - msChunks_entry *abs; + u32 *abs; }; extern struct msChunks msChunks; -extern unsigned long msChunks_alloc(unsigned long, unsigned long, unsigned long); #ifdef CONFIG_MSCHUNKS +/* Chunks are 256 KB */ +#define MSCHUNKS_CHUNK_SHIFT (18) +#define MSCHUNKS_CHUNK_SIZE (1UL << MSCHUNKS_CHUNK_SHIFT) +#define MSCHUNKS_OFFSET_MASK (MSCHUNKS_CHUNK_SIZE - 1) + static inline unsigned long chunk_to_addr(unsigned long chunk) { - return chunk << msChunks.chunk_shift; + return chunk << MSCHUNKS_CHUNK_SHIFT; } static inline unsigned long addr_to_chunk(unsigned long addr) { - return addr >> msChunks.chunk_shift; + return addr >> MSCHUNKS_CHUNK_SHIFT; } static inline unsigned long chunk_offset(unsigned long addr) { - return addr & msChunks.chunk_mask; + return addr & MSCHUNKS_OFFSET_MASK; } static inline unsigned long abs_chunk(unsigned long pchunk) -- cgit v1.2.3 From 56e97b71bf55edb69dc8e9715553972ce50b1564 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 3 Aug 2005 20:21:23 +1000 Subject: [PATCH] ppc64: Rename msChunks structure Rename the msChunks struct to get rid of the StUdlY caps and make it a bit clearer what it's for. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- include/asm-ppc64/abs_addr.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/asm-ppc64/abs_addr.h b/include/asm-ppc64/abs_addr.h index 2276567f133..05414a9bfdd 100644 --- a/include/asm-ppc64/abs_addr.h +++ b/include/asm-ppc64/abs_addr.h @@ -17,18 +17,17 @@ #include #include -struct msChunks { +#ifdef CONFIG_MSCHUNKS + +struct mschunks_map { unsigned long num_chunks; unsigned long chunk_size; unsigned long chunk_shift; unsigned long chunk_mask; - u32 *abs; + u32 *mapping; }; -extern struct msChunks msChunks; - - -#ifdef CONFIG_MSCHUNKS +extern struct mschunks_map mschunks_map; /* Chunks are 256 KB */ #define MSCHUNKS_CHUNK_SHIFT (18) @@ -52,10 +51,10 @@ static inline unsigned long chunk_offset(unsigned long addr) static inline unsigned long abs_chunk(unsigned long pchunk) { - if (pchunk >= msChunks.num_chunks) + if (pchunk >= mschunks_map.num_chunks) return pchunk; - return msChunks.abs[pchunk]; + return mschunks_map.mapping[pchunk]; } /* A macro so it can take pointers or unsigned long. */ -- cgit v1.2.3 From ce21795275ab469b97384faa36462350af17eca0 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 3 Aug 2005 20:21:23 +1000 Subject: [PATCH] ppc64: Consolidate some macros The only caller of chunk_offset() and abs_chunk() is phys_to_abs(), so fold the former two into the latter. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- include/asm-ppc64/abs_addr.h | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/asm-ppc64/abs_addr.h b/include/asm-ppc64/abs_addr.h index 05414a9bfdd..025527742fe 100644 --- a/include/asm-ppc64/abs_addr.h +++ b/include/asm-ppc64/abs_addr.h @@ -44,24 +44,17 @@ static inline unsigned long addr_to_chunk(unsigned long addr) return addr >> MSCHUNKS_CHUNK_SHIFT; } -static inline unsigned long chunk_offset(unsigned long addr) +static inline unsigned long phys_to_abs(unsigned long pa) { - return addr & MSCHUNKS_OFFSET_MASK; -} + unsigned long chunk; -static inline unsigned long abs_chunk(unsigned long pchunk) -{ - if (pchunk >= mschunks_map.num_chunks) - return pchunk; + chunk = addr_to_chunk(pa); - return mschunks_map.mapping[pchunk]; -} + if (chunk < mschunks_map.num_chunks) + chunk = mschunks_map.mapping[chunk]; -/* A macro so it can take pointers or unsigned long. */ -#define phys_to_abs(pa) \ - ({ unsigned long _pa = (unsigned long)(pa); \ - chunk_to_addr(abs_chunk(addr_to_chunk(_pa))) + chunk_offset(_pa); \ - }) + return chunk_to_addr(chunk) + (pa & MSCHUNKS_OFFSET_MASK); +} static inline unsigned long physRpn_to_absRpn(unsigned long rpn) -- cgit v1.2.3 From aefd16b0c5a594b5feaba23954ad74061f45c8a5 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 3 Aug 2005 20:21:24 +1000 Subject: [PATCH] ppc64: Remove redundant uses of physRpn_to_absRpn physRpn_to_absRpn is a no-op on non-iSeries platforms, remove the two redundant calls. There's only one caller on iSeries so fold the logic in there so we can get rid of it completely. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- include/asm-ppc64/abs_addr.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include') diff --git a/include/asm-ppc64/abs_addr.h b/include/asm-ppc64/abs_addr.h index 025527742fe..ab4320c1cf5 100644 --- a/include/asm-ppc64/abs_addr.h +++ b/include/asm-ppc64/abs_addr.h @@ -56,14 +56,6 @@ static inline unsigned long phys_to_abs(unsigned long pa) return chunk_to_addr(chunk) + (pa & MSCHUNKS_OFFSET_MASK); } -static inline unsigned long -physRpn_to_absRpn(unsigned long rpn) -{ - unsigned long pa = rpn << PAGE_SHIFT; - unsigned long aa = phys_to_abs(pa); - return (aa >> PAGE_SHIFT); -} - /* A macro so it can take pointers or unsigned long. */ #define abs_to_phys(aa) lmb_abs_to_phys((unsigned long)(aa)) -- cgit v1.2.3 From e88bcd1b29f63738b702e57d831758706162347e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 3 Aug 2005 20:21:25 +1000 Subject: [PATCH] ppc64: Remove redundant abs_to_phys() macro abs_to_phys() is a macro that turns out to do nothing, and also has the unfortunate property that it's not the inverse of phys_to_abs() on iSeries. The following is for my benefit as much as everyone else. With CONFIG_MSCHUNKS enabled, the lmb code is changed such that it keeps a physbase variable for each lmb region. This is used to take the possibly discontiguous lmb regions and present them as a contiguous address space beginning from zero. In this context each lmb region's base address is its "absolute" base address, and its physbase is it's "physical" address (from Linux's point of view). The abs_to_phys() macro does the mapping from "absolute" to "physical". Note: This is not related to the iSeries mapping of physical to absolute (ie. Hypervisor) addresses which is maintained with the msChunks structure. And the msChunks structure is not controlled via CONFIG_MSCHUNKS. Once upon a time you could compile for non-iSeries with CONFIG_MSCHUNKS enabled. But these days CONFIG_MSCHUNKS depends on CONFIG_PPC_ISERIES, so for non-iSeries code abs_to_phys() is a no-op. On iSeries we always have one lmb region which spans from 0 to systemcfg->physicalMemorySize (arch/ppc64/kernel/iSeries_setup.c line 383). This region has a base (ie. absolute) address of 0, and a physbase address of 0 (as calculated in lmb_analyze() (arch/ppc64/kernel/lmb.c line 144)). On iSeries, abs_to_phys(aa) is defined as lmb_abs_to_phys(aa), which finds the lmb region containing aa (and there's only one, ie. 0), and then does: return lmb.memory.region[0].physbase + (aa - lmb.memory.region[0].base) physbase == base == 0, so you're left with "return aa". So remove abs_to_phys(), and lmb_abs_to_phys() which is the implementation of abs_to_phys() for iSeries. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- include/asm-ppc64/abs_addr.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include') diff --git a/include/asm-ppc64/abs_addr.h b/include/asm-ppc64/abs_addr.h index ab4320c1cf5..200db1c45f2 100644 --- a/include/asm-ppc64/abs_addr.h +++ b/include/asm-ppc64/abs_addr.h @@ -56,9 +56,6 @@ static inline unsigned long phys_to_abs(unsigned long pa) return chunk_to_addr(chunk) + (pa & MSCHUNKS_OFFSET_MASK); } -/* A macro so it can take pointers or unsigned long. */ -#define abs_to_phys(aa) lmb_abs_to_phys((unsigned long)(aa)) - #else /* !CONFIG_MSCHUNKS */ #define chunk_to_addr(chunk) ((unsigned long)(chunk)) @@ -68,12 +65,11 @@ static inline unsigned long phys_to_abs(unsigned long pa) #define phys_to_abs(pa) (pa) #define physRpn_to_absRpn(rpn) (rpn) -#define abs_to_phys(aa) (aa) #endif /* !CONFIG_MSCHUNKS */ /* Convenience macros */ #define virt_to_abs(va) phys_to_abs(__pa(va)) -#define abs_to_virt(aa) __va(abs_to_phys(aa)) +#define abs_to_virt(aa) __va(aa) #endif /* _ABS_ADDR_H */ -- cgit v1.2.3 From 180379dcefb39e8bd05d562b0685e9084dffcc0a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 3 Aug 2005 20:21:26 +1000 Subject: [PATCH] ppc64: Remove physbase from the lmb_property struct We no longer need the lmb code to know about abs and phys addresses, so remove the physbase variable from the lmb_property struct. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- include/asm-ppc64/lmb.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/asm-ppc64/lmb.h b/include/asm-ppc64/lmb.h index a6cbca21ac1..cb368bf0f26 100644 --- a/include/asm-ppc64/lmb.h +++ b/include/asm-ppc64/lmb.h @@ -22,7 +22,6 @@ struct lmb_property { unsigned long base; - unsigned long physbase; unsigned long size; }; -- cgit v1.2.3 From bef5686229810709091fb6e505071f4aa41e3760 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 3 Aug 2005 20:21:26 +1000 Subject: [PATCH] ppc64: Remove CONFIG_MSCHUNKS We can now remove CONFIG_MSCHUNKS as it doesn't do anything interesting anymore. The only macro in abs_addr.h which is called by non-iSeries code is phys_to_abs(), so remove the other dummy implementations, and we add a firmware feature check to phys_to_abs(). Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- include/asm-ppc64/abs_addr.h | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/asm-ppc64/abs_addr.h b/include/asm-ppc64/abs_addr.h index 200db1c45f2..84c24d4cdb7 100644 --- a/include/asm-ppc64/abs_addr.h +++ b/include/asm-ppc64/abs_addr.h @@ -16,8 +16,7 @@ #include #include #include - -#ifdef CONFIG_MSCHUNKS +#include struct mschunks_map { unsigned long num_chunks; @@ -48,6 +47,10 @@ static inline unsigned long phys_to_abs(unsigned long pa) { unsigned long chunk; + /* This is a no-op on non-iSeries */ + if (!firmware_has_feature(FW_FEATURE_ISERIES)) + return pa; + chunk = addr_to_chunk(pa); if (chunk < mschunks_map.num_chunks) @@ -56,18 +59,6 @@ static inline unsigned long phys_to_abs(unsigned long pa) return chunk_to_addr(chunk) + (pa & MSCHUNKS_OFFSET_MASK); } -#else /* !CONFIG_MSCHUNKS */ - -#define chunk_to_addr(chunk) ((unsigned long)(chunk)) -#define addr_to_chunk(addr) (addr) -#define chunk_offset(addr) (0) -#define abs_chunk(pchunk) (pchunk) - -#define phys_to_abs(pa) (pa) -#define physRpn_to_absRpn(rpn) (rpn) - -#endif /* !CONFIG_MSCHUNKS */ - /* Convenience macros */ #define virt_to_abs(va) phys_to_abs(__pa(va)) #define abs_to_virt(aa) __va(aa) -- cgit v1.2.3 From b13cfd173f73c3f6f9a307b7b6e64d45fbd756b2 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Thu, 4 Aug 2005 19:26:42 +0200 Subject: [PATCH] ppc64: allow xmon=off If both CONFIG_XMON and CONFIG_XMON_DEFAULT is enabled in the .config, there is no way to disable xmon again. setup_system calls first xmon_init, later parse_early_param. So a new 'xmon=off' cmdline option will do the right thing. Signed-off-by: Olaf Hering Signed-off-by: Paul Mackerras --- include/asm-ppc64/system.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-ppc64/system.h b/include/asm-ppc64/system.h index 4104a5dedba..b9e1835351e 100644 --- a/include/asm-ppc64/system.h +++ b/include/asm-ppc64/system.h @@ -88,7 +88,7 @@ DEBUGGER_BOILERPLATE(debugger_dabr_match) DEBUGGER_BOILERPLATE(debugger_fault_handler) #ifdef CONFIG_XMON -extern void xmon_init(void); +extern void xmon_init(int enable); #endif #else -- cgit v1.2.3 From 180a33627d958d5d9d3602dde6ac74b315e136f0 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 9 Aug 2005 11:13:36 +1000 Subject: [PATCH] ppc64: Move ppc64_enable_pmcs() logic into a ppc_md function This patch moves power4_enable_pmcs() to arch/ppc64/kernel/pmc.c. I've tested it on P5 LPAR and P4. It does what it used to. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- include/asm-ppc64/machdep.h | 3 +++ include/asm-ppc64/pmc.h | 2 ++ 2 files changed, 5 insertions(+) (limited to 'include') diff --git a/include/asm-ppc64/machdep.h b/include/asm-ppc64/machdep.h index f0ef0637594..ff2c9287d3b 100644 --- a/include/asm-ppc64/machdep.h +++ b/include/asm-ppc64/machdep.h @@ -140,6 +140,9 @@ struct machdep_calls { /* Idle loop for this platform, leave empty for default idle loop */ int (*idle_loop)(void); + + /* Function to enable pmcs for this platform, called once per cpu. */ + void (*enable_pmcs)(void); }; extern int default_idle(void); diff --git a/include/asm-ppc64/pmc.h b/include/asm-ppc64/pmc.h index c924748c0be..d1d297dbccf 100644 --- a/include/asm-ppc64/pmc.h +++ b/include/asm-ppc64/pmc.h @@ -26,4 +26,6 @@ typedef void (*perf_irq_t)(struct pt_regs *); int reserve_pmc_hardware(perf_irq_t new_perf_irq); void release_pmc_hardware(void); +void power4_enable_pmcs(void); + #endif /* _PPC64_PMC_H */ -- cgit v1.2.3 From c594adad5653491813959277fb87a2fef54c4e05 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 11 Aug 2005 16:55:21 +1000 Subject: [PATCH] Dynamic hugepage addresses for ppc64 Paulus, I think this is now a reasonable candidate for the post-2.6.13 queue. Relax address restrictions for hugepages on ppc64 Presently, 64-bit applications on ppc64 may only use hugepages in the address region from 1-1.5T. Furthermore, if hugepages are enabled in the kernel config, they may only use hugepages and never normal pages in this area. This patch relaxes this restriction, allowing any address to be used with hugepages, but with a 1TB granularity. That is if you map a hugepage anywhere in the region 1TB-2TB, that entire area will be reserved exclusively for hugepages for the remainder of the process's lifetime. This works analagously to hugepages in 32-bit applications, where hugepages can be mapped anywhere, but with 256MB (mmu segment) granularity. This patch applies on top of the four level pagetable patch (http://patchwork.ozlabs.org/linuxppc64/patch?id=1936). Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- include/asm-ppc64/mmu.h | 2 +- include/asm-ppc64/page.h | 29 +++++++++++++++++------------ 2 files changed, 18 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/include/asm-ppc64/mmu.h b/include/asm-ppc64/mmu.h index 789c2693483..ad36bb28de2 100644 --- a/include/asm-ppc64/mmu.h +++ b/include/asm-ppc64/mmu.h @@ -307,7 +307,7 @@ typedef unsigned long mm_context_id_t; typedef struct { mm_context_id_t id; #ifdef CONFIG_HUGETLB_PAGE - u16 htlb_segs; /* bitmask */ + u16 low_htlb_areas, high_htlb_areas; #endif } mm_context_t; diff --git a/include/asm-ppc64/page.h b/include/asm-ppc64/page.h index 7e7b18ea986..a79a08df62b 100644 --- a/include/asm-ppc64/page.h +++ b/include/asm-ppc64/page.h @@ -37,40 +37,45 @@ #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) -/* For 64-bit processes the hugepage range is 1T-1.5T */ -#define TASK_HPAGE_BASE ASM_CONST(0x0000010000000000) -#define TASK_HPAGE_END ASM_CONST(0x0000018000000000) +#define HTLB_AREA_SHIFT 40 +#define HTLB_AREA_SIZE (1UL << HTLB_AREA_SHIFT) +#define GET_HTLB_AREA(x) ((x) >> HTLB_AREA_SHIFT) #define LOW_ESID_MASK(addr, len) (((1U << (GET_ESID(addr+len-1)+1)) \ - (1U << GET_ESID(addr))) & 0xffff) +#define HTLB_AREA_MASK(addr, len) (((1U << (GET_HTLB_AREA(addr+len-1)+1)) \ + - (1U << GET_HTLB_AREA(addr))) & 0xffff) #define ARCH_HAS_HUGEPAGE_ONLY_RANGE #define ARCH_HAS_PREPARE_HUGEPAGE_RANGE #define ARCH_HAS_SETCLEAR_HUGE_PTE #define touches_hugepage_low_range(mm, addr, len) \ - (LOW_ESID_MASK((addr), (len)) & mm->context.htlb_segs) -#define touches_hugepage_high_range(addr, len) \ - (((addr) > (TASK_HPAGE_BASE-(len))) && ((addr) < TASK_HPAGE_END)) + (LOW_ESID_MASK((addr), (len)) & (mm)->context.low_htlb_areas) +#define touches_hugepage_high_range(mm, addr, len) \ + (HTLB_AREA_MASK((addr), (len)) & (mm)->context.high_htlb_areas) #define __within_hugepage_low_range(addr, len, segmask) \ ((LOW_ESID_MASK((addr), (len)) | (segmask)) == (segmask)) #define within_hugepage_low_range(addr, len) \ __within_hugepage_low_range((addr), (len), \ - current->mm->context.htlb_segs) -#define within_hugepage_high_range(addr, len) (((addr) >= TASK_HPAGE_BASE) \ - && ((addr)+(len) <= TASK_HPAGE_END) && ((addr)+(len) >= (addr))) + current->mm->context.low_htlb_areas) +#define __within_hugepage_high_range(addr, len, zonemask) \ + ((HTLB_AREA_MASK((addr), (len)) | (zonemask)) == (zonemask)) +#define within_hugepage_high_range(addr, len) \ + __within_hugepage_high_range((addr), (len), \ + current->mm->context.high_htlb_areas) #define is_hugepage_only_range(mm, addr, len) \ - (touches_hugepage_high_range((addr), (len)) || \ + (touches_hugepage_high_range((mm), (addr), (len)) || \ touches_hugepage_low_range((mm), (addr), (len))) #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA #define in_hugepage_area(context, addr) \ (cpu_has_feature(CPU_FTR_16M_PAGE) && \ - ( (((addr) >= TASK_HPAGE_BASE) && ((addr) < TASK_HPAGE_END)) || \ + ( ((1 << GET_HTLB_AREA(addr)) & (context).high_htlb_areas) || \ ( ((addr) < 0x100000000L) && \ - ((1 << GET_ESID(addr)) & (context).htlb_segs) ) ) ) + ((1 << GET_ESID(addr)) & (context).low_htlb_areas) ) ) ) #else /* !CONFIG_HUGETLB_PAGE */ -- cgit v1.2.3 From 9e2d3cd34a159948dc753a14573e16bffc04dba8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 28 Aug 2005 03:19:14 +0100 Subject: [PATCH] mod_devicetable.h fixes * ieee1394_device_id has kernel_ulong_t field after an odd number of __u32 ones. Since mod_devicetable.h is included both from kernel and from host build helper, we may be in trouble if we are building on 32bit host for 64bit target - userland sees unsigned long long, kernel sees unsigned long and while their sizes match, alignments might not. Fixed by forcing alignment. Fortunately, almost nobody else needs that - the rest of such fields is naturally aligned as it is. * of_device_id has void * in it. Host userland helpers need kernel_ulong_t instead, since their void * might have nothing to do with the kernel one. Fixed in the same way it's done for similar problems in pcmcia_device_id (ifdef __KERNEL__). * pcmcia_device_id has the same problem as ieee1394_device_id. Fixed the same way. Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- include/linux/mod_devicetable.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index dce53ac1625..97bbccdbcca 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -33,7 +33,8 @@ struct ieee1394_device_id { __u32 model_id; __u32 specifier_id; __u32 version; - kernel_ulong_t driver_data; + kernel_ulong_t driver_data + __attribute__((aligned(sizeof(kernel_ulong_t)))); }; @@ -182,7 +183,11 @@ struct of_device_id char name[32]; char type[32]; char compatible[128]; +#if __KERNEL__ void *data; +#else + kernel_ulong_t data; +#endif }; @@ -208,7 +213,8 @@ struct pcmcia_device_id { #ifdef __KERNEL__ const char * prod_id[4]; #else - kernel_ulong_t prod_id[4]; + kernel_ulong_t prod_id[4] + __attribute__((aligned(sizeof(kernel_ulong_t)))); #endif /* not matched against */ -- cgit v1.2.3 From 6c52a96e6cacb35403b85c3b42db0faf26f3ed85 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 29 Aug 2005 12:45:11 -0700 Subject: [SPARC64]: Revamp Spitfire error trap handling. Current uncorrectable error handling was poor enough that the processor could just loop taking the same trap over and over again. Fix things up so that we at least get a log message and perhaps even some register state. In the process, much consolidation became possible, particularly with the correctable error handler. Prefix assembler and C function names with "spitfire" to indicate that these are for Ultra-I/II/IIi/IIe only. More work is needed to make these routines robust and featureful to the level of the Ultra-III error handlers. Signed-off-by: David S. Miller --- include/asm-sparc64/sfafsr.h | 82 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 include/asm-sparc64/sfafsr.h (limited to 'include') diff --git a/include/asm-sparc64/sfafsr.h b/include/asm-sparc64/sfafsr.h new file mode 100644 index 00000000000..2f792c20b53 --- /dev/null +++ b/include/asm-sparc64/sfafsr.h @@ -0,0 +1,82 @@ +#ifndef _SPARC64_SFAFSR_H +#define _SPARC64_SFAFSR_H + +#include + +/* Spitfire Asynchronous Fault Status register, ASI=0x4C VA<63:0>=0x0 */ + +#define SFAFSR_ME (_AC(1,UL) << SFAFSR_ME_SHIFT) +#define SFAFSR_ME_SHIFT 32 +#define SFAFSR_PRIV (_AC(1,UL) << SFAFSR_PRIV_SHIFT) +#define SFAFSR_PRIV_SHIFT 31 +#define SFAFSR_ISAP (_AC(1,UL) << SFAFSR_ISAP_SHIFT) +#define SFAFSR_ISAP_SHIFT 30 +#define SFAFSR_ETP (_AC(1,UL) << SFAFSR_ETP_SHIFT) +#define SFAFSR_ETP_SHIFT 29 +#define SFAFSR_IVUE (_AC(1,UL) << SFAFSR_IVUE_SHIFT) +#define SFAFSR_IVUE_SHIFT 28 +#define SFAFSR_TO (_AC(1,UL) << SFAFSR_TO_SHIFT) +#define SFAFSR_TO_SHIFT 27 +#define SFAFSR_BERR (_AC(1,UL) << SFAFSR_BERR_SHIFT) +#define SFAFSR_BERR_SHIFT 26 +#define SFAFSR_LDP (_AC(1,UL) << SFAFSR_LDP_SHIFT) +#define SFAFSR_LDP_SHIFT 25 +#define SFAFSR_CP (_AC(1,UL) << SFAFSR_CP_SHIFT) +#define SFAFSR_CP_SHIFT 24 +#define SFAFSR_WP (_AC(1,UL) << SFAFSR_WP_SHIFT) +#define SFAFSR_WP_SHIFT 23 +#define SFAFSR_EDP (_AC(1,UL) << SFAFSR_EDP_SHIFT) +#define SFAFSR_EDP_SHIFT 22 +#define SFAFSR_UE (_AC(1,UL) << SFAFSR_UE_SHIFT) +#define SFAFSR_UE_SHIFT 21 +#define SFAFSR_CE (_AC(1,UL) << SFAFSR_CE_SHIFT) +#define SFAFSR_CE_SHIFT 20 +#define SFAFSR_ETS (_AC(0xf,UL) << SFAFSR_ETS_SHIFT) +#define SFAFSR_ETS_SHIFT 16 +#define SFAFSR_PSYND (_AC(0xffff,UL) << SFAFSR_PSYND_SHIFT) +#define SFAFSR_PSYND_SHIFT 0 + +/* UDB Error Register, ASI=0x7f VA<63:0>=0x0(High),0x18(Low) for read + * ASI=0x77 VA<63:0>=0x0(High),0x18(Low) for write + */ + +#define UDBE_UE (_AC(1,UL) << 9) +#define UDBE_CE (_AC(1,UL) << 8) +#define UDBE_E_SYNDR (_AC(0xff,UL) << 0) + +/* The trap handlers for asynchronous errors encode the AFSR and + * other pieces of information into a 64-bit argument for C code + * encoded as follows: + * + * ----------------------------------------------- + * | UDB_H | UDB_L | TL>1 | TT | AFSR | + * ----------------------------------------------- + * 63 54 53 44 42 41 33 32 0 + * + * The AFAR is passed in unchanged. + */ +#define SFSTAT_UDBH_MASK (_AC(0x3ff,UL) << SFSTAT_UDBH_SHIFT) +#define SFSTAT_UDBH_SHIFT 54 +#define SFSTAT_UDBL_MASK (_AC(0x3ff,UL) << SFSTAT_UDBH_SHIFT) +#define SFSTAT_UDBL_SHIFT 44 +#define SFSTAT_TL_GT_ONE (_AC(1,UL) << SFSTAT_TL_GT_ONE_SHIFT) +#define SFSTAT_TL_GT_ONE_SHIFT 42 +#define SFSTAT_TRAP_TYPE (_AC(0x1FF,UL) << SFSTAT_TRAP_TYPE_SHIFT) +#define SFSTAT_TRAP_TYPE_SHIFT 33 +#define SFSTAT_AFSR_MASK (_AC(0x1ffffffff,UL) << SFSTAT_AFSR_SHIFT) +#define SFSTAT_AFSR_SHIFT 0 + +/* ESTATE Error Enable Register, ASI=0x4b VA<63:0>=0x0 */ +#define ESTATE_ERR_CE 0x1 /* Correctable errors */ +#define ESTATE_ERR_NCE 0x2 /* TO, BERR, LDP, ETP, EDP, WP, UE, IVUE */ +#define ESTATE_ERR_ISAP 0x4 /* System address parity error */ +#define ESTATE_ERR_ALL (ESTATE_ERR_CE | \ + ESTATE_ERR_NCE | \ + ESTATE_ERR_ISAP) + +/* The various trap types that report using the above state. */ +#define TRAP_TYPE_IAE 0x09 /* Instruction Access Error */ +#define TRAP_TYPE_DAE 0x32 /* Data Access Error */ +#define TRAP_TYPE_CEE 0x63 /* Correctable ECC Error */ + +#endif /* _SPARC64_SFAFSR_H */ -- cgit v1.2.3 From 3d6364abcfdaedeb34418c2894f61251d48614f6 Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Mon, 29 Aug 2005 12:45:30 -0700 Subject: [SPARC64]: remove use of asm/segment.h Removed sparc64 architecture specific users of asm/segment.h and asm-sparc64/segment.h itself Signed-off-by: Kumar Gala Signed-off-by: David S. Miller --- include/asm-sparc64/processor.h | 1 - include/asm-sparc64/segment.h | 6 ------ 2 files changed, 7 deletions(-) delete mode 100644 include/asm-sparc64/segment.h (limited to 'include') diff --git a/include/asm-sparc64/processor.h b/include/asm-sparc64/processor.h index d0bee241356..3169f3e2237 100644 --- a/include/asm-sparc64/processor.h +++ b/include/asm-sparc64/processor.h @@ -18,7 +18,6 @@ #include #include #include -#include #include /* The sparc has no problems with write protection */ diff --git a/include/asm-sparc64/segment.h b/include/asm-sparc64/segment.h deleted file mode 100644 index b03e709fc94..00000000000 --- a/include/asm-sparc64/segment.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __SPARC64_SEGMENT_H -#define __SPARC64_SEGMENT_H - -/* Only here because we have some old header files that expect it.. */ - -#endif -- cgit v1.2.3 From ca7c8d2c1e2a2f2445cb5e00f45b93af57f22c1b Mon Sep 17 00:00:00 2001 From: Kumar Gala Date: Mon, 29 Aug 2005 12:45:44 -0700 Subject: [SPARC]: remove use of asm/segment.h Removed sparc architecture specific users of asm/segment.h and asm-sparc/segment.h itself Signed-off-by: Kumar Gala Signed-off-by: David S. Miller --- include/asm-sparc/processor.h | 1 - include/asm-sparc/segment.h | 6 ------ include/asm-sparc/system.h | 1 - 3 files changed, 8 deletions(-) delete mode 100644 include/asm-sparc/segment.h (limited to 'include') diff --git a/include/asm-sparc/processor.h b/include/asm-sparc/processor.h index 32c9699367c..5a7a1a8d29a 100644 --- a/include/asm-sparc/processor.h +++ b/include/asm-sparc/processor.h @@ -19,7 +19,6 @@ #include #include #include -#include #include #include diff --git a/include/asm-sparc/segment.h b/include/asm-sparc/segment.h deleted file mode 100644 index a1b7ffc9eec..00000000000 --- a/include/asm-sparc/segment.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __SPARC_SEGMENT_H -#define __SPARC_SEGMENT_H - -/* Only here because we have some old header files that expect it.. */ - -#endif diff --git a/include/asm-sparc/system.h b/include/asm-sparc/system.h index 898562ebe94..3557781a4bf 100644 --- a/include/asm-sparc/system.h +++ b/include/asm-sparc/system.h @@ -9,7 +9,6 @@ #include /* NR_CPUS */ #include -#include #include #include #include -- cgit v1.2.3 From 442464a50077ff00454ff8d7628cbe1b8eacc034 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 29 Aug 2005 12:46:07 -0700 Subject: [SPARC64]: Make debugging spinlocks usable again. When the spinlock routines were moved out of line into kernel/spinlock.c this made it so that the debugging spinlocks record lock acquisition program counts in the kernel/spinlock.c functions not in their callers. This makes the debugging info kind of useless. So record the correct caller's program counter and now this feature is useful once more. Signed-off-by: David S. Miller --- include/asm-sparc64/spinlock.h | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/include/asm-sparc64/spinlock.h b/include/asm-sparc64/spinlock.h index 9cb93a5c2b4..d265bf6570f 100644 --- a/include/asm-sparc64/spinlock.h +++ b/include/asm-sparc64/spinlock.h @@ -132,12 +132,15 @@ do { \ membar("#LoadLoad"); \ } while((__lock)->lock) -extern void _do_spin_lock (spinlock_t *lock, char *str); -extern void _do_spin_unlock (spinlock_t *lock); -extern int _do_spin_trylock (spinlock_t *lock); - -#define _raw_spin_trylock(lp) _do_spin_trylock(lp) -#define _raw_spin_lock(lock) _do_spin_lock(lock, "spin_lock") +extern void _do_spin_lock(spinlock_t *lock, char *str, unsigned long caller); +extern void _do_spin_unlock(spinlock_t *lock); +extern int _do_spin_trylock(spinlock_t *lock, unsigned long caller); + +#define _raw_spin_trylock(lp) \ + _do_spin_trylock(lp, (unsigned long) __builtin_return_address(0)) +#define _raw_spin_lock(lock) \ + _do_spin_lock(lock, "spin_lock", \ + (unsigned long) __builtin_return_address(0)) #define _raw_spin_unlock(lock) _do_spin_unlock(lock) #define _raw_spin_lock_flags(lock, flags) _raw_spin_lock(lock) @@ -279,37 +282,41 @@ typedef struct { #define RW_LOCK_UNLOCKED (rwlock_t) { 0, 0, 0xff, { } } #define rwlock_init(lp) do { *(lp) = RW_LOCK_UNLOCKED; } while(0) -extern void _do_read_lock(rwlock_t *rw, char *str); -extern void _do_read_unlock(rwlock_t *rw, char *str); -extern void _do_write_lock(rwlock_t *rw, char *str); -extern void _do_write_unlock(rwlock_t *rw); -extern int _do_write_trylock(rwlock_t *rw, char *str); +extern void _do_read_lock(rwlock_t *rw, char *str, unsigned long caller); +extern void _do_read_unlock(rwlock_t *rw, char *str, unsigned long caller); +extern void _do_write_lock(rwlock_t *rw, char *str, unsigned long caller); +extern void _do_write_unlock(rwlock_t *rw, unsigned long caller); +extern int _do_write_trylock(rwlock_t *rw, char *str, unsigned long caller); #define _raw_read_lock(lock) \ do { unsigned long flags; \ local_irq_save(flags); \ - _do_read_lock(lock, "read_lock"); \ + _do_read_lock(lock, "read_lock", \ + (unsigned long) __builtin_return_address(0)); \ local_irq_restore(flags); \ } while(0) #define _raw_read_unlock(lock) \ do { unsigned long flags; \ local_irq_save(flags); \ - _do_read_unlock(lock, "read_unlock"); \ + _do_read_unlock(lock, "read_unlock", \ + (unsigned long) __builtin_return_address(0)); \ local_irq_restore(flags); \ } while(0) #define _raw_write_lock(lock) \ do { unsigned long flags; \ local_irq_save(flags); \ - _do_write_lock(lock, "write_lock"); \ + _do_write_lock(lock, "write_lock", \ + (unsigned long) __builtin_return_address(0)); \ local_irq_restore(flags); \ } while(0) #define _raw_write_unlock(lock) \ do { unsigned long flags; \ local_irq_save(flags); \ - _do_write_unlock(lock); \ + _do_write_unlock(lock, \ + (unsigned long) __builtin_return_address(0)); \ local_irq_restore(flags); \ } while(0) @@ -317,7 +324,8 @@ do { unsigned long flags; \ ({ unsigned long flags; \ int val; \ local_irq_save(flags); \ - val = _do_write_trylock(lock, "write_trylock"); \ + val = _do_write_trylock(lock, "write_trylock", \ + (unsigned long) __builtin_return_address(0)); \ local_irq_restore(flags); \ val; \ }) -- cgit v1.2.3 From 4f07118f656c179740cad35b827032e2e29b1210 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 29 Aug 2005 12:46:22 -0700 Subject: [SPARC64]: More fully work around Spitfire Errata 51. It appears that a memory barrier soon after a mispredicted branch, not just in the delay slot, can cause the hang condition of this cpu errata. So move them out-of-line, and explicitly put them into a "branch always, predict taken" delay slot which should fully kill this problem. Signed-off-by: David S. Miller --- include/asm-sparc64/atomic.h | 8 ++++---- include/asm-sparc64/bitops.h | 4 ++-- include/asm-sparc64/spinlock.h | 4 ++-- include/asm-sparc64/system.h | 17 ++++++++++------- 4 files changed, 18 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/asm-sparc64/atomic.h b/include/asm-sparc64/atomic.h index d80f3379669..e175afcf2cd 100644 --- a/include/asm-sparc64/atomic.h +++ b/include/asm-sparc64/atomic.h @@ -72,10 +72,10 @@ extern int atomic64_sub_ret(int, atomic64_t *); /* Atomic operations are already serializing */ #ifdef CONFIG_SMP -#define smp_mb__before_atomic_dec() membar("#StoreLoad | #LoadLoad") -#define smp_mb__after_atomic_dec() membar("#StoreLoad | #StoreStore") -#define smp_mb__before_atomic_inc() membar("#StoreLoad | #LoadLoad") -#define smp_mb__after_atomic_inc() membar("#StoreLoad | #StoreStore") +#define smp_mb__before_atomic_dec() membar_storeload_loadload(); +#define smp_mb__after_atomic_dec() membar_storeload_storestore(); +#define smp_mb__before_atomic_inc() membar_storeload_loadload(); +#define smp_mb__after_atomic_inc() membar_storeload_storestore(); #else #define smp_mb__before_atomic_dec() barrier() #define smp_mb__after_atomic_dec() barrier() diff --git a/include/asm-sparc64/bitops.h b/include/asm-sparc64/bitops.h index 9c5e7197028..6388b8376c5 100644 --- a/include/asm-sparc64/bitops.h +++ b/include/asm-sparc64/bitops.h @@ -72,8 +72,8 @@ static inline int __test_and_change_bit(int nr, volatile unsigned long *addr) } #ifdef CONFIG_SMP -#define smp_mb__before_clear_bit() membar("#StoreLoad | #LoadLoad") -#define smp_mb__after_clear_bit() membar("#StoreLoad | #StoreStore") +#define smp_mb__before_clear_bit() membar_storeload_loadload() +#define smp_mb__after_clear_bit() membar_storeload_storestore() #else #define smp_mb__before_clear_bit() barrier() #define smp_mb__after_clear_bit() barrier() diff --git a/include/asm-sparc64/spinlock.h b/include/asm-sparc64/spinlock.h index d265bf6570f..a02c4370eb4 100644 --- a/include/asm-sparc64/spinlock.h +++ b/include/asm-sparc64/spinlock.h @@ -43,7 +43,7 @@ typedef struct { #define spin_is_locked(lp) ((lp)->lock != 0) #define spin_unlock_wait(lp) \ -do { membar("#LoadLoad"); \ +do { rmb(); \ } while((lp)->lock) static inline void _raw_spin_lock(spinlock_t *lock) @@ -129,7 +129,7 @@ typedef struct { #define spin_is_locked(__lock) ((__lock)->lock != 0) #define spin_unlock_wait(__lock) \ do { \ - membar("#LoadLoad"); \ + rmb(); \ } while((__lock)->lock) extern void _do_spin_lock(spinlock_t *lock, char *str, unsigned long caller); diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h index ee4bdfc6b88..5e94c05dc2f 100644 --- a/include/asm-sparc64/system.h +++ b/include/asm-sparc64/system.h @@ -28,6 +28,14 @@ enum sparc_cpu { #define ARCH_SUN4C_SUN4 0 #define ARCH_SUN4 0 +extern void mb(void); +extern void rmb(void); +extern void wmb(void); +extern void membar_storeload(void); +extern void membar_storeload_storestore(void); +extern void membar_storeload_loadload(void); +extern void membar_storestore_loadstore(void); + #endif #define setipl(__new_ipl) \ @@ -78,16 +86,11 @@ enum sparc_cpu { #define nop() __asm__ __volatile__ ("nop") -#define membar(type) __asm__ __volatile__ ("membar " type : : : "memory") -#define mb() \ - membar("#LoadLoad | #LoadStore | #StoreStore | #StoreLoad") -#define rmb() membar("#LoadLoad") -#define wmb() membar("#StoreStore") #define read_barrier_depends() do { } while(0) #define set_mb(__var, __value) \ - do { __var = __value; membar("#StoreLoad | #StoreStore"); } while(0) + do { __var = __value; membar_storeload_storestore(); } while(0) #define set_wmb(__var, __value) \ - do { __var = __value; membar("#StoreStore"); } while(0) + do { __var = __value; wmb(); } while(0) #ifdef CONFIG_SMP #define smp_mb() mb() -- cgit v1.2.3 From bf3a46aa9b96f6eb3a49a568f72a2801c3e830c0 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:22:01 -0700 Subject: [NETFILTER]: convert nfmark and conntrack mark to 32bit As discussed at netconf'05, we convert nfmark and conntrack-mark to be 32bits even on 64bit architectures. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack.h | 2 +- include/linux/skbuff.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index 08fe5f7d14a..4ed720f0c4c 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -171,7 +171,7 @@ struct ip_conntrack #endif /* CONFIG_IP_NF_NAT_NEEDED */ #if defined(CONFIG_IP_NF_CONNTRACK_MARK) - unsigned long mark; + u_int32_t mark; #endif /* Traversed often, so hopefully in different cacheline to top */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 948527e42a6..2e40f4c9f7a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -259,7 +259,7 @@ struct sk_buff { void (*destructor)(struct sk_buff *skb); #ifdef CONFIG_NETFILTER - unsigned long nfmark; + __u32 nfmark; __u32 nfcache; __u32 nfctinfo; struct nf_conntrack *nfct; -- cgit v1.2.3 From 6869c4d8e066e21623c812c448a05f1ed931c9c6 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:24:19 -0700 Subject: [NETFILTER]: reduce netfilter sk_buff enlargement As discussed at netconf'05, we're trying to save every bit in sk_buff. The patch below makes sk_buff 8 bytes smaller. I did some basic testing on my notebook and it seems to work. The only real in-tree user of nfcache was IPVS, who only needs a single bit. Unfortunately I couldn't find some other free bit in sk_buff to stuff that bit into, so I introduced a separate field for them. Maybe the IPVS guys can resolve that to further save space. Initially I wanted to shrink pkt_type to three bits (PACKET_HOST and alike are only 6 values defined), but unfortunately the bluetooth code overloads pkt_type :( The conntrack-event-api (out-of-tree) uses nfcache, but Rusty just came up with a way how to do it without any skb fields, so it's safe to remove it. - remove all never-implemented 'nfcache' code - don't have ipvs code abuse 'nfcache' field. currently get's their own compile-conditional skb->ipvs_property field. IPVS maintainers can decide to move this bit elswhere, but nfcache needs to die. - remove skb->nfcache field to save 4 bytes - move skb->nfctinfo into three unused bits to save further 4 bytes Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter.h | 3 +++ include/linux/netfilter_decnet.h | 3 +++ include/linux/netfilter_ipv4.h | 3 +++ include/linux/netfilter_ipv6.h | 3 +++ include/linux/skbuff.h | 10 +++++----- 5 files changed, 17 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 2e2045482cb..ec60856408f 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -21,10 +21,13 @@ #define NF_STOP 5 #define NF_MAX_VERDICT NF_STOP +/* only for userspace compatibility */ +#ifndef __KERNEL__ /* Generic cache responses from hook functions. <= 0x2000 is used for protocol-flags. */ #define NFC_UNKNOWN 0x4000 #define NFC_ALTERED 0x8000 +#endif #ifdef __KERNEL__ #include diff --git a/include/linux/netfilter_decnet.h b/include/linux/netfilter_decnet.h index 3064eec9cb8..01897948415 100644 --- a/include/linux/netfilter_decnet.h +++ b/include/linux/netfilter_decnet.h @@ -9,6 +9,8 @@ #include +/* only for userspace compatibility */ +#ifndef __KERNEL__ /* IP Cache bits. */ /* Src IP address. */ #define NFC_DN_SRC 0x0001 @@ -18,6 +20,7 @@ #define NFC_DN_IF_IN 0x0004 /* Output device. */ #define NFC_DN_IF_OUT 0x0008 +#endif /* ! __KERNEL__ */ /* DECnet Hooks */ /* After promisc drops, checksum checks. */ diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index 3ebc36afae1..552815b8193 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -8,6 +8,8 @@ #include #include +/* only for userspace compatibility */ +#ifndef __KERNEL__ /* IP Cache bits. */ /* Src IP address. */ #define NFC_IP_SRC 0x0001 @@ -35,6 +37,7 @@ #define NFC_IP_DST_PT 0x0400 /* Something else about the proto */ #define NFC_IP_PROTO_UNKNOWN 0x2000 +#endif /* ! __KERNEL__ */ /* IP Hooks */ /* After promisc drops, checksum checks. */ diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index bee7a5ec7c6..20c069a5e4a 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -10,6 +10,8 @@ #include +/* only for userspace compatibility */ +#ifndef __KERNEL__ /* IP Cache bits. */ /* Src IP address. */ #define NFC_IP6_SRC 0x0001 @@ -38,6 +40,7 @@ #define NFC_IP6_DST_PT 0x0400 /* Something else about the proto */ #define NFC_IP6_PROTO_UNKNOWN 0x2000 +#endif /* ! __KERNEL__ */ /* IP6 Hooks */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2e40f4c9f7a..4b929c3c1a9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -190,7 +190,6 @@ struct skb_shared_info { * @end: End pointer * @destructor: Destruct function * @nfmark: Can be used for communication between hooks - * @nfcache: Cache info * @nfct: Associated connection, if any * @nfctinfo: Relationship of this skb to the connection * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c @@ -252,17 +251,18 @@ struct sk_buff { __u8 local_df:1, cloned:1, ip_summed:2, - nohdr:1; - /* 3 bits spare */ + nohdr:1, + nfctinfo:3; __u8 pkt_type; __be16 protocol; void (*destructor)(struct sk_buff *skb); #ifdef CONFIG_NETFILTER __u32 nfmark; - __u32 nfcache; - __u32 nfctinfo; struct nf_conntrack *nfct; +#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) + __u8 ipvs_property:1; +#endif #ifdef CONFIG_BRIDGE_NETFILTER struct nf_bridge_info *nf_bridge; #endif -- cgit v1.2.3 From 8728b834b226ffcf2c94a58530090e292af2a7bf Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 9 Aug 2005 19:25:21 -0700 Subject: [NET]: Kill skb->list Remove the "list" member of struct sk_buff, as it is entirely redundant. All SKB list removal callers know which list the SKB is on, so storing this in sk_buff does nothing other than taking up some space. Two tricky bits were SCTP, which I took care of, and two ATM drivers which Francois Romieu fixed up. Signed-off-by: David S. Miller Signed-off-by: Francois Romieu --- include/linux/skbuff.h | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4b929c3c1a9..76c68851474 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -204,7 +204,6 @@ struct sk_buff { struct sk_buff *next; struct sk_buff *prev; - struct sk_buff_head *list; struct sock *sk; struct timeval stamp; struct net_device *dev; @@ -597,7 +596,6 @@ static inline void __skb_queue_head(struct sk_buff_head *list, { struct sk_buff *prev, *next; - newsk->list = list; list->qlen++; prev = (struct sk_buff *)list; next = prev->next; @@ -622,7 +620,6 @@ static inline void __skb_queue_tail(struct sk_buff_head *list, { struct sk_buff *prev, *next; - newsk->list = list; list->qlen++; next = (struct sk_buff *)list; prev = next->prev; @@ -655,7 +652,6 @@ static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) next->prev = prev; prev->next = next; result->next = result->prev = NULL; - result->list = NULL; } return result; } @@ -664,7 +660,7 @@ static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) /* * Insert a packet on a list. */ -extern void skb_insert(struct sk_buff *old, struct sk_buff *newsk); +extern void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list); static inline void __skb_insert(struct sk_buff *newsk, struct sk_buff *prev, struct sk_buff *next, struct sk_buff_head *list) @@ -672,24 +668,23 @@ static inline void __skb_insert(struct sk_buff *newsk, newsk->next = next; newsk->prev = prev; next->prev = prev->next = newsk; - newsk->list = list; list->qlen++; } /* * Place a packet after a given packet in a list. */ -extern void skb_append(struct sk_buff *old, struct sk_buff *newsk); -static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk) +extern void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list); +static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head *list) { - __skb_insert(newsk, old, old->next, old->list); + __skb_insert(newsk, old, old->next, list); } /* * remove sk_buff from list. _Must_ be called atomically, and with * the list known.. */ -extern void skb_unlink(struct sk_buff *skb); +extern void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list); static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) { struct sk_buff *next, *prev; @@ -698,7 +693,6 @@ static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) next = skb->next; prev = skb->prev; skb->next = skb->prev = NULL; - skb->list = NULL; next->prev = prev; prev->next = next; } -- cgit v1.2.3 From abc3bc58047efa72ee9c2e208cbeb73d261ad703 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 9 Aug 2005 19:25:56 -0700 Subject: [NET]: Kill skb->tc_classid Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 --- include/net/act_api.h | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 76c68851474..f10a8b9628b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -196,7 +196,6 @@ struct skb_shared_info { * @private: Data which is private to the HIPPI implementation * @tc_index: Traffic control index * @tc_verd: traffic control verdict - * @tc_classid: traffic control classid */ struct sk_buff { @@ -275,9 +274,7 @@ struct sk_buff { __u32 tc_index; /* traffic control index */ #ifdef CONFIG_NET_CLS_ACT __u32 tc_verd; /* traffic control verdict */ - __u32 tc_classid; /* traffic control classid */ #endif - #endif diff --git a/include/net/act_api.h b/include/net/act_api.h index ed00a995f57..b55eb7c7f03 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -63,7 +63,7 @@ struct tc_action_ops __u32 type; /* TBD to match kind */ __u32 capab; /* capabilities includes 4 bit version */ struct module *owner; - int (*act)(struct sk_buff **, struct tc_action *); + int (*act)(struct sk_buff **, struct tc_action *, struct tcf_result *); int (*get_stats)(struct sk_buff *, struct tc_action *); int (*dump)(struct sk_buff *, struct tc_action *,int , int); int (*cleanup)(struct tc_action *, int bind); -- cgit v1.2.3 From ac3247baf8ecadf168642e3898b0212c29c79715 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:28:03 -0700 Subject: [NETFILTER]: connection tracking event notifiers This adds a notifier chain based event mechanism for ip_conntrack state changes. As opposed to the previous implementations in patch-o-matic, we do no longer need a field in the skb to achieve this. Thanks to the valuable input from Patrick McHardy and Rusty on the idea of a per_cpu implementation. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack.h | 144 +++++++++++++++++++++++ include/linux/netfilter_ipv4/ip_conntrack_core.h | 17 ++- 2 files changed, 157 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index 4ed720f0c4c..ae1270c97b5 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -65,6 +65,63 @@ enum ip_conntrack_status { /* Both together */ IPS_NAT_DONE_MASK = (IPS_DST_NAT_DONE | IPS_SRC_NAT_DONE), + + /* Connection is dying (removed from lists), can not be unset. */ + IPS_DYING_BIT = 9, + IPS_DYING = (1 << IPS_DYING_BIT), +}; + +/* Connection tracking event bits */ +enum ip_conntrack_events +{ + /* New conntrack */ + IPCT_NEW_BIT = 0, + IPCT_NEW = (1 << IPCT_NEW_BIT), + + /* Expected connection */ + IPCT_RELATED_BIT = 1, + IPCT_RELATED = (1 << IPCT_RELATED_BIT), + + /* Destroyed conntrack */ + IPCT_DESTROY_BIT = 2, + IPCT_DESTROY = (1 << IPCT_DESTROY_BIT), + + /* Timer has been refreshed */ + IPCT_REFRESH_BIT = 3, + IPCT_REFRESH = (1 << IPCT_REFRESH_BIT), + + /* Status has changed */ + IPCT_STATUS_BIT = 4, + IPCT_STATUS = (1 << IPCT_STATUS_BIT), + + /* Update of protocol info */ + IPCT_PROTOINFO_BIT = 5, + IPCT_PROTOINFO = (1 << IPCT_PROTOINFO_BIT), + + /* Volatile protocol info */ + IPCT_PROTOINFO_VOLATILE_BIT = 6, + IPCT_PROTOINFO_VOLATILE = (1 << IPCT_PROTOINFO_VOLATILE_BIT), + + /* New helper for conntrack */ + IPCT_HELPER_BIT = 7, + IPCT_HELPER = (1 << IPCT_HELPER_BIT), + + /* Update of helper info */ + IPCT_HELPINFO_BIT = 8, + IPCT_HELPINFO = (1 << IPCT_HELPINFO_BIT), + + /* Volatile helper info */ + IPCT_HELPINFO_VOLATILE_BIT = 9, + IPCT_HELPINFO_VOLATILE = (1 << IPCT_HELPINFO_VOLATILE_BIT), + + /* NAT info */ + IPCT_NATINFO_BIT = 10, + IPCT_NATINFO = (1 << IPCT_NATINFO_BIT), +}; + +enum ip_conntrack_expect_events { + IPEXP_NEW_BIT = 0, + IPEXP_NEW = (1 << IPEXP_NEW_BIT), }; #ifdef __KERNEL__ @@ -280,6 +337,11 @@ static inline int is_confirmed(struct ip_conntrack *ct) return test_bit(IPS_CONFIRMED_BIT, &ct->status); } +static inline int is_dying(struct ip_conntrack *ct) +{ + return test_bit(IPS_DYING_BIT, &ct->status); +} + extern unsigned int ip_conntrack_htable_size; struct ip_conntrack_stat @@ -303,6 +365,88 @@ struct ip_conntrack_stat #define CONNTRACK_STAT_INC(count) (__get_cpu_var(ip_conntrack_stat).count++) +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS +#include + +struct ip_conntrack_ecache { + struct ip_conntrack *ct; + unsigned int events; +}; +DECLARE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache); + +#define CONNTRACK_ECACHE(x) (__get_cpu_var(ip_conntrack_ecache).x) + +extern struct notifier_block *ip_conntrack_chain; +extern struct notifier_block *ip_conntrack_expect_chain; + +static inline int ip_conntrack_register_notifier(struct notifier_block *nb) +{ + return notifier_chain_register(&ip_conntrack_chain, nb); +} + +static inline int ip_conntrack_unregister_notifier(struct notifier_block *nb) +{ + return notifier_chain_unregister(&ip_conntrack_chain, nb); +} + +static inline int +ip_conntrack_expect_register_notifier(struct notifier_block *nb) +{ + return notifier_chain_register(&ip_conntrack_expect_chain, nb); +} + +static inline int +ip_conntrack_expect_unregister_notifier(struct notifier_block *nb) +{ + return notifier_chain_unregister(&ip_conntrack_expect_chain, nb); +} + +static inline void +ip_conntrack_event_cache(enum ip_conntrack_events event, + const struct sk_buff *skb) +{ + struct ip_conntrack_ecache *ecache = + &__get_cpu_var(ip_conntrack_ecache); + + if (unlikely((struct ip_conntrack *) skb->nfct != ecache->ct)) { + if (net_ratelimit()) { + printk(KERN_ERR "ctevent: skb->ct != ecache->ct !!!\n"); + dump_stack(); + } + } + ecache->events |= event; +} + +extern void +ip_conntrack_deliver_cached_events_for(const struct ip_conntrack *ct); +extern void ip_conntrack_event_cache_init(const struct sk_buff *skb); + +static inline void ip_conntrack_event(enum ip_conntrack_events event, + struct ip_conntrack *ct) +{ + if (is_confirmed(ct) && !is_dying(ct)) + notifier_call_chain(&ip_conntrack_chain, event, ct); +} + +static inline void +ip_conntrack_expect_event(enum ip_conntrack_expect_events event, + struct ip_conntrack_expect *exp) +{ + notifier_call_chain(&ip_conntrack_expect_chain, event, exp); +} +#else /* CONFIG_IP_NF_CONNTRACK_EVENTS */ +static inline void ip_conntrack_event_cache(enum ip_conntrack_events event, + const struct sk_buff *skb) {} +static inline void ip_conntrack_event(enum ip_conntrack_events event, + struct ip_conntrack *ct) {} +static inline void ip_conntrack_deliver_cached_events_for( + struct ip_conntrack *ct) {} +static inline void ip_conntrack_event_cache_init(const struct sk_buff *skb) {} +static inline void +ip_conntrack_expect_event(enum ip_conntrack_expect_events event, + struct ip_conntrack_expect *exp) {} +#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */ + #ifdef CONFIG_IP_NF_NAT_NEEDED static inline int ip_nat_initialized(struct ip_conntrack *conntrack, enum ip_nat_manip_type manip) diff --git a/include/linux/netfilter_ipv4/ip_conntrack_core.h b/include/linux/netfilter_ipv4/ip_conntrack_core.h index 694aec9b478..46eeea1e273 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_core.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_core.h @@ -38,12 +38,21 @@ extern int __ip_conntrack_confirm(struct sk_buff **pskb); /* Confirm a connection: returns NF_DROP if packet must be dropped. */ static inline int ip_conntrack_confirm(struct sk_buff **pskb) { - if ((*pskb)->nfct - && !is_confirmed((struct ip_conntrack *)(*pskb)->nfct)) - return __ip_conntrack_confirm(pskb); - return NF_ACCEPT; + struct ip_conntrack *ct = (struct ip_conntrack *)(*pskb)->nfct; + int ret = NF_ACCEPT; + + if (ct && !is_confirmed(ct)) + ret = __ip_conntrack_confirm(pskb); + ip_conntrack_deliver_cached_events_for(ct); + + return ret; } +#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS +struct ip_conntrack_ecache; +extern void __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ec); +#endif + extern struct list_head *ip_conntrack_hash; extern struct list_head ip_conntrack_expect_list; extern rwlock_t ip_conntrack_lock; -- cgit v1.2.3 From f9e815b376dc19e6afc551cd755ac64e9e42d81f Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:30:24 -0700 Subject: [NETFITLER]: Add nfnetlink layer. Introduce "nfnetlink" (netfilter netlink) layer. This layer is used as transport layer for all userspace communication of the new upcoming netfilter subsystems, such as ctnetlink, nfnetlink_queue and some day even the mythical pkttables ;) Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink.h | 145 ++++++++++++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) create mode 100644 include/linux/netfilter/nfnetlink.h (limited to 'include') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h new file mode 100644 index 00000000000..8f1bfb8d650 --- /dev/null +++ b/include/linux/netfilter/nfnetlink.h @@ -0,0 +1,145 @@ +#ifndef _NFNETLINK_H +#define _NFNETLINK_H +#include + +/* nfnetlink groups: Up to 32 maximum */ +#define NF_NETLINK_CONNTRACK_NEW 0x00000001 +#define NF_NETLINK_CONNTRACK_UPDATE 0x00000002 +#define NF_NETLINK_CONNTRACK_DESTROY 0x00000004 +#define NF_NETLINK_CONNTRACK_EXP_NEW 0x00000008 +#define NF_NETLINK_CONNTRACK_EXP_UPDATE 0x00000010 +#define NF_NETLINK_CONNTRACK_EXP_DESTROY 0x00000020 + +/* Generic structure for encapsulation optional netfilter information. + * It is reminiscent of sockaddr, but with sa_family replaced + * with attribute type. + * ! This should someday be put somewhere generic as now rtnetlink and + * ! nfnetlink use the same attributes methods. - J. Schulist. + */ + +struct nfattr +{ + u_int16_t nfa_len; + u_int16_t nfa_type; +} __attribute__ ((packed)); + +/* FIXME: Shamelessly copy and pasted from rtnetlink.h, it's time + * to put this in a generic file */ + +#define NFA_ALIGNTO 4 +#define NFA_ALIGN(len) (((len) + NFA_ALIGNTO - 1) & ~(NFA_ALIGNTO - 1)) +#define NFA_OK(nfa,len) ((len) > 0 && (nfa)->nfa_len >= sizeof(struct nfattr) \ + && (nfa)->nfa_len <= (len)) +#define NFA_NEXT(nfa,attrlen) ((attrlen) -= NFA_ALIGN((nfa)->nfa_len), \ + (struct nfattr *)(((char *)(nfa)) + NFA_ALIGN((nfa)->nfa_len))) +#define NFA_LENGTH(len) (NFA_ALIGN(sizeof(struct nfattr)) + (len)) +#define NFA_SPACE(len) NFA_ALIGN(NFA_LENGTH(len)) +#define NFA_DATA(nfa) ((void *)(((char *)(nfa)) + NFA_LENGTH(0))) +#define NFA_PAYLOAD(nfa) ((int)((nfa)->nfa_len) - NFA_LENGTH(0)) +#define NFA_NEST(skb, type) \ +({ struct nfattr *__start = (struct nfattr *) (skb)->tail; \ + NFA_PUT(skb, type, 0, NULL); \ + __start; }) +#define NFA_NEST_END(skb, start) \ +({ (start)->nfa_len = ((skb)->tail - (unsigned char *) (start)); \ + (skb)->len; }) +#define NFA_NEST_CANCEL(skb, start) \ +({ if (start) \ + skb_trim(skb, (unsigned char *) (start) - (skb)->data); \ + -1; }) + +/* General form of address family dependent message. + */ +struct nfgenmsg { + u_int8_t nfgen_family; /* AF_xxx */ + u_int8_t version; /* nfnetlink version */ + u_int16_t res_id; /* resource id */ +} __attribute__ ((packed)); + +#define NFNETLINK_V1 1 + +#define NFM_NFA(n) ((struct nfattr *)(((char *)(n)) \ + + NLMSG_ALIGN(sizeof(struct nfgenmsg)))) +#define NFM_PAYLOAD(n) NLMSG_PAYLOAD(n, sizeof(struct nfgenmsg)) + +/* netfilter netlink message types are split in two pieces: + * 8 bit subsystem, 8bit operation. + */ + +#define NFNL_SUBSYS_ID(x) ((x & 0xff00) >> 8) +#define NFNL_MSG_TYPE(x) (x & 0x00ff) + +enum nfnl_subsys_id { + NFNL_SUBSYS_NONE = 0, + NFNL_SUBSYS_CTNETLINK, + NFNL_SUBSYS_CTNETLINK_EXP, + NFNL_SUBSYS_IPTNETLINK, + NFNL_SUBSYS_QUEUE, + NFNL_SUBSYS_ULOG, + NFNL_SUBSYS_COUNT, +}; + +#ifdef __KERNEL__ + +#include + +struct nfnl_callback +{ + kernel_cap_t cap_required; /* capabilities required for this msg */ + int (*call)(struct sock *nl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nfattr *cda[], int *errp); +}; + +struct nfnetlink_subsystem +{ + const char *name; + __u8 subsys_id; /* nfnetlink subsystem ID */ + __u8 cb_count; /* number of callbacks */ + u_int32_t attr_count; /* number of nfattr's */ + struct nfnl_callback *cb; /* callback for individual types */ +}; + +extern void __nfa_fill(struct sk_buff *skb, int attrtype, + int attrlen, const void *data); +#define NFA_PUT(skb, attrtype, attrlen, data) \ +({ if (skb_tailroom(skb) < (int)NFA_SPACE(attrlen)) goto nfattr_failure; \ + __nfa_fill(skb, attrtype, attrlen, data); }) + +extern struct semaphore nfnl_sem; + +#define nfnl_shlock() down(&nfnl_sem) +#define nfnl_shlock_nowait() down_trylock(&nfnl_sem) + +#define nfnl_shunlock() do { up(&nfnl_sem); \ + if(nfnl && nfnl->sk_receive_queue.qlen) \ + nfnl->sk_data_ready(nfnl, 0); \ + } while(0) + +extern void nfnl_lock(void); +extern void nfnl_unlock(void); + +extern int nfnetlink_subsys_register(struct nfnetlink_subsystem *n); +extern int nfnetlink_subsys_unregister(struct nfnetlink_subsystem *n); + +extern int nfattr_parse(struct nfattr *tb[], int maxattr, + struct nfattr *nfa, int len); + +#define nfattr_parse_nested(tb, max, nfa) \ + nfattr_parse((tb), (max), NFA_DATA((nfa)), NFA_PAYLOAD((nfa))) + +#define nfattr_bad_size(tb, max, cta_min) \ +({ int __i, __res = 0; \ + for (__i=0; __i Date: Tue, 9 Aug 2005 19:30:51 -0700 Subject: [NET]: Introduce SO_{SND,RCV}BUFFORCE socket options Allows overriding of sysctl_{wmem,rmrm}_max Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/asm-alpha/socket.h | 2 ++ include/asm-arm/socket.h | 2 ++ include/asm-arm26/socket.h | 2 ++ include/asm-cris/socket.h | 2 ++ include/asm-frv/socket.h | 2 ++ include/asm-h8300/socket.h | 2 ++ include/asm-i386/socket.h | 2 ++ include/asm-ia64/socket.h | 2 ++ include/asm-m32r/socket.h | 2 ++ include/asm-m68k/socket.h | 2 ++ include/asm-mips/socket.h | 2 ++ include/asm-parisc/socket.h | 2 ++ include/asm-ppc/socket.h | 2 ++ include/asm-ppc64/socket.h | 2 ++ include/asm-s390/socket.h | 2 ++ include/asm-sh/socket.h | 2 ++ include/asm-sparc/socket.h | 2 ++ include/asm-sparc64/socket.h | 2 ++ include/asm-v850/socket.h | 2 ++ include/asm-x86_64/socket.h | 2 ++ include/asm-xtensa/socket.h | 2 ++ 21 files changed, 42 insertions(+) (limited to 'include') diff --git a/include/asm-alpha/socket.h b/include/asm-alpha/socket.h index d00259d3dc7..b5193229132 100644 --- a/include/asm-alpha/socket.h +++ b/include/asm-alpha/socket.h @@ -25,6 +25,8 @@ #define SO_ERROR 0x1007 #define SO_SNDBUF 0x1001 #define SO_RCVBUF 0x1002 +#define SO_SNDBUFFORCE 0x100a +#define SO_RCVBUFFORCE 0x100b #define SO_RCVLOWAT 0x1010 #define SO_SNDLOWAT 0x1011 #define SO_RCVTIMEO 0x1012 diff --git a/include/asm-arm/socket.h b/include/asm-arm/socket.h index 46d20585d95..3c51da6438c 100644 --- a/include/asm-arm/socket.h +++ b/include/asm-arm/socket.h @@ -14,6 +14,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-arm26/socket.h b/include/asm-arm26/socket.h index 46d20585d95..3c51da6438c 100644 --- a/include/asm-arm26/socket.h +++ b/include/asm-arm26/socket.h @@ -14,6 +14,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-cris/socket.h b/include/asm-cris/socket.h index f159b4f165f..8b1da3e58c5 100644 --- a/include/asm-cris/socket.h +++ b/include/asm-cris/socket.h @@ -16,6 +16,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-frv/socket.h b/include/asm-frv/socket.h index c3be17c7de4..7177f8b9817 100644 --- a/include/asm-frv/socket.h +++ b/include/asm-frv/socket.h @@ -14,6 +14,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-h8300/socket.h b/include/asm-h8300/socket.h index af33b8525dc..d98cf85bafc 100644 --- a/include/asm-h8300/socket.h +++ b/include/asm-h8300/socket.h @@ -14,6 +14,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-i386/socket.h b/include/asm-i386/socket.h index 07f6b38ad14..802ae76195b 100644 --- a/include/asm-i386/socket.h +++ b/include/asm-i386/socket.h @@ -14,6 +14,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-ia64/socket.h b/include/asm-ia64/socket.h index 21a9f10d6ba..a255006fb7b 100644 --- a/include/asm-ia64/socket.h +++ b/include/asm-ia64/socket.h @@ -23,6 +23,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-m32r/socket.h b/include/asm-m32r/socket.h index 159519d9904..8b6680f223c 100644 --- a/include/asm-m32r/socket.h +++ b/include/asm-m32r/socket.h @@ -14,6 +14,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-m68k/socket.h b/include/asm-m68k/socket.h index 8d0b9fc2d07..f578ca4b776 100644 --- a/include/asm-m68k/socket.h +++ b/include/asm-m68k/socket.h @@ -14,6 +14,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-mips/socket.h b/include/asm-mips/socket.h index 020b4db70ee..d478a86294e 100644 --- a/include/asm-mips/socket.h +++ b/include/asm-mips/socket.h @@ -37,6 +37,8 @@ To add: #define SO_REUSEPORT 0x0200 /* Allow local address and port reuse. */ #define SO_ERROR 0x1007 /* get error status and clear */ #define SO_SNDBUF 0x1001 /* Send buffer size. */ #define SO_RCVBUF 0x1002 /* Receive buffer. */ +#define SO_SNDBUFFORCE 0x100a +#define SO_RCVBUFFORCE 0x100b #define SO_SNDLOWAT 0x1003 /* send low-water mark */ #define SO_RCVLOWAT 0x1004 /* receive low-water mark */ #define SO_SNDTIMEO 0x1005 /* send timeout */ diff --git a/include/asm-parisc/socket.h b/include/asm-parisc/socket.h index 4a77996c186..1bf54dc53c1 100644 --- a/include/asm-parisc/socket.h +++ b/include/asm-parisc/socket.h @@ -16,6 +16,8 @@ /* To add :#define SO_REUSEPORT 0x0200 */ #define SO_SNDBUF 0x1001 #define SO_RCVBUF 0x1002 +#define SO_SNDBUFFORCE 0x100a +#define SO_RCVBUFFORCE 0x100b #define SO_SNDLOWAT 0x1003 #define SO_RCVLOWAT 0x1004 #define SO_SNDTIMEO 0x1005 diff --git a/include/asm-ppc/socket.h b/include/asm-ppc/socket.h index 4134376b0f6..296e1a3469d 100644 --- a/include/asm-ppc/socket.h +++ b/include/asm-ppc/socket.h @@ -20,6 +20,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-ppc64/socket.h b/include/asm-ppc64/socket.h index 59e00dfc8b8..9e1af8eb2d9 100644 --- a/include/asm-ppc64/socket.h +++ b/include/asm-ppc64/socket.h @@ -21,6 +21,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-s390/socket.h b/include/asm-s390/socket.h index 0e96eeca4e6..15a5298c874 100644 --- a/include/asm-s390/socket.h +++ b/include/asm-s390/socket.h @@ -22,6 +22,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-sh/socket.h b/include/asm-sh/socket.h index dde696c3b4c..553904ff933 100644 --- a/include/asm-sh/socket.h +++ b/include/asm-sh/socket.h @@ -14,6 +14,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_RCVBUFFORCE 32 +#define SO_SNDBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-sparc/socket.h b/include/asm-sparc/socket.h index c1154e3ecfd..09575b608ad 100644 --- a/include/asm-sparc/socket.h +++ b/include/asm-sparc/socket.h @@ -29,6 +29,8 @@ #define SO_SNDBUF 0x1001 #define SO_RCVBUF 0x1002 +#define SO_SNDBUFFORCE 0x100a +#define SO_RCVBUFFORCE 0x100b #define SO_ERROR 0x1007 #define SO_TYPE 0x1008 diff --git a/include/asm-sparc64/socket.h b/include/asm-sparc64/socket.h index 865547a2390..59987dad335 100644 --- a/include/asm-sparc64/socket.h +++ b/include/asm-sparc64/socket.h @@ -29,6 +29,8 @@ #define SO_SNDBUF 0x1001 #define SO_RCVBUF 0x1002 +#define SO_SNDBUFFORCE 0x100a +#define SO_RCVBUFFORCE 0x100b #define SO_ERROR 0x1007 #define SO_TYPE 0x1008 diff --git a/include/asm-v850/socket.h b/include/asm-v850/socket.h index 213b852af53..0240d366a0a 100644 --- a/include/asm-v850/socket.h +++ b/include/asm-v850/socket.h @@ -14,6 +14,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-x86_64/socket.h b/include/asm-x86_64/socket.h index d9a252ea821..f2cdbeae5d5 100644 --- a/include/asm-x86_64/socket.h +++ b/include/asm-x86_64/socket.h @@ -14,6 +14,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 diff --git a/include/asm-xtensa/socket.h b/include/asm-xtensa/socket.h index daccd05a14c..00f83f3a6d7 100644 --- a/include/asm-xtensa/socket.h +++ b/include/asm-xtensa/socket.h @@ -24,6 +24,8 @@ #define SO_BROADCAST 6 #define SO_SNDBUF 7 #define SO_RCVBUF 8 +#define SO_SNDBUFFORCE 32 +#define SO_RCVBUFFORCE 33 #define SO_KEEPALIVE 9 #define SO_OOBINLINE 10 #define SO_NO_CHECK 11 -- cgit v1.2.3 From 6f1cf16582160c4839f05007c978743911aa022b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 9 Aug 2005 19:31:17 -0700 Subject: [NET]: Remove HIPPI private from skbuff.h This removes the private element from skbuff, that is only used by HIPPI. Instead it uses skb->cb[] to hold the additional data that is needed in the output path from hard_header to device driver. PS: The only qdisc that might potentially corrupt this cb[] is if netem was used over HIPPI. I will take care of that by fixing netem to use skb->stamp. I don't expect many users of netem over HIPPI Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/hippidevice.h | 5 +++++ include/linux/skbuff.h | 6 ------ 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/hippidevice.h b/include/linux/hippidevice.h index 9debe6bbe5f..9bc3b688d2e 100644 --- a/include/linux/hippidevice.h +++ b/include/linux/hippidevice.h @@ -26,6 +26,11 @@ #include #ifdef __KERNEL__ + +struct hippi_cb { + __u32 ifield; +}; + extern unsigned short hippi_type_trans(struct sk_buff *skb, struct net_device *dev); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f10a8b9628b..4aeadb10258 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -193,7 +193,6 @@ struct skb_shared_info { * @nfct: Associated connection, if any * @nfctinfo: Relationship of this skb to the connection * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c - * @private: Data which is private to the HIPPI implementation * @tc_index: Traffic control index * @tc_verd: traffic control verdict */ @@ -265,11 +264,6 @@ struct sk_buff { struct nf_bridge_info *nf_bridge; #endif #endif /* CONFIG_NETFILTER */ -#if defined(CONFIG_HIPPI) - union { - __u32 ifield; - } private; -#endif #ifdef CONFIG_NET_SCHED __u32 tc_index; /* traffic control index */ #ifdef CONFIG_NET_CLS_ACT -- cgit v1.2.3 From 080774a243f56ce2195ace96fba3d18548ee48ce Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:32:58 -0700 Subject: [NETFILTER]: Add ctnetlink subsystem Add ctnetlink subsystem for userspace-access to ip_conntrack table. This allows reading and updating of existing entries, as well as creating new ones (and new expect's) via nfnetlink. Please note the 'strange' byte order: nfattr (tag+length) are in host byte order, while the payload is always guaranteed to be in network byte order. This allows a simple userspace process to encapsulate netlink messages into arch-independent udp packets by just processing/swapping the headers and not knowing anything about the actual payload. Signed-off-by: Harald Welte Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink.h | 3 +- include/linux/netfilter/nfnetlink_conntrack.h | 123 +++++++++++++++++++++ include/linux/netfilter_ipv4/ip_conntrack.h | 46 +++++++- include/linux/netfilter_ipv4/ip_conntrack_core.h | 5 + include/linux/netfilter_ipv4/ip_conntrack_helper.h | 2 + .../linux/netfilter_ipv4/ip_conntrack_protocol.h | 24 ++-- include/linux/netfilter_ipv4/ip_nat_protocol.h | 25 +++-- 7 files changed, 210 insertions(+), 18 deletions(-) create mode 100644 include/linux/netfilter/nfnetlink_conntrack.h (limited to 'include') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 8f1bfb8d650..ace7a7be074 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -56,7 +56,7 @@ struct nfgenmsg { u_int16_t res_id; /* resource id */ } __attribute__ ((packed)); -#define NFNETLINK_V1 1 +#define NFNETLINK_V0 0 #define NFM_NFA(n) ((struct nfattr *)(((char *)(n)) \ + NLMSG_ALIGN(sizeof(struct nfgenmsg)))) @@ -81,6 +81,7 @@ enum nfnl_subsys_id { #ifdef __KERNEL__ +#include #include struct nfnl_callback diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h new file mode 100644 index 00000000000..fb528e0e3bd --- /dev/null +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -0,0 +1,123 @@ +#ifndef _IPCONNTRACK_NETLINK_H +#define _IPCONNTRACK_NETLINK_H +#include + +enum cntl_msg_types { + IPCTNL_MSG_CT_NEW, + IPCTNL_MSG_CT_GET, + IPCTNL_MSG_CT_DELETE, + IPCTNL_MSG_CT_GET_CTRZERO, + + IPCTNL_MSG_MAX +}; + +enum ctnl_exp_msg_types { + IPCTNL_MSG_EXP_NEW, + IPCTNL_MSG_EXP_GET, + IPCTNL_MSG_EXP_DELETE, + + IPCTNL_MSG_EXP_MAX +}; + + +enum ctattr_type { + CTA_UNSPEC, + CTA_TUPLE_ORIG, + CTA_TUPLE_REPLY, + CTA_STATUS, + CTA_PROTOINFO, + CTA_HELP, + CTA_NAT, + CTA_TIMEOUT, + CTA_MARK, + CTA_COUNTERS_ORIG, + CTA_COUNTERS_REPLY, + CTA_USE, + CTA_EXPECT, + CTA_ID, + __CTA_MAX +}; +#define CTA_MAX (__CTA_MAX - 1) + +enum ctattr_tuple { + CTA_TUPLE_UNSPEC, + CTA_TUPLE_IP, + CTA_TUPLE_PROTO, + __CTA_TUPLE_MAX +}; +#define CTA_TUPLE_MAX (__CTA_TUPLE_MAX - 1) + +enum ctattr_ip { + CTA_IP_UNSPEC, + CTA_IP_V4_SRC, + CTA_IP_V4_DST, + CTA_IP_V6_SRC, + CTA_IP_V6_DST, + __CTA_IP_MAX +}; +#define CTA_IP_MAX (__CTA_IP_MAX - 1) + +enum ctattr_l4proto { + CTA_PROTO_UNSPEC, + CTA_PROTO_NUM, + CTA_PROTO_SRC_PORT, + CTA_PROTO_DST_PORT, + CTA_PROTO_ICMP_ID, + CTA_PROTO_ICMP_TYPE, + CTA_PROTO_ICMP_CODE, + __CTA_PROTO_MAX +}; +#define CTA_PROTO_MAX (__CTA_PROTO_MAX - 1) + +enum ctattr_protoinfo { + CTA_PROTOINFO_UNSPEC, + CTA_PROTOINFO_TCP_STATE, + __CTA_PROTOINFO_MAX +}; +#define CTA_PROTOINFO_MAX (__CTA_PROTOINFO_MAX - 1) + +enum ctattr_counters { + CTA_COUNTERS_UNSPEC, + CTA_COUNTERS_PACKETS, + CTA_COUNTERS_BYTES, + __CTA_COUNTERS_MAX +}; +#define CTA_COUNTERS_MAX (__CTA_COUNTERS_MAX - 1) + +enum ctattr_nat { + CTA_NAT_UNSPEC, + CTA_NAT_MINIP, + CTA_NAT_MAXIP, + CTA_NAT_PROTO, + __CTA_NAT_MAX +}; +#define CTA_NAT_MAX (__CTA_NAT_MAX - 1) + +enum ctattr_protonat { + CTA_PROTONAT_UNSPEC, + CTA_PROTONAT_PORT_MIN, + CTA_PROTONAT_PORT_MAX, + __CTA_PROTONAT_MAX +}; +#define CTA_PROTONAT_MAX (__CTA_PROTONAT_MAX - 1) + +enum ctattr_expect { + CTA_EXPECT_UNSPEC, + CTA_EXPECT_TUPLE, + CTA_EXPECT_MASK, + CTA_EXPECT_TIMEOUT, + CTA_EXPECT_ID, + __CTA_EXPECT_MAX +}; +#define CTA_EXPECT_MAX (__CTA_EXPECT_MAX - 1) + +enum ctattr_help { + CTA_HELP_UNSPEC, + CTA_HELP_NAME, + __CTA_HELP_MAX +}; +#define CTA_HELP_MAX (__CTA_HELP_MAX - 1) + +#define CTA_HELP_MAXNAMESIZE 32 + +#endif /* _IPCONNTRACK_NETLINK_H */ diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index ae1270c97b5..ff2c1c6001f 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -209,6 +209,9 @@ struct ip_conntrack /* Current number of expected connections */ unsigned int expecting; + /* Unique ID that identifies this conntrack*/ + unsigned int id; + /* Helper, if any. */ struct ip_conntrack_helper *helper; @@ -257,6 +260,9 @@ struct ip_conntrack_expect /* Usage count. */ atomic_t use; + /* Unique ID */ + unsigned int id; + #ifdef CONFIG_IP_NF_NAT_NEEDED /* This is the original per-proto part, used to map the * expected connection the way the recipient expects. */ @@ -296,7 +302,12 @@ ip_conntrack_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) } /* decrement reference count on a conntrack */ -extern void ip_conntrack_put(struct ip_conntrack *ct); +static inline void +ip_conntrack_put(struct ip_conntrack *ct) +{ + IP_NF_ASSERT(ct); + nf_conntrack_put(&ct->ct_general); +} /* call to create an explicit dependency on ip_conntrack. */ extern void need_ip_conntrack(void); @@ -331,6 +342,39 @@ extern void ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *data), void *data); +extern struct ip_conntrack_helper * +__ip_conntrack_helper_find_byname(const char *); +extern struct ip_conntrack_helper * +ip_conntrack_helper_find_get(const struct ip_conntrack_tuple *tuple); +extern void ip_conntrack_helper_put(struct ip_conntrack_helper *helper); + +extern struct ip_conntrack_protocol * +__ip_conntrack_proto_find(u_int8_t protocol); +extern struct ip_conntrack_protocol * +ip_conntrack_proto_find_get(u_int8_t protocol); +extern void ip_conntrack_proto_put(struct ip_conntrack_protocol *proto); + +extern void ip_ct_remove_expectations(struct ip_conntrack *ct); + +extern struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *, + struct ip_conntrack_tuple *); + +extern void ip_conntrack_free(struct ip_conntrack *ct); + +extern void ip_conntrack_hash_insert(struct ip_conntrack *ct); + +extern struct ip_conntrack_expect * +__ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple); + +extern struct ip_conntrack_expect * +ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple); + +extern struct ip_conntrack_tuple_hash * +__ip_conntrack_find(const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack *ignored_conntrack); + +extern void ip_conntrack_flush(void); + /* It's confirmed if it is, or has been in the hash table. */ static inline int is_confirmed(struct ip_conntrack *ct) { diff --git a/include/linux/netfilter_ipv4/ip_conntrack_core.h b/include/linux/netfilter_ipv4/ip_conntrack_core.h index 46eeea1e273..fbf6c3e4164 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_core.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_core.h @@ -2,6 +2,9 @@ #define _IP_CONNTRACK_CORE_H #include +#define MAX_IP_CT_PROTO 256 +extern struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO]; + /* This header is used to share core functionality between the standalone connection tracking module, and the compatibility layer's use of connection tracking. */ @@ -53,6 +56,8 @@ struct ip_conntrack_ecache; extern void __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ec); #endif +extern void __ip_ct_expect_unlink_destroy(struct ip_conntrack_expect *exp); + extern struct list_head *ip_conntrack_hash; extern struct list_head ip_conntrack_expect_list; extern rwlock_t ip_conntrack_lock; diff --git a/include/linux/netfilter_ipv4/ip_conntrack_helper.h b/include/linux/netfilter_ipv4/ip_conntrack_helper.h index 3692daa93de..8d69279ccfe 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_helper.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_helper.h @@ -24,6 +24,8 @@ struct ip_conntrack_helper int (*help)(struct sk_buff **pskb, struct ip_conntrack *ct, enum ip_conntrack_info conntrackinfo); + + int (*to_nfattr)(struct sk_buff *skb, const struct ip_conntrack *ct); }; extern int ip_conntrack_helper_register(struct ip_conntrack_helper *); diff --git a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h index e20b57c5e1b..b6b99be8632 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h @@ -2,6 +2,7 @@ #ifndef _IP_CONNTRACK_PROTOCOL_H #define _IP_CONNTRACK_PROTOCOL_H #include +#include struct seq_file; @@ -47,22 +48,22 @@ struct ip_conntrack_protocol int (*error)(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, unsigned int hooknum); + /* convert protoinfo to nfnetink attributes */ + int (*to_nfattr)(struct sk_buff *skb, struct nfattr *nfa, + const struct ip_conntrack *ct); + + int (*tuple_to_nfattr)(struct sk_buff *skb, + const struct ip_conntrack_tuple *t); + int (*nfattr_to_tuple)(struct nfattr *tb[], + struct ip_conntrack_tuple *t); + /* Module (if any) which this is connected to. */ struct module *me; }; -#define MAX_IP_CT_PROTO 256 -extern struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO]; - /* Protocol registration. */ extern int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto); extern void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto); - -static inline struct ip_conntrack_protocol *ip_ct_find_proto(u_int8_t protocol) -{ - return ip_ct_protos[protocol]; -} - /* Existing built-in protocols */ extern struct ip_conntrack_protocol ip_conntrack_protocol_tcp; extern struct ip_conntrack_protocol ip_conntrack_protocol_udp; @@ -73,6 +74,11 @@ extern int ip_conntrack_protocol_tcp_init(void); /* Log invalid packets */ extern unsigned int ip_ct_log_invalid; +extern int ip_ct_port_tuple_to_nfattr(struct sk_buff *, + const struct ip_conntrack_tuple *); +extern int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[], + struct ip_conntrack_tuple *); + #ifdef CONFIG_SYSCTL #ifdef DEBUG_INVALID_PACKETS #define LOG_INVALID(proto) \ diff --git a/include/linux/netfilter_ipv4/ip_nat_protocol.h b/include/linux/netfilter_ipv4/ip_nat_protocol.h index 129708c2238..ef63aa991a0 100644 --- a/include/linux/netfilter_ipv4/ip_nat_protocol.h +++ b/include/linux/netfilter_ipv4/ip_nat_protocol.h @@ -4,6 +4,9 @@ #include #include +#include +#include + struct iphdr; struct ip_nat_range; @@ -15,6 +18,8 @@ struct ip_nat_protocol /* Protocol number. */ unsigned int protonum; + struct module *me; + /* Translate a packet to the target according to manip type. Return true if succeeded. */ int (*manip_pkt)(struct sk_buff **pskb, @@ -43,19 +48,20 @@ struct ip_nat_protocol unsigned int (*print_range)(char *buffer, const struct ip_nat_range *range); -}; -#define MAX_IP_NAT_PROTO 256 -extern struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO]; + int (*range_to_nfattr)(struct sk_buff *skb, + const struct ip_nat_range *range); + + int (*nfattr_to_range)(struct nfattr *tb[], + struct ip_nat_range *range); +}; /* Protocol registration. */ extern int ip_nat_protocol_register(struct ip_nat_protocol *proto); extern void ip_nat_protocol_unregister(struct ip_nat_protocol *proto); -static inline struct ip_nat_protocol *ip_nat_find_proto(u_int8_t protocol) -{ - return ip_nat_protos[protocol]; -} +extern struct ip_nat_protocol *ip_nat_proto_find_get(u_int8_t protocol); +extern void ip_nat_proto_put(struct ip_nat_protocol *proto); /* Built-in protocols. */ extern struct ip_nat_protocol ip_nat_protocol_tcp; @@ -67,4 +73,9 @@ extern int init_protocols(void) __init; extern void cleanup_protocols(void); extern struct ip_nat_protocol *find_nat_proto(u_int16_t protonum); +extern int ip_nat_port_range_to_nfattr(struct sk_buff *skb, + const struct ip_nat_range *range); +extern int ip_nat_port_nfattr_to_range(struct nfattr *tb[], + struct ip_nat_range *range); + #endif /*_IP_NAT_PROTO_H*/ -- cgit v1.2.3 From 83e3609eba3818f6e18b8bf9442195169ac306b7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 19:33:31 -0700 Subject: [REQSK]: Move the syn_table destroy from tcp_listen_stop to reqsk_queue_destroy Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/request_sock.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 72fd6f5e86b..334717bf9ef 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -89,6 +89,7 @@ struct listen_sock { int qlen_young; int clock_hand; u32 hash_rnd; + u32 nr_table_entries; struct request_sock *syn_table[0]; }; @@ -129,11 +130,13 @@ static inline struct listen_sock *reqsk_queue_yank_listen_sk(struct request_sock return lopt; } -static inline void reqsk_queue_destroy(struct request_sock_queue *queue) +static inline void __reqsk_queue_destroy(struct request_sock_queue *queue) { kfree(reqsk_queue_yank_listen_sk(queue)); } +extern void reqsk_queue_destroy(struct request_sock_queue *queue); + static inline struct request_sock * reqsk_queue_yank_acceptq(struct request_sock_queue *queue) { -- cgit v1.2.3 From b6b99eb5409d75ae35390057cd28f3aedfbd4cf4 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 9 Aug 2005 19:33:51 -0700 Subject: [NET]: Reduce tc_index/tc_verd to u16 Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4aeadb10258..af4f02e9824 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -265,9 +265,9 @@ struct sk_buff { #endif #endif /* CONFIG_NETFILTER */ #ifdef CONFIG_NET_SCHED - __u32 tc_index; /* traffic control index */ + __u16 tc_index; /* traffic control index */ #ifdef CONFIG_NET_CLS_ACT - __u32 tc_verd; /* traffic control verdict */ + __u16 tc_verd; /* traffic control verdict */ #endif #endif -- cgit v1.2.3 From f2ccd8fa06c8e302116e71df372f5c1f83432e03 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 9 Aug 2005 19:34:12 -0700 Subject: [NET]: Kill skb->real_dev Bonding just wants the device before the skb_bond() decapsulation occurs, so simply pass that original device into packet_type->func() as an argument. It remains to be seen whether we can use this same exact thing to get rid of skb->input_dev as well. Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 1 - include/linux/netdevice.h | 10 ++++++---- include/linux/skbuff.h | 2 -- include/net/arp.h | 2 +- include/net/ax25.h | 2 +- include/net/datalink.h | 2 +- include/net/ip.h | 2 +- include/net/ipv6.h | 3 ++- include/net/llc.h | 8 +++++--- include/net/p8022.h | 3 ++- include/net/psnap.h | 2 +- include/net/x25.h | 2 +- 12 files changed, 21 insertions(+), 18 deletions(-) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 62a9d89dfbe..17d0c0d40b0 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -155,7 +155,6 @@ static inline int __vlan_hwaccel_rx(struct sk_buff *skb, { struct net_device_stats *stats; - skb->real_dev = skb->dev; skb->dev = grp->vlan_devices[vlan_tag & VLAN_VID_MASK]; if (skb->dev == NULL) { dev_kfree_skb_any(skb); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3a0ed7f9e80..296cf93a65e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -497,10 +497,12 @@ static inline void *netdev_priv(struct net_device *dev) #define SET_NETDEV_DEV(net, pdev) ((net)->class_dev.dev = (pdev)) struct packet_type { - __be16 type; /* This is really htons(ether_type). */ - struct net_device *dev; /* NULL is wildcarded here */ - int (*func) (struct sk_buff *, struct net_device *, - struct packet_type *); + __be16 type; /* This is really htons(ether_type). */ + struct net_device *dev; /* NULL is wildcarded here */ + int (*func) (struct sk_buff *, + struct net_device *, + struct packet_type *, + struct net_device *); void *af_packet_priv; struct list_head list; }; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index af4f02e9824..60b32151f76 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -164,7 +164,6 @@ struct skb_shared_info { * @stamp: Time we arrived * @dev: Device we arrived on/are leaving by * @input_dev: Device we arrived on - * @real_dev: The real device we are using * @h: Transport layer header * @nh: Network layer header * @mac: Link layer header @@ -206,7 +205,6 @@ struct sk_buff { struct timeval stamp; struct net_device *dev; struct net_device *input_dev; - struct net_device *real_dev; union { struct tcphdr *th; diff --git a/include/net/arp.h b/include/net/arp.h index a1f09fad6a5..a13e30c35f4 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -11,7 +11,7 @@ extern struct neigh_table arp_tbl; extern void arp_init(void); extern int arp_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct packet_type *pt, struct net_device *orig_dev); extern int arp_find(unsigned char *haddr, struct sk_buff *skb); extern int arp_ioctl(unsigned int cmd, void __user *arg); extern void arp_send(int type, int ptype, u32 dest_ip, diff --git a/include/net/ax25.h b/include/net/ax25.h index 3696f988a9f..926eed54302 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -316,7 +316,7 @@ extern int ax25_protocol_is_registered(unsigned int); /* ax25_in.c */ extern int ax25_rx_iframe(ax25_cb *, struct sk_buff *); -extern int ax25_kiss_rcv(struct sk_buff *, struct net_device *, struct packet_type *); +extern int ax25_kiss_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); /* ax25_ip.c */ extern int ax25_encapsulate(struct sk_buff *, struct net_device *, unsigned short, void *, void *, unsigned int); diff --git a/include/net/datalink.h b/include/net/datalink.h index 5797ba3d2eb..deb7ca75db4 100644 --- a/include/net/datalink.h +++ b/include/net/datalink.h @@ -9,7 +9,7 @@ struct datalink_proto { unsigned short header_length; int (*rcvfunc)(struct sk_buff *, struct net_device *, - struct packet_type *); + struct packet_type *, struct net_device *); int (*request)(struct datalink_proto *, struct sk_buff *, unsigned char *); struct list_head node; diff --git a/include/net/ip.h b/include/net/ip.h index 32360bbe143..2570b536c8f 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -86,7 +86,7 @@ extern int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, u32 saddr, u32 daddr, struct ip_options *opt); extern int ip_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct packet_type *pt, struct net_device *orig_dev); extern int ip_local_deliver(struct sk_buff *skb); extern int ip_mr_input(struct sk_buff *skb); extern int ip_output(struct sk_buff *skb); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 69324465e8b..533fc074ed9 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -346,7 +346,8 @@ static inline int ipv6_addr_any(const struct in6_addr *a) extern int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct packet_type *pt, + struct net_device *orig_dev); /* * upper-layer output functions diff --git a/include/net/llc.h b/include/net/llc.h index c9aed2a8b4e..71769a5aeef 100644 --- a/include/net/llc.h +++ b/include/net/llc.h @@ -46,7 +46,8 @@ struct llc_sap { unsigned char f_bit; int (*rcv_func)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct packet_type *pt, + struct net_device *orig_dev); struct llc_addr laddr; struct list_head node; struct { @@ -64,7 +65,7 @@ extern rwlock_t llc_sap_list_lock; extern unsigned char llc_station_mac_sa[ETH_ALEN]; extern int llc_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt); + struct packet_type *pt, struct net_device *orig_dev); extern int llc_mac_hdr_init(struct sk_buff *skb, unsigned char *sa, unsigned char *da); @@ -78,7 +79,8 @@ extern void llc_set_station_handler(void (*handler)(struct sk_buff *skb)); extern struct llc_sap *llc_sap_open(unsigned char lsap, int (*rcv)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt)); + struct packet_type *pt, + struct net_device *orig_dev)); extern void llc_sap_close(struct llc_sap *sap); extern struct llc_sap *llc_sap_find(unsigned char sap_value); diff --git a/include/net/p8022.h b/include/net/p8022.h index 3c99a86c358..223f8fa9ffc 100644 --- a/include/net/p8022.h +++ b/include/net/p8022.h @@ -4,7 +4,8 @@ extern struct datalink_proto * register_8022_client(unsigned char type, int (*func)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt)); + struct packet_type *pt, + struct net_device *orig_dev)); extern void unregister_8022_client(struct datalink_proto *proto); #endif diff --git a/include/net/psnap.h b/include/net/psnap.h index 9c94e8f98b3..b2e01cc3fc8 100644 --- a/include/net/psnap.h +++ b/include/net/psnap.h @@ -1,7 +1,7 @@ #ifndef _NET_PSNAP_H #define _NET_PSNAP_H -extern struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, struct packet_type *)); +extern struct datalink_proto *register_snap_client(unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *orig_dev)); extern void unregister_snap_client(struct datalink_proto *proto); #endif diff --git a/include/net/x25.h b/include/net/x25.h index 8b39b98876e..fee62ff8c19 100644 --- a/include/net/x25.h +++ b/include/net/x25.h @@ -175,7 +175,7 @@ extern void x25_kill_by_neigh(struct x25_neigh *); /* x25_dev.c */ extern void x25_send_frame(struct sk_buff *, struct x25_neigh *); -extern int x25_lapb_receive_frame(struct sk_buff *, struct net_device *, struct packet_type *); +extern int x25_lapb_receive_frame(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); extern void x25_establish_link(struct x25_neigh *); extern void x25_terminate_link(struct x25_neigh *); -- cgit v1.2.3 From 0742fd53a3774781255bd1e471e7aa2e4a82d5f7 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 9 Aug 2005 19:35:47 -0700 Subject: [IPV4]: possible cleanups This patch contains the following possible cleanups: - make needlessly global code static - #if 0 the following unused global function: - xfrm4_state.c: xfrm4_state_fini - remove the following unneeded EXPORT_SYMBOL's: - ip_output.c: ip_finish_output - ip_output.c: sysctl_ip_default_ttl - fib_frontend.c: ip_dev_find - inetpeer.c: inet_peer_idlock - ip_options.c: ip_options_compile - ip_options.c: ip_options_undo - net/core/request_sock.c: sysctl_max_syn_backlog Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- include/net/ip.h | 2 -- include/net/route.h | 4 ---- include/net/xfrm.h | 1 - 3 files changed, 7 deletions(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 2570b536c8f..c16fb6ac344 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -140,8 +140,6 @@ struct ip_reply_arg { void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, unsigned int len); -extern int ip_finish_output(struct sk_buff *skb); - struct ipv4_config { int log_martians; diff --git a/include/net/route.h b/include/net/route.h index c3cd069a9ac..63c94558236 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -105,10 +105,6 @@ struct rt_cache_stat unsigned int out_hlist_search; }; -extern struct rt_cache_stat *rt_cache_stat; -#define RT_CACHE_STAT_INC(field) \ - (per_cpu_ptr(rt_cache_stat, raw_smp_processor_id())->field++) - extern struct ip_rt_acct *ip_rt_acct; struct in_device; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 868ef88ef97..a9d0d8c5dfb 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -818,7 +818,6 @@ extern void xfrm6_init(void); extern void xfrm6_fini(void); extern void xfrm_state_init(void); extern void xfrm4_state_init(void); -extern void xfrm4_state_fini(void); extern void xfrm6_state_init(void); extern void xfrm6_state_fini(void); -- cgit v1.2.3 From 86e65da9c1fc6fb421b9f796b597b3eced6b55ab Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 9 Aug 2005 19:36:29 -0700 Subject: [NET]: Remove explicit initializations of skb->input_dev Instead, set it in one place, namely the beginning of netif_receive_skb(). Based upon suggestions from Jamal Hadi Salim. Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 6 +++--- include/net/x25device.h | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 4abda6aec05..b902d24a325 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -352,10 +352,10 @@ tcf_change_indev(struct tcf_proto *tp, char *indev, struct rtattr *indev_tlv) static inline int tcf_match_indev(struct sk_buff *skb, char *indev) { - if (0 != indev[0]) { - if (NULL == skb->input_dev) + if (indev[0]) { + if (!skb->input_dev) return 0; - else if (0 != strcmp(indev, skb->input_dev->name)) + if (strcmp(indev, skb->input_dev->name)) return 0; } diff --git a/include/net/x25device.h b/include/net/x25device.h index d45ae883bd1..1a318374fae 100644 --- a/include/net/x25device.h +++ b/include/net/x25device.h @@ -8,7 +8,6 @@ static inline __be16 x25_type_trans(struct sk_buff *skb, struct net_device *dev) { skb->mac.raw = skb->data; - skb->input_dev = skb->dev = dev; skb->pkt_type = PACKET_HOST; return htons(ETH_P_X25); -- cgit v1.2.3 From 089af26c706d1473f641c909fee7c878d29c1f1a Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:37:23 -0700 Subject: [NETFILTER]: Rename skb_ip_make_writable() to skb_make_writable() There is nothing IPv4-specific in it. In fact, it was already used by IPv6, too... Upcoming nfnetlink_queue code will use it for any kind of packet. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter.h | 5 +++++ include/linux/netfilter_ipv4.h | 5 ----- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index ec60856408f..54b97a1baba 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -193,6 +193,11 @@ extern void nf_ct_attach(struct sk_buff *, struct sk_buff *); /* FIXME: Before cache is ever used, this must be implemented for real. */ extern void nf_invalidate_cache(int pf); +/* Call this before modifying an existing packet: ensures it is + modifiable and linear to the point you care about (writable_len). + Returns true or false. */ +extern int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len); + #else /* !CONFIG_NETFILTER */ #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index 552815b8193..fdc4a952734 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -80,11 +80,6 @@ enum nf_ip_hook_priorities { #ifdef __KERNEL__ extern int ip_route_me_harder(struct sk_buff **pskb); -/* Call this before modifying an existing IP packet: ensures it is - modifiable and linear to the point you care about (writable_len). - Returns true or false. */ -extern int skb_ip_make_writable(struct sk_buff **pskb, - unsigned int writable_len); #endif /*__KERNEL__*/ #endif /*__LINUX_IP_NETFILTER_H*/ -- cgit v1.2.3 From 4fdb3bb723db469717c6d38fda667d8b0fa86ebd Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:40:55 -0700 Subject: [NETLINK]: Add properly module refcounting for kernel netlink sockets. - Remove bogus code for compiling netlink as module - Add module refcounting support for modules implementing a netlink protocol - Add support for autoloading modules that implement a netlink protocol as soon as someone opens a socket for that protocol Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/net.h | 3 +++ include/linux/netlink.h | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/net.h b/include/linux/net.h index 20cb226b226..39906619b9d 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -282,5 +282,8 @@ static struct proto_ops name##_ops = { \ #define MODULE_ALIAS_NETPROTO(proto) \ MODULE_ALIAS("net-pf-" __stringify(proto)) +#define MODULE_ALIAS_NET_PF_PROTO(pf, proto) \ + MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto)) + #endif /* __KERNEL__ */ #endif /* _LINUX_NET_H */ diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 6552b71bfa7..1c50fea8995 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -117,7 +117,7 @@ struct netlink_skb_parms #define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds) -extern struct sock *netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len)); +extern struct sock *netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len), struct module *module); extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock); extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid, -- cgit v1.2.3 From 2cc7d5730957c4a3f3659d17d2ba5e06d5581c1f Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:42:34 -0700 Subject: [NETFILTER]: Move reroute-after-queue code up to the nf_queue layer. The rerouting functionality is required by the core, therefore it has to be implemented by the core and not in individual queue handlers. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter.h | 11 +++++++++++ include/linux/netfilter_ipv6.h | 3 +++ 2 files changed, 14 insertions(+) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 54b97a1baba..d163e20ca8d 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -198,6 +198,17 @@ extern void nf_invalidate_cache(int pf); Returns true or false. */ extern int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len); +struct nf_queue_rerouter { + void (*save)(const struct sk_buff *skb, struct nf_info *info); + int (*reroute)(struct sk_buff **skb, const struct nf_info *info); + int rer_size; +}; + +#define nf_info_reroute(x) ((void *)x + sizeof(struct nf_info)) + +extern int nf_register_queue_rerouter(int pf, struct nf_queue_rerouter *rer); +extern int nf_unregister_queue_rerouter(int pf); + #else /* !CONFIG_NETFILTER */ #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 20c069a5e4a..5d204ee7a31 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -71,4 +71,7 @@ enum nf_ip6_hook_priorities { NF_IP6_PRI_LAST = INT_MAX, }; +int ipv6_netfilter_init(void); +void ipv6_netfilter_fini(void); + #endif /*__LINUX_IP6_NETFILTER_H*/ -- cgit v1.2.3 From 0ab43f84995f2c2fcc5cc58a9accaa1095e1317f Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:43:44 -0700 Subject: [NETFILTER]: Core changes required by upcoming nfnetlink_queue code - split netfiler verdict in 16bit verdict and 16bit queue number - add 'queuenum' argument to nf_queue_outfn_t and its users ip[6]_queue - move NFNL_SUBSYS_ definitions from enum to #define - introduce autoloading for nfnetlink subsystem modules - add MODULE_ALIAS_NFNL_SUBSYS macro - add nf_unregister_queue_handlers() to register all handlers for a given nf_queue_outfn_t - add more verbose DEBUGP macro definition to nfnetlink.c - make nfnetlink_subsys_register fail if subsys already exists - add some more comments and debug statements to nfnetlink.c Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter.h | 14 +++++++++++++- include/linux/netfilter/nfnetlink.h | 20 +++++++++++--------- 2 files changed, 24 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index d163e20ca8d..711e05f33d6 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -21,6 +21,16 @@ #define NF_STOP 5 #define NF_MAX_VERDICT NF_STOP +/* we overload the higher bits for encoding auxiliary data such as the queue + * number. Not nice, but better than additional function arguments. */ +#define NF_VERDICT_MASK 0x0000ffff +#define NF_VERDICT_BITS 16 + +#define NF_VERDICT_QMASK 0xffff0000 +#define NF_VERDICT_QBITS 16 + +#define NF_QUEUE_NR(x) ((x << NF_VERDICT_QBITS) & NF_VERDICT_QMASK || NF_QUEUE) + /* only for userspace compatibility */ #ifndef __KERNEL__ /* Generic cache responses from hook functions. @@ -179,10 +189,12 @@ int nf_getsockopt(struct sock *sk, int pf, int optval, char __user *opt, /* Packet queuing */ typedef int (*nf_queue_outfn_t)(struct sk_buff *skb, - struct nf_info *info, void *data); + struct nf_info *info, + unsigned int queuenum, void *data); extern int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data); extern int nf_unregister_queue_handler(int pf); +extern void nf_unregister_queue_handlers(nf_queue_outfn_t outfn); extern void nf_reinject(struct sk_buff *skb, struct nf_info *info, unsigned int verdict); diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index ace7a7be074..561f9df2880 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -69,15 +69,14 @@ struct nfgenmsg { #define NFNL_SUBSYS_ID(x) ((x & 0xff00) >> 8) #define NFNL_MSG_TYPE(x) (x & 0x00ff) -enum nfnl_subsys_id { - NFNL_SUBSYS_NONE = 0, - NFNL_SUBSYS_CTNETLINK, - NFNL_SUBSYS_CTNETLINK_EXP, - NFNL_SUBSYS_IPTNETLINK, - NFNL_SUBSYS_QUEUE, - NFNL_SUBSYS_ULOG, - NFNL_SUBSYS_COUNT, -}; +/* No enum here, otherwise __stringify() trick of MODULE_ALIAS_NFNL_SUBSYS() + * won't work anymore */ +#define NFNL_SUBSYS_NONE 0 +#define NFNL_SUBSYS_CTNETLINK 1 +#define NFNL_SUBSYS_CTNETLINK_EXP 2 +#define NFNL_SUBSYS_QUEUE 3 +#define NFNL_SUBSYS_ULOG 4 +#define NFNL_SUBSYS_COUNT 5 #ifdef __KERNEL__ @@ -142,5 +141,8 @@ extern int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo); extern int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags); +#define MODULE_ALIAS_NFNL_SUBSYS(subsys) \ + MODULE_ALIAS("nfnetlink-subsys-" __stringify(subsys)) + #endif /* __KERNEL__ */ #endif /* _NFNETLINK_H */ -- cgit v1.2.3 From 7af4cc3fa158ff1dda6e7451c7e6afa6b0bb85cb Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:44:15 -0700 Subject: [NETFILTER]: Add "nfnetlink_queue" netfilter queue handler over nfnetlink - Add new nfnetlink_queue module - Add new ipt_NFQUEUE and ip6t_NFQUEUE modules to access queue numbers 1-65535 - Mark ip_queue and ip6_queue Kconfig options as OBSOLETE - Update feature-removal-schedule to remove ip[6]_queue in December Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink_queue.h | 85 ++++++++++++++++++++++++++++++ include/linux/netfilter_ipv4/ipt_NFQUEUE.h | 16 ++++++ 2 files changed, 101 insertions(+) create mode 100644 include/linux/netfilter/nfnetlink_queue.h create mode 100644 include/linux/netfilter_ipv4/ipt_NFQUEUE.h (limited to 'include') diff --git a/include/linux/netfilter/nfnetlink_queue.h b/include/linux/netfilter/nfnetlink_queue.h new file mode 100644 index 00000000000..edb463a87eb --- /dev/null +++ b/include/linux/netfilter/nfnetlink_queue.h @@ -0,0 +1,85 @@ +#ifndef _NFNETLINK_QUEUE_H +#define _NFNETLINK_QUEUE_H + +#include + +enum nfqnl_msg_types { + NFQNL_MSG_PACKET, /* packet from kernel to userspace */ + NFQNL_MSG_VERDICT, /* verdict from userspace to kernel */ + NFQNL_MSG_CONFIG, /* connect to a particular queue */ + + NFQNL_MSG_MAX +}; + +struct nfqnl_msg_packet_hdr { + u_int32_t packet_id; /* unique ID of packet in queue */ + u_int16_t hw_protocol; /* hw protocol (network order) */ + u_int8_t hook; /* netfilter hook */ +} __attribute__ ((packed)); + +struct nfqnl_msg_packet_hw { + u_int16_t hw_addrlen; + u_int16_t _pad; + u_int8_t hw_addr[8]; +} __attribute__ ((packed)); + +struct nfqnl_msg_packet_timestamp { + u_int64_t sec; + u_int64_t usec; +} __attribute__ ((packed)); + +enum nfqnl_attr_type { + NFQA_UNSPEC, + NFQA_PACKET_HDR, + NFQA_VERDICT_HDR, /* nfqnl_msg_verdict_hrd */ + NFQA_MARK, /* u_int32_t nfmark */ + NFQA_TIMESTAMP, /* nfqnl_msg_packet_timestamp */ + NFQA_IFINDEX_INDEV, /* u_int32_t ifindex */ + NFQA_IFINDEX_OUTDEV, /* u_int32_t ifindex */ + NFQA_HWADDR, /* nfqnl_msg_packet_hw */ + NFQA_PAYLOAD, /* opaque data payload */ + + __NFQA_MAX +}; +#define NFQA_MAX (__NFQA_MAX - 1) + +struct nfqnl_msg_verdict_hdr { + u_int32_t verdict; + u_int32_t id; +} __attribute__ ((packed)); + + +enum nfqnl_msg_config_cmds { + NFQNL_CFG_CMD_NONE, + NFQNL_CFG_CMD_BIND, + NFQNL_CFG_CMD_UNBIND, + NFQNL_CFG_CMD_PF_BIND, + NFQNL_CFG_CMD_PF_UNBIND, +}; + +struct nfqnl_msg_config_cmd { + u_int8_t command; /* nfqnl_msg_config_cmds */ + u_int8_t _pad; + u_int16_t pf; /* AF_xxx for PF_[UN]BIND */ +} __attribute__ ((packed)); + +enum nfqnl_config_mode { + NFQNL_COPY_NONE, + NFQNL_COPY_META, + NFQNL_COPY_PACKET, +}; + +struct nfqnl_msg_config_params { + u_int32_t copy_range; + u_int8_t copy_mode; /* enum nfqnl_config_mode */ +} __attribute__ ((packed)); + + +enum nfqnl_attr_config { + NFQA_CFG_UNSPEC, + NFQA_CFG_CMD, /* nfqnl_msg_config_cmd */ + NFQA_CFG_PARAMS, /* nfqnl_msg_config_params */ + __NFQA_CFG_MAX +}; + +#endif /* _NFNETLINK_QUEUE_H */ diff --git a/include/linux/netfilter_ipv4/ipt_NFQUEUE.h b/include/linux/netfilter_ipv4/ipt_NFQUEUE.h new file mode 100644 index 00000000000..b5b2943b0c6 --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_NFQUEUE.h @@ -0,0 +1,16 @@ +/* iptables module for using NFQUEUE mechanism + * + * (C) 2005 Harald Welte + * + * This software is distributed under GNU GPL v2, 1991 + * +*/ +#ifndef _IPT_NFQ_TARGET_H +#define _IPT_NFQ_TARGET_H + +/* target info */ +struct ipt_NFQ_info { + u_int16_t queuenum; +}; + +#endif /* _IPT_DSCP_TARGET_H */ -- cgit v1.2.3 From 0bd1b59b15e4057101c89d4db15a3683c0d897f7 Mon Sep 17 00:00:00 2001 From: Andrew McDonald Date: Tue, 9 Aug 2005 19:44:42 -0700 Subject: [IPV6]: Check interface bindings on IPv6 raw socket reception Take account of whether a socket is bound to a particular device when selecting an IPv6 raw socket to receive a packet. Also perform this check when receiving IPv6 packets with router alert options. Signed-off-by: Andrew McDonald Signed-off-by: David S. Miller --- include/net/rawv6.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/rawv6.h b/include/net/rawv6.h index 23fd9a6a221..887009aa1f8 100644 --- a/include/net/rawv6.h +++ b/include/net/rawv6.h @@ -10,7 +10,8 @@ extern rwlock_t raw_v6_lock; extern void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr); extern struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num, - struct in6_addr *loc_addr, struct in6_addr *rmt_addr); + struct in6_addr *loc_addr, struct in6_addr *rmt_addr, + int dif); extern int rawv6_rcv(struct sock *sk, struct sk_buff *skb); -- cgit v1.2.3 From d13964f4490157b8a290903362bfbc54f750a6bc Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 9 Aug 2005 19:45:02 -0700 Subject: [IPV4/6]: Check if packet was actually delivered to a raw socket to decide whether to send an ICMP unreachable Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/raw.h | 2 +- include/net/rawv6.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/raw.h b/include/net/raw.h index 1c411c45587..1c4bc3e6809 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -37,6 +37,6 @@ extern struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, unsigned long raddr, unsigned long laddr, int dif); -extern void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash); +extern int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash); #endif /* _RAW_H */ diff --git a/include/net/rawv6.h b/include/net/rawv6.h index 887009aa1f8..14476a71725 100644 --- a/include/net/rawv6.h +++ b/include/net/rawv6.h @@ -7,7 +7,7 @@ extern struct hlist_head raw_v6_htable[RAWV6_HTABLE_SIZE]; extern rwlock_t raw_v6_lock; -extern void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr); +extern int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr); extern struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num, struct in6_addr *loc_addr, struct in6_addr *rmt_addr, -- cgit v1.2.3 From e6848976b721eeb5551cd94673faafeef78d9f35 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 19:45:38 -0700 Subject: [NET]: Cleanup INET_REFCNT_DEBUG code Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_common.h | 1 - include/net/ipv6.h | 1 - include/net/sock.h | 32 +++++++++++++++++++++++++++++++- include/net/tcp.h | 2 +- 4 files changed, 32 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/inet_common.h b/include/net/inet_common.h index fbc1f4d140d..1fbd94d8a31 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -29,7 +29,6 @@ extern unsigned int inet_poll(struct file * file, struct socket *sock, struct p extern int inet_listen(struct socket *sock, int backlog); extern void inet_sock_destruct(struct sock *sk); -extern atomic_t inet_sock_nr; extern int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 533fc074ed9..c5a02ddc594 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -145,7 +145,6 @@ DECLARE_SNMP_STAT(struct udp_mib, udp_stats_in6); #define UDP6_INC_STATS(field) SNMP_INC_STATS(udp_stats_in6, field) #define UDP6_INC_STATS_BH(field) SNMP_INC_STATS_BH(udp_stats_in6, field) #define UDP6_INC_STATS_USER(field) SNMP_INC_STATS_USER(udp_stats_in6, field) -extern atomic_t inet6_sock_nr; int snmp6_register_dev(struct inet6_dev *idev); int snmp6_unregister_dev(struct inet6_dev *idev); diff --git a/include/net/sock.h b/include/net/sock.h index e9b1dbab90d..11b81551041 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -491,6 +491,9 @@ extern int sk_wait_data(struct sock *sk, long *timeo); struct request_sock_ops; +/* Here is the right place to enable sock refcounting debugging */ +#define SOCK_REFCNT_DEBUG + /* Networking protocol blocks we attach to sockets. * socket layer -> transport layer interface * transport -> network interface is defined by struct inet_proto @@ -561,7 +564,9 @@ struct proto { char name[32]; struct list_head node; - +#ifdef SOCK_REFCNT_DEBUG + atomic_t socks; +#endif struct { int inuse; u8 __pad[SMP_CACHE_BYTES - sizeof(int)]; @@ -571,6 +576,31 @@ struct proto { extern int proto_register(struct proto *prot, int alloc_slab); extern void proto_unregister(struct proto *prot); +#ifdef SOCK_REFCNT_DEBUG +static inline void sk_refcnt_debug_inc(struct sock *sk) +{ + atomic_inc(&sk->sk_prot->socks); +} + +static inline void sk_refcnt_debug_dec(struct sock *sk) +{ + atomic_dec(&sk->sk_prot->socks); + printk(KERN_DEBUG "%s socket %p released, %d are still alive\n", + sk->sk_prot->name, sk, atomic_read(&sk->sk_prot->socks)); +} + +static inline void sk_refcnt_debug_release(const struct sock *sk) +{ + if (atomic_read(&sk->sk_refcnt) != 1) + printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n", + sk->sk_prot->name, sk, atomic_read(&sk->sk_refcnt)); +} +#else /* SOCK_REFCNT_DEBUG */ +#define sk_refcnt_debug_inc(sk) do { } while (0) +#define sk_refcnt_debug_dec(sk) do { } while (0) +#define sk_refcnt_debug_release(sk) do { } while (0) +#endif /* SOCK_REFCNT_DEBUG */ + /* Called with local bh disabled */ static __inline__ void sock_prot_inc_use(struct proto *prot) { diff --git a/include/net/tcp.h b/include/net/tcp.h index 5010f0c5a56..31984733777 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -306,7 +306,7 @@ extern kmem_cache_t *tcp_timewait_cachep; static inline void tcp_tw_put(struct tcp_tw_bucket *tw) { if (atomic_dec_and_test(&tw->tw_refcnt)) { -#ifdef INET_REFCNT_DEBUG +#ifdef SOCK_REFCNT_DEBUG printk(KERN_DEBUG "tw_bucket %p released\n", tw); #endif kmem_cache_free(tcp_timewait_cachep, tw); -- cgit v1.2.3 From 614c6cb4f225a7da9f13e5dd0fac3b531078eb9f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 19:47:37 -0700 Subject: [SOCK]: Rename __tcp_v4_rehash to __sk_prot_rehash This operation was already generic and DCCP will use it. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/sock.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 11b81551041..f91ee82522f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -612,6 +612,15 @@ static __inline__ void sock_prot_dec_use(struct proto *prot) prot->stats[smp_processor_id()].inuse--; } +/* With per-bucket locks this operation is not-atomic, so that + * this version is not worse. + */ +static inline void __sk_prot_rehash(struct sock *sk) +{ + sk->sk_prot->unhash(sk); + sk->sk_prot->hash(sk); +} + /* About 10 seconds */ #define SOCK_DESTROY_TIME (10*HZ) -- cgit v1.2.3 From 6cbb0df788b90777a7ed0f9d8261260353f48076 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 19:49:02 -0700 Subject: [SOCK]: Introduce sk_setup_caps From tcp_v4_setup_caps, that always is preceded by a call to __sk_dst_set, so coalesce this sequence into sk_setup_caps, removing one call to a TCP function in the IP layer. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/sock.h | 10 ++++++++++ include/net/tcp.h | 9 --------- 2 files changed, 10 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index f91ee82522f..69d869e41c3 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1025,6 +1025,16 @@ sk_dst_check(struct sock *sk, u32 cookie) return dst; } +static inline void sk_setup_caps(struct sock *sk, struct dst_entry *dst) +{ + __sk_dst_set(sk, dst); + sk->sk_route_caps = dst->dev->features; + if (sk->sk_route_caps & NETIF_F_TSO) { + if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len) + sk->sk_route_caps &= ~NETIF_F_TSO; + } +} + static inline void sk_charge_skb(struct sock *sk, struct sk_buff *skb) { sk->sk_wmem_queued += skb->truesize; diff --git a/include/net/tcp.h b/include/net/tcp.h index 31984733777..d95661a3aee 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1658,15 +1658,6 @@ static inline int tcp_paws_check(const struct tcp_options_received *rx_opt, int return 1; } -static inline void tcp_v4_setup_caps(struct sock *sk, struct dst_entry *dst) -{ - sk->sk_route_caps = dst->dev->features; - if (sk->sk_route_caps & NETIF_F_TSO) { - if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len) - sk->sk_route_caps &= ~NETIF_F_TSO; - } -} - #define TCP_CHECK_TIMER(sk) do { } while (0) static inline int tcp_use_frto(const struct sock *sk) -- cgit v1.2.3 From 32519f11d38ea8f4f60896763bacec7db1760f9c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 19:50:02 -0700 Subject: [INET]: Introduce inet_sk_rebuild_header From tcp_v4_rebuild_header, that already was pretty generic, I only needed to use sk->sk_protocol instead of the hardcoded IPPROTO_TCP and establish the requirement that INET transport layer protocols that want to use this function map TCP_SYN_SENT to its equivalent state. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ip.h | 2 ++ include/net/tcp.h | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ip.h b/include/linux/ip.h index 31e7cedd9f8..33e8a19a1a0 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -196,6 +196,8 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to, #endif #endif +extern int inet_sk_rebuild_header(struct sock *sk); + struct iphdr { #if defined(__LITTLE_ENDIAN_BITFIELD) __u8 ihl:4, diff --git a/include/net/tcp.h b/include/net/tcp.h index d95661a3aee..0c769adb046 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -799,8 +799,6 @@ extern void tcp_parse_options(struct sk_buff *skb, * TCP v4 functions exported for the inet6 API */ -extern int tcp_v4_rebuild_header(struct sock *sk); - extern int tcp_v4_build_header(struct sock *sk, struct sk_buff *skb); -- cgit v1.2.3 From 838ab6364956d9bdcefe84712de1621cf20a40b3 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:50:45 -0700 Subject: [NETFILTER]: Add refcounting and /proc/net/netfilter interface to nfnetlink_queue Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink_queue.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/netfilter/nfnetlink_queue.h b/include/linux/netfilter/nfnetlink_queue.h index edb463a87eb..e142b0ff7c0 100644 --- a/include/linux/netfilter/nfnetlink_queue.h +++ b/include/linux/netfilter/nfnetlink_queue.h @@ -81,5 +81,6 @@ enum nfqnl_attr_config { NFQA_CFG_PARAMS, /* nfqnl_msg_config_params */ __NFQA_CFG_MAX }; +#define NFQA_CFG_MAX (__NFQA_CFG_MAX-1) #endif /* _NFNETLINK_QUEUE_H */ -- cgit v1.2.3 From 608c8e4f7b6e61cc783283e9dff8a465a5ad59bb Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:58:27 -0700 Subject: [NETFILTER]: Extend netfilter logging API This patch is in preparation to nfnetlink_log: - loggers now have to register struct nf_logger instead of nf_logfn - nf_log_unregister() replaced by nf_log_unregister_pf() and nf_log_unregister_logger() - add comment to ip[6]t_LOG.h to assure nobody redefines flags - add /proc/net/netfilter/nf_log to tell user which logger is currently registered for which address family - if user has configured logging, but no logging backend (logger) is available, always spit a message to syslog, not just the first time. - split ip[6]t_LOG.c into two parts: Backend: Always try to register as logger for the respective address family Frontend: Always log via nf_log_packet() API - modify all users of nf_log_packet() to accomodate additional argument Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter.h | 48 ++++++++++++++++++++++++++++++--- include/linux/netfilter_ipv4/ipt_LOG.h | 1 + include/linux/netfilter_ipv6/ip6t_LOG.h | 1 + 3 files changed, 47 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 711e05f33d6..815583af06c 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -114,15 +114,51 @@ void nf_unregister_sockopt(struct nf_sockopt_ops *reg); extern struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS]; -typedef void nf_logfn(unsigned int hooknum, +/* those NF_LOG_* defines and struct nf_loginfo are legacy definitios that will + * disappear once iptables is replaced with pkttables. Please DO NOT use them + * for any new code! */ +#define NF_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ +#define NF_LOG_TCPOPT 0x02 /* Log TCP options */ +#define NF_LOG_IPOPT 0x04 /* Log IP options */ +#define NF_LOG_UID 0x08 /* Log UID owning local socket */ +#define NF_LOG_MASK 0x0f + +#define NF_LOG_TYPE_LOG 0x01 +#define NF_LOG_TYPE_ULOG 0x02 + +struct nf_loginfo { + u_int8_t type; + union { + struct { + u_int32_t copy_len; + u_int16_t group; + u_int16_t qthreshold; + } ulog; + struct { + u_int8_t level; + u_int8_t logflags; + } log; + } u; +}; + +typedef void nf_logfn(unsigned int pf, + unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, + const struct nf_loginfo *li, const char *prefix); +struct nf_logger { + struct module *me; + nf_logfn *logfn; + char *name; +}; + /* Function to register/unregister log function. */ -int nf_log_register(int pf, nf_logfn *logfn); -void nf_log_unregister(int pf, nf_logfn *logfn); +int nf_log_register(int pf, struct nf_logger *logger); +void nf_log_unregister_pf(int pf); +void nf_log_unregister_logger(struct nf_logger *logger); /* Calls the registered backend logging function */ void nf_log_packet(int pf, @@ -130,6 +166,7 @@ void nf_log_packet(int pf, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, + struct nf_loginfo *li, const char *fmt, ...); /* Activate hook; either okfn or kfree_skb called, unless a hook @@ -221,6 +258,11 @@ struct nf_queue_rerouter { extern int nf_register_queue_rerouter(int pf, struct nf_queue_rerouter *rer); extern int nf_unregister_queue_rerouter(int pf); +#ifdef CONFIG_PROC_FS +#include +extern struct proc_dir_entry *proc_net_netfilter; +#endif + #else /* !CONFIG_NETFILTER */ #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} diff --git a/include/linux/netfilter_ipv4/ipt_LOG.h b/include/linux/netfilter_ipv4/ipt_LOG.h index d25f782e57d..22d16177319 100644 --- a/include/linux/netfilter_ipv4/ipt_LOG.h +++ b/include/linux/netfilter_ipv4/ipt_LOG.h @@ -1,6 +1,7 @@ #ifndef _IPT_LOG_H #define _IPT_LOG_H +/* make sure not to change this without changing netfilter.h:NF_LOG_* (!) */ #define IPT_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ #define IPT_LOG_TCPOPT 0x02 /* Log TCP options */ #define IPT_LOG_IPOPT 0x04 /* Log IP options */ diff --git a/include/linux/netfilter_ipv6/ip6t_LOG.h b/include/linux/netfilter_ipv6/ip6t_LOG.h index 42996a43bb3..9008ff5c40a 100644 --- a/include/linux/netfilter_ipv6/ip6t_LOG.h +++ b/include/linux/netfilter_ipv6/ip6t_LOG.h @@ -1,6 +1,7 @@ #ifndef _IP6T_LOG_H #define _IP6T_LOG_H +/* make sure not to change this without changing netfilter.h:NF_LOG_* (!) */ #define IP6T_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ #define IP6T_LOG_TCPOPT 0x02 /* Log TCP options */ #define IP6T_LOG_IPOPT 0x04 /* Log IP options */ -- cgit v1.2.3 From 0597f2680d666a3bcf101ac0c771ba7e50016bbd Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 19:58:39 -0700 Subject: [NETFILTER]: Add new "nfnetlink_log" userspace packet logging facility This is a generic (layer3 independent) version of what ipt_ULOG is already doing for IPv4 today. ipt_ULOG, ebt_ulog and finally also ip[6]t_LOG will be deprecated by this mechanism in the long term. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink_log.h | 85 +++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 include/linux/netfilter/nfnetlink_log.h (limited to 'include') diff --git a/include/linux/netfilter/nfnetlink_log.h b/include/linux/netfilter/nfnetlink_log.h new file mode 100644 index 00000000000..420ff4625cb --- /dev/null +++ b/include/linux/netfilter/nfnetlink_log.h @@ -0,0 +1,85 @@ +#ifndef _NFNETLINK_LOG_H +#define _NFNETLINK_LOG_H + +/* This file describes the netlink messages (i.e. 'protocol packets'), + * and not any kind of function definitions. It is shared between kernel and + * userspace. Don't put kernel specific stuff in here */ + +#include + +enum nfulnl_msg_types { + NFULNL_MSG_PACKET, /* packet from kernel to userspace */ + NFULNL_MSG_CONFIG, /* connect to a particular queue */ + + NFULNL_MSG_MAX +}; + +struct nfulnl_msg_packet_hdr { + u_int16_t hw_protocol; /* hw protocol (network order) */ + u_int8_t hook; /* netfilter hook */ + u_int8_t _pad; +} __attribute__ ((packed)); + +struct nfulnl_msg_packet_hw { + u_int16_t hw_addrlen; + u_int16_t _pad; + u_int8_t hw_addr[8]; +} __attribute__ ((packed)); + +struct nfulnl_msg_packet_timestamp { + u_int64_t sec; + u_int64_t usec; +} __attribute__ ((packed)); + +#define NFULNL_PREFIXLEN 30 /* just like old log target */ + +enum nfulnl_attr_type { + NFULA_UNSPEC, + NFULA_PACKET_HDR, + NFULA_MARK, /* u_int32_t nfmark */ + NFULA_TIMESTAMP, /* nfulnl_msg_packet_timestamp */ + NFULA_IFINDEX_INDEV, /* u_int32_t ifindex */ + NFULA_IFINDEX_OUTDEV, /* u_int32_t ifindex */ + NFULA_HWADDR, /* nfulnl_msg_packet_hw */ + NFULA_PAYLOAD, /* opaque data payload */ + NFULA_PREFIX, /* string prefix */ + NFULA_UID, /* user id of socket */ + + __NFULA_MAX +}; +#define NFULA_MAX (__NFULA_MAX - 1) + +enum nfulnl_msg_config_cmds { + NFULNL_CFG_CMD_NONE, + NFULNL_CFG_CMD_BIND, + NFULNL_CFG_CMD_UNBIND, + NFULNL_CFG_CMD_PF_BIND, + NFULNL_CFG_CMD_PF_UNBIND, +}; + +struct nfulnl_msg_config_cmd { + u_int8_t command; /* nfulnl_msg_config_cmds */ +} __attribute__ ((packed)); + +struct nfulnl_msg_config_mode { + u_int32_t copy_range; + u_int8_t copy_mode; + u_int8_t _pad; +} __attribute__ ((packed)); + +enum nfulnl_attr_config { + NFULA_CFG_UNSPEC, + NFULA_CFG_CMD, /* nfulnl_msg_config_cmd */ + NFULA_CFG_MODE, /* nfulnl_msg_config_mode */ + NFULA_CFG_NLBUFSIZ, /* u_int32_t buffer size */ + NFULA_CFG_TIMEOUT, /* u_int32_t in 1/100 s */ + NFULA_CFG_QTHRESH, /* u_int32_t */ + __NFULA_CFG_MAX +}; +#define NFULA_CFG_MAX (__NFULA_CFG_MAX -1) + +#define NFULNL_COPY_NONE 0x00 +#define NFULNL_COPY_META 0x01 +#define NFULNL_COPY_PACKET 0x02 + +#endif /* _NFNETLINK_LOG_H */ -- cgit v1.2.3 From 304a16180fb6d2b153b45f6fbbcec1fa814496e5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 19:59:20 -0700 Subject: [INET]: Move the TCP ehash functions to include/net/inet_hashtables.h To be shared with DCCP (and others), this is the start of a series of patches that will expose the already generic TCP hash table routines. The few changes noticed when calling gcc -S before/after on a pentium4 were of this type: movl 40(%esp), %edx cmpl %esi, 472(%edx) je .L168 - pushl $291 + pushl $272 pushl $.LC0 pushl $.LC1 pushl $.LC2 [acme@toy net-2.6.14]$ size net/ipv4/tcp_ipv4.before.o net/ipv4/tcp_ipv4.after.o text data bss dec hex filename 17804 516 140 18460 481c net/ipv4/tcp_ipv4.before.o 17804 516 140 18460 481c net/ipv4/tcp_ipv4.after.o Holler if some weird architecture has issues with things like this 8) Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 include/net/inet_hashtables.h (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h new file mode 100644 index 00000000000..c4c9e39f450 --- /dev/null +++ b/include/net/inet_hashtables.h @@ -0,0 +1,40 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Authors: Lotsa people, from code originally in tcp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _INET_HASHTABLES_H +#define _INET_HASHTABLES_H + +#include + +static inline int inet_ehashfn(const __u32 laddr, const __u16 lport, + const __u32 faddr, const __u16 fport, + const int ehash_size) +{ + int h = (laddr ^ lport) ^ (faddr ^ fport); + h ^= h >> 16; + h ^= h >> 8; + return h & (ehash_size - 1); +} + +static inline int inet_sk_ehashfn(const struct sock *sk, const int ehash_size) +{ + const struct inet_sock *inet = inet_sk(sk); + const __u32 laddr = inet->rcv_saddr; + const __u16 lport = inet->num; + const __u32 faddr = inet->daddr; + const __u16 fport = inet->dport; + + return inet_ehashfn(laddr, lport, faddr, fport, ehash_size); +} + +#endif /* _INET_HASHTABLES_H */ -- cgit v1.2.3 From 0f7ff9274e72fd254fbd1ab117bbc1db6e7cdb34 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 19:59:44 -0700 Subject: [INET]: Just rename the TCP hashtable functions/structs to inet_ This is to break down the complexity of the series of patches, making it very clear that this one just does: 1. renames tcp_ prefixed hashtable functions and data structures that were already mostly generic to inet_ to share it with DCCP and other INET transport protocols. 2. Removes not used functions (__tb_head & tb_head) 3. Removes some leftover prototypes in the headers (tcp_bucket_unlock & tcp_v4_build_header) Next changesets will move tcp_sk(sk)->bind_hash to inet_sock so that we can make functions such as tcp_inherit_port, __tcp_inherit_port, tcp_v4_get_port, __tcp_put_port, generic and get others like tcp_destroy_sock closer to generic (tcp_orphan_count will go to sk->sk_prot to allow this). Eventually most of these functions will be used passing the transport protocol inet_hashinfo structure. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- include/net/tcp.h | 93 ++++++++++++++++++++++++----------------------------- 2 files changed, 43 insertions(+), 52 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index e4fd82e4210..ec580a560e8 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -258,7 +258,7 @@ struct tcp_sock { __u32 snd_sml; /* Last byte of the most recently transmitted small packet */ __u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ __u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ - struct tcp_bind_bucket *bind_hash; + struct inet_bind_bucket *bind_hash; /* Delayed ACK control data */ struct { __u8 pending; /* ACK is pending */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 0c769adb046..6c9f6f7cab5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -44,13 +44,13 @@ * New scheme, half the table is for TIME_WAIT, the other half is * for the rest. I'll experiment with dynamic table growth later. */ -struct tcp_ehash_bucket { +struct inet_ehash_bucket { rwlock_t lock; struct hlist_head chain; } __attribute__((__aligned__(8))); /* This is for listening sockets, thus all sockets which possess wildcards. */ -#define TCP_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */ +#define INET_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */ /* There are a few simple rules, which allow for local port reuse by * an application. In essence: @@ -83,31 +83,22 @@ struct tcp_ehash_bucket { * users logged onto your box, isn't it nice to know that new data * ports are created in O(1) time? I thought so. ;-) -DaveM */ -struct tcp_bind_bucket { +struct inet_bind_bucket { unsigned short port; signed short fastreuse; struct hlist_node node; struct hlist_head owners; }; -#define tb_for_each(tb, node, head) hlist_for_each_entry(tb, node, head, node) +#define inet_bind_bucket_for_each(tb, node, head) \ + hlist_for_each_entry(tb, node, head, node) -struct tcp_bind_hashbucket { +struct inet_bind_hashbucket { spinlock_t lock; struct hlist_head chain; }; -static inline struct tcp_bind_bucket *__tb_head(struct tcp_bind_hashbucket *head) -{ - return hlist_entry(head->chain.first, struct tcp_bind_bucket, node); -} - -static inline struct tcp_bind_bucket *tb_head(struct tcp_bind_hashbucket *head) -{ - return hlist_empty(&head->chain) ? NULL : __tb_head(head); -} - -extern struct tcp_hashinfo { +struct inet_hashinfo { /* This is for sockets with full identity only. Sockets here will * always be without wildcards and will have the following invariant: * @@ -116,21 +107,21 @@ extern struct tcp_hashinfo { * First half of the table is for sockets not in TIME_WAIT, second half * is for TIME_WAIT sockets only. */ - struct tcp_ehash_bucket *__tcp_ehash; + struct inet_ehash_bucket *ehash; /* Ok, let's try this, I give up, we do need a local binding * TCP hash as well as the others for fast bind/connect. */ - struct tcp_bind_hashbucket *__tcp_bhash; + struct inet_bind_hashbucket *bhash; - int __tcp_bhash_size; - int __tcp_ehash_size; + int bhash_size; + int ehash_size; /* All sockets in TCP_LISTEN state will be in here. This is the only * table where wildcard'd TCP sockets can exist. Hash function here * is just local port number. */ - struct hlist_head __tcp_listening_hash[TCP_LHTABLE_SIZE]; + struct hlist_head listening_hash[INET_LHTABLE_SIZE]; /* All the above members are written once at bootup and * never written again _or_ are predominantly read-access. @@ -138,36 +129,39 @@ extern struct tcp_hashinfo { * Now align to a new cache line as all the following members * are often dirty. */ - rwlock_t __tcp_lhash_lock ____cacheline_aligned; - atomic_t __tcp_lhash_users; - wait_queue_head_t __tcp_lhash_wait; - spinlock_t __tcp_portalloc_lock; -} tcp_hashinfo; - -#define tcp_ehash (tcp_hashinfo.__tcp_ehash) -#define tcp_bhash (tcp_hashinfo.__tcp_bhash) -#define tcp_ehash_size (tcp_hashinfo.__tcp_ehash_size) -#define tcp_bhash_size (tcp_hashinfo.__tcp_bhash_size) -#define tcp_listening_hash (tcp_hashinfo.__tcp_listening_hash) -#define tcp_lhash_lock (tcp_hashinfo.__tcp_lhash_lock) -#define tcp_lhash_users (tcp_hashinfo.__tcp_lhash_users) -#define tcp_lhash_wait (tcp_hashinfo.__tcp_lhash_wait) -#define tcp_portalloc_lock (tcp_hashinfo.__tcp_portalloc_lock) + rwlock_t lhash_lock ____cacheline_aligned; + atomic_t lhash_users; + wait_queue_head_t lhash_wait; + spinlock_t portalloc_lock; +}; + +extern struct inet_hashinfo tcp_hashinfo; +#define tcp_ehash (tcp_hashinfo.ehash) +#define tcp_bhash (tcp_hashinfo.bhash) +#define tcp_ehash_size (tcp_hashinfo.ehash_size) +#define tcp_bhash_size (tcp_hashinfo.bhash_size) +#define tcp_listening_hash (tcp_hashinfo.listening_hash) +#define tcp_lhash_lock (tcp_hashinfo.lhash_lock) +#define tcp_lhash_users (tcp_hashinfo.lhash_users) +#define tcp_lhash_wait (tcp_hashinfo.lhash_wait) +#define tcp_portalloc_lock (tcp_hashinfo.portalloc_lock) extern kmem_cache_t *tcp_bucket_cachep; -extern struct tcp_bind_bucket *tcp_bucket_create(struct tcp_bind_hashbucket *head, - unsigned short snum); -extern void tcp_bucket_destroy(struct tcp_bind_bucket *tb); -extern void tcp_bucket_unlock(struct sock *sk); +extern struct inet_bind_bucket * + inet_bind_bucket_create(kmem_cache_t *cachep, + struct inet_bind_hashbucket *head, + const unsigned short snum); +extern void inet_bind_bucket_destroy(kmem_cache_t *cachep, + struct inet_bind_bucket *tb); extern int tcp_port_rover; /* These are AF independent. */ -static __inline__ int tcp_bhashfn(__u16 lport) +static inline int inet_bhashfn(const __u16 lport, const int bhash_size) { - return (lport & (tcp_bhash_size - 1)); + return lport & (bhash_size - 1); } -extern void tcp_bind_hash(struct sock *sk, struct tcp_bind_bucket *tb, +extern void tcp_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, unsigned short snum); #if (BITS_PER_LONG == 64) @@ -212,7 +206,7 @@ struct tcp_tw_bucket { __u32 tw_ts_recent; long tw_ts_recent_stamp; unsigned long tw_ttd; - struct tcp_bind_bucket *tw_tb; + struct inet_bind_bucket *tw_tb; struct hlist_node tw_death_node; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) struct in6_addr tw_v6_daddr; @@ -366,14 +360,14 @@ extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw); (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) /* These can have wildcards, don't try too hard. */ -static __inline__ int tcp_lhashfn(unsigned short num) +static inline int inet_lhashfn(const unsigned short num) { - return num & (TCP_LHTABLE_SIZE - 1); + return num & (INET_LHTABLE_SIZE - 1); } -static __inline__ int tcp_sk_listen_hashfn(struct sock *sk) +static inline int inet_sk_listen_hashfn(const struct sock *sk) { - return tcp_lhashfn(inet_sk(sk)->num); + return inet_lhashfn(inet_sk(sk)->num); } #define MAX_TCP_HEADER (128 + MAX_HEADER) @@ -799,9 +793,6 @@ extern void tcp_parse_options(struct sk_buff *skb, * TCP v4 functions exported for the inet6 API */ -extern int tcp_v4_build_header(struct sock *sk, - struct sk_buff *skb); - extern void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len, struct sk_buff *skb); -- cgit v1.2.3 From 77d8bf9c6208eb535f05718168ffcc476be0ca8c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:00:51 -0700 Subject: [INET]: Move the TCP hashtable functions/structs to inet_hashtables.[ch] Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_common.h | 5 ++ include/net/inet_hashtables.h | 122 ++++++++++++++++++++++++++++++++++++++++++ include/net/tcp.h | 120 +---------------------------------------- 3 files changed, 129 insertions(+), 118 deletions(-) (limited to 'include') diff --git a/include/net/inet_common.h b/include/net/inet_common.h index 1fbd94d8a31..f943306ce5f 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -8,6 +8,11 @@ extern struct proto_ops inet_dgram_ops; * INET4 prototypes used by INET6 */ +struct msghdr; +struct sock; +struct sockaddr; +struct socket; + extern void inet_remove_sock(struct sock *sk1); extern void inet_put_sock(unsigned short num, struct sock *sk); diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index c4c9e39f450..3a6c11ca421 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -14,8 +14,107 @@ #ifndef _INET_HASHTABLES_H #define _INET_HASHTABLES_H +#include +#include +#include +#include #include +/* This is for all connections with a full identity, no wildcards. + * New scheme, half the table is for TIME_WAIT, the other half is + * for the rest. I'll experiment with dynamic table growth later. + */ +struct inet_ehash_bucket { + rwlock_t lock; + struct hlist_head chain; +} __attribute__((__aligned__(8))); + +/* There are a few simple rules, which allow for local port reuse by + * an application. In essence: + * + * 1) Sockets bound to different interfaces may share a local port. + * Failing that, goto test 2. + * 2) If all sockets have sk->sk_reuse set, and none of them are in + * TCP_LISTEN state, the port may be shared. + * Failing that, goto test 3. + * 3) If all sockets are bound to a specific inet_sk(sk)->rcv_saddr local + * address, and none of them are the same, the port may be + * shared. + * Failing this, the port cannot be shared. + * + * The interesting point, is test #2. This is what an FTP server does + * all day. To optimize this case we use a specific flag bit defined + * below. As we add sockets to a bind bucket list, we perform a + * check of: (newsk->sk_reuse && (newsk->sk_state != TCP_LISTEN)) + * As long as all sockets added to a bind bucket pass this test, + * the flag bit will be set. + * The resulting situation is that tcp_v[46]_verify_bind() can just check + * for this flag bit, if it is set and the socket trying to bind has + * sk->sk_reuse set, we don't even have to walk the owners list at all, + * we return that it is ok to bind this socket to the requested local port. + * + * Sounds like a lot of work, but it is worth it. In a more naive + * implementation (ie. current FreeBSD etc.) the entire list of ports + * must be walked for each data port opened by an ftp server. Needless + * to say, this does not scale at all. With a couple thousand FTP + * users logged onto your box, isn't it nice to know that new data + * ports are created in O(1) time? I thought so. ;-) -DaveM + */ +struct inet_bind_bucket { + unsigned short port; + signed short fastreuse; + struct hlist_node node; + struct hlist_head owners; +}; + +#define inet_bind_bucket_for_each(tb, node, head) \ + hlist_for_each_entry(tb, node, head, node) + +struct inet_bind_hashbucket { + spinlock_t lock; + struct hlist_head chain; +}; + +/* This is for listening sockets, thus all sockets which possess wildcards. */ +#define INET_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */ + +struct inet_hashinfo { + /* This is for sockets with full identity only. Sockets here will + * always be without wildcards and will have the following invariant: + * + * TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE + * + * First half of the table is for sockets not in TIME_WAIT, second half + * is for TIME_WAIT sockets only. + */ + struct inet_ehash_bucket *ehash; + + /* Ok, let's try this, I give up, we do need a local binding + * TCP hash as well as the others for fast bind/connect. + */ + struct inet_bind_hashbucket *bhash; + + int bhash_size; + int ehash_size; + + /* All sockets in TCP_LISTEN state will be in here. This is the only + * table where wildcard'd TCP sockets can exist. Hash function here + * is just local port number. + */ + struct hlist_head listening_hash[INET_LHTABLE_SIZE]; + + /* All the above members are written once at bootup and + * never written again _or_ are predominantly read-access. + * + * Now align to a new cache line as all the following members + * are often dirty. + */ + rwlock_t lhash_lock ____cacheline_aligned; + atomic_t lhash_users; + wait_queue_head_t lhash_wait; + spinlock_t portalloc_lock; +}; + static inline int inet_ehashfn(const __u32 laddr, const __u16 lport, const __u32 faddr, const __u16 fport, const int ehash_size) @@ -37,4 +136,27 @@ static inline int inet_sk_ehashfn(const struct sock *sk, const int ehash_size) return inet_ehashfn(laddr, lport, faddr, fport, ehash_size); } +extern struct inet_bind_bucket * + inet_bind_bucket_create(kmem_cache_t *cachep, + struct inet_bind_hashbucket *head, + const unsigned short snum); +extern void inet_bind_bucket_destroy(kmem_cache_t *cachep, + struct inet_bind_bucket *tb); + +static inline int inet_bhashfn(const __u16 lport, const int bhash_size) +{ + return lport & (bhash_size - 1); +} + +/* These can have wildcards, don't try too hard. */ +static inline int inet_lhashfn(const unsigned short num) +{ + return num & (INET_LHTABLE_SIZE - 1); +} + +static inline int inet_sk_listen_hashfn(const struct sock *sk) +{ + return inet_lhashfn(inet_sk(sk)->num); +} + #endif /* _INET_HASHTABLES_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 6c9f6f7cab5..ff5d30ac2b0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -40,101 +41,6 @@ #endif #include -/* This is for all connections with a full identity, no wildcards. - * New scheme, half the table is for TIME_WAIT, the other half is - * for the rest. I'll experiment with dynamic table growth later. - */ -struct inet_ehash_bucket { - rwlock_t lock; - struct hlist_head chain; -} __attribute__((__aligned__(8))); - -/* This is for listening sockets, thus all sockets which possess wildcards. */ -#define INET_LHTABLE_SIZE 32 /* Yes, really, this is all you need. */ - -/* There are a few simple rules, which allow for local port reuse by - * an application. In essence: - * - * 1) Sockets bound to different interfaces may share a local port. - * Failing that, goto test 2. - * 2) If all sockets have sk->sk_reuse set, and none of them are in - * TCP_LISTEN state, the port may be shared. - * Failing that, goto test 3. - * 3) If all sockets are bound to a specific inet_sk(sk)->rcv_saddr local - * address, and none of them are the same, the port may be - * shared. - * Failing this, the port cannot be shared. - * - * The interesting point, is test #2. This is what an FTP server does - * all day. To optimize this case we use a specific flag bit defined - * below. As we add sockets to a bind bucket list, we perform a - * check of: (newsk->sk_reuse && (newsk->sk_state != TCP_LISTEN)) - * As long as all sockets added to a bind bucket pass this test, - * the flag bit will be set. - * The resulting situation is that tcp_v[46]_verify_bind() can just check - * for this flag bit, if it is set and the socket trying to bind has - * sk->sk_reuse set, we don't even have to walk the owners list at all, - * we return that it is ok to bind this socket to the requested local port. - * - * Sounds like a lot of work, but it is worth it. In a more naive - * implementation (ie. current FreeBSD etc.) the entire list of ports - * must be walked for each data port opened by an ftp server. Needless - * to say, this does not scale at all. With a couple thousand FTP - * users logged onto your box, isn't it nice to know that new data - * ports are created in O(1) time? I thought so. ;-) -DaveM - */ -struct inet_bind_bucket { - unsigned short port; - signed short fastreuse; - struct hlist_node node; - struct hlist_head owners; -}; - -#define inet_bind_bucket_for_each(tb, node, head) \ - hlist_for_each_entry(tb, node, head, node) - -struct inet_bind_hashbucket { - spinlock_t lock; - struct hlist_head chain; -}; - -struct inet_hashinfo { - /* This is for sockets with full identity only. Sockets here will - * always be without wildcards and will have the following invariant: - * - * TCP_ESTABLISHED <= sk->sk_state < TCP_CLOSE - * - * First half of the table is for sockets not in TIME_WAIT, second half - * is for TIME_WAIT sockets only. - */ - struct inet_ehash_bucket *ehash; - - /* Ok, let's try this, I give up, we do need a local binding - * TCP hash as well as the others for fast bind/connect. - */ - struct inet_bind_hashbucket *bhash; - - int bhash_size; - int ehash_size; - - /* All sockets in TCP_LISTEN state will be in here. This is the only - * table where wildcard'd TCP sockets can exist. Hash function here - * is just local port number. - */ - struct hlist_head listening_hash[INET_LHTABLE_SIZE]; - - /* All the above members are written once at bootup and - * never written again _or_ are predominantly read-access. - * - * Now align to a new cache line as all the following members - * are often dirty. - */ - rwlock_t lhash_lock ____cacheline_aligned; - atomic_t lhash_users; - wait_queue_head_t lhash_wait; - spinlock_t portalloc_lock; -}; - extern struct inet_hashinfo tcp_hashinfo; #define tcp_ehash (tcp_hashinfo.ehash) #define tcp_bhash (tcp_hashinfo.bhash) @@ -147,19 +53,8 @@ extern struct inet_hashinfo tcp_hashinfo; #define tcp_portalloc_lock (tcp_hashinfo.portalloc_lock) extern kmem_cache_t *tcp_bucket_cachep; -extern struct inet_bind_bucket * - inet_bind_bucket_create(kmem_cache_t *cachep, - struct inet_bind_hashbucket *head, - const unsigned short snum); -extern void inet_bind_bucket_destroy(kmem_cache_t *cachep, - struct inet_bind_bucket *tb); -extern int tcp_port_rover; -/* These are AF independent. */ -static inline int inet_bhashfn(const __u16 lport, const int bhash_size) -{ - return lport & (bhash_size - 1); -} +extern int tcp_port_rover; extern void tcp_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, unsigned short snum); @@ -359,17 +254,6 @@ extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw); ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -/* These can have wildcards, don't try too hard. */ -static inline int inet_lhashfn(const unsigned short num) -{ - return num & (INET_LHTABLE_SIZE - 1); -} - -static inline int inet_sk_listen_hashfn(const struct sock *sk) -{ - return inet_lhashfn(inet_sk(sk)->num); -} - #define MAX_TCP_HEADER (128 + MAX_HEADER) /* -- cgit v1.2.3 From a55ebcc4c4532107ad9eee1c9bb698ab5f12c00f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:01:14 -0700 Subject: [INET]: Move bind_hash from tcp_sk to inet_sk This should really be in a inet_connection_sock, but I'm leaving it for a later optimization, when some more fields common to INET transport protocols now in tcp_sk or inet_sk will be chunked out into inet_connection_sock, for now its better to concentrate on getting the changes in the core merged to leave the DCCP tree with only DCCP specific code. Next changesets will take advantage of this move to generalise things like tcp_bind_hash, tcp_put_port, tcp_inherit_port, making the later receive a inet_hashinfo parameter, and even __tcp_tw_hashdance, etc in the future, when tcp_tw_bucket gets transformed into the struct timewait_sock hierarchy. tcp_destroy_sock also is eligible as soon as tcp_orphan_count gets moved to sk_prot. A cascade of incremental changes will ultimately make the tcp_lookup functions be fully generic. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ip.h | 2 ++ include/linux/tcp.h | 1 - include/net/tcp.h | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/ip.h b/include/linux/ip.h index 33e8a19a1a0..2c54bbd3da7 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -128,6 +128,7 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) return (struct inet_request_sock *)sk; } +struct inet_bind_bucket; struct ipv6_pinfo; struct inet_sock { @@ -157,6 +158,7 @@ struct inet_sock { int mc_index; /* Multicast device index */ __u32 mc_addr; struct ip_mc_socklist *mc_list; /* Group array */ + struct inet_bind_bucket *bind_hash; /* * Following members are used to retain the infomation to build * an ip header on each ip fragmentation while the socket is corked. diff --git a/include/linux/tcp.h b/include/linux/tcp.h index ec580a560e8..e70ab19652d 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -258,7 +258,6 @@ struct tcp_sock { __u32 snd_sml; /* Last byte of the most recently transmitted small packet */ __u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ __u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ - struct inet_bind_bucket *bind_hash; /* Delayed ACK control data */ struct { __u8 pending; /* ACK is pending */ diff --git a/include/net/tcp.h b/include/net/tcp.h index ff5d30ac2b0..6c6c879e7e8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1266,7 +1266,7 @@ static __inline__ void tcp_set_state(struct sock *sk, int state) TCP_INC_STATS(TCP_MIB_ESTABRESETS); sk->sk_prot->unhash(sk); - if (tcp_sk(sk)->bind_hash && + if (inet_sk(sk)->bind_hash && !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) tcp_put_port(sk); /* fall through */ -- cgit v1.2.3 From a86888b925299330053d20e0eba03ac4d2648c4b Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 9 Aug 2005 20:02:13 -0700 Subject: [NETFILTER]: Fix multiple problems with the conntrack event cache refcnt underflow: the reference count is decremented when a conntrack entry is removed from the hash but it is not incremented when entering new entries. missing protection of process context against softirq context: all cache operations need to locally disable softirqs to avoid races. Additionally the event cache can't be initialized when a packet enteres the conntrack code but needs to be initialized whenever we cache an event and the stored conntrack entry doesn't match the current one. incorrect flushing of the event cache in ip_ct_iterate_cleanup: without real locking we can't flush the cache for different CPUs without incurring races. The cache for different CPUs can only be flushed when no packets are going through the code. ip_ct_iterate_cleanup doesn't need to drop all references, so flushing is moved to the cleanup path. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_conntrack.h | 29 +++++++++++------------- include/linux/netfilter_ipv4/ip_conntrack_core.h | 14 ++++-------- 2 files changed, 18 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index ff2c1c6001f..088742befe4 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -411,6 +411,7 @@ struct ip_conntrack_stat #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS #include +#include struct ip_conntrack_ecache { struct ip_conntrack *ct; @@ -445,26 +446,24 @@ ip_conntrack_expect_unregister_notifier(struct notifier_block *nb) return notifier_chain_unregister(&ip_conntrack_expect_chain, nb); } +extern void ip_ct_deliver_cached_events(const struct ip_conntrack *ct); +extern void __ip_ct_event_cache_init(struct ip_conntrack *ct); + static inline void ip_conntrack_event_cache(enum ip_conntrack_events event, const struct sk_buff *skb) { - struct ip_conntrack_ecache *ecache = - &__get_cpu_var(ip_conntrack_ecache); - - if (unlikely((struct ip_conntrack *) skb->nfct != ecache->ct)) { - if (net_ratelimit()) { - printk(KERN_ERR "ctevent: skb->ct != ecache->ct !!!\n"); - dump_stack(); - } - } + struct ip_conntrack *ct = (struct ip_conntrack *)skb->nfct; + struct ip_conntrack_ecache *ecache; + + local_bh_disable(); + ecache = &__get_cpu_var(ip_conntrack_ecache); + if (ct != ecache->ct) + __ip_ct_event_cache_init(ct); ecache->events |= event; + local_bh_enable(); } -extern void -ip_conntrack_deliver_cached_events_for(const struct ip_conntrack *ct); -extern void ip_conntrack_event_cache_init(const struct sk_buff *skb); - static inline void ip_conntrack_event(enum ip_conntrack_events event, struct ip_conntrack *ct) { @@ -483,9 +482,7 @@ static inline void ip_conntrack_event_cache(enum ip_conntrack_events event, const struct sk_buff *skb) {} static inline void ip_conntrack_event(enum ip_conntrack_events event, struct ip_conntrack *ct) {} -static inline void ip_conntrack_deliver_cached_events_for( - struct ip_conntrack *ct) {} -static inline void ip_conntrack_event_cache_init(const struct sk_buff *skb) {} +static inline void ip_ct_deliver_cached_events(const struct ip_conntrack *ct) {} static inline void ip_conntrack_expect_event(enum ip_conntrack_expect_events event, struct ip_conntrack_expect *exp) {} diff --git a/include/linux/netfilter_ipv4/ip_conntrack_core.h b/include/linux/netfilter_ipv4/ip_conntrack_core.h index fbf6c3e4164..dc4d2a0575d 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_core.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_core.h @@ -44,18 +44,14 @@ static inline int ip_conntrack_confirm(struct sk_buff **pskb) struct ip_conntrack *ct = (struct ip_conntrack *)(*pskb)->nfct; int ret = NF_ACCEPT; - if (ct && !is_confirmed(ct)) - ret = __ip_conntrack_confirm(pskb); - ip_conntrack_deliver_cached_events_for(ct); - + if (ct) { + if (!is_confirmed(ct)) + ret = __ip_conntrack_confirm(pskb); + ip_ct_deliver_cached_events(ct); + } return ret; } -#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS -struct ip_conntrack_ecache; -extern void __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ec); -#endif - extern void __ip_ct_expect_unlink_destroy(struct ip_conntrack_expect *exp); extern struct list_head *ip_conntrack_hash; -- cgit v1.2.3 From 927ccbcc28dceee29dad876982768cca29738564 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 20:03:40 -0700 Subject: [NETFILTER]: attribute count is an attribute of message type, not subsytem Prior to this patch, every nfnetlink subsystem had to specify it's attribute count. However, in reality the attribute count depends on the message type within the subsystem, not the subsystem itself. This patch moves 'attr_count' from 'struct nfnetlink_subsys' into nfnl_callback to fix this. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 561f9df2880..b0feb237407 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -85,9 +85,10 @@ struct nfgenmsg { struct nfnl_callback { - kernel_cap_t cap_required; /* capabilities required for this msg */ int (*call)(struct sock *nl, struct sk_buff *skb, struct nlmsghdr *nlh, struct nfattr *cda[], int *errp); + kernel_cap_t cap_required; /* capabilities required for this msg */ + u_int16_t attr_count; /* number of nfattr's */ }; struct nfnetlink_subsystem @@ -95,7 +96,6 @@ struct nfnetlink_subsystem const char *name; __u8 subsys_id; /* nfnetlink subsystem ID */ __u8 cb_count; /* number of callbacks */ - u_int32_t attr_count; /* number of nfattr's */ struct nfnl_callback *cb; /* callback for individual types */ }; -- cgit v1.2.3 From 1444fc559b01aa5d4fedf4ee4f306a9e9cd56f95 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 20:04:07 -0700 Subject: [NETFILTER]: don't use nested attributes for conntrack_expect We used to use nested nfattr structures for ip_conntrack_expect. This is bogus, since ip_conntrack and ip_conntrack_expect are communicated in different netlink message types. both should be encoded at the top level attributes, no extra nesting required. This patch addresses the issue. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink_conntrack.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index fb528e0e3bd..5c55751c78e 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -33,7 +33,6 @@ enum ctattr_type { CTA_COUNTERS_ORIG, CTA_COUNTERS_REPLY, CTA_USE, - CTA_EXPECT, CTA_ID, __CTA_MAX }; @@ -103,10 +102,12 @@ enum ctattr_protonat { enum ctattr_expect { CTA_EXPECT_UNSPEC, + CTA_EXPECT_MASTER, CTA_EXPECT_TUPLE, CTA_EXPECT_MASK, CTA_EXPECT_TIMEOUT, CTA_EXPECT_ID, + CTA_EXPECT_HELP_NAME, __CTA_EXPECT_MAX }; #define CTA_EXPECT_MAX (__CTA_EXPECT_MAX - 1) -- cgit v1.2.3 From 2d8c4ce51903636ce0f60addc8134aa50ab8fa76 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:07:13 -0700 Subject: [INET]: Generalise tcp_bind_hash & tcp_inherit_port This required moving tcp_bucket_cachep to inet_hashinfo. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 32 ++++++++++++++++++++++++++++++++ include/net/tcp.h | 11 ++--------- 2 files changed, 34 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 3a6c11ca421..da9705525f1 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -14,12 +14,15 @@ #ifndef _INET_HASHTABLES_H #define _INET_HASHTABLES_H +#include #include #include #include #include #include +#include + /* This is for all connections with a full identity, no wildcards. * New scheme, half the table is for TIME_WAIT, the other half is * for the rest. I'll experiment with dynamic table growth later. @@ -113,6 +116,7 @@ struct inet_hashinfo { atomic_t lhash_users; wait_queue_head_t lhash_wait; spinlock_t portalloc_lock; + kmem_cache_t *bind_bucket_cachep; }; static inline int inet_ehashfn(const __u32 laddr, const __u16 lport, @@ -148,6 +152,9 @@ static inline int inet_bhashfn(const __u16 lport, const int bhash_size) return lport & (bhash_size - 1); } +extern void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, + const unsigned short snum); + /* These can have wildcards, don't try too hard. */ static inline int inet_lhashfn(const unsigned short num) { @@ -159,4 +166,29 @@ static inline int inet_sk_listen_hashfn(const struct sock *sk) return inet_lhashfn(inet_sk(sk)->num); } +/* Caller must disable local BH processing. */ +static inline void __inet_inherit_port(struct inet_hashinfo *table, + struct sock *sk, struct sock *child) +{ + const int bhash = inet_bhashfn(inet_sk(child)->num, table->bhash_size); + struct inet_bind_hashbucket *head = &table->bhash[bhash]; + struct inet_bind_bucket *tb; + + spin_lock(&head->lock); + tb = inet_sk(sk)->bind_hash; + sk_add_bind_node(child, &tb->owners); + inet_sk(child)->bind_hash = tb; + spin_unlock(&head->lock); +} + +static inline void inet_inherit_port(struct inet_hashinfo *table, + struct sock *sk, struct sock *child) +{ + local_bh_disable(); + __inet_inherit_port(table, sk, child); + local_bh_enable(); +} + +extern void inet_put_port(struct inet_hashinfo *table, struct sock *sk); + #endif /* _INET_HASHTABLES_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 6c6c879e7e8..9eb8ff7c911 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -51,14 +51,10 @@ extern struct inet_hashinfo tcp_hashinfo; #define tcp_lhash_users (tcp_hashinfo.lhash_users) #define tcp_lhash_wait (tcp_hashinfo.lhash_wait) #define tcp_portalloc_lock (tcp_hashinfo.portalloc_lock) - -extern kmem_cache_t *tcp_bucket_cachep; +#define tcp_bucket_cachep (tcp_hashinfo.bind_bucket_cachep) extern int tcp_port_rover; -extern void tcp_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, - unsigned short snum); - #if (BITS_PER_LONG == 64) #define TCP_ADDRCMP_ALIGN_BYTES 8 #else @@ -549,9 +545,6 @@ DECLARE_SNMP_STAT(struct tcp_mib, tcp_statistics); #define TCP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(tcp_statistics, field, val) #define TCP_ADD_STATS_USER(field, val) SNMP_ADD_STATS_USER(tcp_statistics, field, val) -extern void tcp_put_port(struct sock *sk); -extern void tcp_inherit_port(struct sock *sk, struct sock *child); - extern void tcp_v4_err(struct sk_buff *skb, u32); extern void tcp_shutdown (struct sock *sk, int how); @@ -1268,7 +1261,7 @@ static __inline__ void tcp_set_state(struct sock *sk, int state) sk->sk_prot->unhash(sk); if (inet_sk(sk)->bind_hash && !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) - tcp_put_port(sk); + inet_put_port(&tcp_hashinfo, sk); /* fall through */ default: if (oldstate==TCP_ESTABLISHED) -- cgit v1.2.3 From 6e04e02165a7209a71db553b7bc48d68421e5ebf Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:07:35 -0700 Subject: [INET]: Move tcp_port_rover to inet_hashinfo Also expose all of the tcp_hashinfo members, i.e. killing those tcp_ehash, etc macros, this will more clearly expose already generic functions and some that need just a bit of work to become generic, as we'll see in the upcoming changesets. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 1 + include/net/sock.h | 2 +- include/net/tcp.h | 26 +++++++------------------- 3 files changed, 9 insertions(+), 20 deletions(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index da9705525f1..da07411b36d 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -117,6 +117,7 @@ struct inet_hashinfo { wait_queue_head_t lhash_wait; spinlock_t portalloc_lock; kmem_cache_t *bind_bucket_cachep; + int port_rover; }; static inline int inet_ehashfn(const __u32 laddr, const __u16 lport, diff --git a/include/net/sock.h b/include/net/sock.h index 69d869e41c3..391d00b5b7b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -136,7 +136,7 @@ struct sock_common { * @sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets * @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO) * @sk_lingertime: %SO_LINGER l_linger setting - * @sk_hashent: hash entry in several tables (e.g. tcp_ehash) + * @sk_hashent: hash entry in several tables (e.g. inet_hashinfo.ehash) * @sk_backlog: always used with the per-socket spinlock held * @sk_callback_lock: used with the callbacks in the end of this struct * @sk_error_queue: rarely used diff --git a/include/net/tcp.h b/include/net/tcp.h index 9eb8ff7c911..99e47695d4b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -41,19 +41,7 @@ #endif #include -extern struct inet_hashinfo tcp_hashinfo; -#define tcp_ehash (tcp_hashinfo.ehash) -#define tcp_bhash (tcp_hashinfo.bhash) -#define tcp_ehash_size (tcp_hashinfo.ehash_size) -#define tcp_bhash_size (tcp_hashinfo.bhash_size) -#define tcp_listening_hash (tcp_hashinfo.listening_hash) -#define tcp_lhash_lock (tcp_hashinfo.lhash_lock) -#define tcp_lhash_users (tcp_hashinfo.lhash_users) -#define tcp_lhash_wait (tcp_hashinfo.lhash_wait) -#define tcp_portalloc_lock (tcp_hashinfo.portalloc_lock) -#define tcp_bucket_cachep (tcp_hashinfo.bind_bucket_cachep) - -extern int tcp_port_rover; +extern struct inet_hashinfo tcp_hashinfo; #if (BITS_PER_LONG == 64) #define TCP_ADDRCMP_ALIGN_BYTES 8 @@ -1463,21 +1451,21 @@ extern void tcp_listen_wlock(void); /* - We may sleep inside this lock. * - If sleeping is not required (or called from BH), - * use plain read_(un)lock(&tcp_lhash_lock). + * use plain read_(un)lock(&inet_hashinfo.lhash_lock). */ static inline void tcp_listen_lock(void) { /* read_lock synchronizes to candidates to writers */ - read_lock(&tcp_lhash_lock); - atomic_inc(&tcp_lhash_users); - read_unlock(&tcp_lhash_lock); + read_lock(&tcp_hashinfo.lhash_lock); + atomic_inc(&tcp_hashinfo.lhash_users); + read_unlock(&tcp_hashinfo.lhash_lock); } static inline void tcp_listen_unlock(void) { - if (atomic_dec_and_test(&tcp_lhash_users)) - wake_up(&tcp_lhash_wait); + if (atomic_dec_and_test(&tcp_hashinfo.lhash_users)) + wake_up(&tcp_hashinfo.lhash_wait); } static inline int keepalive_intvl_when(const struct tcp_sock *tp) -- cgit v1.2.3 From f3f05f7046e7c85b04af390d95a82a27160dd5d0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:08:09 -0700 Subject: [INET]: Generalise the tcp_listen_ lock routines Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 48 +++++++++++++++++++++++++++++++++++++++++++ include/net/tcp.h | 21 ------------------- 2 files changed, 48 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index da07411b36d..f5d65121f7b 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -19,10 +19,14 @@ #include #include #include +#include /* only for TCP_LISTEN, damn :-( */ #include +#include #include +#include + /* This is for all connections with a full identity, no wildcards. * New scheme, half the table is for TIME_WAIT, the other half is * for the rest. I'll experiment with dynamic table growth later. @@ -192,4 +196,48 @@ static inline void inet_inherit_port(struct inet_hashinfo *table, extern void inet_put_port(struct inet_hashinfo *table, struct sock *sk); +extern void inet_listen_wlock(struct inet_hashinfo *hashinfo); + +/* + * - We may sleep inside this lock. + * - If sleeping is not required (or called from BH), + * use plain read_(un)lock(&inet_hashinfo.lhash_lock). + */ +static inline void inet_listen_lock(struct inet_hashinfo *hashinfo) +{ + /* read_lock synchronizes to candidates to writers */ + read_lock(&hashinfo->lhash_lock); + atomic_inc(&hashinfo->lhash_users); + read_unlock(&hashinfo->lhash_lock); +} + +static inline void inet_listen_unlock(struct inet_hashinfo *hashinfo) +{ + if (atomic_dec_and_test(&hashinfo->lhash_users)) + wake_up(&hashinfo->lhash_wait); +} + +static inline void __inet_hash(struct inet_hashinfo *hashinfo, + struct sock *sk, const int listen_possible) +{ + struct hlist_head *list; + rwlock_t *lock; + + BUG_TRAP(sk_unhashed(sk)); + if (listen_possible && sk->sk_state == TCP_LISTEN) { + list = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; + lock = &hashinfo->lhash_lock; + inet_listen_wlock(hashinfo); + } else { + sk->sk_hashent = inet_sk_ehashfn(sk, hashinfo->ehash_size); + list = &hashinfo->ehash[sk->sk_hashent].chain; + lock = &hashinfo->ehash[sk->sk_hashent].lock; + write_lock(lock); + } + __sk_add_node(sk, list); + sock_prot_inc_use(sk->sk_prot); + write_unlock(lock); + if (listen_possible && sk->sk_state == TCP_LISTEN) + wake_up(&hashinfo->lhash_wait); +} #endif /* _INET_HASHTABLES_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 99e47695d4b..bc110cc7022 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1447,27 +1447,6 @@ static __inline__ void tcp_openreq_init(struct request_sock *req, extern void tcp_enter_memory_pressure(void); -extern void tcp_listen_wlock(void); - -/* - We may sleep inside this lock. - * - If sleeping is not required (or called from BH), - * use plain read_(un)lock(&inet_hashinfo.lhash_lock). - */ - -static inline void tcp_listen_lock(void) -{ - /* read_lock synchronizes to candidates to writers */ - read_lock(&tcp_hashinfo.lhash_lock); - atomic_inc(&tcp_hashinfo.lhash_users); - read_unlock(&tcp_hashinfo.lhash_lock); -} - -static inline void tcp_listen_unlock(void) -{ - if (atomic_dec_and_test(&tcp_hashinfo.lhash_users)) - wake_up(&tcp_hashinfo.lhash_wait); -} - static inline int keepalive_intvl_when(const struct tcp_sock *tp) { return tp->keepalive_intvl ? : sysctl_tcp_keepalive_intvl; -- cgit v1.2.3 From c752f0739f09b803aed191c4765a3b6650a08653 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:08:28 -0700 Subject: [TCP]: Move the tcp sock states to net/tcp_states.h Lots of places just needs the states, not even linux/tcp.h, where this enum was, needs it. This speeds up development of the refactorings as less sources are rebuilt when things get moved from net/tcp.h. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/tcp.h | 18 ------------------ include/net/dn.h | 1 + include/net/inet_hashtables.h | 2 +- include/net/ip6_route.h | 1 - include/net/ip_vs.h | 1 - include/net/sctp/constants.h | 2 +- include/net/tcp.h | 2 ++ include/net/tcp_states.h | 34 ++++++++++++++++++++++++++++++++++ 8 files changed, 39 insertions(+), 22 deletions(-) create mode 100644 include/net/tcp_states.h (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index e70ab19652d..b88fe05fdcb 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -55,24 +55,6 @@ struct tcphdr { __u16 urg_ptr; }; - -enum { - TCP_ESTABLISHED = 1, - TCP_SYN_SENT, - TCP_SYN_RECV, - TCP_FIN_WAIT1, - TCP_FIN_WAIT2, - TCP_TIME_WAIT, - TCP_CLOSE, - TCP_CLOSE_WAIT, - TCP_LAST_ACK, - TCP_LISTEN, - TCP_CLOSING, /* now a valid state */ - - TCP_MAX_STATES /* Leave at the end! */ -}; - -#define TCP_STATE_MASK 0xF #define TCP_ACTION_FIN (1 << 7) enum { diff --git a/include/net/dn.h b/include/net/dn.h index 5551c46db39..c1dbbd22279 100644 --- a/include/net/dn.h +++ b/include/net/dn.h @@ -3,6 +3,7 @@ #include #include +#include #include typedef unsigned short dn_address; diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index f5d65121f7b..c816708fa55 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -19,11 +19,11 @@ #include #include #include -#include /* only for TCP_LISTEN, damn :-( */ #include #include #include +#include #include diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index f920706d526..1f2e428ca36 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -12,7 +12,6 @@ #include #include #include -#include #include #include diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 52da5d26617..7a3c43711a1 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -255,7 +255,6 @@ struct ip_vs_daemon_user { #include /* for struct atomic_t */ #include /* for struct neighbour */ #include /* for struct dst_entry */ -#include #include #include diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 5999e5684bb..c51541ee024 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -47,10 +47,10 @@ #ifndef __sctp_constants_h__ #define __sctp_constants_h__ -#include /* For TCP states used in sctp_sock_state_t */ #include #include /* For ipv6hdr. */ #include +#include /* For TCP states used in sctp_sock_state_t */ /* Value used for stream negotiation. */ enum { SCTP_MAX_STREAM = 0xffff }; diff --git a/include/net/tcp.h b/include/net/tcp.h index bc110cc7022..9d026d81d8c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -36,6 +36,8 @@ #include #include #include +#include + #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) #include #endif diff --git a/include/net/tcp_states.h b/include/net/tcp_states.h new file mode 100644 index 00000000000..b9d4176b2d1 --- /dev/null +++ b/include/net/tcp_states.h @@ -0,0 +1,34 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for the TCP protocol sk_state field. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _LINUX_TCP_STATES_H +#define _LINUX_TCP_STATES_H + +enum { + TCP_ESTABLISHED = 1, + TCP_SYN_SENT, + TCP_SYN_RECV, + TCP_FIN_WAIT1, + TCP_FIN_WAIT2, + TCP_TIME_WAIT, + TCP_CLOSE, + TCP_CLOSE_WAIT, + TCP_LAST_ACK, + TCP_LISTEN, + TCP_CLOSING, /* Now a valid state */ + + TCP_MAX_STATES /* Leave at the end! */ +}; + +#define TCP_STATE_MASK 0xF + +#endif /* _LINUX_TCP_STATES_H */ -- cgit v1.2.3 From 81849d106b1fb97f8e2d311c0c4d36347def55b8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:08:50 -0700 Subject: [INET]: Generalise tcp_v4_hash & tcp_unhash It really just makes the existing code be a helper function that tcp_v4_hash and tcp_unhash uses, specifying the right inet_hashinfo, tcp_hashinfo. One thing I'll investigate at some point is to have the inet_hashinfo pointer in sk_prot, so that we get all the hashtable information from the sk pointer, this can lead to some extra indirections that may well hurt performance/code size, we'll see. Ultimate idea would be that sk_prot would provide _all_ the information about a protocol implementation. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index c816708fa55..6731df2cea6 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -240,4 +240,38 @@ static inline void __inet_hash(struct inet_hashinfo *hashinfo, if (listen_possible && sk->sk_state == TCP_LISTEN) wake_up(&hashinfo->lhash_wait); } + +static inline void inet_hash(struct inet_hashinfo *hashinfo, struct sock *sk) +{ + if (sk->sk_state != TCP_CLOSE) { + local_bh_disable(); + __inet_hash(hashinfo, sk, 1); + local_bh_enable(); + } +} + +static inline void inet_unhash(struct inet_hashinfo *hashinfo, struct sock *sk) +{ + rwlock_t *lock; + + if (sk_unhashed(sk)) + goto out; + + if (sk->sk_state == TCP_LISTEN) { + local_bh_disable(); + inet_listen_wlock(hashinfo); + lock = &hashinfo->lhash_lock; + } else { + struct inet_ehash_bucket *head = &hashinfo->ehash[sk->sk_hashent]; + lock = &head->lock; + write_lock_bh(&head->lock); + } + + if (__sk_del_node_init(sk)) + sock_prot_dec_use(sk->sk_prot); + write_unlock_bh(lock); +out: + if (sk->sk_state == TCP_LISTEN) + wake_up(&hashinfo->lhash_wait); +} #endif /* _INET_HASHTABLES_H */ -- cgit v1.2.3 From 33b62231908c58ae04185e4f1063d1e35a7c8576 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:09:06 -0700 Subject: [INET]: Generalise tcp_v4_lookup_listener [acme@toy net-2.6.14]$ grep built-in /tmp/before /tmp/after /tmp/before: 282560 13122 9312 304994 4a762 net/ipv4/built-in.o /tmp/after: 282560 13122 9312 304994 4a762 net/ipv4/built-in.o Will be used in DCCP, not exporting it right now not to get in Adrian Bunk's exported-but-not-used-on-modules radar 8) Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 6731df2cea6..1c4fa0065a8 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -16,8 +16,10 @@ #include #include +#include #include #include +#include #include #include #include @@ -274,4 +276,38 @@ out: if (sk->sk_state == TCP_LISTEN) wake_up(&hashinfo->lhash_wait); } + +extern struct sock *__inet_lookup_listener(const struct hlist_head *head, + const u32 daddr, + const unsigned short hnum, + const int dif); + +/* Optimize the common listener case. */ +static inline struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo, + const u32 daddr, + const unsigned short hnum, + const int dif) +{ + struct sock *sk = NULL; + struct hlist_head *head; + + read_lock(&hashinfo->lhash_lock); + head = &hashinfo->listening_hash[inet_lhashfn(hnum)]; + if (!hlist_empty(head)) { + const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); + + if (inet->num == hnum && !sk->sk_node.next && + (!inet->rcv_saddr || inet->rcv_saddr == daddr) && + (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && + !sk->sk_bound_dev_if) + goto sherry_cache; + sk = __inet_lookup_listener(head, daddr, hnum, dif); + } + if (sk) { +sherry_cache: + sock_hold(sk); + } + read_unlock(&hashinfo->lhash_lock); + return sk; +} #endif /* _INET_HASHTABLES_H */ -- cgit v1.2.3 From 8feaf0c0a5488b3d898a9c207eb6678f44ba3f26 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:09:30 -0700 Subject: [INET]: Generalise tcp_tw_bucket, aka TIME_WAIT sockets This paves the way to generalise the rest of the sock ID lookup routines and saves some bytes in TCPv4 TIME_WAIT sockets on distro kernels (where IPv6 is always built as a module): [root@qemu ~]# grep tw_sock /proc/slabinfo tw_sock_TCPv6 0 0 128 31 1 tw_sock_TCP 0 0 96 41 1 [root@qemu ~]# Now if a protocol wants to use the TIME_WAIT generic infrastructure it only has to set the sk_prot->twsk_obj_size field with the size of its inet_timewait_sock derived sock and proto_register will create sk_prot->twsk_slab, for now its only for INET sockets, but we can introduce timewait_sock later if some non INET transport protocolo wants to use this stuff. Next changesets will take advantage of this new infrastructure to generalise even more TCP code. [acme@toy net-2.6.14]$ grep built-in /tmp/before.size /tmp/after.size /tmp/before.size: 188646 11764 5068 205478 322a6 net/ipv4/built-in.o /tmp/after.size: 188144 11764 5068 204976 320b0 net/ipv4/built-in.o [acme@toy net-2.6.14]$ Tested with both IPv4 & IPv6 (::1 (localhost) & ::ffff:172.20.0.1 (qemu host)). Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ipv6.h | 52 +++++++++- include/linux/tcp.h | 15 +++ include/net/inet_hashtables.h | 41 ++++++++ include/net/inet_timewait_sock.h | 142 +++++++++++++++++++++++++++ include/net/sock.h | 17 ++-- include/net/tcp.h | 202 +-------------------------------------- 6 files changed, 262 insertions(+), 207 deletions(-) create mode 100644 include/net/inet_timewait_sock.h (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 6fcd6a0ade2..98fa32316e4 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -308,6 +308,41 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to, #define __ipv6_only_sock(sk) (inet6_sk(sk)->ipv6only) #define ipv6_only_sock(sk) ((sk)->sk_family == PF_INET6 && __ipv6_only_sock(sk)) + +#include + +struct tcp6_timewait_sock { + struct tcp_timewait_sock tw_v6_sk; + struct in6_addr tw_v6_daddr; + struct in6_addr tw_v6_rcv_saddr; +}; + +static inline struct tcp6_timewait_sock *tcp6_twsk(const struct sock *sk) +{ + return (struct tcp6_timewait_sock *)sk; +} + +static inline struct in6_addr *__tcp_v6_rcv_saddr(const struct sock *sk) +{ + return likely(sk->sk_state != TCP_TIME_WAIT) ? + &inet6_sk(sk)->rcv_saddr : &tcp6_twsk(sk)->tw_v6_rcv_saddr; +} + +static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk) +{ + return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL; +} + +static inline int tcp_twsk_ipv6only(const struct sock *sk) +{ + return inet_twsk(sk)->tw_ipv6only; +} + +static inline int tcp_v6_ipv6only(const struct sock *sk) +{ + return likely(sk->sk_state != TCP_TIME_WAIT) ? + ipv6_only_sock(sk) : tcp_twsk_ipv6only(sk); +} #else #define __ipv6_only_sock(sk) 0 #define ipv6_only_sock(sk) 0 @@ -322,8 +357,19 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) return NULL; } -#endif +#define __tcp_v6_rcv_saddr(__sk) NULL +#define tcp_v6_rcv_saddr(__sk) NULL +#define tcp_twsk_ipv6only(__sk) 0 +#define tcp_v6_ipv6only(__sk) 0 +#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ -#endif +#define INET6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \ + (((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ + ((__sk)->sk_family == AF_INET6) && \ + ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \ + ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#endif +#endif /* __KERNEL__ */ + +#endif /* _IPV6_H */ diff --git a/include/linux/tcp.h b/include/linux/tcp.h index b88fe05fdcb..5d295b1b3de 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -179,6 +179,7 @@ struct tcp_info #include #include #include +#include /* This defines a selective acknowledgement block. */ struct tcp_sack_block { @@ -387,6 +388,20 @@ static inline struct tcp_sock *tcp_sk(const struct sock *sk) return (struct tcp_sock *)sk; } +struct tcp_timewait_sock { + struct inet_timewait_sock tw_sk; + __u32 tw_rcv_nxt; + __u32 tw_snd_nxt; + __u32 tw_rcv_wnd; + __u32 tw_ts_recent; + long tw_ts_recent_stamp; +}; + +static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) +{ + return (struct tcp_timewait_sock *)sk; +} + static inline void *tcp_ca(const struct tcp_sock *tp) { return (void *) tp->ca_priv; diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 1c4fa0065a8..c38c637e073 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -14,6 +14,8 @@ #ifndef _INET_HASHTABLES_H #define _INET_HASHTABLES_H +#include + #include #include #include @@ -310,4 +312,43 @@ sherry_cache: read_unlock(&hashinfo->lhash_lock); return sk; } + +/* Socket demux engine toys. */ +#ifdef __BIG_ENDIAN +#define INET_COMBINED_PORTS(__sport, __dport) \ + (((__u32)(__sport) << 16) | (__u32)(__dport)) +#else /* __LITTLE_ENDIAN */ +#define INET_COMBINED_PORTS(__sport, __dport) \ + (((__u32)(__dport) << 16) | (__u32)(__sport)) +#endif + +#if (BITS_PER_LONG == 64) +#ifdef __BIG_ENDIAN +#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ + const __u64 __name = (((__u64)(__saddr)) << 32) | ((__u64)(__daddr)); +#else /* __LITTLE_ENDIAN */ +#define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ + const __u64 __name = (((__u64)(__daddr)) << 32) | ((__u64)(__saddr)); +#endif /* __BIG_ENDIAN */ +#define INET_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ + (((*((__u64 *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ + ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) +#define INET_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ + (((*((__u64 *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ + ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) +#else /* 32-bit arch */ +#define INET_ADDR_COOKIE(__name, __saddr, __daddr) +#define INET_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif) \ + ((inet_sk(__sk)->daddr == (__saddr)) && \ + (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ + ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) +#define INET_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif) \ + ((inet_twsk(__sk)->tw_daddr == (__saddr)) && \ + (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \ + ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ + (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) +#endif /* 64-bit arch */ #endif /* _INET_HASHTABLES_H */ diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h new file mode 100644 index 00000000000..ce117048f2f --- /dev/null +++ b/include/net/inet_timewait_sock.h @@ -0,0 +1,142 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Definitions for a generic INET TIMEWAIT sock + * + * From code originally in net/tcp.h + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _INET_TIMEWAIT_SOCK_ +#define _INET_TIMEWAIT_SOCK_ + +#include + +#include +#include + +#include +#include + +#include + +#if (BITS_PER_LONG == 64) +#define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 8 +#else +#define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 4 +#endif + +struct inet_bind_bucket; + +/* + * This is a TIME_WAIT sock. It works around the memory consumption + * problems of sockets in such a state on heavily loaded servers, but + * without violating the protocol specification. + */ +struct inet_timewait_sock { + /* + * Now struct sock also uses sock_common, so please just + * don't add nothing before this first member (__tw_common) --acme + */ + struct sock_common __tw_common; +#define tw_family __tw_common.skc_family +#define tw_state __tw_common.skc_state +#define tw_reuse __tw_common.skc_reuse +#define tw_bound_dev_if __tw_common.skc_bound_dev_if +#define tw_node __tw_common.skc_node +#define tw_bind_node __tw_common.skc_bind_node +#define tw_refcnt __tw_common.skc_refcnt +#define tw_prot __tw_common.skc_prot + volatile unsigned char tw_substate; + /* 3 bits hole, try to pack */ + unsigned char tw_rcv_wscale; + /* Socket demultiplex comparisons on incoming packets. */ + /* these five are in inet_sock */ + __u16 tw_sport; + __u32 tw_daddr __attribute__((aligned(INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES))); + __u32 tw_rcv_saddr; + __u16 tw_dport; + __u16 tw_num; + /* And these are ours. */ + __u8 tw_ipv6only:1; + /* 31 bits hole, try to pack */ + int tw_hashent; + int tw_timeout; + unsigned long tw_ttd; + struct inet_bind_bucket *tw_tb; + struct hlist_node tw_death_node; +}; + +static inline void inet_twsk_add_node(struct inet_timewait_sock *tw, + struct hlist_head *list) +{ + hlist_add_head(&tw->tw_node, list); +} + +static inline void inet_twsk_add_bind_node(struct inet_timewait_sock *tw, + struct hlist_head *list) +{ + hlist_add_head(&tw->tw_bind_node, list); +} + +static inline int inet_twsk_dead_hashed(const struct inet_timewait_sock *tw) +{ + return tw->tw_death_node.pprev != NULL; +} + +static inline void inet_twsk_dead_node_init(struct inet_timewait_sock *tw) +{ + tw->tw_death_node.pprev = NULL; +} + +static inline void __inet_twsk_del_dead_node(struct inet_timewait_sock *tw) +{ + __hlist_del(&tw->tw_death_node); + inet_twsk_dead_node_init(tw); +} + +static inline int inet_twsk_del_dead_node(struct inet_timewait_sock *tw) +{ + if (inet_twsk_dead_hashed(tw)) { + __inet_twsk_del_dead_node(tw); + return 1; + } + return 0; +} + +#define inet_twsk_for_each(tw, node, head) \ + hlist_for_each_entry(tw, node, head, tw_node) + +#define inet_twsk_for_each_inmate(tw, node, jail) \ + hlist_for_each_entry(tw, node, jail, tw_death_node) + +#define inet_twsk_for_each_inmate_safe(tw, node, safe, jail) \ + hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node) + +static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk) +{ + return (struct inet_timewait_sock *)sk; +} + +static inline u32 inet_rcv_saddr(const struct sock *sk) +{ + return likely(sk->sk_state != TCP_TIME_WAIT) ? + inet_sk(sk)->rcv_saddr : inet_twsk(sk)->tw_rcv_saddr; +} + +static inline void inet_twsk_put(struct inet_timewait_sock *tw) +{ + if (atomic_dec_and_test(&tw->tw_refcnt)) { +#ifdef SOCK_REFCNT_DEBUG + printk(KERN_DEBUG "%s timewait_sock %p released\n", + tw->tw_prot->name, tw); +#endif + kmem_cache_free(tw->tw_prot->twsk_slab, tw); + } +} +#endif /* _INET_TIMEWAIT_SOCK_ */ diff --git a/include/net/sock.h b/include/net/sock.h index 391d00b5b7b..c902c57bf2b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -88,6 +88,7 @@ do { spin_lock_init(&((__sk)->sk_lock.slock)); \ } while(0) struct sock; +struct proto; /** * struct sock_common - minimal network layer representation of sockets @@ -98,10 +99,11 @@ struct sock; * @skc_node: main hash linkage for various protocol lookup tables * @skc_bind_node: bind hash linkage for various protocol lookup tables * @skc_refcnt: reference count + * @skc_prot: protocol handlers inside a network family * * This is the minimal network layer representation of sockets, the header - * for struct sock and struct tcp_tw_bucket. - */ + * for struct sock and struct inet_timewait_sock. + */ struct sock_common { unsigned short skc_family; volatile unsigned char skc_state; @@ -110,11 +112,12 @@ struct sock_common { struct hlist_node skc_node; struct hlist_node skc_bind_node; atomic_t skc_refcnt; + struct proto *skc_prot; }; /** * struct sock - network layer representation of sockets - * @__sk_common: shared layout with tcp_tw_bucket + * @__sk_common: shared layout with inet_timewait_sock * @sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN * @sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings * @sk_lock: synchronizer @@ -140,7 +143,6 @@ struct sock_common { * @sk_backlog: always used with the per-socket spinlock held * @sk_callback_lock: used with the callbacks in the end of this struct * @sk_error_queue: rarely used - * @sk_prot: protocol handlers inside a network family * @sk_prot_creator: sk_prot of original sock creator (see ipv6_setsockopt, IPV6_ADDRFORM for instance) * @sk_err: last error * @sk_err_soft: errors that don't cause failure but are the cause of a persistent failure not just 'timed out' @@ -173,7 +175,7 @@ struct sock_common { */ struct sock { /* - * Now struct tcp_tw_bucket also uses sock_common, so please just + * Now struct inet_timewait_sock also uses sock_common, so please just * don't add nothing before this first member (__sk_common) --acme */ struct sock_common __sk_common; @@ -184,6 +186,7 @@ struct sock { #define sk_node __sk_common.skc_node #define sk_bind_node __sk_common.skc_bind_node #define sk_refcnt __sk_common.skc_refcnt +#define sk_prot __sk_common.skc_prot unsigned char sk_shutdown : 2, sk_no_check : 2, sk_userlocks : 4; @@ -218,7 +221,6 @@ struct sock { struct sk_buff *tail; } sk_backlog; struct sk_buff_head sk_error_queue; - struct proto *sk_prot; struct proto *sk_prot_creator; rwlock_t sk_callback_lock; int sk_err, @@ -557,6 +559,9 @@ struct proto { kmem_cache_t *slab; unsigned int obj_size; + kmem_cache_t *twsk_slab; + unsigned int twsk_obj_size; + struct request_sock_ops *rsk_prot; struct module *owner; diff --git a/include/net/tcp.h b/include/net/tcp.h index 9d026d81d8c..cf8e664176a 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -38,207 +38,14 @@ #include #include -#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) -#include -#endif #include extern struct inet_hashinfo tcp_hashinfo; -#if (BITS_PER_LONG == 64) -#define TCP_ADDRCMP_ALIGN_BYTES 8 -#else -#define TCP_ADDRCMP_ALIGN_BYTES 4 -#endif - -/* This is a TIME_WAIT bucket. It works around the memory consumption - * problems of sockets in such a state on heavily loaded servers, but - * without violating the protocol specification. - */ -struct tcp_tw_bucket { - /* - * Now struct sock also uses sock_common, so please just - * don't add nothing before this first member (__tw_common) --acme - */ - struct sock_common __tw_common; -#define tw_family __tw_common.skc_family -#define tw_state __tw_common.skc_state -#define tw_reuse __tw_common.skc_reuse -#define tw_bound_dev_if __tw_common.skc_bound_dev_if -#define tw_node __tw_common.skc_node -#define tw_bind_node __tw_common.skc_bind_node -#define tw_refcnt __tw_common.skc_refcnt - volatile unsigned char tw_substate; - unsigned char tw_rcv_wscale; - __u16 tw_sport; - /* Socket demultiplex comparisons on incoming packets. */ - /* these five are in inet_sock */ - __u32 tw_daddr - __attribute__((aligned(TCP_ADDRCMP_ALIGN_BYTES))); - __u32 tw_rcv_saddr; - __u16 tw_dport; - __u16 tw_num; - /* And these are ours. */ - int tw_hashent; - int tw_timeout; - __u32 tw_rcv_nxt; - __u32 tw_snd_nxt; - __u32 tw_rcv_wnd; - __u32 tw_ts_recent; - long tw_ts_recent_stamp; - unsigned long tw_ttd; - struct inet_bind_bucket *tw_tb; - struct hlist_node tw_death_node; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - struct in6_addr tw_v6_daddr; - struct in6_addr tw_v6_rcv_saddr; - int tw_v6_ipv6only; -#endif -}; - -static __inline__ void tw_add_node(struct tcp_tw_bucket *tw, - struct hlist_head *list) -{ - hlist_add_head(&tw->tw_node, list); -} - -static __inline__ void tw_add_bind_node(struct tcp_tw_bucket *tw, - struct hlist_head *list) -{ - hlist_add_head(&tw->tw_bind_node, list); -} - -static inline int tw_dead_hashed(struct tcp_tw_bucket *tw) -{ - return tw->tw_death_node.pprev != NULL; -} - -static __inline__ void tw_dead_node_init(struct tcp_tw_bucket *tw) -{ - tw->tw_death_node.pprev = NULL; -} - -static __inline__ void __tw_del_dead_node(struct tcp_tw_bucket *tw) -{ - __hlist_del(&tw->tw_death_node); - tw_dead_node_init(tw); -} - -static __inline__ int tw_del_dead_node(struct tcp_tw_bucket *tw) -{ - if (tw_dead_hashed(tw)) { - __tw_del_dead_node(tw); - return 1; - } - return 0; -} - -#define tw_for_each(tw, node, head) \ - hlist_for_each_entry(tw, node, head, tw_node) - -#define tw_for_each_inmate(tw, node, jail) \ - hlist_for_each_entry(tw, node, jail, tw_death_node) - -#define tw_for_each_inmate_safe(tw, node, safe, jail) \ - hlist_for_each_entry_safe(tw, node, safe, jail, tw_death_node) - -#define tcptw_sk(__sk) ((struct tcp_tw_bucket *)(__sk)) - -static inline u32 tcp_v4_rcv_saddr(const struct sock *sk) -{ - return likely(sk->sk_state != TCP_TIME_WAIT) ? - inet_sk(sk)->rcv_saddr : tcptw_sk(sk)->tw_rcv_saddr; -} - -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -static inline struct in6_addr *__tcp_v6_rcv_saddr(const struct sock *sk) -{ - return likely(sk->sk_state != TCP_TIME_WAIT) ? - &inet6_sk(sk)->rcv_saddr : &tcptw_sk(sk)->tw_v6_rcv_saddr; -} - -static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk) -{ - return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL; -} - -#define tcptw_sk_ipv6only(__sk) (tcptw_sk(__sk)->tw_v6_ipv6only) - -static inline int tcp_v6_ipv6only(const struct sock *sk) -{ - return likely(sk->sk_state != TCP_TIME_WAIT) ? - ipv6_only_sock(sk) : tcptw_sk_ipv6only(sk); -} -#else -# define __tcp_v6_rcv_saddr(__sk) NULL -# define tcp_v6_rcv_saddr(__sk) NULL -# define tcptw_sk_ipv6only(__sk) 0 -# define tcp_v6_ipv6only(__sk) 0 -#endif - -extern kmem_cache_t *tcp_timewait_cachep; - -static inline void tcp_tw_put(struct tcp_tw_bucket *tw) -{ - if (atomic_dec_and_test(&tw->tw_refcnt)) { -#ifdef SOCK_REFCNT_DEBUG - printk(KERN_DEBUG "tw_bucket %p released\n", tw); -#endif - kmem_cache_free(tcp_timewait_cachep, tw); - } -} - extern atomic_t tcp_orphan_count; extern int tcp_tw_count; extern void tcp_time_wait(struct sock *sk, int state, int timeo); -extern void tcp_tw_deschedule(struct tcp_tw_bucket *tw); - - -/* Socket demux engine toys. */ -#ifdef __BIG_ENDIAN -#define TCP_COMBINED_PORTS(__sport, __dport) \ - (((__u32)(__sport)<<16) | (__u32)(__dport)) -#else /* __LITTLE_ENDIAN */ -#define TCP_COMBINED_PORTS(__sport, __dport) \ - (((__u32)(__dport)<<16) | (__u32)(__sport)) -#endif - -#if (BITS_PER_LONG == 64) -#ifdef __BIG_ENDIAN -#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \ - __u64 __name = (((__u64)(__saddr))<<32)|((__u64)(__daddr)); -#else /* __LITTLE_ENDIAN */ -#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) \ - __u64 __name = (((__u64)(__daddr))<<32)|((__u64)(__saddr)); -#endif /* __BIG_ENDIAN */ -#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ - (((*((__u64 *)&(inet_sk(__sk)->daddr)))== (__cookie)) && \ - ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ - (((*((__u64 *)&(tcptw_sk(__sk)->tw_daddr))) == (__cookie)) && \ - ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#else /* 32-bit arch */ -#define TCP_V4_ADDR_COOKIE(__name, __saddr, __daddr) -#define TCP_IPV4_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ - ((inet_sk(__sk)->daddr == (__saddr)) && \ - (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ - ((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#define TCP_IPV4_TW_MATCH(__sk, __cookie, __saddr, __daddr, __ports, __dif)\ - ((tcptw_sk(__sk)->tw_daddr == (__saddr)) && \ - (tcptw_sk(__sk)->tw_rcv_saddr == (__daddr)) && \ - ((*((__u32 *)&(tcptw_sk(__sk)->tw_dport))) == (__ports)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) -#endif /* 64-bit arch */ - -#define TCP_IPV6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \ - (((*((__u32 *)&(inet_sk(__sk)->dport)))== (__ports)) && \ - ((__sk)->sk_family == AF_INET6) && \ - ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \ - ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ - (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) +extern void tcp_tw_deschedule(struct inet_timewait_sock *tw); #define MAX_TCP_HEADER (128 + MAX_HEADER) @@ -543,7 +350,7 @@ extern int tcp_v4_rcv(struct sk_buff *skb); extern int tcp_v4_remember_stamp(struct sock *sk); -extern int tcp_v4_tw_remember_stamp(struct tcp_tw_bucket *tw); +extern int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t size); @@ -616,10 +423,9 @@ enum tcp_tw_status }; -extern enum tcp_tw_status tcp_timewait_state_process(struct tcp_tw_bucket *tw, +extern enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, - struct tcphdr *th, - unsigned len); + const struct tcphdr *th); extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb, struct request_sock *req, -- cgit v1.2.3 From e48c414ee61f4ac8d5cff2973e66a7cbc8a93aa5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:09:46 -0700 Subject: [INET]: Generalise the TCP sock ID lookup routines And also some TIME_WAIT functions. [acme@toy net-2.6.14]$ grep built-in /tmp/before.size /tmp/after.size /tmp/before.size: 282955 13122 9312 305389 4a8ed net/ipv4/built-in.o /tmp/after.size: 281566 13122 9312 304000 4a380 net/ipv4/built-in.o [acme@toy net-2.6.14]$ I kept them still inlined, will uninline at some point to see what would be the performance difference. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 77 +++++++++++++++++++++++++++++++++++++--- include/net/inet_timewait_sock.h | 9 +++++ include/net/sock.h | 12 +++---- 3 files changed, 87 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index c38c637e073..b5c0d64ea74 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -30,6 +30,7 @@ #include #include +#include /* This is for all connections with a full identity, no wildcards. * New scheme, half the table is for TIME_WAIT, the other half is @@ -285,13 +286,13 @@ extern struct sock *__inet_lookup_listener(const struct hlist_head *head, const int dif); /* Optimize the common listener case. */ -static inline struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo, - const u32 daddr, - const unsigned short hnum, - const int dif) +static inline struct sock * + inet_lookup_listener(struct inet_hashinfo *hashinfo, + const u32 daddr, + const unsigned short hnum, const int dif) { struct sock *sk = NULL; - struct hlist_head *head; + const struct hlist_head *head; read_lock(&hashinfo->lhash_lock); head = &hashinfo->listening_hash[inet_lhashfn(hnum)]; @@ -351,4 +352,70 @@ sherry_cache: ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) #endif /* 64-bit arch */ + +/* + * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need + * not check it for lookups anymore, thanks Alexey. -DaveM + * + * Local BH must be disabled here. + */ +static inline struct sock * + __inet_lookup_established(struct inet_hashinfo *hashinfo, + const u32 saddr, const u16 sport, + const u32 daddr, const u16 hnum, + const int dif) +{ + INET_ADDR_COOKIE(acookie, saddr, daddr) + const __u32 ports = INET_COMBINED_PORTS(sport, hnum); + struct sock *sk; + const struct hlist_node *node; + /* Optimize here for direct hit, only listening connections can + * have wildcards anyways. + */ + const int hash = inet_ehashfn(daddr, hnum, saddr, sport, hashinfo->ehash_size); + struct inet_ehash_bucket *head = &hashinfo->ehash[hash]; + + read_lock(&head->lock); + sk_for_each(sk, node, &head->chain) { + if (INET_MATCH(sk, acookie, saddr, daddr, ports, dif)) + goto hit; /* You sunk my battleship! */ + } + + /* Must check for a TIME_WAIT'er before going to listener hash. */ + sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) { + if (INET_TW_MATCH(sk, acookie, saddr, daddr, ports, dif)) + goto hit; + } + sk = NULL; +out: + read_unlock(&head->lock); + return sk; +hit: + sock_hold(sk); + goto out; +} + +static inline struct sock *__inet_lookup(struct inet_hashinfo *hashinfo, + const u32 saddr, const u16 sport, + const u32 daddr, const u16 hnum, + const int dif) +{ + struct sock *sk = __inet_lookup_established(hashinfo, saddr, sport, daddr, + hnum, dif); + return sk ? : inet_lookup_listener(hashinfo, daddr, hnum, dif); +} + +static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo, + const u32 saddr, const u16 sport, + const u32 daddr, const u16 dport, + const int dif) +{ + struct sock *sk; + + local_bh_disable(); + sk = __inet_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif); + local_bh_enable(); + + return sk; +} #endif /* _INET_HASHTABLES_H */ diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index ce117048f2f..020f28058fd 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -17,6 +17,7 @@ #include +#include #include #include @@ -32,6 +33,7 @@ #endif struct inet_bind_bucket; +struct inet_hashinfo; /* * This is a TIME_WAIT sock. It works around the memory consumption @@ -139,4 +141,11 @@ static inline void inet_twsk_put(struct inet_timewait_sock *tw) kmem_cache_free(tw->tw_prot->twsk_slab, tw); } } + +extern void __inet_twsk_kill(struct inet_timewait_sock *tw, + struct inet_hashinfo *hashinfo); + +extern void __inet_twsk_hashdance(struct inet_timewait_sock *tw, + struct sock *sk, + struct inet_hashinfo *hashinfo); #endif /* _INET_TIMEWAIT_SOCK_ */ diff --git a/include/net/sock.h b/include/net/sock.h index c902c57bf2b..bdae0a5eadf 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -255,28 +255,28 @@ struct sock { /* * Hashed lists helper routines */ -static inline struct sock *__sk_head(struct hlist_head *head) +static inline struct sock *__sk_head(const struct hlist_head *head) { return hlist_entry(head->first, struct sock, sk_node); } -static inline struct sock *sk_head(struct hlist_head *head) +static inline struct sock *sk_head(const struct hlist_head *head) { return hlist_empty(head) ? NULL : __sk_head(head); } -static inline struct sock *sk_next(struct sock *sk) +static inline struct sock *sk_next(const struct sock *sk) { return sk->sk_node.next ? hlist_entry(sk->sk_node.next, struct sock, sk_node) : NULL; } -static inline int sk_unhashed(struct sock *sk) +static inline int sk_unhashed(const struct sock *sk) { return hlist_unhashed(&sk->sk_node); } -static inline int sk_hashed(struct sock *sk) +static inline int sk_hashed(const struct sock *sk) { return sk->sk_node.pprev != NULL; } @@ -494,7 +494,7 @@ extern int sk_wait_data(struct sock *sk, long *timeo); struct request_sock_ops; /* Here is the right place to enable sock refcounting debugging */ -#define SOCK_REFCNT_DEBUG +//#define SOCK_REFCNT_DEBUG /* Networking protocol blocks we attach to sockets. * socket layer -> transport layer interface -- cgit v1.2.3 From c676270bcd25015b978722ec0352c330dcc87883 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:09:59 -0700 Subject: [INET_TWSK]: Introduce inet_twsk_alloc With the parts of tcp_time_wait that are not TCP specific, tcp_time_wait uses it and so will dccp_time_wait. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_timewait_sock.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 020f28058fd..e00861b1669 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -142,6 +142,9 @@ static inline void inet_twsk_put(struct inet_timewait_sock *tw) } } +extern struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, + const int state); + extern void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_hashinfo *hashinfo); -- cgit v1.2.3 From 87d11ceb9deb7a3f13fdee6e89d9bb6be7d27a71 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:10:12 -0700 Subject: [SOCK]: Introduce sk_clone Out of tcp_create_openreq_child, will be used in dccp_create_openreq_child, and is a nice sock function anyway. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/sock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index bdae0a5eadf..828dc082fcb 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -737,6 +737,8 @@ extern struct sock *sk_alloc(int family, unsigned int __nocast priority, struct proto *prot, int zero_it); extern void sk_free(struct sock *sk); +extern struct sock *sk_clone(const struct sock *sk, + const unsigned int __nocast priority); extern struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, -- cgit v1.2.3 From 463c84b97f24010a67cd871746d6a7e4c925a5f9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:10:42 -0700 Subject: [NET]: Introduce inet_connection_sock This creates struct inet_connection_sock, moving members out of struct tcp_sock that are shareable with other INET connection oriented protocols, such as DCCP, that in my private tree already uses most of these members. The functions that operate on these members were renamed, using a inet_csk_ prefix while not being moved yet to a new file, so as to ease the review of these changes. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ip.h | 2 - include/linux/ipv6.h | 8 +- include/linux/tcp.h | 39 ++----- include/net/inet_connection_sock.h | 86 ++++++++++++++ include/net/inet_hashtables.h | 6 +- include/net/request_sock.h | 6 +- include/net/sock.h | 3 - include/net/tcp.h | 222 ++++++++++++++++++------------------- include/net/tcp_ecn.h | 2 +- 9 files changed, 215 insertions(+), 159 deletions(-) create mode 100644 include/net/inet_connection_sock.h (limited to 'include') diff --git a/include/linux/ip.h b/include/linux/ip.h index 2c54bbd3da7..33e8a19a1a0 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -128,7 +128,6 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk) return (struct inet_request_sock *)sk; } -struct inet_bind_bucket; struct ipv6_pinfo; struct inet_sock { @@ -158,7 +157,6 @@ struct inet_sock { int mc_index; /* Multicast device index */ __u32 mc_addr; struct ip_mc_socklist *mc_list; /* Group array */ - struct inet_bind_bucket *bind_hash; /* * Following members are used to retain the infomation to build * an ip header on each ip fragmentation while the socket is corked. diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 98fa32316e4..88591913c94 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -333,15 +333,15 @@ static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk) return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL; } -static inline int tcp_twsk_ipv6only(const struct sock *sk) +static inline int inet_twsk_ipv6only(const struct sock *sk) { return inet_twsk(sk)->tw_ipv6only; } -static inline int tcp_v6_ipv6only(const struct sock *sk) +static inline int inet_v6_ipv6only(const struct sock *sk) { return likely(sk->sk_state != TCP_TIME_WAIT) ? - ipv6_only_sock(sk) : tcp_twsk_ipv6only(sk); + ipv6_only_sock(sk) : inet_twsk_ipv6only(sk); } #else #define __ipv6_only_sock(sk) 0 @@ -360,7 +360,7 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) #define __tcp_v6_rcv_saddr(__sk) NULL #define tcp_v6_rcv_saddr(__sk) NULL #define tcp_twsk_ipv6only(__sk) 0 -#define tcp_v6_ipv6only(__sk) 0 +#define inet_v6_ipv6only(__sk) 0 #endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ #define INET6_MATCH(__sk, __saddr, __daddr, __ports, __dif) \ diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 5d295b1b3de..800930fac38 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -177,8 +177,8 @@ struct tcp_info #include #include -#include #include +#include #include /* This defines a selective acknowledgement block. */ @@ -219,8 +219,8 @@ static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req) } struct tcp_sock { - /* inet_sock has to be the first member of tcp_sock */ - struct inet_sock inet; + /* inet_connection_sock has to be the first member of tcp_sock */ + struct inet_connection_sock inet_conn; int tcp_header_len; /* Bytes of tcp header to send */ /* @@ -241,18 +241,6 @@ struct tcp_sock { __u32 snd_sml; /* Last byte of the most recently transmitted small packet */ __u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ __u32 lsndtime; /* timestamp of last sent data packet (for restart window) */ - /* Delayed ACK control data */ - struct { - __u8 pending; /* ACK is pending */ - __u8 quick; /* Scheduled number of quick acks */ - __u8 pingpong; /* The session is interactive */ - __u8 blocked; /* Delayed ACK was blocked by socket lock*/ - __u32 ato; /* Predicted tick of soft clock */ - unsigned long timeout; /* Currently scheduled timeout */ - __u32 lrcvtime; /* timestamp of last received data packet*/ - __u16 last_seg_size; /* Size of last incoming segment */ - __u16 rcv_mss; /* MSS used for delayed ACK decisions */ - } ack; /* Data for direct copy to user */ struct { @@ -271,8 +259,8 @@ struct tcp_sock { __u16 xmit_size_goal; /* Goal for segmenting output packets */ __u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */ __u8 ca_state; /* State of fast-retransmit machine */ - __u8 retransmits; /* Number of unrecovered RTO timeouts. */ + __u8 keepalive_probes; /* num of allowed keep alive probes */ __u16 advmss; /* Advertised MSS */ __u32 window_clamp; /* Maximal window to advertise */ __u32 rcv_ssthresh; /* Current window clamp */ @@ -281,7 +269,7 @@ struct tcp_sock { __u8 reordering; /* Packet reordering metric. */ __u8 frto_counter; /* Number of new acks after RTO */ - __u8 unused; + __u8 nonagle; /* Disable Nagle algorithm? */ __u8 defer_accept; /* User waits for some data after accept() */ /* RTT measurement */ @@ -290,19 +278,13 @@ struct tcp_sock { __u32 mdev_max; /* maximal mdev for the last rtt period */ __u32 rttvar; /* smoothed mdev_max */ __u32 rtt_seq; /* sequence number to update rttvar */ - __u32 rto; /* retransmit timeout */ __u32 packets_out; /* Packets which are "in flight" */ __u32 left_out; /* Packets which leaved network */ __u32 retrans_out; /* Retransmitted packets out */ - __u8 backoff; /* backoff */ /* * Options received (usually on last packet, some only on SYN packets). */ - __u8 nonagle; /* Disable Nagle algorithm? */ - __u8 keepalive_probes; /* num of allowed keep alive probes */ - - __u8 probes_out; /* unanswered 0 window probes */ struct tcp_options_received rx_opt; /* @@ -315,11 +297,6 @@ struct tcp_sock { __u32 snd_cwnd_used; __u32 snd_cwnd_stamp; - /* Two commonly used timers in both sender and receiver paths. */ - unsigned long timeout; - struct timer_list retransmit_timer; /* Resend (no ack) */ - struct timer_list delack_timer; /* Ack delay */ - struct sk_buff_head out_of_order_queue; /* Out of order segments go here */ struct tcp_func *af_specific; /* Operations which are AF_INET{4,6} specific */ @@ -334,7 +311,7 @@ struct tcp_sock { struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */ struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/ - __u8 syn_retries; /* num of allowed syn retries */ + __u8 probes_out; /* unanswered 0 window probes */ __u8 ecn_flags; /* ECN status bits. */ __u16 prior_ssthresh; /* ssthresh saved at recovery start */ __u32 lost_out; /* Lost packets */ @@ -349,14 +326,12 @@ struct tcp_sock { int undo_retrans; /* number of undoable retransmissions. */ __u32 urg_seq; /* Seq of received urgent pointer */ __u16 urg_data; /* Saved octet of OOB data and control flags */ - __u8 pending; /* Scheduled timer event */ __u8 urg_mode; /* In urgent mode */ + /* ONE BYTE HOLE, TRY TO PACK! */ __u32 snd_up; /* Urgent pointer */ __u32 total_retrans; /* Total retransmits for entire connection */ - struct request_sock_queue accept_queue; /* FIFO of established children */ - unsigned int keepalive_time; /* time before keep alive takes place */ unsigned int keepalive_intvl; /* time interval between keep alive probes */ int linger2; diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h new file mode 100644 index 00000000000..ef609396e41 --- /dev/null +++ b/include/net/inet_connection_sock.h @@ -0,0 +1,86 @@ +/* + * NET Generic infrastructure for INET connection oriented protocols. + * + * Definitions for inet_connection_sock + * + * Authors: Many people, see the TCP sources + * + * From code originally in TCP + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef _INET_CONNECTION_SOCK_H +#define _INET_CONNECTION_SOCK_H + +#include +#include +#include + +struct inet_bind_bucket; +struct inet_hashinfo; + +/** inet_connection_sock - INET connection oriented sock + * + * @icsk_accept_queue: FIFO of established children + * @icsk_bind_hash: Bind node + * @icsk_timeout: Timeout + * @icsk_retransmit_timer: Resend (no ack) + * @icsk_rto: Retransmit timeout + * @icsk_retransmits: Number of unrecovered [RTO] timeouts + * @icsk_pending: Scheduled timer event + * @icsk_backoff: Backoff + * @icsk_syn_retries: Number of allowed SYN (or equivalent) retries + * @icsk_ack: Delayed ACK control data + */ +struct inet_connection_sock { + /* inet_sock has to be the first member! */ + struct inet_sock icsk_inet; + struct request_sock_queue icsk_accept_queue; + struct inet_bind_bucket *icsk_bind_hash; + unsigned long icsk_timeout; + struct timer_list icsk_retransmit_timer; + struct timer_list icsk_delack_timer; + __u32 icsk_rto; + __u8 icsk_retransmits; + __u8 icsk_pending; + __u8 icsk_backoff; + __u8 icsk_syn_retries; + struct { + __u8 pending; /* ACK is pending */ + __u8 quick; /* Scheduled number of quick acks */ + __u8 pingpong; /* The session is interactive */ + __u8 blocked; /* Delayed ACK was blocked by socket lock */ + __u32 ato; /* Predicted tick of soft clock */ + unsigned long timeout; /* Currently scheduled timeout */ + __u32 lrcvtime; /* timestamp of last received data packet */ + __u16 last_seg_size; /* Size of last incoming segment */ + __u16 rcv_mss; /* MSS used for delayed ACK decisions */ + } icsk_ack; +}; + +static inline struct inet_connection_sock *inet_csk(const struct sock *sk) +{ + return (struct inet_connection_sock *)sk; +} + +extern void inet_csk_init_xmit_timers(struct sock *sk, + void (*retransmit_handler)(unsigned long), + void (*delack_handler)(unsigned long), + void (*keepalive_handler)(unsigned long)); +extern void inet_csk_clear_xmit_timers(struct sock *sk); + +extern struct request_sock *inet_csk_search_req(const struct sock *sk, + struct request_sock ***prevp, + const __u16 rport, + const __u32 raddr, + const __u32 laddr); +extern int inet_csk_get_port(struct inet_hashinfo *hashinfo, + struct sock *sk, unsigned short snum); + +extern struct dst_entry* inet_csk_route_req(struct sock *sk, + const struct request_sock *req); + +#endif /* _INET_CONNECTION_SOCK_H */ diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index b5c0d64ea74..f0c21c07f89 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -17,7 +17,6 @@ #include #include -#include #include #include #include @@ -26,6 +25,7 @@ #include #include +#include #include #include @@ -185,9 +185,9 @@ static inline void __inet_inherit_port(struct inet_hashinfo *table, struct inet_bind_bucket *tb; spin_lock(&head->lock); - tb = inet_sk(sk)->bind_hash; + tb = inet_csk(sk)->icsk_bind_hash; sk_add_bind_node(child, &tb->owners); - inet_sk(child)->bind_hash = tb; + inet_csk(child)->icsk_bind_hash = tb; spin_unlock(&head->lock); } diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 334717bf9ef..b7c7eecbe64 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -224,17 +224,17 @@ static inline int reqsk_queue_added(struct request_sock_queue *queue) return prev_qlen; } -static inline int reqsk_queue_len(struct request_sock_queue *queue) +static inline int reqsk_queue_len(const struct request_sock_queue *queue) { return queue->listen_opt != NULL ? queue->listen_opt->qlen : 0; } -static inline int reqsk_queue_len_young(struct request_sock_queue *queue) +static inline int reqsk_queue_len_young(const struct request_sock_queue *queue) { return queue->listen_opt->qlen_young; } -static inline int reqsk_queue_is_full(struct request_sock_queue *queue) +static inline int reqsk_queue_is_full(const struct request_sock_queue *queue) { return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log; } diff --git a/include/net/sock.h b/include/net/sock.h index 828dc082fcb..48cc337a656 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -493,9 +493,6 @@ extern int sk_wait_data(struct sock *sk, long *timeo); struct request_sock_ops; -/* Here is the right place to enable sock refcounting debugging */ -//#define SOCK_REFCNT_DEBUG - /* Networking protocol blocks we attach to sockets. * socket layer -> transport layer interface * transport -> network interface is defined by struct inet_proto diff --git a/include/net/tcp.h b/include/net/tcp.h index cf8e664176a..a943c79c88b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -19,10 +19,11 @@ #define _TCP_H #define TCP_DEBUG 1 +#define INET_CSK_DEBUG 1 #define FASTRETRANS_DEBUG 1 /* Cancel timers, when they are not required. */ -#undef TCP_CLEAR_TIMERS +#undef INET_CSK_CLEAR_TIMERS #include #include @@ -205,10 +206,10 @@ extern void tcp_tw_deschedule(struct inet_timewait_sock *tw); #define TCPOLEN_SACK_BASE_ALIGNED 4 #define TCPOLEN_SACK_PERBLOCK 8 -#define TCP_TIME_RETRANS 1 /* Retransmit timer */ -#define TCP_TIME_DACK 2 /* Delayed ack timer */ -#define TCP_TIME_PROBE0 3 /* Zero window probe timer */ -#define TCP_TIME_KEEPOPEN 4 /* Keepalive timer */ +#define ICSK_TIME_RETRANS 1 /* Retransmit timer */ +#define ICSK_TIME_DACK 2 /* Delayed ack timer */ +#define ICSK_TIME_PROBE0 3 /* Zero window probe timer */ +#define ICSK_TIME_KEEPOPEN 4 /* Keepalive timer */ /* Flags in tp->nonagle */ #define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ @@ -257,9 +258,9 @@ extern atomic_t tcp_sockets_allocated; extern int tcp_memory_pressure; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -#define TCP_INET_FAMILY(fam) ((fam) == AF_INET) +#define AF_INET_FAMILY(fam) ((fam) == AF_INET) #else -#define TCP_INET_FAMILY(fam) 1 +#define AF_INET_FAMILY(fam) 1 #endif /* @@ -372,41 +373,42 @@ extern int tcp_rcv_established(struct sock *sk, extern void tcp_rcv_space_adjust(struct sock *sk); -enum tcp_ack_state_t -{ - TCP_ACK_SCHED = 1, - TCP_ACK_TIMER = 2, - TCP_ACK_PUSHED= 4 +enum inet_csk_ack_state_t { + ICSK_ACK_SCHED = 1, + ICSK_ACK_TIMER = 2, + ICSK_ACK_PUSHED = 4 }; -static inline void tcp_schedule_ack(struct tcp_sock *tp) +static inline void inet_csk_schedule_ack(struct sock *sk) { - tp->ack.pending |= TCP_ACK_SCHED; + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_SCHED; } -static inline int tcp_ack_scheduled(struct tcp_sock *tp) +static inline int inet_csk_ack_scheduled(const struct sock *sk) { - return tp->ack.pending&TCP_ACK_SCHED; + return inet_csk(sk)->icsk_ack.pending & ICSK_ACK_SCHED; } -static __inline__ void tcp_dec_quickack_mode(struct tcp_sock *tp, unsigned int pkts) +static inline void tcp_dec_quickack_mode(struct sock *sk, + const unsigned int pkts) { - if (tp->ack.quick) { - if (pkts >= tp->ack.quick) { - tp->ack.quick = 0; + struct inet_connection_sock *icsk = inet_csk(sk); + if (icsk->icsk_ack.quick) { + if (pkts >= icsk->icsk_ack.quick) { + icsk->icsk_ack.quick = 0; /* Leaving quickack mode we deflate ATO. */ - tp->ack.ato = TCP_ATO_MIN; + icsk->icsk_ack.ato = TCP_ATO_MIN; } else - tp->ack.quick -= pkts; + icsk->icsk_ack.quick -= pkts; } } -extern void tcp_enter_quickack_mode(struct tcp_sock *tp); +extern void tcp_enter_quickack_mode(struct sock *sk); -static __inline__ void tcp_delack_init(struct tcp_sock *tp) +static inline void inet_csk_delack_init(struct sock *sk) { - memset(&tp->ack, 0, sizeof(tp->ack)); + memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack)); } static inline void tcp_clear_options(struct tcp_options_received *rx_opt) @@ -440,7 +442,7 @@ extern void tcp_update_metrics(struct sock *sk); extern void tcp_close(struct sock *sk, long timeout); -extern struct sock * tcp_accept(struct sock *sk, int flags, int *err); +extern struct sock * inet_csk_accept(struct sock *sk, int flags, int *err); extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait); extern int tcp_getsockopt(struct sock *sk, int level, @@ -534,15 +536,18 @@ extern void tcp_cwnd_application_limited(struct sock *sk); /* tcp_timer.c */ extern void tcp_init_xmit_timers(struct sock *); -extern void tcp_clear_xmit_timers(struct sock *); +static inline void tcp_clear_xmit_timers(struct sock *sk) +{ + inet_csk_clear_xmit_timers(sk); +} -extern void tcp_delete_keepalive_timer(struct sock *); -extern void tcp_reset_keepalive_timer(struct sock *, unsigned long); +extern void inet_csk_delete_keepalive_timer(struct sock *sk); +extern void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout); extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu); extern unsigned int tcp_current_mss(struct sock *sk, int large); -#ifdef TCP_DEBUG -extern const char tcp_timer_bug_msg[]; +#ifdef INET_CSK_DEBUG +extern const char inet_csk_timer_bug_msg[]; #endif /* tcp_diag.c */ @@ -554,70 +559,58 @@ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor); -static inline void tcp_clear_xmit_timer(struct sock *sk, int what) +static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) { - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); - switch (what) { - case TCP_TIME_RETRANS: - case TCP_TIME_PROBE0: - tp->pending = 0; - -#ifdef TCP_CLEAR_TIMERS - sk_stop_timer(sk, &tp->retransmit_timer); + if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { + icsk->icsk_pending = 0; +#ifdef INET_CSK_CLEAR_TIMERS + sk_stop_timer(sk, &icsk->icsk_retransmit_timer); #endif - break; - case TCP_TIME_DACK: - tp->ack.blocked = 0; - tp->ack.pending = 0; - -#ifdef TCP_CLEAR_TIMERS - sk_stop_timer(sk, &tp->delack_timer); + } else if (what == ICSK_TIME_DACK) { + icsk->icsk_ack.blocked = icsk->icsk_ack.pending = 0; +#ifdef INET_CSK_CLEAR_TIMERS + sk_stop_timer(sk, &icsk->icsk_delack_timer); #endif - break; - default: -#ifdef TCP_DEBUG - printk(tcp_timer_bug_msg); + } +#ifdef INET_CSK_DEBUG + else { + pr_debug(inet_csk_timer_bug_msg); + } #endif - return; - }; - } /* * Reset the retransmission timer */ -static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long when) +static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, + unsigned long when) { - struct tcp_sock *tp = tcp_sk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); if (when > TCP_RTO_MAX) { -#ifdef TCP_DEBUG - printk(KERN_DEBUG "reset_xmit_timer sk=%p %d when=0x%lx, caller=%p\n", sk, what, when, current_text_addr()); +#ifdef INET_CSK_DEBUG + pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n", + sk, what, when, current_text_addr()); #endif when = TCP_RTO_MAX; } - switch (what) { - case TCP_TIME_RETRANS: - case TCP_TIME_PROBE0: - tp->pending = what; - tp->timeout = jiffies+when; - sk_reset_timer(sk, &tp->retransmit_timer, tp->timeout); - break; - - case TCP_TIME_DACK: - tp->ack.pending |= TCP_ACK_TIMER; - tp->ack.timeout = jiffies+when; - sk_reset_timer(sk, &tp->delack_timer, tp->ack.timeout); - break; - - default: -#ifdef TCP_DEBUG - printk(tcp_timer_bug_msg); + if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { + icsk->icsk_pending = what; + icsk->icsk_timeout = jiffies + when; + sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); + } else if (what == ICSK_TIME_DACK) { + icsk->icsk_ack.pending |= ICSK_ACK_TIMER; + icsk->icsk_ack.timeout = jiffies + when; + sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); + } +#ifdef INET_CSK_DEBUG + else { + pr_debug(inet_csk_timer_bug_msg); + } #endif - return; - }; } /* Initialize RCV_MSS value. @@ -637,7 +630,7 @@ static inline void tcp_initialize_rcv_mss(struct sock *sk) hint = min(hint, TCP_MIN_RCVMSS); hint = max(hint, TCP_MIN_MSS); - tp->ack.rcv_mss = hint; + inet_csk(sk)->icsk_ack.rcv_mss = hint; } static __inline__ void __tcp_fast_path_on(struct tcp_sock *tp, u32 snd_wnd) @@ -772,7 +765,7 @@ static inline void tcp_packets_out_inc(struct sock *sk, tp->packets_out += tcp_skb_pcount(skb); if (!orig) - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); } static inline void tcp_packets_out_dec(struct tcp_sock *tp, @@ -939,8 +932,9 @@ static __inline__ void tcp_minshall_update(struct tcp_sock *tp, int mss, static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp) { - if (!tp->packets_out && !tp->pending) - tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto); + const struct inet_connection_sock *icsk = inet_csk(sk); + if (!tp->packets_out && !icsk->icsk_pending) + inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, icsk->icsk_rto); } static __inline__ void tcp_push_pending_frames(struct sock *sk, @@ -1021,8 +1015,9 @@ static __inline__ int tcp_prequeue(struct sock *sk, struct sk_buff *skb) tp->ucopy.memory = 0; } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { wake_up_interruptible(sk->sk_sleep); - if (!tcp_ack_scheduled(tp)) - tcp_reset_xmit_timer(sk, TCP_TIME_DACK, (3*TCP_RTO_MIN)/4); + if (!inet_csk_ack_scheduled(sk)) + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, + (3 * TCP_RTO_MIN) / 4); } return 1; } @@ -1055,7 +1050,7 @@ static __inline__ void tcp_set_state(struct sock *sk, int state) TCP_INC_STATS(TCP_MIB_ESTABRESETS); sk->sk_prot->unhash(sk); - if (inet_sk(sk)->bind_hash && + if (inet_csk(sk)->icsk_bind_hash && !(sk->sk_userlocks & SOCK_BINDPORT_LOCK)) inet_put_port(&tcp_hashinfo, sk); /* fall through */ @@ -1186,51 +1181,55 @@ static inline int tcp_full_space(const struct sock *sk) return tcp_win_from_space(sk->sk_rcvbuf); } -static inline void tcp_acceptq_queue(struct sock *sk, struct request_sock *req, - struct sock *child) +static inline void inet_csk_reqsk_queue_add(struct sock *sk, + struct request_sock *req, + struct sock *child) { - reqsk_queue_add(&tcp_sk(sk)->accept_queue, req, sk, child); + reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child); } -static inline void -tcp_synq_removed(struct sock *sk, struct request_sock *req) +static inline void inet_csk_reqsk_queue_removed(struct sock *sk, + struct request_sock *req) { - if (reqsk_queue_removed(&tcp_sk(sk)->accept_queue, req) == 0) - tcp_delete_keepalive_timer(sk); + if (reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req) == 0) + inet_csk_delete_keepalive_timer(sk); } -static inline void tcp_synq_added(struct sock *sk) +static inline void inet_csk_reqsk_queue_added(struct sock *sk, + const unsigned long timeout) { - if (reqsk_queue_added(&tcp_sk(sk)->accept_queue) == 0) - tcp_reset_keepalive_timer(sk, TCP_TIMEOUT_INIT); + if (reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue) == 0) + inet_csk_reset_keepalive_timer(sk, timeout); } -static inline int tcp_synq_len(struct sock *sk) +static inline int inet_csk_reqsk_queue_len(const struct sock *sk) { - return reqsk_queue_len(&tcp_sk(sk)->accept_queue); + return reqsk_queue_len(&inet_csk(sk)->icsk_accept_queue); } -static inline int tcp_synq_young(struct sock *sk) +static inline int inet_csk_reqsk_queue_young(const struct sock *sk) { - return reqsk_queue_len_young(&tcp_sk(sk)->accept_queue); + return reqsk_queue_len_young(&inet_csk(sk)->icsk_accept_queue); } -static inline int tcp_synq_is_full(struct sock *sk) +static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) { - return reqsk_queue_is_full(&tcp_sk(sk)->accept_queue); + return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue); } -static inline void tcp_synq_unlink(struct tcp_sock *tp, struct request_sock *req, - struct request_sock **prev) +static inline void inet_csk_reqsk_queue_unlink(struct sock *sk, + struct request_sock *req, + struct request_sock **prev) { - reqsk_queue_unlink(&tp->accept_queue, req, prev); + reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req, prev); } -static inline void tcp_synq_drop(struct sock *sk, struct request_sock *req, - struct request_sock **prev) +static inline void inet_csk_reqsk_queue_drop(struct sock *sk, + struct request_sock *req, + struct request_sock **prev) { - tcp_synq_unlink(tcp_sk(sk), req, prev); - tcp_synq_removed(sk, req); + inet_csk_reqsk_queue_unlink(sk, req, prev); + inet_csk_reqsk_queue_removed(sk, req); reqsk_free(req); } @@ -1265,12 +1264,13 @@ static inline int keepalive_time_when(const struct tcp_sock *tp) return tp->keepalive_time ? : sysctl_tcp_keepalive_time; } -static inline int tcp_fin_time(const struct tcp_sock *tp) +static inline int tcp_fin_time(const struct sock *sk) { - int fin_timeout = tp->linger2 ? : sysctl_tcp_fin_timeout; + int fin_timeout = tcp_sk(sk)->linger2 ? : sysctl_tcp_fin_timeout; + const int rto = inet_csk(sk)->icsk_rto; - if (fin_timeout < (tp->rto<<2) - (tp->rto>>1)) - fin_timeout = (tp->rto<<2) - (tp->rto>>1); + if (fin_timeout < (rto << 2) - (rto >> 1)) + fin_timeout = (rto << 2) - (rto >> 1); return fin_timeout; } diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h index 64980ee8c92..c6b84397448 100644 --- a/include/net/tcp_ecn.h +++ b/include/net/tcp_ecn.h @@ -88,7 +88,7 @@ static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb) * it is surely retransmit. It is not in ECN RFC, * but Linux follows this rule. */ else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags))) - tcp_enter_quickack_mode(tp); + tcp_enter_quickack_mode((struct sock *)tp); } } -- cgit v1.2.3 From 3f421baa4720b708022f8bcc52a61e5cd6f10bf8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:11:08 -0700 Subject: [NET]: Just move the inet_connection_sock function from tcp sources Completing the previous changeset, this also generalises tcp_v4_synq_add, renaming it to inet_csk_reqsk_queue_hash_add, already geing used in the DCCP tree, which I plan to merge RSN. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ipv6.h | 7 +- include/net/inet_connection_sock.h | 152 +++++++++++++++++++++++++++++++++++ include/net/tcp.h | 160 ++----------------------------------- 3 files changed, 161 insertions(+), 158 deletions(-) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 88591913c94..777339b6846 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -333,15 +333,10 @@ static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk) return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL; } -static inline int inet_twsk_ipv6only(const struct sock *sk) -{ - return inet_twsk(sk)->tw_ipv6only; -} - static inline int inet_v6_ipv6only(const struct sock *sk) { return likely(sk->sk_state != TCP_TIME_WAIT) ? - ipv6_only_sock(sk) : inet_twsk_ipv6only(sk); + ipv6_only_sock(sk) : inet_twsk(sk)->tw_ipv6only; } #else #define __ipv6_only_sock(sk) 0 diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index ef609396e41..97e002001c1 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -16,9 +16,15 @@ #define _INET_CONNECTION_SOCK_H #include +#include #include #include +#define INET_CSK_DEBUG 1 + +/* Cancel timers, when they are not required. */ +#undef INET_CSK_CLEAR_TIMERS + struct inet_bind_bucket; struct inet_hashinfo; @@ -61,17 +67,107 @@ struct inet_connection_sock { } icsk_ack; }; +#define ICSK_TIME_RETRANS 1 /* Retransmit timer */ +#define ICSK_TIME_DACK 2 /* Delayed ack timer */ +#define ICSK_TIME_PROBE0 3 /* Zero window probe timer */ +#define ICSK_TIME_KEEPOPEN 4 /* Keepalive timer */ + static inline struct inet_connection_sock *inet_csk(const struct sock *sk) { return (struct inet_connection_sock *)sk; } +enum inet_csk_ack_state_t { + ICSK_ACK_SCHED = 1, + ICSK_ACK_TIMER = 2, + ICSK_ACK_PUSHED = 4 +}; + extern void inet_csk_init_xmit_timers(struct sock *sk, void (*retransmit_handler)(unsigned long), void (*delack_handler)(unsigned long), void (*keepalive_handler)(unsigned long)); extern void inet_csk_clear_xmit_timers(struct sock *sk); +static inline void inet_csk_schedule_ack(struct sock *sk) +{ + inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_SCHED; +} + +static inline int inet_csk_ack_scheduled(const struct sock *sk) +{ + return inet_csk(sk)->icsk_ack.pending & ICSK_ACK_SCHED; +} + +static inline void inet_csk_delack_init(struct sock *sk) +{ + memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack)); +} + +extern void inet_csk_delete_keepalive_timer(struct sock *sk); +extern void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout); + +#ifdef INET_CSK_DEBUG +extern const char inet_csk_timer_bug_msg[]; +#endif + +static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { + icsk->icsk_pending = 0; +#ifdef INET_CSK_CLEAR_TIMERS + sk_stop_timer(sk, &icsk->icsk_retransmit_timer); +#endif + } else if (what == ICSK_TIME_DACK) { + icsk->icsk_ack.blocked = icsk->icsk_ack.pending = 0; +#ifdef INET_CSK_CLEAR_TIMERS + sk_stop_timer(sk, &icsk->icsk_delack_timer); +#endif + } +#ifdef INET_CSK_DEBUG + else { + pr_debug(inet_csk_timer_bug_msg); + } +#endif +} + +/* + * Reset the retransmission timer + */ +static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, + unsigned long when, + const unsigned long max_when) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + if (when > max_when) { +#ifdef INET_CSK_DEBUG + pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n", + sk, what, when, current_text_addr()); +#endif + when = max_when; + } + + if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { + icsk->icsk_pending = what; + icsk->icsk_timeout = jiffies + when; + sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); + } else if (what == ICSK_TIME_DACK) { + icsk->icsk_ack.pending |= ICSK_ACK_TIMER; + icsk->icsk_ack.timeout = jiffies + when; + sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); + } +#ifdef INET_CSK_DEBUG + else { + pr_debug(inet_csk_timer_bug_msg); + } +#endif +} + +extern struct sock *inet_csk_accept(struct sock *sk, int flags, int *err); + extern struct request_sock *inet_csk_search_req(const struct sock *sk, struct request_sock ***prevp, const __u16 rport, @@ -83,4 +179,60 @@ extern int inet_csk_get_port(struct inet_hashinfo *hashinfo, extern struct dst_entry* inet_csk_route_req(struct sock *sk, const struct request_sock *req); +static inline void inet_csk_reqsk_queue_add(struct sock *sk, + struct request_sock *req, + struct sock *child) +{ + reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child); +} + +extern void inet_csk_reqsk_queue_hash_add(struct sock *sk, + struct request_sock *req, + const unsigned timeout); + +static inline void inet_csk_reqsk_queue_removed(struct sock *sk, + struct request_sock *req) +{ + if (reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req) == 0) + inet_csk_delete_keepalive_timer(sk); +} + +static inline void inet_csk_reqsk_queue_added(struct sock *sk, + const unsigned long timeout) +{ + if (reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue) == 0) + inet_csk_reset_keepalive_timer(sk, timeout); +} + +static inline int inet_csk_reqsk_queue_len(const struct sock *sk) +{ + return reqsk_queue_len(&inet_csk(sk)->icsk_accept_queue); +} + +static inline int inet_csk_reqsk_queue_young(const struct sock *sk) +{ + return reqsk_queue_len_young(&inet_csk(sk)->icsk_accept_queue); +} + +static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) +{ + return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue); +} + +static inline void inet_csk_reqsk_queue_unlink(struct sock *sk, + struct request_sock *req, + struct request_sock **prev) +{ + reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req, prev); +} + +static inline void inet_csk_reqsk_queue_drop(struct sock *sk, + struct request_sock *req, + struct request_sock **prev) +{ + inet_csk_reqsk_queue_unlink(sk, req, prev); + inet_csk_reqsk_queue_removed(sk, req); + reqsk_free(req); +} + #endif /* _INET_CONNECTION_SOCK_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index a943c79c88b..dd9a5a288f8 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -19,18 +19,16 @@ #define _TCP_H #define TCP_DEBUG 1 -#define INET_CSK_DEBUG 1 #define FASTRETRANS_DEBUG 1 -/* Cancel timers, when they are not required. */ -#undef INET_CSK_CLEAR_TIMERS - #include #include #include #include #include #include + +#include #include #include #include @@ -206,11 +204,6 @@ extern void tcp_tw_deschedule(struct inet_timewait_sock *tw); #define TCPOLEN_SACK_BASE_ALIGNED 4 #define TCPOLEN_SACK_PERBLOCK 8 -#define ICSK_TIME_RETRANS 1 /* Retransmit timer */ -#define ICSK_TIME_DACK 2 /* Delayed ack timer */ -#define ICSK_TIME_PROBE0 3 /* Zero window probe timer */ -#define ICSK_TIME_KEEPOPEN 4 /* Keepalive timer */ - /* Flags in tp->nonagle */ #define TCP_NAGLE_OFF 1 /* Nagle's algo is disabled */ #define TCP_NAGLE_CORK 2 /* Socket is corked */ @@ -257,12 +250,6 @@ extern atomic_t tcp_memory_allocated; extern atomic_t tcp_sockets_allocated; extern int tcp_memory_pressure; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -#define AF_INET_FAMILY(fam) ((fam) == AF_INET) -#else -#define AF_INET_FAMILY(fam) 1 -#endif - /* * Pointers to address related TCP functions * (i.e. things that depend on the address family) @@ -373,22 +360,6 @@ extern int tcp_rcv_established(struct sock *sk, extern void tcp_rcv_space_adjust(struct sock *sk); -enum inet_csk_ack_state_t { - ICSK_ACK_SCHED = 1, - ICSK_ACK_TIMER = 2, - ICSK_ACK_PUSHED = 4 -}; - -static inline void inet_csk_schedule_ack(struct sock *sk) -{ - inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_SCHED; -} - -static inline int inet_csk_ack_scheduled(const struct sock *sk) -{ - return inet_csk(sk)->icsk_ack.pending & ICSK_ACK_SCHED; -} - static inline void tcp_dec_quickack_mode(struct sock *sk, const unsigned int pkts) { @@ -406,11 +377,6 @@ static inline void tcp_dec_quickack_mode(struct sock *sk, extern void tcp_enter_quickack_mode(struct sock *sk); -static inline void inet_csk_delack_init(struct sock *sk) -{ - memset(&inet_csk(sk)->icsk_ack, 0, sizeof(inet_csk(sk)->icsk_ack)); -} - static inline void tcp_clear_options(struct tcp_options_received *rx_opt) { rx_opt->tstamp_ok = rx_opt->sack_ok = rx_opt->wscale_ok = rx_opt->snd_wscale = 0; @@ -442,7 +408,6 @@ extern void tcp_update_metrics(struct sock *sk); extern void tcp_close(struct sock *sk, long timeout); -extern struct sock * inet_csk_accept(struct sock *sk, int flags, int *err); extern unsigned int tcp_poll(struct file * file, struct socket *sock, struct poll_table_struct *wait); extern int tcp_getsockopt(struct sock *sk, int level, @@ -541,15 +506,9 @@ static inline void tcp_clear_xmit_timers(struct sock *sk) inet_csk_clear_xmit_timers(sk); } -extern void inet_csk_delete_keepalive_timer(struct sock *sk); -extern void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout); extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu); extern unsigned int tcp_current_mss(struct sock *sk, int large); -#ifdef INET_CSK_DEBUG -extern const char inet_csk_timer_bug_msg[]; -#endif - /* tcp_diag.c */ extern void tcp_get_info(struct sock *, struct tcp_info *); @@ -559,60 +518,6 @@ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor); -static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - - if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { - icsk->icsk_pending = 0; -#ifdef INET_CSK_CLEAR_TIMERS - sk_stop_timer(sk, &icsk->icsk_retransmit_timer); -#endif - } else if (what == ICSK_TIME_DACK) { - icsk->icsk_ack.blocked = icsk->icsk_ack.pending = 0; -#ifdef INET_CSK_CLEAR_TIMERS - sk_stop_timer(sk, &icsk->icsk_delack_timer); -#endif - } -#ifdef INET_CSK_DEBUG - else { - pr_debug(inet_csk_timer_bug_msg); - } -#endif -} - -/* - * Reset the retransmission timer - */ -static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, - unsigned long when) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - - if (when > TCP_RTO_MAX) { -#ifdef INET_CSK_DEBUG - pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n", - sk, what, when, current_text_addr()); -#endif - when = TCP_RTO_MAX; - } - - if (what == ICSK_TIME_RETRANS || what == ICSK_TIME_PROBE0) { - icsk->icsk_pending = what; - icsk->icsk_timeout = jiffies + when; - sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout); - } else if (what == ICSK_TIME_DACK) { - icsk->icsk_ack.pending |= ICSK_ACK_TIMER; - icsk->icsk_ack.timeout = jiffies + when; - sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); - } -#ifdef INET_CSK_DEBUG - else { - pr_debug(inet_csk_timer_bug_msg); - } -#endif -} - /* Initialize RCV_MSS value. * RCV_MSS is an our guess about MSS used by the peer. * We haven't any direct information about the MSS. @@ -765,7 +670,8 @@ static inline void tcp_packets_out_inc(struct sock *sk, tp->packets_out += tcp_skb_pcount(skb); if (!orig) - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + inet_csk(sk)->icsk_rto, TCP_RTO_MAX); } static inline void tcp_packets_out_dec(struct tcp_sock *tp, @@ -934,7 +840,8 @@ static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *t { const struct inet_connection_sock *icsk = inet_csk(sk); if (!tp->packets_out && !icsk->icsk_pending) - inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, icsk->icsk_rto); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_PROBE0, + icsk->icsk_rto, TCP_RTO_MAX); } static __inline__ void tcp_push_pending_frames(struct sock *sk, @@ -1017,7 +924,8 @@ static __inline__ int tcp_prequeue(struct sock *sk, struct sk_buff *skb) wake_up_interruptible(sk->sk_sleep); if (!inet_csk_ack_scheduled(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, - (3 * TCP_RTO_MIN) / 4); + (3 * TCP_RTO_MIN) / 4, + TCP_RTO_MAX); } return 1; } @@ -1181,58 +1089,6 @@ static inline int tcp_full_space(const struct sock *sk) return tcp_win_from_space(sk->sk_rcvbuf); } -static inline void inet_csk_reqsk_queue_add(struct sock *sk, - struct request_sock *req, - struct sock *child) -{ - reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child); -} - -static inline void inet_csk_reqsk_queue_removed(struct sock *sk, - struct request_sock *req) -{ - if (reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req) == 0) - inet_csk_delete_keepalive_timer(sk); -} - -static inline void inet_csk_reqsk_queue_added(struct sock *sk, - const unsigned long timeout) -{ - if (reqsk_queue_added(&inet_csk(sk)->icsk_accept_queue) == 0) - inet_csk_reset_keepalive_timer(sk, timeout); -} - -static inline int inet_csk_reqsk_queue_len(const struct sock *sk) -{ - return reqsk_queue_len(&inet_csk(sk)->icsk_accept_queue); -} - -static inline int inet_csk_reqsk_queue_young(const struct sock *sk) -{ - return reqsk_queue_len_young(&inet_csk(sk)->icsk_accept_queue); -} - -static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) -{ - return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue); -} - -static inline void inet_csk_reqsk_queue_unlink(struct sock *sk, - struct request_sock *req, - struct request_sock **prev) -{ - reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req, prev); -} - -static inline void inet_csk_reqsk_queue_drop(struct sock *sk, - struct request_sock *req, - struct request_sock **prev) -{ - inet_csk_reqsk_queue_unlink(sk, req, prev); - inet_csk_reqsk_queue_removed(sk, req); - reqsk_free(req); -} - static __inline__ void tcp_openreq_init(struct request_sock *req, struct tcp_options_received *rx_opt, struct sk_buff *skb) -- cgit v1.2.3 From 9f1d2604c71498579609b1532fedc5a89276bb00 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:11:24 -0700 Subject: [ICSK]: Introduce inet_csk_clone Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 97e002001c1..a50f4a4b7b4 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -77,6 +77,10 @@ static inline struct inet_connection_sock *inet_csk(const struct sock *sk) return (struct inet_connection_sock *)sk; } +extern struct sock *inet_csk_clone(struct sock *sk, + const struct request_sock *req, + const unsigned int __nocast priority); + enum inet_csk_ack_state_t { ICSK_ACK_SCHED = 1, ICSK_ACK_TIMER = 2, -- cgit v1.2.3 From 0a5578cf8e5e045aaa68643c17ce885426697c6b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:11:41 -0700 Subject: [ICSK]: Generalise tcp_listen_{start,stop} This also moved inet_iif from tcp to inet_hashtables.h, as it is needed by the inet_lookup callers, perhaps this needs a bit of polishing, but for now seems fine. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 6 ++++++ include/net/sock.h | 1 + include/net/tcp.h | 4 ++-- 3 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index f0c21c07f89..646b6ea7fe2 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -280,6 +281,11 @@ out: wake_up(&hashinfo->lhash_wait); } +static inline int inet_iif(const struct sk_buff *skb) +{ + return ((struct rtable *)skb->dst)->rt_iif; +} + extern struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 daddr, const unsigned short hnum, diff --git a/include/net/sock.h b/include/net/sock.h index 48cc337a656..8678313a22b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -558,6 +558,7 @@ struct proto { kmem_cache_t *twsk_slab; unsigned int twsk_obj_size; + atomic_t *orphan_count; struct request_sock_ops *rsk_prot; diff --git a/include/net/tcp.h b/include/net/tcp.h index dd9a5a288f8..68f1ec1c583 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -860,7 +860,7 @@ static __inline__ void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq) tp->snd_wl1 = seq; } -extern void tcp_destroy_sock(struct sock *sk); +extern void inet_csk_destroy_sock(struct sock *sk); /* @@ -987,7 +987,7 @@ static __inline__ void tcp_done(struct sock *sk) if (!sock_flag(sk, SOCK_DEAD)) sk->sk_state_change(sk); else - tcp_destroy_sock(sk); + inet_csk_destroy_sock(sk); } static __inline__ void tcp_sack_reset(struct tcp_options_received *rx_opt) -- cgit v1.2.3 From 295f7324ff8d9ea58b4d3ec93b1aaa1d80e048a9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:11:56 -0700 Subject: [ICSK]: Introduce reqsk_queue_prune from code in tcp_synack_timer With this we're very close to getting all of the current TCP refactorings in my dccp-2.6 tree merged, next changeset will export some functions needed by the current DCCP code and then dccp-2.6.git will be born! Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- include/net/inet_connection_sock.h | 2 ++ include/net/request_sock.h | 7 +++++++ include/net/tcp.h | 3 ++- 4 files changed, 12 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 800930fac38..62009684074 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -270,7 +270,7 @@ struct tcp_sock { __u8 frto_counter; /* Number of new acks after RTO */ __u8 nonagle; /* Disable Nagle algorithm? */ - __u8 defer_accept; /* User waits for some data after accept() */ + /* ONE BYTE HOLE, TRY TO PACK */ /* RTT measurement */ __u32 srtt; /* smoothed round trip time << 3 */ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index a50f4a4b7b4..692825fc813 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -239,4 +239,6 @@ static inline void inet_csk_reqsk_queue_drop(struct sock *sk, reqsk_free(req); } +extern void inet_csk_listen_stop(struct sock *sk); + #endif /* _INET_CONNECTION_SOCK_H */ diff --git a/include/net/request_sock.h b/include/net/request_sock.h index b7c7eecbe64..447d287a38f 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -97,6 +97,7 @@ struct listen_sock { * * @rskq_accept_head - FIFO head of established children * @rskq_accept_tail - FIFO tail of established children + * @rskq_defer_accept - User waits for some data after accept() * @syn_wait_lock - serializer * * %syn_wait_lock is necessary only to avoid proc interface having to grab the main @@ -112,6 +113,8 @@ struct request_sock_queue { struct request_sock *rskq_accept_head; struct request_sock *rskq_accept_tail; rwlock_t syn_wait_lock; + u8 rskq_defer_accept; + /* 3 bytes hole, try to pack */ struct listen_sock *listen_opt; }; @@ -255,4 +258,8 @@ static inline void reqsk_queue_hash_req(struct request_sock_queue *queue, write_unlock(&queue->syn_wait_lock); } +extern void reqsk_queue_prune(struct request_sock_queue *queue, struct sock *parent, + const unsigned long interval, const unsigned long timeout, + const unsigned long max_rto, int max_retries); + #endif /* _REQUEST_SOCK_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 68f1ec1c583..2423f059b62 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -423,7 +423,8 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, size_t len, int nonblock, int flags, int *addr_len); -extern int tcp_listen_start(struct sock *sk); +extern int inet_csk_listen_start(struct sock *sk, + const int nr_table_entries); extern void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, -- cgit v1.2.3 From c4365c9235f80128c3c3d5993074173941b1c1f0 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:12:30 -0700 Subject: [RANDOM]: Introduce secure_dccp_sequence_number Code contributed by Stephen Hemminger. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/random.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/random.h b/include/linux/random.h index cc670344991..7b2adb3322d 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -59,6 +59,8 @@ extern __u32 secure_tcp_sequence_number(__u32 saddr, __u32 daddr, __u16 sport, __u16 dport); extern __u32 secure_tcpv6_sequence_number(__u32 *saddr, __u32 *daddr, __u16 sport, __u16 dport); +extern u64 secure_dccp_sequence_number(__u32 saddr, __u32 daddr, + __u16 sport, __u16 dport); #ifndef MODULE extern struct file_operations random_fops, urandom_fops; -- cgit v1.2.3 From 7c657876b63cb1d8a2ec06f8fc6c37bb8412e66c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:14:34 -0700 Subject: [DCCP]: Initial implementation Development to this point was done on a subversion repository at: http://oops.ghostprotocols.net:81/cgi-bin/viewcvs.cgi/dccp-2.6/ This repository will be kept at this site for the foreseable future, so that interested parties can see the history of this code, attributions, etc. If I ever decide to take this offline I'll provide the full history at some other suitable place. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/dccp.h | 432 +++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/in.h | 1 + include/linux/net.h | 1 + include/linux/socket.h | 1 + 4 files changed, 435 insertions(+) create mode 100644 include/linux/dccp.h (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h new file mode 100644 index 00000000000..e3b4bf7346b --- /dev/null +++ b/include/linux/dccp.h @@ -0,0 +1,432 @@ +#ifndef _LINUX_DCCP_H +#define _LINUX_DCCP_H + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* FIXME: this is utterly wrong */ +struct sockaddr_dccp { + struct sockaddr_in in; + unsigned int service; +}; + +enum dccp_state { + DCCP_OPEN = TCP_ESTABLISHED, + DCCP_REQUESTING = TCP_SYN_SENT, + DCCP_PARTOPEN = TCP_FIN_WAIT1, /* FIXME: + This mapping is horrible, but TCP has + no matching state for DCCP_PARTOPEN, + as TCP_SYN_RECV is already used by + DCCP_RESPOND, why don't stop using TCP + mapping of states? OK, now we don't use + sk_stream_sendmsg anymore, so doesn't + seem to exist any reason for us to + do the TCP mapping here */ + DCCP_LISTEN = TCP_LISTEN, + DCCP_RESPOND = TCP_SYN_RECV, + DCCP_CLOSING = TCP_CLOSING, + DCCP_TIME_WAIT = TCP_TIME_WAIT, + DCCP_CLOSED = TCP_CLOSE, + DCCP_MAX_STATES = TCP_MAX_STATES, +}; + +#define DCCP_STATE_MASK 0xf +#define DCCP_ACTION_FIN (1<<7) + +enum { + DCCPF_OPEN = TCPF_ESTABLISHED, + DCCPF_REQUESTING = TCPF_SYN_SENT, + DCCPF_PARTOPEN = TCPF_FIN_WAIT1, + DCCPF_LISTEN = TCPF_LISTEN, + DCCPF_RESPOND = TCPF_SYN_RECV, + DCCPF_CLOSING = TCPF_CLOSING, + DCCPF_TIME_WAIT = TCPF_TIME_WAIT, + DCCPF_CLOSED = TCPF_CLOSE, +}; + +/** + * struct dccp_hdr - generic part of DCCP packet header + * + * @dccph_sport - Relevant port on the endpoint that sent this packet + * @dccph_dport - Relevant port on the other endpoint + * @dccph_doff - Data Offset from the start of the DCCP header, in 32-bit words + * @dccph_ccval - Used by the HC-Sender CCID + * @dccph_cscov - Parts of the packet that are covered by the Checksum field + * @dccph_checksum - Internet checksum, depends on dccph_cscov + * @dccph_x - 0 = 24 bit sequence number, 1 = 48 + * @dccph_type - packet type, see DCCP_PKT_ prefixed macros + * @dccph_seq - sequence number high or low order 24 bits, depends on dccph_x + */ +struct dccp_hdr { + __u16 dccph_sport, + dccph_dport; + __u8 dccph_doff; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 dccph_cscov:4, + dccph_ccval:4; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u8 dccph_ccval:4, + dccph_cscov:4; +#else +#error "Adjust your defines" +#endif + __u16 dccph_checksum; +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u32 dccph_x:1, + dccph_type:4, + dccph_reserved:3, + dccph_seq:24; +#elif defined(__BIG_ENDIAN_BITFIELD) + __u32 dccph_reserved:3, + dccph_type:4, + dccph_x:1, + dccph_seq:24; +#else +#error "Adjust your defines" +#endif +}; + +static inline struct dccp_hdr *dccp_hdr(const struct sk_buff *skb) +{ + return (struct dccp_hdr *)skb->h.raw; +} + +/** + * struct dccp_hdr_ext - the low bits of a 48 bit seq packet + * + * @dccph_seq_low - low 24 bits of a 48 bit seq packet + */ +struct dccp_hdr_ext { + __u32 dccph_seq_low; +}; + +static inline struct dccp_hdr_ext *dccp_hdrx(const struct sk_buff *skb) +{ + return (struct dccp_hdr_ext *)(skb->h.raw + sizeof(struct dccp_hdr)); +} + +static inline unsigned int dccp_basic_hdr_len(const struct sk_buff *skb) +{ + const struct dccp_hdr *dh = dccp_hdr(skb); + return sizeof(*dh) + (dh->dccph_x ? sizeof(struct dccp_hdr_ext) : 0); +} + +static inline __u64 dccp_hdr_seq(const struct sk_buff *skb) +{ + const struct dccp_hdr *dh = dccp_hdr(skb); +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u64 seq_nr = ntohl(dh->dccph_seq << 8); +#elif defined(__BIG_ENDIAN_BITFIELD) + __u64 seq_nr = ntohl(dh->dccph_seq); +#else +#error "Adjust your defines" +#endif + + if (dh->dccph_x != 0) + seq_nr = (seq_nr << 32) + ntohl(dccp_hdrx(skb)->dccph_seq_low); + + return seq_nr; +} + +/** + * struct dccp_hdr_request - Conection initiation request header + * + * @dccph_req_service - Service to which the client app wants to connect + * @dccph_req_options - list of options (must be a multiple of 32 bits + */ +struct dccp_hdr_request { + __u32 dccph_req_service; +}; + +static inline struct dccp_hdr_request *dccp_hdr_request(struct sk_buff *skb) +{ + return (struct dccp_hdr_request *)(skb->h.raw + dccp_basic_hdr_len(skb)); +} + +/** + * struct dccp_hdr_ack_bits - acknowledgment bits common to most packets + * + * @dccph_resp_ack_nr_high - 48 bit ack number high order bits, contains GSR + * @dccph_resp_ack_nr_low - 48 bit ack number low order bits, contains GSR + */ +struct dccp_hdr_ack_bits { + __u32 dccph_reserved1:8, + dccph_ack_nr_high:24; + __u32 dccph_ack_nr_low; +}; + +static inline struct dccp_hdr_ack_bits *dccp_hdr_ack_bits(const struct sk_buff *skb) +{ + return (struct dccp_hdr_ack_bits *)(skb->h.raw + dccp_basic_hdr_len(skb)); +} + +static inline u64 dccp_hdr_ack_seq(const struct sk_buff *skb) +{ + const struct dccp_hdr_ack_bits *dhack = dccp_hdr_ack_bits(skb); +#if defined(__LITTLE_ENDIAN_BITFIELD) + return (((u64)ntohl(dhack->dccph_ack_nr_high << 8)) << 32) + ntohl(dhack->dccph_ack_nr_low); +#elif defined(__BIG_ENDIAN_BITFIELD) + return (((u64)ntohl(dhack->dccph_ack_nr_high)) << 32) + ntohl(dhack->dccph_ack_nr_low); +#else +#error "Adjust your defines" +#endif +} + +/** + * struct dccp_hdr_response - Conection initiation response header + * + * @dccph_resp_ack_nr_high - 48 bit ack number high order bits, contains GSR + * @dccph_resp_ack_nr_low - 48 bit ack number low order bits, contains GSR + * @dccph_resp_service - Echoes the Service Code on a received DCCP-Request + * @dccph_resp_options - list of options (must be a multiple of 32 bits + */ +struct dccp_hdr_response { + struct dccp_hdr_ack_bits dccph_resp_ack; + __u32 dccph_resp_service; +}; + +static inline struct dccp_hdr_response *dccp_hdr_response(struct sk_buff *skb) +{ + return (struct dccp_hdr_response *)(skb->h.raw + dccp_basic_hdr_len(skb)); +} + +/** + * struct dccp_hdr_reset - Unconditionally shut down a connection + * + * @dccph_reset_service - Echoes the Service Code on a received DCCP-Request + * @dccph_reset_options - list of options (must be a multiple of 32 bits + */ +struct dccp_hdr_reset { + struct dccp_hdr_ack_bits dccph_reset_ack; + __u8 dccph_reset_code, + dccph_reset_data[3]; +}; + +static inline struct dccp_hdr_reset *dccp_hdr_reset(struct sk_buff *skb) +{ + return (struct dccp_hdr_reset *)(skb->h.raw + dccp_basic_hdr_len(skb)); +} + +enum dccp_pkt_type { + DCCP_PKT_REQUEST = 0, + DCCP_PKT_RESPONSE, + DCCP_PKT_DATA, + DCCP_PKT_ACK, + DCCP_PKT_DATAACK, + DCCP_PKT_CLOSEREQ, + DCCP_PKT_CLOSE, + DCCP_PKT_RESET, + DCCP_PKT_SYNC, + DCCP_PKT_SYNCACK, + DCCP_PKT_INVALID, +}; + +#define DCCP_NR_PKT_TYPES DCCP_PKT_INVALID + +static inline unsigned int dccp_packet_hdr_len(const __u8 type) +{ + if (type == DCCP_PKT_DATA) + return 0; + if (type == DCCP_PKT_DATAACK || + type == DCCP_PKT_ACK || + type == DCCP_PKT_SYNC || + type == DCCP_PKT_SYNCACK || + type == DCCP_PKT_CLOSE || + type == DCCP_PKT_CLOSEREQ) + return sizeof(struct dccp_hdr_ack_bits); + if (type == DCCP_PKT_REQUEST) + return sizeof(struct dccp_hdr_request); + if (type == DCCP_PKT_RESPONSE) + return sizeof(struct dccp_hdr_response); + return sizeof(struct dccp_hdr_reset); +} + +static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) +{ + return dccp_basic_hdr_len(skb) + + dccp_packet_hdr_len(dccp_hdr(skb)->dccph_type); +} + +enum dccp_reset_codes { + DCCP_RESET_CODE_UNSPECIFIED = 0, + DCCP_RESET_CODE_CLOSED, + DCCP_RESET_CODE_ABORTED, + DCCP_RESET_CODE_NO_CONNECTION, + DCCP_RESET_CODE_PACKET_ERROR, + DCCP_RESET_CODE_OPTION_ERROR, + DCCP_RESET_CODE_MANDATORY_ERROR, + DCCP_RESET_CODE_CONNECTION_REFUSED, + DCCP_RESET_CODE_BAD_SERVICE_CODE, + DCCP_RESET_CODE_TOO_BUSY, + DCCP_RESET_CODE_BAD_INIT_COOKIE, + DCCP_RESET_CODE_AGGRESSION_PENALTY, +}; + +/* DCCP options */ +enum { + DCCPO_PADDING = 0, + DCCPO_MANDATORY = 1, + DCCPO_MIN_RESERVED = 3, + DCCPO_MAX_RESERVED = 31, + DCCPO_NDP_COUNT = 37, + DCCPO_ACK_VECTOR_0 = 38, + DCCPO_ACK_VECTOR_1 = 39, + DCCPO_TIMESTAMP = 41, + DCCPO_TIMESTAMP_ECHO = 42, + DCCPO_ELAPSED_TIME = 43, + DCCPO_MAX = 45, + DCCPO_MIN_CCID_SPECIFIC = 128, + DCCPO_MAX_CCID_SPECIFIC = 255, +}; + +/* DCCP features */ +enum { + DCCPF_RESERVED = 0, + DCCPF_SEQUENCE_WINDOW = 3, + DCCPF_SEND_ACK_VECTOR = 6, + DCCPF_SEND_NDP_COUNT = 7, + /* 10-127 reserved */ + DCCPF_MIN_CCID_SPECIFIC = 128, + DCCPF_MAX_CCID_SPECIFIC = 255, +}; + +/* initial values for each feature */ +#define DCCPF_INITIAL_SEQUENCE_WINDOW 100 +/* FIXME: for now we're using CCID 3 (TFRC) */ +#define DCCPF_INITIAL_CCID 3 +#define DCCPF_INITIAL_SEND_ACK_VECTOR 0 +/* FIXME: for now we're default to 1 but it should really be 0 */ +#define DCCPF_INITIAL_SEND_NDP_COUNT 1 + +#define DCCP_NDP_LIMIT 0xFFFFFF + +/** + * struct dccp_options - option values for a DCCP connection + * @dccpo_sequence_window - Sequence Window Feature (section 7.5.2) + * @dccpo_ccid - Congestion Control Id (CCID) (section 10) + * @dccpo_send_ack_vector - Send Ack Vector Feature (section 11.5) + * @dccpo_send_ndp_count - Send NDP Count Feature (7.7.2) + */ +struct dccp_options { + __u64 dccpo_sequence_window; + __u8 dccpo_ccid; + __u8 dccpo_send_ack_vector; + __u8 dccpo_send_ndp_count; +}; + +extern void __dccp_options_init(struct dccp_options *dccpo); +extern void dccp_options_init(struct dccp_options *dccpo); +extern int dccp_parse_options(struct sock *sk, struct sk_buff *skb); + +struct dccp_request_sock { + struct inet_request_sock dreq_inet_rsk; + __u64 dreq_iss; + __u64 dreq_isr; + __u32 dreq_service; +}; + +static inline struct dccp_request_sock *dccp_rsk(const struct request_sock *req) +{ + return (struct dccp_request_sock *)req; +} + +/* Read about the ECN nonce to see why it is 253 */ +#define DCCP_MAX_ACK_VECTOR_LEN 253 + +struct dccp_options_received { + u32 dccpor_ndp:24, + dccpor_ack_vector_len:8; + u32 dccpor_ack_vector_idx:10; + /* 22 bits hole, try to pack */ + u32 dccpor_timestamp; + u32 dccpor_timestamp_echo; + u32 dccpor_elapsed_time; +}; + +struct ccid; + +enum dccp_role { + DCCP_ROLE_UNDEFINED, + DCCP_ROLE_LISTEN, + DCCP_ROLE_CLIENT, + DCCP_ROLE_SERVER, +}; + +/** + * struct dccp_sock - DCCP socket state + * + * @dccps_swl - sequence number window low + * @dccps_swh - sequence number window high + * @dccps_awl - acknowledgement number window low + * @dccps_awh - acknowledgement number window high + * @dccps_iss - initial sequence number sent + * @dccps_isr - initial sequence number received + * @dccps_osr - first OPEN sequence number received + * @dccps_gss - greatest sequence number sent + * @dccps_gsr - greatest valid sequence number received + * @dccps_gar - greatest valid ack number received on a non-Sync; initialized to %dccps_iss + * @dccps_timestamp_time - time of latest TIMESTAMP option + * @dccps_timestamp_echo - latest timestamp received on a TIMESTAMP option + * @dccps_ext_header_len - network protocol overhead (IP/IPv6 options) + * @dccps_pmtu_cookie - Last pmtu seen by socket + * @dccps_avg_packet_size - FIXME: has to be set by the app thru some setsockopt or ioctl, CCID3 uses it + * @dccps_role - Role of this sock, one of %dccp_role + * @dccps_ndp_count - number of Non Data Packets since last data packet + * @dccps_hc_rx_ackpkts - receiver half connection acked packets + */ +struct dccp_sock { + /* inet_connection_sock has to be the first member of dccp_sock */ + struct inet_connection_sock dccps_inet_connection; + __u64 dccps_swl; + __u64 dccps_swh; + __u64 dccps_awl; + __u64 dccps_awh; + __u64 dccps_iss; + __u64 dccps_isr; + __u64 dccps_osr; + __u64 dccps_gss; + __u64 dccps_gsr; + __u64 dccps_gar; + unsigned long dccps_service; + unsigned long dccps_timestamp_time; + __u32 dccps_timestamp_echo; + __u32 dccps_avg_packet_size; + unsigned long dccps_ndp_count; + __u16 dccps_ext_header_len; + __u32 dccps_pmtu_cookie; + __u32 dccps_mss_cache; + struct dccp_options dccps_options; + struct dccp_ackpkts *dccps_hc_rx_ackpkts; + void *dccps_hc_rx_ccid_private; + void *dccps_hc_tx_ccid_private; + struct ccid *dccps_hc_rx_ccid; + struct ccid *dccps_hc_tx_ccid; + struct dccp_options_received dccps_options_received; + enum dccp_role dccps_role:2; +}; + +static inline struct dccp_sock *dccp_sk(const struct sock *sk) +{ + return (struct dccp_sock *)sk; +} + +static inline const char *dccp_role(const struct sock *sk) +{ + switch (dccp_sk(sk)->dccps_role) { + case DCCP_ROLE_UNDEFINED: return "undefined"; + case DCCP_ROLE_LISTEN: return "listen"; + case DCCP_ROLE_SERVER: return "server"; + case DCCP_ROLE_CLIENT: return "client"; + } + return NULL; +} + +#endif /* _LINUX_DCCP_H */ diff --git a/include/linux/in.h b/include/linux/in.h index fb88c66d748..ba355384016 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -32,6 +32,7 @@ enum { IPPROTO_PUP = 12, /* PUP protocol */ IPPROTO_UDP = 17, /* User Datagram Protocol */ IPPROTO_IDP = 22, /* XNS IDP protocol */ + IPPROTO_DCCP = 33, /* Datagram Congestion Control Protocol */ IPPROTO_RSVP = 46, /* RSVP protocol */ IPPROTO_GRE = 47, /* Cisco GRE tunnels (rfc 1701,1702) */ diff --git a/include/linux/net.h b/include/linux/net.h index 39906619b9d..5f8b632ff65 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -84,6 +84,7 @@ enum sock_type { SOCK_RAW = 3, SOCK_RDM = 4, SOCK_SEQPACKET = 5, + SOCK_DCCP = 6, SOCK_PACKET = 10, }; diff --git a/include/linux/socket.h b/include/linux/socket.h index a5c7d96e4d2..ddf22559f48 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -271,6 +271,7 @@ struct ucred { #define SOL_IRDA 266 #define SOL_NETBEUI 267 #define SOL_LLC 268 +#define SOL_DCCP 269 /* IPX options */ #define IPX_TYPE 1 -- cgit v1.2.3 From a019d6fe2b9da68ea4ba6cf3c4e86fc1dbf554c3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:15:09 -0700 Subject: [ICSK]: Move generalised functions from tcp to inet_connection_sock This also improves reqsk_queue_prune and renames it to inet_csk_reqsk_queue_prune, as it deals with both inet_connection_sock and inet_request_sock objects, not just with request_sock ones thus belonging to inet_request_sock. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 7 +++++++ include/net/request_sock.h | 4 ---- include/net/tcp.h | 6 ------ 3 files changed, 7 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 692825fc813..bec19d5cff2 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -239,6 +239,13 @@ static inline void inet_csk_reqsk_queue_drop(struct sock *sk, reqsk_free(req); } +extern void inet_csk_reqsk_queue_prune(struct sock *parent, + const unsigned long interval, + const unsigned long timeout, + const unsigned long max_rto); + +extern void inet_csk_destroy_sock(struct sock *sk); +extern int inet_csk_listen_start(struct sock *sk, const int nr_table_entries); extern void inet_csk_listen_stop(struct sock *sk); #endif /* _INET_CONNECTION_SOCK_H */ diff --git a/include/net/request_sock.h b/include/net/request_sock.h index 447d287a38f..b52cc52ffe3 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -258,8 +258,4 @@ static inline void reqsk_queue_hash_req(struct request_sock_queue *queue, write_unlock(&queue->syn_wait_lock); } -extern void reqsk_queue_prune(struct request_sock_queue *queue, struct sock *parent, - const unsigned long interval, const unsigned long timeout, - const unsigned long max_rto, int max_retries); - #endif /* _REQUEST_SOCK_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 2423f059b62..077db859ae0 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -423,9 +423,6 @@ extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, size_t len, int nonblock, int flags, int *addr_len); -extern int inet_csk_listen_start(struct sock *sk, - const int nr_table_entries); - extern void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab); @@ -861,9 +858,6 @@ static __inline__ void tcp_update_wl(struct tcp_sock *tp, u32 ack, u32 seq) tp->snd_wl1 = seq; } -extern void inet_csk_destroy_sock(struct sock *sk); - - /* * Calculate(/check) TCP checksum */ -- cgit v1.2.3 From 74459dc7bacda04d14626d239c8f5c4dac22560d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:15:51 -0700 Subject: [LIST]: Introduce list_for_each_entry_safe_continue Used in the dccp CCID3 code, that is going to be submitted RSN. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/list.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/list.h b/include/linux/list.h index aab2db21b01..597094e0fdb 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -418,6 +418,19 @@ static inline void list_splice_init(struct list_head *list, &pos->member != (head); \ pos = n, n = list_entry(n->member.next, typeof(*n), member)) +/** + * list_for_each_entry_safe_continue - iterate over list of given type + * continuing after existing point safe against removal of list entry + * @pos: the type * to use as a loop counter. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + */ +#define list_for_each_entry_safe_continue(pos, n, head, member) \ + for (pos = n, n = list_entry(n->member.next, typeof(*n), member); \ + &pos->member != (head); \ + pos = n, n = list_entry(n->member.next, typeof(*n), member)) + /** * list_for_each_rcu - iterate over an rcu-protected list * @pos: the &struct list_head to use as a loop counter. -- cgit v1.2.3 From fbcd923c3e0c8ec9e4ed64f5a4e5766807b32729 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 20:22:10 -0700 Subject: [NETFILTER]: add correct bridging support to nfnetlink_{queue,log} This patch adds support for passing the real 'physical' device ifindex down to userspace via nfnetlink_log and nfnetlink_queue. This feature basically obsoletes net/bridge/netfilter/ebt_ulog.c, and it is likely ebt_ulog.c will die with one of the next couple of patches. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink_log.h | 2 ++ include/linux/netfilter/nfnetlink_queue.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/netfilter/nfnetlink_log.h b/include/linux/netfilter/nfnetlink_log.h index 420ff4625cb..a61836a083e 100644 --- a/include/linux/netfilter/nfnetlink_log.h +++ b/include/linux/netfilter/nfnetlink_log.h @@ -40,6 +40,8 @@ enum nfulnl_attr_type { NFULA_TIMESTAMP, /* nfulnl_msg_packet_timestamp */ NFULA_IFINDEX_INDEV, /* u_int32_t ifindex */ NFULA_IFINDEX_OUTDEV, /* u_int32_t ifindex */ + NFULA_IFINDEX_PHYSINDEV, /* u_int32_t ifindex */ + NFULA_IFINDEX_PHYSOUTDEV, /* u_int32_t ifindex */ NFULA_HWADDR, /* nfulnl_msg_packet_hw */ NFULA_PAYLOAD, /* opaque data payload */ NFULA_PREFIX, /* string prefix */ diff --git a/include/linux/netfilter/nfnetlink_queue.h b/include/linux/netfilter/nfnetlink_queue.h index e142b0ff7c0..2d8d2b2cfca 100644 --- a/include/linux/netfilter/nfnetlink_queue.h +++ b/include/linux/netfilter/nfnetlink_queue.h @@ -36,6 +36,8 @@ enum nfqnl_attr_type { NFQA_TIMESTAMP, /* nfqnl_msg_packet_timestamp */ NFQA_IFINDEX_INDEV, /* u_int32_t ifindex */ NFQA_IFINDEX_OUTDEV, /* u_int32_t ifindex */ + NFQA_IFINDEX_PHYSINDEV, /* u_int32_t ifindex */ + NFQA_IFINDEX_PHYSOUTDEV, /* u_int32_t ifindex */ NFQA_HWADDR, /* nfqnl_msg_packet_hw */ NFQA_PAYLOAD, /* opaque data payload */ -- cgit v1.2.3 From bbd86b9fc469b7e91dc7444e6abb8930811d79cb Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 20:23:11 -0700 Subject: [NETFILTER]: add /proc/net/netfilter interface to nf_queue This patch adds a /proc/net/netfilter/nf_queue file, similar to the recently-added /proc/net/netfilter/nf_log. It indicates which queue handler is registered to which protocol family. This is useful since there are now multiple queue handlers in the treee (ip[6]_queue, nfnetlink_queue). Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 815583af06c..bf430fcbe36 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -225,13 +225,16 @@ int nf_getsockopt(struct sock *sk, int pf, int optval, char __user *opt, int *len); /* Packet queuing */ -typedef int (*nf_queue_outfn_t)(struct sk_buff *skb, - struct nf_info *info, - unsigned int queuenum, void *data); +struct nf_queue_handler { + int (*outfn)(struct sk_buff *skb, struct nf_info *info, + unsigned int queuenum, void *data); + void *data; + char *name; +}; extern int nf_register_queue_handler(int pf, - nf_queue_outfn_t outfn, void *data); + struct nf_queue_handler *qh); extern int nf_unregister_queue_handler(int pf); -extern void nf_unregister_queue_handlers(nf_queue_outfn_t outfn); +extern void nf_unregister_queue_handlers(struct nf_queue_handler *qh); extern void nf_reinject(struct sk_buff *skb, struct nf_info *info, unsigned int verdict); -- cgit v1.2.3 From 8a61fadb3908454ccfa538aaa75eb1d22def5700 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 20:23:53 -0700 Subject: [NETFILTER]: check nf_log function call arguments Check whether pf is too large in order to prevent array overflow. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index bf430fcbe36..ac3c61411d4 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -157,7 +157,7 @@ struct nf_logger { /* Function to register/unregister log function. */ int nf_log_register(int pf, struct nf_logger *logger); -void nf_log_unregister_pf(int pf); +int nf_log_unregister_pf(int pf); void nf_log_unregister_logger(struct nf_logger *logger); /* Calls the registered backend logging function */ -- cgit v1.2.3 From 5a47a470e602eecb168ddd3b78841b84ceddd319 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 20:26:03 -0700 Subject: [DCCP]: make include-able from userspace The protocol header files in are usually structured in a way to be included by userspace code. The top section consists of general protocol structure definitions, typedefs, enums - followed by an #ifdef __KERNEL__ section. Currently doesn't follow that convention and can therefore not be used from userspace. However, for example iptables' libipt_dccp.c actually needs various definitions from there. Signed-off-by: Harald Welte Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/dccp.h | 238 ++++++++++++++++++++++++++------------------------- 1 file changed, 121 insertions(+), 117 deletions(-) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index e3b4bf7346b..add4908b8e5 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -1,16 +1,8 @@ #ifndef _LINUX_DCCP_H #define _LINUX_DCCP_H -#include -#include #include -#include -#include - -#include -#include -#include -#include +#include /* FIXME: this is utterly wrong */ struct sockaddr_dccp { @@ -18,40 +10,6 @@ struct sockaddr_dccp { unsigned int service; }; -enum dccp_state { - DCCP_OPEN = TCP_ESTABLISHED, - DCCP_REQUESTING = TCP_SYN_SENT, - DCCP_PARTOPEN = TCP_FIN_WAIT1, /* FIXME: - This mapping is horrible, but TCP has - no matching state for DCCP_PARTOPEN, - as TCP_SYN_RECV is already used by - DCCP_RESPOND, why don't stop using TCP - mapping of states? OK, now we don't use - sk_stream_sendmsg anymore, so doesn't - seem to exist any reason for us to - do the TCP mapping here */ - DCCP_LISTEN = TCP_LISTEN, - DCCP_RESPOND = TCP_SYN_RECV, - DCCP_CLOSING = TCP_CLOSING, - DCCP_TIME_WAIT = TCP_TIME_WAIT, - DCCP_CLOSED = TCP_CLOSE, - DCCP_MAX_STATES = TCP_MAX_STATES, -}; - -#define DCCP_STATE_MASK 0xf -#define DCCP_ACTION_FIN (1<<7) - -enum { - DCCPF_OPEN = TCPF_ESTABLISHED, - DCCPF_REQUESTING = TCPF_SYN_SENT, - DCCPF_PARTOPEN = TCPF_FIN_WAIT1, - DCCPF_LISTEN = TCPF_LISTEN, - DCCPF_RESPOND = TCPF_SYN_RECV, - DCCPF_CLOSING = TCPF_CLOSING, - DCCPF_TIME_WAIT = TCPF_TIME_WAIT, - DCCPF_CLOSED = TCPF_CLOSE, -}; - /** * struct dccp_hdr - generic part of DCCP packet header * @@ -94,11 +52,6 @@ struct dccp_hdr { #endif }; -static inline struct dccp_hdr *dccp_hdr(const struct sk_buff *skb) -{ - return (struct dccp_hdr *)skb->h.raw; -} - /** * struct dccp_hdr_ext - the low bits of a 48 bit seq packet * @@ -108,34 +61,6 @@ struct dccp_hdr_ext { __u32 dccph_seq_low; }; -static inline struct dccp_hdr_ext *dccp_hdrx(const struct sk_buff *skb) -{ - return (struct dccp_hdr_ext *)(skb->h.raw + sizeof(struct dccp_hdr)); -} - -static inline unsigned int dccp_basic_hdr_len(const struct sk_buff *skb) -{ - const struct dccp_hdr *dh = dccp_hdr(skb); - return sizeof(*dh) + (dh->dccph_x ? sizeof(struct dccp_hdr_ext) : 0); -} - -static inline __u64 dccp_hdr_seq(const struct sk_buff *skb) -{ - const struct dccp_hdr *dh = dccp_hdr(skb); -#if defined(__LITTLE_ENDIAN_BITFIELD) - __u64 seq_nr = ntohl(dh->dccph_seq << 8); -#elif defined(__BIG_ENDIAN_BITFIELD) - __u64 seq_nr = ntohl(dh->dccph_seq); -#else -#error "Adjust your defines" -#endif - - if (dh->dccph_x != 0) - seq_nr = (seq_nr << 32) + ntohl(dccp_hdrx(skb)->dccph_seq_low); - - return seq_nr; -} - /** * struct dccp_hdr_request - Conection initiation request header * @@ -145,12 +70,6 @@ static inline __u64 dccp_hdr_seq(const struct sk_buff *skb) struct dccp_hdr_request { __u32 dccph_req_service; }; - -static inline struct dccp_hdr_request *dccp_hdr_request(struct sk_buff *skb) -{ - return (struct dccp_hdr_request *)(skb->h.raw + dccp_basic_hdr_len(skb)); -} - /** * struct dccp_hdr_ack_bits - acknowledgment bits common to most packets * @@ -162,24 +81,6 @@ struct dccp_hdr_ack_bits { dccph_ack_nr_high:24; __u32 dccph_ack_nr_low; }; - -static inline struct dccp_hdr_ack_bits *dccp_hdr_ack_bits(const struct sk_buff *skb) -{ - return (struct dccp_hdr_ack_bits *)(skb->h.raw + dccp_basic_hdr_len(skb)); -} - -static inline u64 dccp_hdr_ack_seq(const struct sk_buff *skb) -{ - const struct dccp_hdr_ack_bits *dhack = dccp_hdr_ack_bits(skb); -#if defined(__LITTLE_ENDIAN_BITFIELD) - return (((u64)ntohl(dhack->dccph_ack_nr_high << 8)) << 32) + ntohl(dhack->dccph_ack_nr_low); -#elif defined(__BIG_ENDIAN_BITFIELD) - return (((u64)ntohl(dhack->dccph_ack_nr_high)) << 32) + ntohl(dhack->dccph_ack_nr_low); -#else -#error "Adjust your defines" -#endif -} - /** * struct dccp_hdr_response - Conection initiation response header * @@ -193,11 +94,6 @@ struct dccp_hdr_response { __u32 dccph_resp_service; }; -static inline struct dccp_hdr_response *dccp_hdr_response(struct sk_buff *skb) -{ - return (struct dccp_hdr_response *)(skb->h.raw + dccp_basic_hdr_len(skb)); -} - /** * struct dccp_hdr_reset - Unconditionally shut down a connection * @@ -210,11 +106,6 @@ struct dccp_hdr_reset { dccph_reset_data[3]; }; -static inline struct dccp_hdr_reset *dccp_hdr_reset(struct sk_buff *skb) -{ - return (struct dccp_hdr_reset *)(skb->h.raw + dccp_basic_hdr_len(skb)); -} - enum dccp_pkt_type { DCCP_PKT_REQUEST = 0, DCCP_PKT_RESPONSE, @@ -248,13 +139,6 @@ static inline unsigned int dccp_packet_hdr_len(const __u8 type) return sizeof(struct dccp_hdr_response); return sizeof(struct dccp_hdr_reset); } - -static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) -{ - return dccp_basic_hdr_len(skb) + - dccp_packet_hdr_len(dccp_hdr(skb)->dccph_type); -} - enum dccp_reset_codes { DCCP_RESET_CODE_UNSPECIFIED = 0, DCCP_RESET_CODE_CLOSED, @@ -298,6 +182,124 @@ enum { DCCPF_MAX_CCID_SPECIFIC = 255, }; +#ifdef __KERNEL__ + +#include +#include +#include +#include + +#include +#include +#include +#include + +enum dccp_state { + DCCP_OPEN = TCP_ESTABLISHED, + DCCP_REQUESTING = TCP_SYN_SENT, + DCCP_PARTOPEN = TCP_FIN_WAIT1, /* FIXME: + This mapping is horrible, but TCP has + no matching state for DCCP_PARTOPEN, + as TCP_SYN_RECV is already used by + DCCP_RESPOND, why don't stop using TCP + mapping of states? OK, now we don't use + sk_stream_sendmsg anymore, so doesn't + seem to exist any reason for us to + do the TCP mapping here */ + DCCP_LISTEN = TCP_LISTEN, + DCCP_RESPOND = TCP_SYN_RECV, + DCCP_CLOSING = TCP_CLOSING, + DCCP_TIME_WAIT = TCP_TIME_WAIT, + DCCP_CLOSED = TCP_CLOSE, + DCCP_MAX_STATES = TCP_MAX_STATES, +}; + +#define DCCP_STATE_MASK 0xf +#define DCCP_ACTION_FIN (1<<7) + +enum { + DCCPF_OPEN = TCPF_ESTABLISHED, + DCCPF_REQUESTING = TCPF_SYN_SENT, + DCCPF_PARTOPEN = TCPF_FIN_WAIT1, + DCCPF_LISTEN = TCPF_LISTEN, + DCCPF_RESPOND = TCPF_SYN_RECV, + DCCPF_CLOSING = TCPF_CLOSING, + DCCPF_TIME_WAIT = TCPF_TIME_WAIT, + DCCPF_CLOSED = TCPF_CLOSE, +}; + +static inline struct dccp_hdr *dccp_hdr(const struct sk_buff *skb) +{ + return (struct dccp_hdr *)skb->h.raw; +} + +static inline struct dccp_hdr_ext *dccp_hdrx(const struct sk_buff *skb) +{ + return (struct dccp_hdr_ext *)(skb->h.raw + sizeof(struct dccp_hdr)); +} + +static inline unsigned int dccp_basic_hdr_len(const struct sk_buff *skb) +{ + const struct dccp_hdr *dh = dccp_hdr(skb); + return sizeof(*dh) + (dh->dccph_x ? sizeof(struct dccp_hdr_ext) : 0); +} + +static inline __u64 dccp_hdr_seq(const struct sk_buff *skb) +{ + const struct dccp_hdr *dh = dccp_hdr(skb); +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u64 seq_nr = ntohl(dh->dccph_seq << 8); +#elif defined(__BIG_ENDIAN_BITFIELD) + __u64 seq_nr = ntohl(dh->dccph_seq); +#else +#error "Adjust your defines" +#endif + + if (dh->dccph_x != 0) + seq_nr = (seq_nr << 32) + ntohl(dccp_hdrx(skb)->dccph_seq_low); + + return seq_nr; +} + +static inline struct dccp_hdr_request *dccp_hdr_request(struct sk_buff *skb) +{ + return (struct dccp_hdr_request *)(skb->h.raw + dccp_basic_hdr_len(skb)); +} + +static inline struct dccp_hdr_ack_bits *dccp_hdr_ack_bits(const struct sk_buff *skb) +{ + return (struct dccp_hdr_ack_bits *)(skb->h.raw + dccp_basic_hdr_len(skb)); +} + +static inline u64 dccp_hdr_ack_seq(const struct sk_buff *skb) +{ + const struct dccp_hdr_ack_bits *dhack = dccp_hdr_ack_bits(skb); +#if defined(__LITTLE_ENDIAN_BITFIELD) + return (((u64)ntohl(dhack->dccph_ack_nr_high << 8)) << 32) + ntohl(dhack->dccph_ack_nr_low); +#elif defined(__BIG_ENDIAN_BITFIELD) + return (((u64)ntohl(dhack->dccph_ack_nr_high)) << 32) + ntohl(dhack->dccph_ack_nr_low); +#else +#error "Adjust your defines" +#endif +} + +static inline struct dccp_hdr_response *dccp_hdr_response(struct sk_buff *skb) +{ + return (struct dccp_hdr_response *)(skb->h.raw + dccp_basic_hdr_len(skb)); +} + +static inline struct dccp_hdr_reset *dccp_hdr_reset(struct sk_buff *skb) +{ + return (struct dccp_hdr_reset *)(skb->h.raw + dccp_basic_hdr_len(skb)); +} + +static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) +{ + return dccp_basic_hdr_len(skb) + + dccp_packet_hdr_len(dccp_hdr(skb)->dccph_type); +} + + /* initial values for each feature */ #define DCCPF_INITIAL_SEQUENCE_WINDOW 100 /* FIXME: for now we're using CCID 3 (TFRC) */ @@ -429,4 +431,6 @@ static inline const char *dccp_role(const struct sock *sk) return NULL; } +#endif /* __KERNEL__ */ + #endif /* _LINUX_DCCP_H */ -- cgit v1.2.3 From e2e268665f6c01686b477a6b0cc5a70bab689d54 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:26:28 -0700 Subject: [DCCP]: Fix struct sockaddr_dccp definition Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/dccp.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index add4908b8e5..fd1412ddb3f 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -4,10 +4,14 @@ #include #include -/* FIXME: this is utterly wrong */ +/* Structure describing an Internet (DCCP) socket address. */ struct sockaddr_dccp { - struct sockaddr_in in; - unsigned int service; + __u16 sdccp_family; /* Address family */ + __u16 sdccp_port; /* Port number */ + __u32 sdccp_addr; /* Internet address */ + __u32 sdccp_service; /* Service */ + /* Pad to size of `struct sockaddr': 16 bytes . */ + __u32 sdccp_pad; }; /** -- cgit v1.2.3 From 1d3de414eb20d937d82c5219fd13ee4cedc499cb Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Tue, 9 Aug 2005 20:26:55 -0700 Subject: [NETFILTER]: New iptables DCCP protocol header match Using this new iptables DCCP protocol header match, it is possible to create simplistic stateless packet filtering rules for DCCP. It permits matching of port numbers, packet type and options. Signed-off-by: Harald Welte Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/dccp.h | 16 +++++++++++++--- include/linux/netfilter_ipv4/ipt_dccp.h | 23 +++++++++++++++++++++++ 2 files changed, 36 insertions(+), 3 deletions(-) create mode 100644 include/linux/netfilter_ipv4/ipt_dccp.h (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index fd1412ddb3f..431d58923ba 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -242,10 +242,15 @@ static inline struct dccp_hdr_ext *dccp_hdrx(const struct sk_buff *skb) return (struct dccp_hdr_ext *)(skb->h.raw + sizeof(struct dccp_hdr)); } +static inline unsigned int __dccp_basic_hdr_len(const struct dccp_hdr *dh) +{ + return sizeof(*dh) + (dh->dccph_x ? sizeof(struct dccp_hdr_ext) : 0); +} + static inline unsigned int dccp_basic_hdr_len(const struct sk_buff *skb) { const struct dccp_hdr *dh = dccp_hdr(skb); - return sizeof(*dh) + (dh->dccph_x ? sizeof(struct dccp_hdr_ext) : 0); + return __dccp_basic_hdr_len(dh); } static inline __u64 dccp_hdr_seq(const struct sk_buff *skb) @@ -297,10 +302,15 @@ static inline struct dccp_hdr_reset *dccp_hdr_reset(struct sk_buff *skb) return (struct dccp_hdr_reset *)(skb->h.raw + dccp_basic_hdr_len(skb)); } +static inline unsigned int __dccp_hdr_len(const struct dccp_hdr *dh) +{ + return __dccp_basic_hdr_len(dh) + + dccp_packet_hdr_len(dh->dccph_type); +} + static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) { - return dccp_basic_hdr_len(skb) + - dccp_packet_hdr_len(dccp_hdr(skb)->dccph_type); + return __dccp_hdr_len(dccp_hdr(skb)); } diff --git a/include/linux/netfilter_ipv4/ipt_dccp.h b/include/linux/netfilter_ipv4/ipt_dccp.h new file mode 100644 index 00000000000..3cb3a522e62 --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_dccp.h @@ -0,0 +1,23 @@ +#ifndef _IPT_DCCP_H_ +#define _IPT_DCCP_H_ + +#define IPT_DCCP_SRC_PORTS 0x01 +#define IPT_DCCP_DEST_PORTS 0x02 +#define IPT_DCCP_TYPE 0x04 +#define IPT_DCCP_OPTION 0x08 + +#define IPT_DCCP_VALID_FLAGS 0x0f + +struct ipt_dccp_info { + u_int16_t dpts[2]; /* Min, Max */ + u_int16_t spts[2]; /* Min, Max */ + + u_int16_t flags; + u_int16_t invflags; + + u_int16_t typemask; + u_int8_t option; +}; + +#endif /* _IPT_DCCP_H_ */ + -- cgit v1.2.3 From 45bb4bf08b9c16122af84d3f26a018c8022b24e5 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 9 Aug 2005 20:27:49 -0700 Subject: [Bluetooth]: Workaround for inquiry results with RSSI and page scan mode This patch implements a workaround for buggy Bluetooth 1.2 devices from Silicon Wave. Their inquiry results with RSSI contain the page scan mode field. This field was removed in the final Bluetooth 1.2 specification. Signed-off-by: Marcel Holtmann Signed-off-by: David S. Miller --- include/net/bluetooth/hci.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 6f0706f4af6..cd075f19798 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -453,6 +453,15 @@ struct inquiry_info_with_rssi { __u16 clock_offset; __s8 rssi; } __attribute__ ((packed)); +struct inquiry_info_with_rssi_and_pscan_mode { + bdaddr_t bdaddr; + __u8 pscan_rep_mode; + __u8 pscan_period_mode; + __u8 pscan_mode; + __u8 dev_class[3]; + __u16 clock_offset; + __s8 rssi; +} __attribute__ ((packed)); #define HCI_EV_CONN_COMPLETE 0x03 struct hci_ev_conn_complete { -- cgit v1.2.3 From 85a1e930bf628700e8e9c166b1f5c1c26d3651cc Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 9 Aug 2005 20:28:02 -0700 Subject: [Bluetooth]: Track page scan repetition mode changes The HCI page scan repetition mode change event contains the actual page scan repetition mode for the remote device. It is the same value that is received from an inquiry response and it can be used to make further reconnections faster. Signed-off-by: Marcel Holtmann Signed-off-by: David S. Miller --- include/net/bluetooth/hci.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index cd075f19798..371e7d3f2e6 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -593,6 +593,12 @@ struct hci_ev_clock_offset { __u16 clock_offset; } __attribute__ ((packed)); +#define HCI_EV_PSCAN_REP_MODE 0x20 +struct hci_ev_pscan_rep_mode { + bdaddr_t bdaddr; + __u8 pscan_rep_mode; +} __attribute__ ((packed)); + /* Internal events generated by Bluetooth stack */ #define HCI_EV_STACK_INTERNAL 0xFD struct hci_ev_stack_internal { -- cgit v1.2.3 From 3a5e903c09aed19ca4a1bb26d87b8d6461a93818 Mon Sep 17 00:00:00 2001 From: "J. Suter" Date: Tue, 9 Aug 2005 20:28:46 -0700 Subject: [Bluetooth]: Implement RFCOMM remote port negotiation This patch implements the remote port negotiation (RPN) of the RFCOMM protocol for Bluetooth. Signed-off-by: J. Suter Signed-off-by: Marcel Holtmann Signed-off-by: David S. Miller --- include/net/bluetooth/rfcomm.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index 13669bad00b..3768823d5fd 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -80,9 +80,9 @@ #define RFCOMM_RPN_STOP_15 1 #define RFCOMM_RPN_PARITY_NONE 0x0 -#define RFCOMM_RPN_PARITY_ODD 0x4 -#define RFCOMM_RPN_PARITY_EVEN 0x5 -#define RFCOMM_RPN_PARITY_MARK 0x6 +#define RFCOMM_RPN_PARITY_ODD 0x1 +#define RFCOMM_RPN_PARITY_EVEN 0x3 +#define RFCOMM_RPN_PARITY_MARK 0x5 #define RFCOMM_RPN_PARITY_SPACE 0x7 #define RFCOMM_RPN_FLOW_NONE 0x00 @@ -223,6 +223,12 @@ struct rfcomm_dlc { #define RFCOMM_CFC_DISABLED 0 #define RFCOMM_CFC_ENABLED RFCOMM_MAX_CREDITS +/* ---- RFCOMM SEND RPN ---- */ +int rfcomm_send_rpn(struct rfcomm_session *s, int cr, u8 dlci, + u8 bit_rate, u8 data_bits, u8 stop_bits, + u8 parity, u8 flow_ctrl_settings, + u8 xon_char, u8 xoff_char, u16 param_mask); + /* ---- RFCOMM DLCs (channels) ---- */ struct rfcomm_dlc *rfcomm_dlc_alloc(int prio); void rfcomm_dlc_free(struct rfcomm_dlc *d); -- cgit v1.2.3 From 2eb25a6c34504254760e67172f7518d6bfdd7676 Mon Sep 17 00:00:00 2001 From: Victor Fusco Date: Tue, 9 Aug 2005 20:29:11 -0700 Subject: [Bluetooth]: Fix sparse warnings (__nocast type) This patch fixes the sparse warnings "implicit cast to nocast type" for the priority or gfp_mask parameters of the memory allocations. Signed-off-by: Victor Fusco Signed-off-by: Domen Puncer Signed-off-by: Marcel Holtmann Signed-off-by: David S. Miller --- include/net/bluetooth/bluetooth.h | 2 +- include/net/bluetooth/rfcomm.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 06b24f63702..5309b6fa669 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -135,7 +135,7 @@ struct bt_skb_cb { }; #define bt_cb(skb) ((struct bt_skb_cb *)(skb->cb)) -static inline struct sk_buff *bt_skb_alloc(unsigned int len, int how) +static inline struct sk_buff *bt_skb_alloc(unsigned int len, unsigned int __nocast how) { struct sk_buff *skb; diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index 3768823d5fd..ffea9d54071 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -230,7 +230,7 @@ int rfcomm_send_rpn(struct rfcomm_session *s, int cr, u8 dlci, u8 xon_char, u8 xoff_char, u16 param_mask); /* ---- RFCOMM DLCs (channels) ---- */ -struct rfcomm_dlc *rfcomm_dlc_alloc(int prio); +struct rfcomm_dlc *rfcomm_dlc_alloc(unsigned int __nocast prio); void rfcomm_dlc_free(struct rfcomm_dlc *d); int rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst, u8 channel); int rfcomm_dlc_close(struct rfcomm_dlc *d, int reason); -- cgit v1.2.3 From 0d48d93947dd9ea21c5cdc76a8581b06a4a39281 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 9 Aug 2005 20:30:28 -0700 Subject: [Bluetooth]: Move packet type into the SKB control buffer This patch moves the usage of packet type into the SKB control buffer. After this patch it is now possible to shrink the sk_buff structure and redefine its pkt_type. Signed-off-by: Marcel Holtmann Signed-off-by: David S. Miller --- include/net/bluetooth/bluetooth.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 5309b6fa669..6dfa4a61ffd 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -131,7 +131,8 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock); /* Skb helpers */ struct bt_skb_cb { - int incoming; + __u8 pkt_type; + __u8 incoming; }; #define bt_cb(skb) ((struct bt_skb_cb *)(skb->cb)) -- cgit v1.2.3 From 295ff7edb8f72b77d524759266f7524deae379b3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:44:40 -0700 Subject: [TIMEWAIT]: Introduce inet_timewait_death_row That groups all of the tables and variables associated to the TCP timewait schedulling/recycling/killing code, that now can be isolated from the TCP specific code and used by other transport protocols, such as DCCP. Next changeset will move this code to net/ipv4/inet_timewait_sock.c Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_timewait_sock.h | 57 +++++++++++++++++++++++++++++++++++++++- include/net/tcp.h | 36 ++++--------------------- 2 files changed, 61 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index e00861b1669..a7e8052e2fb 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -19,13 +19,69 @@ #include #include +#include #include +#include #include #include #include +struct inet_hashinfo; + +#define INET_TWDR_RECYCLE_SLOTS_LOG 5 +#define INET_TWDR_RECYCLE_SLOTS (1 << INET_TWDR_RECYCLE_SLOTS_LOG) + +/* + * If time > 4sec, it is "slow" path, no recycling is required, + * so that we select tick to get range about 4 seconds. + */ +#if HZ <= 16 || HZ > 4096 +# error Unsupported: HZ <= 16 or HZ > 4096 +#elif HZ <= 32 +# define INET_TWDR_RECYCLE_TICK (5 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +#elif HZ <= 64 +# define INET_TWDR_RECYCLE_TICK (6 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +#elif HZ <= 128 +# define INET_TWDR_RECYCLE_TICK (7 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +#elif HZ <= 256 +# define INET_TWDR_RECYCLE_TICK (8 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +#elif HZ <= 512 +# define INET_TWDR_RECYCLE_TICK (9 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +#elif HZ <= 1024 +# define INET_TWDR_RECYCLE_TICK (10 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +#elif HZ <= 2048 +# define INET_TWDR_RECYCLE_TICK (11 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +#else +# define INET_TWDR_RECYCLE_TICK (12 + 2 - INET_TWDR_RECYCLE_SLOTS_LOG) +#endif + +/* TIME_WAIT reaping mechanism. */ +#define INET_TWDR_TWKILL_SLOTS 8 /* Please keep this a power of 2. */ + +#define INET_TWDR_TWKILL_QUOTA 100 + +struct inet_timewait_death_row { + /* Short-time timewait calendar */ + int twcal_hand; + int twcal_jiffie; + struct timer_list twcal_timer; + struct hlist_head twcal_row[INET_TWDR_RECYCLE_SLOTS]; + + spinlock_t death_lock; + int tw_count; + int period; + u32 thread_slots; + struct work_struct twkill_work; + struct timer_list tw_timer; + int slot; + struct hlist_head cells[INET_TWDR_TWKILL_SLOTS]; + struct inet_hashinfo *hashinfo; + int sysctl_tw_recycle; + int sysctl_max_tw_buckets; +}; + #if (BITS_PER_LONG == 64) #define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 8 #else @@ -33,7 +89,6 @@ #endif struct inet_bind_bucket; -struct inet_hashinfo; /* * This is a TIME_WAIT sock. It works around the memory consumption diff --git a/include/net/tcp.h b/include/net/tcp.h index 077db859ae0..4c4cd4fb1ed 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -29,6 +29,7 @@ #include #include +#include #include #include #include @@ -42,9 +43,9 @@ extern struct inet_hashinfo tcp_hashinfo; extern atomic_t tcp_orphan_count; -extern int tcp_tw_count; extern void tcp_time_wait(struct sock *sk, int state, int timeo); -extern void tcp_tw_deschedule(struct inet_timewait_sock *tw); +extern void inet_twsk_deschedule(struct inet_timewait_sock *tw, + struct inet_timewait_death_row *twdr); #define MAX_TCP_HEADER (128 + MAX_HEADER) @@ -148,33 +149,6 @@ extern void tcp_tw_deschedule(struct inet_timewait_sock *tw); * timestamps. It must be less than * minimal timewait lifetime. */ - -#define TCP_TW_RECYCLE_SLOTS_LOG 5 -#define TCP_TW_RECYCLE_SLOTS (1< 4sec, it is "slow" path, no recycling is required, - so that we select tick to get range about 4 seconds. - */ - -#if HZ <= 16 || HZ > 4096 -# error Unsupported: HZ <= 16 or HZ > 4096 -#elif HZ <= 32 -# define TCP_TW_RECYCLE_TICK (5+2-TCP_TW_RECYCLE_SLOTS_LOG) -#elif HZ <= 64 -# define TCP_TW_RECYCLE_TICK (6+2-TCP_TW_RECYCLE_SLOTS_LOG) -#elif HZ <= 128 -# define TCP_TW_RECYCLE_TICK (7+2-TCP_TW_RECYCLE_SLOTS_LOG) -#elif HZ <= 256 -# define TCP_TW_RECYCLE_TICK (8+2-TCP_TW_RECYCLE_SLOTS_LOG) -#elif HZ <= 512 -# define TCP_TW_RECYCLE_TICK (9+2-TCP_TW_RECYCLE_SLOTS_LOG) -#elif HZ <= 1024 -# define TCP_TW_RECYCLE_TICK (10+2-TCP_TW_RECYCLE_SLOTS_LOG) -#elif HZ <= 2048 -# define TCP_TW_RECYCLE_TICK (11+2-TCP_TW_RECYCLE_SLOTS_LOG) -#else -# define TCP_TW_RECYCLE_TICK (12+2-TCP_TW_RECYCLE_SLOTS_LOG) -#endif /* * TCP option */ @@ -209,12 +183,13 @@ extern void tcp_tw_deschedule(struct inet_timewait_sock *tw); #define TCP_NAGLE_CORK 2 /* Socket is corked */ #define TCP_NAGLE_PUSH 4 /* Cork is overriden for already queued data */ +extern struct inet_timewait_death_row tcp_death_row; + /* sysctl variables for tcp */ extern int sysctl_tcp_timestamps; extern int sysctl_tcp_window_scaling; extern int sysctl_tcp_sack; extern int sysctl_tcp_fin_timeout; -extern int sysctl_tcp_tw_recycle; extern int sysctl_tcp_keepalive_time; extern int sysctl_tcp_keepalive_probes; extern int sysctl_tcp_keepalive_intvl; @@ -229,7 +204,6 @@ extern int sysctl_tcp_stdurg; extern int sysctl_tcp_rfc1337; extern int sysctl_tcp_abort_on_overflow; extern int sysctl_tcp_max_orphans; -extern int sysctl_tcp_max_tw_buckets; extern int sysctl_tcp_fack; extern int sysctl_tcp_reordering; extern int sysctl_tcp_ecn; -- cgit v1.2.3 From 696ab2d3bffc746fb8cf3712f066d42b9886aeed Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:45:03 -0700 Subject: [TIMEWAIT]: Move inet_timewait_death_row routines to net/ipv4/inet_timewait_sock.c Also export the ones that will be used in the next changeset, when DCCP uses this infrastructure. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_timewait_sock.h | 10 ++++++++++ include/net/tcp.h | 2 -- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index a7e8052e2fb..3b070352e86 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -82,6 +82,10 @@ struct inet_timewait_death_row { int sysctl_max_tw_buckets; }; +extern void inet_twdr_hangman(unsigned long data); +extern void inet_twdr_twkill_work(void *data); +extern void inet_twdr_twcal_tick(unsigned long data); + #if (BITS_PER_LONG == 64) #define INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES 8 #else @@ -206,4 +210,10 @@ extern void __inet_twsk_kill(struct inet_timewait_sock *tw, extern void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, struct inet_hashinfo *hashinfo); + +extern void inet_twsk_schedule(struct inet_timewait_sock *tw, + struct inet_timewait_death_row *twdr, + const int timeo, const int timewait_len); +extern void inet_twsk_deschedule(struct inet_timewait_sock *tw, + struct inet_timewait_death_row *twdr); #endif /* _INET_TIMEWAIT_SOCK_ */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 4c4cd4fb1ed..d489ac548e4 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -44,8 +44,6 @@ extern struct inet_hashinfo tcp_hashinfo; extern atomic_t tcp_orphan_count; extern void tcp_time_wait(struct sock *sk, int state, int timeo); -extern void inet_twsk_deschedule(struct inet_timewait_sock *tw, - struct inet_timewait_death_row *twdr); #define MAX_TCP_HEADER (128 + MAX_HEADER) -- cgit v1.2.3 From 64cf1e5d8b5f88d56509260e08fa0d8314277350 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Aug 2005 20:45:21 -0700 Subject: [DCCP]: Finish the TIMEWAIT minisock support Using most of the infrastructure TCP uses, with a dccp_death_row, etc. As per my current interpretation of the draft what we have with this changeset seems to be all we need (or very close to it 8)). Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/dccp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 431d58923ba..3dccdd5108b 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -194,6 +194,7 @@ enum { #include #include +#include #include #include #include @@ -354,6 +355,8 @@ static inline struct dccp_request_sock *dccp_rsk(const struct request_sock *req) return (struct dccp_request_sock *)req; } +extern struct inet_timewait_death_row dccp_death_row; + /* Read about the ECN nonce to see why it is 253 */ #define DCCP_MAX_ACK_VECTOR_LEN 253 -- cgit v1.2.3 From 64ce207306debd7157f47282be94770407bec01c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 9 Aug 2005 20:50:53 -0700 Subject: [NET]: Make NETDEBUG pure printk wrappers Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/sock.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 8678313a22b..065df67b642 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1316,11 +1316,11 @@ extern int sock_get_timestamp(struct sock *, struct timeval __user *); */ #if 0 -#define NETDEBUG(x) do { } while (0) -#define LIMIT_NETDEBUG(x) do {} while(0) +#define NETDEBUG(fmt, args...) do { } while (0) +#define LIMIT_NETDEBUG(fmt, args...) do { } while(0) #else -#define NETDEBUG(x) do { x; } while (0) -#define LIMIT_NETDEBUG(x) do { if (net_ratelimit()) { x; } } while(0) +#define NETDEBUG(fmt, args...) printk(fmt,##args) +#define LIMIT_NETDEBUG(fmt, args...) do { if (net_ratelimit()) printk(fmt,##args); } while(0) #endif /* -- cgit v1.2.3 From 6687e988d9aeaccad6774e6a8304f681f3ec0a03 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 10 Aug 2005 04:03:31 -0300 Subject: [ICSK]: Move TCP congestion avoidance members to icsk This changeset basically moves tcp_sk()->{ca_ops,ca_state,etc} to inet_csk(), minimal renaming/moving done in this changeset to ease review. Most of it is just changes of struct tcp_sock * to struct sock * parameters. With this we move to a state closer to two interesting goals: 1. Generalisation of net/ipv4/tcp_diag.c, becoming inet_diag.c, being used for any INET transport protocol that has struct inet_hashinfo and are derived from struct inet_connection_sock. Keeps the userspace API, that will just not display DCCP sockets, while newer versions of tools can support DCCP. 2. INET generic transport pluggable Congestion Avoidance infrastructure, using the current TCP CA infrastructure with DCCP. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/tcp.h | 21 ++--------- include/net/inet_connection_sock.h | 15 ++++++++ include/net/tcp.h | 74 +++++++++++++++++++++----------------- 3 files changed, 60 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 62009684074..ac4ca44c75c 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -258,19 +258,15 @@ struct tcp_sock { __u32 mss_cache; /* Cached effective mss, not including SACKS */ __u16 xmit_size_goal; /* Goal for segmenting output packets */ __u16 ext_header_len; /* Network protocol overhead (IP/IPv6 options) */ - __u8 ca_state; /* State of fast-retransmit machine */ - __u8 keepalive_probes; /* num of allowed keep alive probes */ - __u16 advmss; /* Advertised MSS */ __u32 window_clamp; /* Maximal window to advertise */ __u32 rcv_ssthresh; /* Current window clamp */ __u32 frto_highmark; /* snd_nxt when RTO occurred */ __u8 reordering; /* Packet reordering metric. */ __u8 frto_counter; /* Number of new acks after RTO */ - __u8 nonagle; /* Disable Nagle algorithm? */ - /* ONE BYTE HOLE, TRY TO PACK */ + __u8 keepalive_probes; /* num of allowed keep alive probes */ /* RTT measurement */ __u32 srtt; /* smoothed round trip time << 3 */ @@ -311,8 +307,7 @@ struct tcp_sock { struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */ struct tcp_sack_block selective_acks[4]; /* The SACKS themselves*/ - __u8 probes_out; /* unanswered 0 window probes */ - __u8 ecn_flags; /* ECN status bits. */ + __u16 advmss; /* Advertised MSS */ __u16 prior_ssthresh; /* ssthresh saved at recovery start */ __u32 lost_out; /* Lost packets */ __u32 sacked_out; /* SACK'd packets */ @@ -327,7 +322,7 @@ struct tcp_sock { __u32 urg_seq; /* Seq of received urgent pointer */ __u16 urg_data; /* Saved octet of OOB data and control flags */ __u8 urg_mode; /* In urgent mode */ - /* ONE BYTE HOLE, TRY TO PACK! */ + __u8 ecn_flags; /* ECN status bits. */ __u32 snd_up; /* Urgent pointer */ __u32 total_retrans; /* Total retransmits for entire connection */ @@ -351,11 +346,6 @@ struct tcp_sock { __u32 seq; __u32 time; } rcvq_space; - - /* Pluggable TCP congestion control hook */ - struct tcp_congestion_ops *ca_ops; - u32 ca_priv[16]; -#define TCP_CA_PRIV_SIZE (16*sizeof(u32)) }; static inline struct tcp_sock *tcp_sk(const struct sock *sk) @@ -377,11 +367,6 @@ static inline struct tcp_timewait_sock *tcp_twsk(const struct sock *sk) return (struct tcp_timewait_sock *)sk; } -static inline void *tcp_ca(const struct tcp_sock *tp) -{ - return (void *) tp->ca_priv; -} - #endif #endif /* _LINUX_TCP_H */ diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index bec19d5cff2..4d7e708c07d 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -27,6 +27,7 @@ struct inet_bind_bucket; struct inet_hashinfo; +struct tcp_congestion_ops; /** inet_connection_sock - INET connection oriented sock * @@ -35,10 +36,13 @@ struct inet_hashinfo; * @icsk_timeout: Timeout * @icsk_retransmit_timer: Resend (no ack) * @icsk_rto: Retransmit timeout + * @icsk_ca_ops Pluggable congestion control hook + * @icsk_ca_state: Congestion control state * @icsk_retransmits: Number of unrecovered [RTO] timeouts * @icsk_pending: Scheduled timer event * @icsk_backoff: Backoff * @icsk_syn_retries: Number of allowed SYN (or equivalent) retries + * @icsk_probes_out: unanswered 0 window probes * @icsk_ack: Delayed ACK control data */ struct inet_connection_sock { @@ -50,10 +54,14 @@ struct inet_connection_sock { struct timer_list icsk_retransmit_timer; struct timer_list icsk_delack_timer; __u32 icsk_rto; + struct tcp_congestion_ops *icsk_ca_ops; + __u8 icsk_ca_state; __u8 icsk_retransmits; __u8 icsk_pending; __u8 icsk_backoff; __u8 icsk_syn_retries; + __u8 icsk_probes_out; + /* 2 BYTES HOLE, TRY TO PACK! */ struct { __u8 pending; /* ACK is pending */ __u8 quick; /* Scheduled number of quick acks */ @@ -65,6 +73,8 @@ struct inet_connection_sock { __u16 last_seg_size; /* Size of last incoming segment */ __u16 rcv_mss; /* MSS used for delayed ACK decisions */ } icsk_ack; + u32 icsk_ca_priv[16]; +#define ICSK_CA_PRIV_SIZE (16 * sizeof(u32)) }; #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ @@ -77,6 +87,11 @@ static inline struct inet_connection_sock *inet_csk(const struct sock *sk) return (struct inet_connection_sock *)sk; } +static inline void *inet_csk_ca(const struct sock *sk) +{ + return (void *)inet_csk(sk)->icsk_ca_priv; +} + extern struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, const unsigned int __nocast priority); diff --git a/include/net/tcp.h b/include/net/tcp.h index d489ac548e4..0b3f7294c5c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -669,29 +669,29 @@ struct tcp_congestion_ops { struct list_head list; /* initialize private data (optional) */ - void (*init)(struct tcp_sock *tp); + void (*init)(struct sock *sk); /* cleanup private data (optional) */ - void (*release)(struct tcp_sock *tp); + void (*release)(struct sock *sk); /* return slow start threshold (required) */ - u32 (*ssthresh)(struct tcp_sock *tp); + u32 (*ssthresh)(struct sock *sk); /* lower bound for congestion window (optional) */ - u32 (*min_cwnd)(struct tcp_sock *tp); + u32 (*min_cwnd)(struct sock *sk); /* do new cwnd calculation (required) */ - void (*cong_avoid)(struct tcp_sock *tp, u32 ack, + void (*cong_avoid)(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int good_ack); /* round trip time sample per acked packet (optional) */ - void (*rtt_sample)(struct tcp_sock *tp, u32 usrtt); + void (*rtt_sample)(struct sock *sk, u32 usrtt); /* call before changing ca_state (optional) */ - void (*set_state)(struct tcp_sock *tp, u8 new_state); + void (*set_state)(struct sock *sk, u8 new_state); /* call when cwnd event occurs (optional) */ - void (*cwnd_event)(struct tcp_sock *tp, enum tcp_ca_event ev); + void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev); /* new value of cwnd after loss (optional) */ - u32 (*undo_cwnd)(struct tcp_sock *tp); + u32 (*undo_cwnd)(struct sock *sk); /* hook for packet ack accounting (optional) */ - void (*pkts_acked)(struct tcp_sock *tp, u32 num_acked); + void (*pkts_acked)(struct sock *sk, u32 num_acked); /* get info for tcp_diag (optional) */ - void (*get_info)(struct tcp_sock *tp, u32 ext, struct sk_buff *skb); + void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb); char name[TCP_CA_NAME_MAX]; struct module *owner; @@ -700,30 +700,34 @@ struct tcp_congestion_ops { extern int tcp_register_congestion_control(struct tcp_congestion_ops *type); extern void tcp_unregister_congestion_control(struct tcp_congestion_ops *type); -extern void tcp_init_congestion_control(struct tcp_sock *tp); -extern void tcp_cleanup_congestion_control(struct tcp_sock *tp); +extern void tcp_init_congestion_control(struct sock *sk); +extern void tcp_cleanup_congestion_control(struct sock *sk); extern int tcp_set_default_congestion_control(const char *name); extern void tcp_get_default_congestion_control(char *name); -extern int tcp_set_congestion_control(struct tcp_sock *tp, const char *name); +extern int tcp_set_congestion_control(struct sock *sk, const char *name); extern struct tcp_congestion_ops tcp_init_congestion_ops; -extern u32 tcp_reno_ssthresh(struct tcp_sock *tp); -extern void tcp_reno_cong_avoid(struct tcp_sock *tp, u32 ack, +extern u32 tcp_reno_ssthresh(struct sock *sk); +extern void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 rtt, u32 in_flight, int flag); -extern u32 tcp_reno_min_cwnd(struct tcp_sock *tp); +extern u32 tcp_reno_min_cwnd(struct sock *sk); extern struct tcp_congestion_ops tcp_reno; -static inline void tcp_set_ca_state(struct tcp_sock *tp, u8 ca_state) +static inline void tcp_set_ca_state(struct sock *sk, const u8 ca_state) { - if (tp->ca_ops->set_state) - tp->ca_ops->set_state(tp, ca_state); - tp->ca_state = ca_state; + struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ca_ops->set_state) + icsk->icsk_ca_ops->set_state(sk, ca_state); + icsk->icsk_ca_state = ca_state; } -static inline void tcp_ca_event(struct tcp_sock *tp, enum tcp_ca_event event) +static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) { - if (tp->ca_ops->cwnd_event) - tp->ca_ops->cwnd_event(tp, event); + const struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ca_ops->cwnd_event) + icsk->icsk_ca_ops->cwnd_event(sk, event); } /* This determines how many packets are "in the network" to the best @@ -749,9 +753,10 @@ static __inline__ unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) * The exception is rate halving phase, when cwnd is decreasing towards * ssthresh. */ -static inline __u32 tcp_current_ssthresh(struct tcp_sock *tp) +static inline __u32 tcp_current_ssthresh(const struct sock *sk) { - if ((1<ca_state)&(TCPF_CA_CWR|TCPF_CA_Recovery)) + const struct tcp_sock *tp = tcp_sk(sk); + if ((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_CWR | TCPF_CA_Recovery)) return tp->snd_ssthresh; else return max(tp->snd_ssthresh, @@ -768,10 +773,13 @@ static inline void tcp_sync_left_out(struct tcp_sock *tp) } /* Set slow start threshold and cwnd not falling to slow start */ -static inline void __tcp_enter_cwr(struct tcp_sock *tp) +static inline void __tcp_enter_cwr(struct sock *sk) { + const struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + tp->undo_marker = 0; - tp->snd_ssthresh = tp->ca_ops->ssthresh(tp); + tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1U); tp->snd_cwnd_cnt = 0; @@ -780,12 +788,14 @@ static inline void __tcp_enter_cwr(struct tcp_sock *tp) TCP_ECN_queue_cwr(tp); } -static inline void tcp_enter_cwr(struct tcp_sock *tp) +static inline void tcp_enter_cwr(struct sock *sk) { + struct tcp_sock *tp = tcp_sk(sk); + tp->prior_ssthresh = 0; - if (tp->ca_state < TCP_CA_CWR) { - __tcp_enter_cwr(tp); - tcp_set_ca_state(tp, TCP_CA_CWR); + if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) { + __tcp_enter_cwr(sk); + tcp_set_ca_state(sk, TCP_CA_CWR); } } -- cgit v1.2.3 From 540722ffc3a0d7e11d97a13e1ce6f3bc23b061c1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 10 Aug 2005 05:54:28 -0300 Subject: [TCPDIAG]: Implement cheapest way of supporting DCCPDIAG_GETSOCK With ugly ifdefs, etc, but this actually: 1. keeps the existing ABI, i.e. no need to recompile the iproute2 utilities if not interested in DCCP. 2. Provides all the tcp_diag functionality in DCCP, with just a small patch that makes iproute2 support DCCP. Of course I'll get this cleaned-up in time, but for now I think its OK to be this way to quickly get this functionality. iproute2-ss050808 patch at: http://vger.kernel.org/~acme/iproute2-ss050808.dccp.patch Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/tcp_diag.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/tcp_diag.h b/include/linux/tcp_diag.h index 7a599674394..190494ebcfb 100644 --- a/include/linux/tcp_diag.h +++ b/include/linux/tcp_diag.h @@ -3,6 +3,7 @@ /* Just some random number */ #define TCPDIAG_GETSOCK 18 +#define DCCPDIAG_GETSOCK 19 /* Socket identity */ struct tcpdiag_sockid -- cgit v1.2.3 From 8c60f3fab55712f23f2bd557ceedfbb00c649f37 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 10 Aug 2005 12:59:38 -0300 Subject: [CCID3]: Separate most of the packet history code This also changes the list_for_each_entry_safe_continue behaviour to match its kerneldoc comment, that is, to start after the pos passed. Also adds several helper functions from previously open coded fragments, making the code more clear. Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/list.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/list.h b/include/linux/list.h index 597094e0fdb..0f2435f92db 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -427,7 +427,8 @@ static inline void list_splice_init(struct list_head *list, * @member: the name of the list_struct within the struct. */ #define list_for_each_entry_safe_continue(pos, n, head, member) \ - for (pos = n, n = list_entry(n->member.next, typeof(*n), member); \ + for (pos = list_entry(pos->member.next, typeof(*pos), member), \ + n = list_entry(pos->member.next, typeof(*pos), member); \ &pos->member != (head); \ pos = n, n = list_entry(n->member.next, typeof(*n), member)) -- cgit v1.2.3 From 5917ed961def82a4dba9198d11a75f79d115a8cb Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Thu, 11 Aug 2005 15:31:15 -0700 Subject: [NETFILTER]: Fix NF_QUEUE_NR() macro I obviously wanted to use bitwise-or, not logical or. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index ac3c61411d4..189ba67ba60 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -29,7 +29,7 @@ #define NF_VERDICT_QMASK 0xffff0000 #define NF_VERDICT_QBITS 16 -#define NF_QUEUE_NR(x) ((x << NF_VERDICT_QBITS) & NF_VERDICT_QMASK || NF_QUEUE) +#define NF_QUEUE_NR(x) ((x << NF_VERDICT_QBITS) & NF_VERDICT_QMASK | NF_QUEUE) /* only for userspace compatibility */ #ifndef __KERNEL__ -- cgit v1.2.3 From 0a242efc4fb859b2da506cdf8f3366231602e4ff Mon Sep 17 00:00:00 2001 From: Denis Vlasenko Date: Thu, 11 Aug 2005 15:32:53 -0700 Subject: [NET]: Deinline netif_carrier_{on,off}(). # grep -r 'netif_carrier_o[nf]' linux-2.6.12 | wc -l 246 # size vmlinux.org vmlinux.carrier text data bss dec hex filename 4339634 1054414 259296 5653344 564360 vmlinux.org 4337710 1054414 259296 5651420 563bdc vmlinux.carrier And this ain't an allyesconfig kernel! Signed-off-by: David S. Miller --- include/linux/netdevice.h | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 296cf93a65e..d8e52edfd52 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -699,19 +699,9 @@ static inline int netif_carrier_ok(const struct net_device *dev) extern void __netdev_watchdog_up(struct net_device *dev); -static inline void netif_carrier_on(struct net_device *dev) -{ - if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) - linkwatch_fire_event(dev); - if (netif_running(dev)) - __netdev_watchdog_up(dev); -} +extern void netif_carrier_on(struct net_device *dev); -static inline void netif_carrier_off(struct net_device *dev) -{ - if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) - linkwatch_fire_event(dev); -} +extern void netif_carrier_off(struct net_device *dev); /* Hot-plugging. */ static inline int netif_device_present(struct net_device *dev) -- cgit v1.2.3 From b766b305d3f2d8be173e5d9853534ea1afdbabba Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Fri, 12 Aug 2005 11:36:44 -0700 Subject: [NETFILTER]: Fix gcc-3.4.x warning about iplicit operator precedence Fix gcc-3.4.x warning about iplicit operator precedence in NF_QUEUE_NR() Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 189ba67ba60..be365e70ee9 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -29,7 +29,7 @@ #define NF_VERDICT_QMASK 0xffff0000 #define NF_VERDICT_QBITS 16 -#define NF_QUEUE_NR(x) ((x << NF_VERDICT_QBITS) & NF_VERDICT_QMASK | NF_QUEUE) +#define NF_QUEUE_NR(x) (((x << NF_VERDICT_QBITS) & NF_VERDICT_QMASK) | NF_QUEUE) /* only for userspace compatibility */ #ifndef __KERNEL__ -- cgit v1.2.3 From 505cbfc577f3fa778005e2800b869eca25727d5f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 12 Aug 2005 09:19:38 -0300 Subject: [IPV6]: Generalise the tcp_v6_lookup routines In the same way as was done with the v4 counterparts, this will be moved to inet6_hashtables.c. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/ipv6.h | 5 +++++ include/net/inet6_hashtables.h | 26 ++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 include/net/inet6_hashtables.h (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 777339b6846..3c7dbc6a0a7 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -193,6 +193,11 @@ struct inet6_skb_parm { #define IP6CB(skb) ((struct inet6_skb_parm*)((skb)->cb)) +static inline int inet6_iif(const struct sk_buff *skb) +{ + return IP6CB(skb)->iif; +} + struct tcp6_request_sock { struct tcp_request_sock req; struct in6_addr loc_addr; diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h new file mode 100644 index 00000000000..297c2b16967 --- /dev/null +++ b/include/net/inet6_hashtables.h @@ -0,0 +1,26 @@ +/* + * INET An implementation of the TCP/IP protocol suite for the LINUX + * operating system. INET is implemented using the BSD Socket + * interface as the means of communication with the user level. + * + * Authors: Lotsa people, from code originally in tcp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _INET6_HASHTABLES_H +#define _INET6_HASHTABLES_H + +#include + +struct in6_addr; +struct inet_hashinfo; + +extern struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, + const struct in6_addr *saddr, const u16 sport, + const struct in6_addr *daddr, const u16 dport, + const int dif); +#endif /* _INET6_HASHTABLES_H */ -- cgit v1.2.3 From 5324a040ccc708998e61ea93e669b81312f0ae11 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 12 Aug 2005 09:26:18 -0300 Subject: [INET6_HASHTABLES]: Move inet6_lookup functions to net/ipv6/inet6_hashtables.c Doing this we allow tcp_diag to support IPV6 even if tcp_diag is compiled statically and IPV6 is compiled as a module, removing the previous restriction while not building any IPV6 code if it is not selected. Now to work on the tcpdiag_register infrastructure and then to rename the whole thing to inetdiag, reflecting its by then completely generic nature. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet6_hashtables.h | 106 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 105 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 297c2b16967..03df3b15796 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -14,13 +14,117 @@ #ifndef _INET6_HASHTABLES_H #define _INET6_HASHTABLES_H +#include + +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) +#include +#include #include -struct in6_addr; +#include + struct inet_hashinfo; +/* I have no idea if this is a good hash for v6 or not. -DaveM */ +static inline int inet6_ehashfn(const struct in6_addr *laddr, const u16 lport, + const struct in6_addr *faddr, const u16 fport, + const int ehash_size) +{ + int hashent = (lport ^ fport); + + hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]); + hashent ^= hashent >> 16; + hashent ^= hashent >> 8; + return (hashent & (ehash_size - 1)); +} + +static inline int inet6_sk_ehashfn(const struct sock *sk, const int ehash_size) +{ + const struct inet_sock *inet = inet_sk(sk); + const struct ipv6_pinfo *np = inet6_sk(sk); + const struct in6_addr *laddr = &np->rcv_saddr; + const struct in6_addr *faddr = &np->daddr; + const __u16 lport = inet->num; + const __u16 fport = inet->dport; + return inet6_ehashfn(laddr, lport, faddr, fport, ehash_size); +} + +/* + * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so + * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM + * + * The sockhash lock must be held as a reader here. + */ +static inline struct sock * + __inet6_lookup_established(struct inet_hashinfo *hashinfo, + const struct in6_addr *saddr, + const u16 sport, + const struct in6_addr *daddr, + const u16 hnum, + const int dif) +{ + struct sock *sk; + const struct hlist_node *node; + const __u32 ports = INET_COMBINED_PORTS(sport, hnum); + /* Optimize here for direct hit, only listening connections can + * have wildcards anyways. + */ + const int hash = inet6_ehashfn(daddr, hnum, saddr, sport, + hashinfo->ehash_size); + struct inet_ehash_bucket *head = &hashinfo->ehash[hash]; + + read_lock(&head->lock); + sk_for_each(sk, node, &head->chain) { + /* For IPV6 do the cheaper port and family tests first. */ + if (INET6_MATCH(sk, saddr, daddr, ports, dif)) + goto hit; /* You sunk my battleship! */ + } + /* Must check for a TIME_WAIT'er before going to listener hash. */ + sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) { + const struct inet_timewait_sock *tw = inet_twsk(sk); + + if(*((__u32 *)&(tw->tw_dport)) == ports && + sk->sk_family == PF_INET6) { + const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk); + + if (ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) && + ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) && + (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif)) + goto hit; + } + } + read_unlock(&head->lock); + return NULL; + +hit: + sock_hold(sk); + read_unlock(&head->lock); + return sk; +} + +extern struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo, + const struct in6_addr *daddr, + const unsigned short hnum, + const int dif); + +static inline struct sock *__inet6_lookup(struct inet_hashinfo *hashinfo, + const struct in6_addr *saddr, + const u16 sport, + const struct in6_addr *daddr, + const u16 hnum, + const int dif) +{ + struct sock *sk = __inet6_lookup_established(hashinfo, saddr, sport, + daddr, hnum, dif); + if (sk) + return sk; + + return inet6_lookup_listener(hashinfo, daddr, hnum, dif); +} + extern struct sock *inet6_lookup(struct inet_hashinfo *hashinfo, const struct in6_addr *saddr, const u16 sport, const struct in6_addr *daddr, const u16 dport, const int dif); +#endif /* defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) */ #endif /* _INET6_HASHTABLES_H */ -- cgit v1.2.3 From 4f5736c4c7cf6f9bd8db82b712cfdd51c87e06b9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 12 Aug 2005 09:27:49 -0300 Subject: [TCPDIAG]: Introduce inet_diag_{register,unregister} Next changeset will rename tcp_diag to inet_diag and move the tcp_diag code out of it and into a new tcp_diag.c, similar to the net/dccp/diag.c introduced in this changeset, completing the transition to a generic inet_diag infrastructure. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/tcp_diag.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include') diff --git a/include/linux/tcp_diag.h b/include/linux/tcp_diag.h index 190494ebcfb..910c34ba19c 100644 --- a/include/linux/tcp_diag.h +++ b/include/linux/tcp_diag.h @@ -5,6 +5,8 @@ #define TCPDIAG_GETSOCK 18 #define DCCPDIAG_GETSOCK 19 +#define INET_DIAG_GETSOCK_MAX 24 + /* Socket identity */ struct tcpdiag_sockid { @@ -125,4 +127,21 @@ struct tcpvegas_info { __u32 tcpv_minrtt; }; +#ifdef __KERNEL__ +struct sock; +struct inet_hashinfo; + +struct inet_diag_handler { + struct inet_hashinfo *idiag_hashinfo; + void (*idiag_get_info)(struct sock *sk, + struct tcpdiagmsg *r, + void *info); + __u16 idiag_info_size; + __u16 idiag_type; +}; + +extern int inet_diag_register(const struct inet_diag_handler *handler); +extern void inet_diag_unregister(const struct inet_diag_handler *handler); +#endif /* __KERNEL__ */ + #endif /* _TCP_DIAG_H_ */ -- cgit v1.2.3 From 73c1f4a033675f168df7e98bbeeafca3c644b8a6 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 12 Aug 2005 12:51:49 -0300 Subject: [TCPDIAG]: Just rename everything to inet_diag Next changeset will rename tcp_diag.[ch] to inet_diag.[ch]. I'm taking this longer route so as to easy review, making clear the changes made all along the way. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 +- include/linux/tcp_diag.h | 135 ++++++++++++++++++++++------------------------- include/net/tcp.h | 2 +- 3 files changed, 65 insertions(+), 74 deletions(-) (limited to 'include') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 1c50fea8995..d5e09bcd80f 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -8,7 +8,7 @@ #define NETLINK_W1 1 /* 1-wire subsystem */ #define NETLINK_USERSOCK 2 /* Reserved for user mode socket protocols */ #define NETLINK_FIREWALL 3 /* Firewalling hook */ -#define NETLINK_TCPDIAG 4 /* TCP socket monitoring */ +#define NETLINK_INET_DIAG 4 /* INET socket monitoring */ #define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */ #define NETLINK_XFRM 6 /* ipsec */ #define NETLINK_SELINUX 7 /* SELinux event notifications */ diff --git a/include/linux/tcp_diag.h b/include/linux/tcp_diag.h index 910c34ba19c..a4606e5810e 100644 --- a/include/linux/tcp_diag.h +++ b/include/linux/tcp_diag.h @@ -1,5 +1,5 @@ -#ifndef _TCP_DIAG_H_ -#define _TCP_DIAG_H_ 1 +#ifndef _INET_DIAG_H_ +#define _INET_DIAG_H_ 1 /* Just some random number */ #define TCPDIAG_GETSOCK 18 @@ -8,39 +8,36 @@ #define INET_DIAG_GETSOCK_MAX 24 /* Socket identity */ -struct tcpdiag_sockid -{ - __u16 tcpdiag_sport; - __u16 tcpdiag_dport; - __u32 tcpdiag_src[4]; - __u32 tcpdiag_dst[4]; - __u32 tcpdiag_if; - __u32 tcpdiag_cookie[2]; -#define TCPDIAG_NOCOOKIE (~0U) +struct inet_diag_sockid { + __u16 idiag_sport; + __u16 idiag_dport; + __u32 idiag_src[4]; + __u32 idiag_dst[4]; + __u32 idiag_if; + __u32 idiag_cookie[2]; +#define INET_DIAG_NOCOOKIE (~0U) }; /* Request structure */ -struct tcpdiagreq -{ - __u8 tcpdiag_family; /* Family of addresses. */ - __u8 tcpdiag_src_len; - __u8 tcpdiag_dst_len; - __u8 tcpdiag_ext; /* Query extended information */ +struct inet_diag_req { + __u8 idiag_family; /* Family of addresses. */ + __u8 idiag_src_len; + __u8 idiag_dst_len; + __u8 idiag_ext; /* Query extended information */ - struct tcpdiag_sockid id; + struct inet_diag_sockid id; - __u32 tcpdiag_states; /* States to dump */ - __u32 tcpdiag_dbs; /* Tables to dump (NI) */ + __u32 idiag_states; /* States to dump */ + __u32 idiag_dbs; /* Tables to dump (NI) */ }; -enum -{ - TCPDIAG_REQ_NONE, - TCPDIAG_REQ_BYTECODE, +enum { + INET_DIAG_REQ_NONE, + INET_DIAG_REQ_BYTECODE, }; -#define TCPDIAG_REQ_MAX TCPDIAG_REQ_BYTECODE +#define INET_DIAG_REQ_MAX INET_DIAG_REQ_BYTECODE /* Bytecode is sequence of 4 byte commands followed by variable arguments. * All the commands identified by "code" are conditional jumps forward: @@ -48,28 +45,25 @@ enum * length of the command and its arguments. */ -struct tcpdiag_bc_op -{ +struct inet_diag_bc_op { unsigned char code; unsigned char yes; unsigned short no; }; -enum -{ - TCPDIAG_BC_NOP, - TCPDIAG_BC_JMP, - TCPDIAG_BC_S_GE, - TCPDIAG_BC_S_LE, - TCPDIAG_BC_D_GE, - TCPDIAG_BC_D_LE, - TCPDIAG_BC_AUTO, - TCPDIAG_BC_S_COND, - TCPDIAG_BC_D_COND, +enum { + INET_DIAG_BC_NOP, + INET_DIAG_BC_JMP, + INET_DIAG_BC_S_GE, + INET_DIAG_BC_S_LE, + INET_DIAG_BC_D_GE, + INET_DIAG_BC_D_LE, + INET_DIAG_BC_AUTO, + INET_DIAG_BC_S_COND, + INET_DIAG_BC_D_COND, }; -struct tcpdiag_hostcond -{ +struct inet_diag_hostcond { __u8 family; __u8 prefix_len; int port; @@ -78,47 +72,44 @@ struct tcpdiag_hostcond /* Base info structure. It contains socket identity (addrs/ports/cookie) * and, alas, the information shown by netstat. */ -struct tcpdiagmsg -{ - __u8 tcpdiag_family; - __u8 tcpdiag_state; - __u8 tcpdiag_timer; - __u8 tcpdiag_retrans; - - struct tcpdiag_sockid id; - - __u32 tcpdiag_expires; - __u32 tcpdiag_rqueue; - __u32 tcpdiag_wqueue; - __u32 tcpdiag_uid; - __u32 tcpdiag_inode; +struct inet_diag_msg { + __u8 idiag_family; + __u8 idiag_state; + __u8 idiag_timer; + __u8 idiag_retrans; + + struct inet_diag_sockid id; + + __u32 idiag_expires; + __u32 idiag_rqueue; + __u32 idiag_wqueue; + __u32 idiag_uid; + __u32 idiag_inode; }; /* Extensions */ -enum -{ - TCPDIAG_NONE, - TCPDIAG_MEMINFO, - TCPDIAG_INFO, - TCPDIAG_VEGASINFO, - TCPDIAG_CONG, +enum { + INET_DIAG_NONE, + INET_DIAG_MEMINFO, + INET_DIAG_INFO, + INET_DIAG_VEGASINFO, + INET_DIAG_CONG, }; -#define TCPDIAG_MAX TCPDIAG_CONG +#define INET_DIAG_MAX INET_DIAG_CONG -/* TCPDIAG_MEM */ +/* INET_DIAG_MEM */ -struct tcpdiag_meminfo -{ - __u32 tcpdiag_rmem; - __u32 tcpdiag_wmem; - __u32 tcpdiag_fmem; - __u32 tcpdiag_tmem; +struct inet_diag_meminfo { + __u32 idiag_rmem; + __u32 idiag_wmem; + __u32 idiag_fmem; + __u32 idiag_tmem; }; -/* TCPDIAG_VEGASINFO */ +/* INET_DIAG_VEGASINFO */ struct tcpvegas_info { __u32 tcpv_enabled; @@ -134,7 +125,7 @@ struct inet_hashinfo; struct inet_diag_handler { struct inet_hashinfo *idiag_hashinfo; void (*idiag_get_info)(struct sock *sk, - struct tcpdiagmsg *r, + struct inet_diag_msg *r, void *info); __u16 idiag_info_size; __u16 idiag_type; @@ -144,4 +135,4 @@ extern int inet_diag_register(const struct inet_diag_handler *handler); extern void inet_diag_unregister(const struct inet_diag_handler *handler); #endif /* __KERNEL__ */ -#endif /* _TCP_DIAG_H_ */ +#endif /* _INET_DIAG_H_ */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 0b3f7294c5c..fef122782b4 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -690,7 +690,7 @@ struct tcp_congestion_ops { u32 (*undo_cwnd)(struct sock *sk); /* hook for packet ack accounting (optional) */ void (*pkts_acked)(struct sock *sk, u32 num_acked); - /* get info for tcp_diag (optional) */ + /* get info for inet_diag (optional) */ void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb); char name[TCP_CA_NAME_MAX]; -- cgit v1.2.3 From a8c2190ee7da1a1dc68ff1a6b5f03feb61e523a5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 12 Aug 2005 12:56:38 -0300 Subject: [INET_DIAG]: Rename tcp_diag.[ch] to inet_diag.[ch] Next changeset will introduce net/ipv4/tcp_diag.c, moving the code that was put transitioanlly in inet_diag.c. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 138 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/tcp_diag.h | 138 ---------------------------------------------- 2 files changed, 138 insertions(+), 138 deletions(-) create mode 100644 include/linux/inet_diag.h delete mode 100644 include/linux/tcp_diag.h (limited to 'include') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h new file mode 100644 index 00000000000..a4606e5810e --- /dev/null +++ b/include/linux/inet_diag.h @@ -0,0 +1,138 @@ +#ifndef _INET_DIAG_H_ +#define _INET_DIAG_H_ 1 + +/* Just some random number */ +#define TCPDIAG_GETSOCK 18 +#define DCCPDIAG_GETSOCK 19 + +#define INET_DIAG_GETSOCK_MAX 24 + +/* Socket identity */ +struct inet_diag_sockid { + __u16 idiag_sport; + __u16 idiag_dport; + __u32 idiag_src[4]; + __u32 idiag_dst[4]; + __u32 idiag_if; + __u32 idiag_cookie[2]; +#define INET_DIAG_NOCOOKIE (~0U) +}; + +/* Request structure */ + +struct inet_diag_req { + __u8 idiag_family; /* Family of addresses. */ + __u8 idiag_src_len; + __u8 idiag_dst_len; + __u8 idiag_ext; /* Query extended information */ + + struct inet_diag_sockid id; + + __u32 idiag_states; /* States to dump */ + __u32 idiag_dbs; /* Tables to dump (NI) */ +}; + +enum { + INET_DIAG_REQ_NONE, + INET_DIAG_REQ_BYTECODE, +}; + +#define INET_DIAG_REQ_MAX INET_DIAG_REQ_BYTECODE + +/* Bytecode is sequence of 4 byte commands followed by variable arguments. + * All the commands identified by "code" are conditional jumps forward: + * to offset cc+"yes" or to offset cc+"no". "yes" is supposed to be + * length of the command and its arguments. + */ + +struct inet_diag_bc_op { + unsigned char code; + unsigned char yes; + unsigned short no; +}; + +enum { + INET_DIAG_BC_NOP, + INET_DIAG_BC_JMP, + INET_DIAG_BC_S_GE, + INET_DIAG_BC_S_LE, + INET_DIAG_BC_D_GE, + INET_DIAG_BC_D_LE, + INET_DIAG_BC_AUTO, + INET_DIAG_BC_S_COND, + INET_DIAG_BC_D_COND, +}; + +struct inet_diag_hostcond { + __u8 family; + __u8 prefix_len; + int port; + __u32 addr[0]; +}; + +/* Base info structure. It contains socket identity (addrs/ports/cookie) + * and, alas, the information shown by netstat. */ +struct inet_diag_msg { + __u8 idiag_family; + __u8 idiag_state; + __u8 idiag_timer; + __u8 idiag_retrans; + + struct inet_diag_sockid id; + + __u32 idiag_expires; + __u32 idiag_rqueue; + __u32 idiag_wqueue; + __u32 idiag_uid; + __u32 idiag_inode; +}; + +/* Extensions */ + +enum { + INET_DIAG_NONE, + INET_DIAG_MEMINFO, + INET_DIAG_INFO, + INET_DIAG_VEGASINFO, + INET_DIAG_CONG, +}; + +#define INET_DIAG_MAX INET_DIAG_CONG + + +/* INET_DIAG_MEM */ + +struct inet_diag_meminfo { + __u32 idiag_rmem; + __u32 idiag_wmem; + __u32 idiag_fmem; + __u32 idiag_tmem; +}; + +/* INET_DIAG_VEGASINFO */ + +struct tcpvegas_info { + __u32 tcpv_enabled; + __u32 tcpv_rttcnt; + __u32 tcpv_rtt; + __u32 tcpv_minrtt; +}; + +#ifdef __KERNEL__ +struct sock; +struct inet_hashinfo; + +struct inet_diag_handler { + struct inet_hashinfo *idiag_hashinfo; + void (*idiag_get_info)(struct sock *sk, + struct inet_diag_msg *r, + void *info); + __u16 idiag_info_size; + __u16 idiag_type; +}; + +extern int inet_diag_register(const struct inet_diag_handler *handler); +extern void inet_diag_unregister(const struct inet_diag_handler *handler); +#endif /* __KERNEL__ */ + +#endif /* _INET_DIAG_H_ */ diff --git a/include/linux/tcp_diag.h b/include/linux/tcp_diag.h deleted file mode 100644 index a4606e5810e..00000000000 --- a/include/linux/tcp_diag.h +++ /dev/null @@ -1,138 +0,0 @@ -#ifndef _INET_DIAG_H_ -#define _INET_DIAG_H_ 1 - -/* Just some random number */ -#define TCPDIAG_GETSOCK 18 -#define DCCPDIAG_GETSOCK 19 - -#define INET_DIAG_GETSOCK_MAX 24 - -/* Socket identity */ -struct inet_diag_sockid { - __u16 idiag_sport; - __u16 idiag_dport; - __u32 idiag_src[4]; - __u32 idiag_dst[4]; - __u32 idiag_if; - __u32 idiag_cookie[2]; -#define INET_DIAG_NOCOOKIE (~0U) -}; - -/* Request structure */ - -struct inet_diag_req { - __u8 idiag_family; /* Family of addresses. */ - __u8 idiag_src_len; - __u8 idiag_dst_len; - __u8 idiag_ext; /* Query extended information */ - - struct inet_diag_sockid id; - - __u32 idiag_states; /* States to dump */ - __u32 idiag_dbs; /* Tables to dump (NI) */ -}; - -enum { - INET_DIAG_REQ_NONE, - INET_DIAG_REQ_BYTECODE, -}; - -#define INET_DIAG_REQ_MAX INET_DIAG_REQ_BYTECODE - -/* Bytecode is sequence of 4 byte commands followed by variable arguments. - * All the commands identified by "code" are conditional jumps forward: - * to offset cc+"yes" or to offset cc+"no". "yes" is supposed to be - * length of the command and its arguments. - */ - -struct inet_diag_bc_op { - unsigned char code; - unsigned char yes; - unsigned short no; -}; - -enum { - INET_DIAG_BC_NOP, - INET_DIAG_BC_JMP, - INET_DIAG_BC_S_GE, - INET_DIAG_BC_S_LE, - INET_DIAG_BC_D_GE, - INET_DIAG_BC_D_LE, - INET_DIAG_BC_AUTO, - INET_DIAG_BC_S_COND, - INET_DIAG_BC_D_COND, -}; - -struct inet_diag_hostcond { - __u8 family; - __u8 prefix_len; - int port; - __u32 addr[0]; -}; - -/* Base info structure. It contains socket identity (addrs/ports/cookie) - * and, alas, the information shown by netstat. */ -struct inet_diag_msg { - __u8 idiag_family; - __u8 idiag_state; - __u8 idiag_timer; - __u8 idiag_retrans; - - struct inet_diag_sockid id; - - __u32 idiag_expires; - __u32 idiag_rqueue; - __u32 idiag_wqueue; - __u32 idiag_uid; - __u32 idiag_inode; -}; - -/* Extensions */ - -enum { - INET_DIAG_NONE, - INET_DIAG_MEMINFO, - INET_DIAG_INFO, - INET_DIAG_VEGASINFO, - INET_DIAG_CONG, -}; - -#define INET_DIAG_MAX INET_DIAG_CONG - - -/* INET_DIAG_MEM */ - -struct inet_diag_meminfo { - __u32 idiag_rmem; - __u32 idiag_wmem; - __u32 idiag_fmem; - __u32 idiag_tmem; -}; - -/* INET_DIAG_VEGASINFO */ - -struct tcpvegas_info { - __u32 tcpv_enabled; - __u32 tcpv_rttcnt; - __u32 tcpv_rtt; - __u32 tcpv_minrtt; -}; - -#ifdef __KERNEL__ -struct sock; -struct inet_hashinfo; - -struct inet_diag_handler { - struct inet_hashinfo *idiag_hashinfo; - void (*idiag_get_info)(struct sock *sk, - struct inet_diag_msg *r, - void *info); - __u16 idiag_info_size; - __u16 idiag_type; -}; - -extern int inet_diag_register(const struct inet_diag_handler *handler); -extern void inet_diag_unregister(const struct inet_diag_handler *handler); -#endif /* __KERNEL__ */ - -#endif /* _INET_DIAG_H_ */ -- cgit v1.2.3 From 17b085eacef81a6286bd478f2ec75e04abb091cb Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 12 Aug 2005 12:59:17 -0300 Subject: [INET_DIAG]: Move the tcp_diag interface to the proper place With this the previous setup is back, i.e. tcp_diag can be built as a module, as dccp_diag and both share the infrastructure available in inet_diag. If one selects CONFIG_INET_DIAG as module CONFIG_INET_TCP_DIAG will also be built as a module, as will CONFIG_INET_DCCP_DIAG, if CONFIG_IP_DCCP was selected static or as a module, if CONFIG_INET_DIAG is y, being statically linked CONFIG_INET_TCP_DIAG will follow suit and CONFIG_INET_DCCP_DIAG will be built in the same manner as CONFIG_IP_DCCP. Now to aim at UDP, converting it to use inet_hashinfo, so that we can use iproute2 for UDP sockets as well. Ah, just to show an example of this new infrastructure working for DCCP :-) [root@qemu ~]# ./ss -dane State Recv-Q Send-Q Local Address:Port Peer Address:Port LISTEN 0 0 *:5001 *:* ino:942 sk:cfd503a0 ESTAB 0 0 127.0.0.1:5001 127.0.0.1:32770 ino:943 sk:cfd50a60 ESTAB 0 0 127.0.0.1:32770 127.0.0.1:5001 ino:947 sk:cfd50700 TIME-WAIT 0 0 127.0.0.1:32769 127.0.0.1:5001 timer:(timewait,3.430ms,0) ino:0 sk:cf209620 Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index fef122782b4..d958260af23 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -479,7 +479,7 @@ static inline void tcp_clear_xmit_timers(struct sock *sk) extern unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu); extern unsigned int tcp_current_mss(struct sock *sk, int large); -/* tcp_diag.c */ +/* tcp.c */ extern void tcp_get_info(struct sock *, struct tcp_info *); /* Read 'sendfile()'-style from a TCP socket */ -- cgit v1.2.3 From 0ba2c6e8c0fb5cde5a23a213c2e7cb851b85c310 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sat, 13 Aug 2005 13:55:44 -0700 Subject: [NETFILTER]: introduce and use aligned_u64 data type As proposed by Andi Kleen, this is required esp. for x86_64 architecture, where 64bit code needs 8byte aligned 64bit data types, but 32bit userspace apps will only align to 4bytes. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink_log.h | 5 +++-- include/linux/netfilter/nfnetlink_queue.h | 5 +++-- include/linux/types.h | 3 +++ 3 files changed, 9 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nfnetlink_log.h b/include/linux/netfilter/nfnetlink_log.h index a61836a083e..b04b0388059 100644 --- a/include/linux/netfilter/nfnetlink_log.h +++ b/include/linux/netfilter/nfnetlink_log.h @@ -5,6 +5,7 @@ * and not any kind of function definitions. It is shared between kernel and * userspace. Don't put kernel specific stuff in here */ +#include #include enum nfulnl_msg_types { @@ -27,8 +28,8 @@ struct nfulnl_msg_packet_hw { } __attribute__ ((packed)); struct nfulnl_msg_packet_timestamp { - u_int64_t sec; - u_int64_t usec; + aligned_u64 sec; + aligned_u64 usec; } __attribute__ ((packed)); #define NFULNL_PREFIXLEN 30 /* just like old log target */ diff --git a/include/linux/netfilter/nfnetlink_queue.h b/include/linux/netfilter/nfnetlink_queue.h index 2d8d2b2cfca..9e774373244 100644 --- a/include/linux/netfilter/nfnetlink_queue.h +++ b/include/linux/netfilter/nfnetlink_queue.h @@ -1,6 +1,7 @@ #ifndef _NFNETLINK_QUEUE_H #define _NFNETLINK_QUEUE_H +#include #include enum nfqnl_msg_types { @@ -24,8 +25,8 @@ struct nfqnl_msg_packet_hw { } __attribute__ ((packed)); struct nfqnl_msg_packet_timestamp { - u_int64_t sec; - u_int64_t usec; + aligned_u64 sec; + aligned_u64 usec; } __attribute__ ((packed)); enum nfqnl_attr_type { diff --git a/include/linux/types.h b/include/linux/types.h index dcb13f865df..2b678c22ca4 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -123,6 +123,9 @@ typedef __u64 u_int64_t; typedef __s64 int64_t; #endif +/* this is a special 64bit data type that is 8-byte aligned */ +#define aligned_u64 unsigned long long __attribute__((aligned(8))) + /* * The type used for indexing onto a disc or disc partition. * If required, asm/types.h can override it and define -- cgit v1.2.3 From 9d810fd2d28a9d672eca3136476af1a54a380bb2 Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sat, 13 Aug 2005 13:56:26 -0700 Subject: [NETFILTER]: Add new iptables "connbytes" match This patch ads a new "connbytes" match that utilizes the CONFIG_NF_CT_ACCT per-connection byte and packet counters. Using it you can do things like packet classification on average packet size within a connection. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ipt_connbytes.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 include/linux/netfilter_ipv4/ipt_connbytes.h (limited to 'include') diff --git a/include/linux/netfilter_ipv4/ipt_connbytes.h b/include/linux/netfilter_ipv4/ipt_connbytes.h new file mode 100644 index 00000000000..abaa65afd4e --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_connbytes.h @@ -0,0 +1,25 @@ +#ifndef _IPT_CONNBYTES_H +#define _IPT_CONNBYTES_H + +enum ipt_connbytes_what { + IPT_CONNBYTES_WHAT_PKTS, + IPT_CONNBYTES_WHAT_BYTES, + IPT_CONNBYTES_WHAT_AVGPKT, +}; + +enum ipt_connbytes_direction { + IPT_CONNBYTES_DIR_ORIGINAL, + IPT_CONNBYTES_DIR_REPLY, + IPT_CONNBYTES_DIR_BOTH, +}; + +struct ipt_connbytes_info +{ + struct { + aligned_u64 from; /* count to be matched */ + aligned_u64 to; /* count to be matched */ + } count; + u_int8_t what; /* ipt_connbytes_what */ + u_int8_t direction; /* ipt_connbytes_direction */ +}; +#endif -- cgit v1.2.3 From 25ed891019b84498c83903ecf53df7ce35e9cff6 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sat, 13 Aug 2005 13:58:21 -0700 Subject: [NETFILTER]: Nicer names for ipt_connbytes constants Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ipt_connbytes.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_ipv4/ipt_connbytes.h b/include/linux/netfilter_ipv4/ipt_connbytes.h index abaa65afd4e..9e5532f8d8a 100644 --- a/include/linux/netfilter_ipv4/ipt_connbytes.h +++ b/include/linux/netfilter_ipv4/ipt_connbytes.h @@ -2,9 +2,9 @@ #define _IPT_CONNBYTES_H enum ipt_connbytes_what { - IPT_CONNBYTES_WHAT_PKTS, - IPT_CONNBYTES_WHAT_BYTES, - IPT_CONNBYTES_WHAT_AVGPKT, + IPT_CONNBYTES_PKTS, + IPT_CONNBYTES_BYTES, + IPT_CONNBYTES_AVGPKT, }; enum ipt_connbytes_direction { -- cgit v1.2.3 From a61bbcf28a8cb0ba56f8193d512f7222e711a294 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 14 Aug 2005 17:24:31 -0700 Subject: [NET]: Store skb->timestamp as offset to a base timestamp Reduces skb size by 8 bytes on 64-bit. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/skbuff.h | 47 ++++++++++++++++++++++++++++++++++++++-- include/net/bluetooth/hci_core.h | 2 +- include/net/neighbour.h | 9 +++++++- include/net/sock.h | 13 ++++++----- 4 files changed, 62 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 60b32151f76..32635c401d4 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -155,13 +155,20 @@ struct skb_shared_info { #define SKB_DATAREF_SHIFT 16 #define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1) +extern struct timeval skb_tv_base; + +struct skb_timeval { + u32 off_sec; + u32 off_usec; +}; + /** * struct sk_buff - socket buffer * @next: Next buffer in list * @prev: Previous buffer in list * @list: List we are on * @sk: Socket we are owned by - * @stamp: Time we arrived + * @tstamp: Time we arrived stored as offset to skb_tv_base * @dev: Device we arrived on/are leaving by * @input_dev: Device we arrived on * @h: Transport layer header @@ -202,7 +209,7 @@ struct sk_buff { struct sk_buff *prev; struct sock *sk; - struct timeval stamp; + struct skb_timeval tstamp; struct net_device *dev; struct net_device *input_dev; @@ -1213,6 +1220,42 @@ static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, extern void skb_init(void); extern void skb_add_mtu(int mtu); +/** + * skb_get_timestamp - get timestamp from a skb + * @skb: skb to get stamp from + * @stamp: pointer to struct timeval to store stamp in + * + * Timestamps are stored in the skb as offsets to a base timestamp. + * This function converts the offset back to a struct timeval and stores + * it in stamp. + */ +static inline void skb_get_timestamp(struct sk_buff *skb, struct timeval *stamp) +{ + stamp->tv_sec = skb->tstamp.off_sec; + stamp->tv_usec = skb->tstamp.off_usec; + if (skb->tstamp.off_sec) { + stamp->tv_sec += skb_tv_base.tv_sec; + stamp->tv_usec += skb_tv_base.tv_usec; + } +} + +/** + * skb_set_timestamp - set timestamp of a skb + * @skb: skb to set stamp of + * @stamp: pointer to struct timeval to get stamp from + * + * Timestamps are stored in the skb as offsets to a base timestamp. + * This function converts a struct timeval to an offset and stores + * it in the skb. + */ +static inline void skb_set_timestamp(struct sk_buff *skb, struct timeval *stamp) +{ + skb->tstamp.off_sec = stamp->tv_sec - skb_tv_base.tv_sec; + skb->tstamp.off_usec = stamp->tv_usec - skb_tv_base.tv_usec; +} + +extern void __net_timestamp(struct sk_buff *skb); + #ifdef CONFIG_NETFILTER static inline void nf_conntrack_put(struct nf_conntrack *nfct) { diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 6d63a47c731..7f933f30207 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -404,7 +404,7 @@ static inline int hci_recv_frame(struct sk_buff *skb) bt_cb(skb)->incoming = 1; /* Time stamp */ - do_gettimeofday(&skb->stamp); + __net_timestamp(skb); /* Queue frame for rx task */ skb_queue_tail(&hdev->rx_q, skb); diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 89809891e5a..34c07731933 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -363,7 +363,14 @@ __neigh_lookup_errno(struct neigh_table *tbl, const void *pkey, return neigh_create(tbl, pkey, dev); } -#define LOCALLY_ENQUEUED -2 +struct neighbour_cb { + unsigned long sched_next; + unsigned int flags; +}; + +#define LOCALLY_ENQUEUED 0x1 + +#define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb) #endif #endif diff --git a/include/net/sock.h b/include/net/sock.h index 065df67b642..d5942887707 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1282,16 +1282,19 @@ static inline int sock_intr_errno(long timeo) static __inline__ void sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { - struct timeval *stamp = &skb->stamp; + struct timeval stamp; + + skb_get_timestamp(skb, &stamp); if (sock_flag(sk, SOCK_RCVTSTAMP)) { /* Race occurred between timestamp enabling and packet receiving. Fill in the current time for now. */ - if (stamp->tv_sec == 0) - do_gettimeofday(stamp); + if (stamp.tv_sec == 0) + do_gettimeofday(&stamp); + skb_set_timestamp(skb, &stamp); put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(struct timeval), - stamp); + &stamp); } else - sk->sk_stamp = *stamp; + sk->sk_stamp = stamp; } /** -- cgit v1.2.3 From fb13ab2849074244a51ae5147483610529a29ced Mon Sep 17 00:00:00 2001 From: Domen Puncer Date: Sun, 14 Aug 2005 17:32:05 -0700 Subject: [NETFILTER]: Remove two unused files Signed-off-by: Domen Puncer Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_logging.h | 20 -------------------- include/linux/netfilter_ipv6/ip6_logging.h | 20 -------------------- 2 files changed, 40 deletions(-) delete mode 100644 include/linux/netfilter_ipv4/ip_logging.h delete mode 100644 include/linux/netfilter_ipv6/ip6_logging.h (limited to 'include') diff --git a/include/linux/netfilter_ipv4/ip_logging.h b/include/linux/netfilter_ipv4/ip_logging.h deleted file mode 100644 index 0c5c52cb658..00000000000 --- a/include/linux/netfilter_ipv4/ip_logging.h +++ /dev/null @@ -1,20 +0,0 @@ -/* IPv4 macros for the internal logging interface. */ -#ifndef __IP_LOGGING_H -#define __IP_LOGGING_H - -#ifdef __KERNEL__ -#include -#include - -#define nf_log_ip_packet(pskb,hooknum,in,out,fmt,args...) \ - nf_log_packet(AF_INET,pskb,hooknum,in,out,fmt,##args) - -#define nf_log_ip(pfh,len,fmt,args...) \ - nf_log(AF_INET,pfh,len,fmt,##args) - -#define nf_ip_log_register(logging) nf_log_register(AF_INET,logging) -#define nf_ip_log_unregister(logging) nf_log_unregister(AF_INET,logging) - -#endif /*__KERNEL__*/ - -#endif /*__IP_LOGGING_H*/ diff --git a/include/linux/netfilter_ipv6/ip6_logging.h b/include/linux/netfilter_ipv6/ip6_logging.h deleted file mode 100644 index a0b2ee3043a..00000000000 --- a/include/linux/netfilter_ipv6/ip6_logging.h +++ /dev/null @@ -1,20 +0,0 @@ -/* IPv6 macros for the nternal logging interface. */ -#ifndef __IP6_LOGGING_H -#define __IP6_LOGGING_H - -#ifdef __KERNEL__ -#include -#include - -#define nf_log_ip6_packet(pskb,hooknum,in,out,fmt,args...) \ - nf_log_packet(AF_INET6,pskb,hooknum,in,out,fmt,##args) - -#define nf_log_ip6(pfh,len,fmt,args...) \ - nf_log(AF_INET6,pfh,len,fmt,##args) - -#define nf_ip6_log_register(logging) nf_log_register(AF_INET6,logging) -#define nf_ip6_log_unregister(logging) nf_log_unregister(AF_INET6,logging) - -#endif /*__KERNEL__*/ - -#endif /*__IP6_LOGGING_H*/ -- cgit v1.2.3 From db080529798b497eb5a37b92a25e966be5a7dd5d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 14 Aug 2005 19:26:34 -0700 Subject: [NETLINK]: Remove unused groups member from struct netlink_skb_parms Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netlink.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index d5e09bcd80f..eab51f9c9c8 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -106,7 +106,6 @@ struct netlink_skb_parms { struct ucred creds; /* Skb credentials */ __u32 pid; - __u32 groups; __u32 dst_pid; __u32 dst_groups; kernel_cap_t eff_cap; -- cgit v1.2.3 From d629b836d151d43332492651dd841d32e57ebe3b Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 14 Aug 2005 19:27:50 -0700 Subject: [NETLINK]: Use group numbers instead of bitmasks internally Using the group number allows increasing the number of groups without beeing limited by the size of the bitmask. It introduces one limitation for netlink users: messages can't be broadcasted to multiple groups anymore, however this feature was never used inside the kernel. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index eab51f9c9c8..c724c9d4984 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -107,7 +107,7 @@ struct netlink_skb_parms struct ucred creds; /* Skb credentials */ __u32 pid; __u32 dst_pid; - __u32 dst_groups; + __u32 dst_group; kernel_cap_t eff_cap; __u32 loginuid; /* Login (audit) uid */ }; -- cgit v1.2.3 From ac6d439d2097b72ea0cbc2322ce1263a38bc1fd0 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 14 Aug 2005 19:29:52 -0700 Subject: [NETLINK]: Convert netlink users to use group numbers instead of bitmasks Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink.h | 23 +++++++++++++++++++- include/linux/netfilter_decnet.h | 14 +++++++++++++ include/linux/rtnetlink.h | 42 ++++++++++++++++++++++++++++++++++--- include/linux/selinux_netlink.h | 13 +++++++++++- include/linux/xfrm.h | 18 ++++++++++++++++ 5 files changed, 105 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index b0feb237407..1d5b10ae239 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -2,13 +2,34 @@ #define _NFNETLINK_H #include -/* nfnetlink groups: Up to 32 maximum */ +#ifndef __KERNEL__ +/* nfnetlink groups: Up to 32 maximum - backwards compatibility for userspace */ #define NF_NETLINK_CONNTRACK_NEW 0x00000001 #define NF_NETLINK_CONNTRACK_UPDATE 0x00000002 #define NF_NETLINK_CONNTRACK_DESTROY 0x00000004 #define NF_NETLINK_CONNTRACK_EXP_NEW 0x00000008 #define NF_NETLINK_CONNTRACK_EXP_UPDATE 0x00000010 #define NF_NETLINK_CONNTRACK_EXP_DESTROY 0x00000020 +#endif + +enum nfnetlink_groups { + NFNLGRP_NONE, +#define NFNLGRP_NONE NFNLGRP_NONE + NFNLGRP_CONNTRACK_NEW, +#define NFNLGRP_CONNTRACK_NEW NFNLGRP_CONNTRACK_NEW + NFNLGRP_CONNTRACK_UPDATE, +#define NFNLGRP_CONNTRACK_UPDATE NFNLGRP_CONNTRACK_UPDATE + NFNLGRP_CONNTRACK_DESTROY, +#define NFNLGRP_CONNTRACK_DESTROY NFNLGRP_CONNTRACK_DESTROY + NFNLGRP_CONNTRACK_EXP_NEW, +#define NFNLGRP_CONNTRACK_EXP_NEW NFNLGRP_CONNTRACK_EXP_NEW + NFNLGRP_CONNTRACK_EXP_UPDATE, +#define NFNLGRP_CONNTRACK_EXP_UPDATE NFNLGRP_CONNTRACK_EXP_UPDATE + NFNLGRP_CONNTRACK_EXP_DESTROY, +#define NFNLGRP_CONNTRACK_EXP_DESTROY NFNLGRP_CONNTRACK_EXP_DESTROY + __NFNLGRP_MAX, +}; +#define NFNLGRP_MAX (__NFNLGRP_MAX - 1) /* Generic structure for encapsulation optional netfilter information. * It is reminiscent of sockaddr, but with sa_family replaced diff --git a/include/linux/netfilter_decnet.h b/include/linux/netfilter_decnet.h index 01897948415..6f425369ee2 100644 --- a/include/linux/netfilter_decnet.h +++ b/include/linux/netfilter_decnet.h @@ -56,7 +56,21 @@ struct nf_dn_rtmsg { #define NFDN_RTMSG(r) ((unsigned char *)(r) + NLMSG_ALIGN(sizeof(struct nf_dn_rtmsg))) +#ifndef __KERNEL__ +/* backwards compatibility for userspace */ #define DNRMG_L1_GROUP 0x01 #define DNRMG_L2_GROUP 0x02 +#endif + +enum { + DNRNG_NLGRP_NONE, +#define DNRNG_NLGRP_NONE DNRNG_NLGRP_NONE + DNRNG_NLGRP_L1, +#define DNRNG_NLGRP_L1 DNRNG_NLGRP_L1 + DNRNG_NLGRP_L2, +#define DNRNG_NLGRP_L2 DNRNG_NLGRP_L2 + __DNRNG_NLGRP_MAX +}; +#define DNRNG_NLGRP_MAX (__DNRNG_NLGRP_MAX - 1) #endif /*__LINUX_DECNET_NETFILTER_H*/ diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 657c05ab8f9..c231e9a08f0 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -826,9 +826,8 @@ enum #define TCA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcmsg)))) #define TCA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcmsg)) - -/* RTnetlink multicast groups */ - +#ifndef __KERNEL__ +/* RTnetlink multicast groups - backwards compatibility for userspace */ #define RTMGRP_LINK 1 #define RTMGRP_NOTIFY 2 #define RTMGRP_NEIGH 4 @@ -847,6 +846,43 @@ enum #define RTMGRP_DECnet_ROUTE 0x4000 #define RTMGRP_IPV6_PREFIX 0x20000 +#endif + +/* RTnetlink multicast groups */ +enum rtnetlink_groups { + RTNLGRP_NONE, +#define RTNLGRP_NONE RTNLGRP_NONE + RTNLGRP_LINK, +#define RTNLGRP_LINK RTNLGRP_LINK + RTNLGRP_NOTIFY, +#define RTNLGRP_NOTIFY RTNLGRP_NOTIFY + RTNLGRP_NEIGH, +#define RTNLGRP_NEIGH RTNLGRP_NEIGH + RTNLGRP_TC, +#define RTNLGRP_TC RTNLGRP_TC + RTNLGRP_IPV4_IFADDR, +#define RTNLGRP_IPV4_IFADDR RTNLGRP_IPV4_IFADDR + RTNLGRP_IPV4_MROUTE, +#define RTNLGRP_IPV4_MROUTE RTNLGRP_IPV4_MROUTE + RTNLGRP_IPV4_ROUTE, +#define RTNLGRP_IPV4_ROUTE RTNLGRP_IPV4_ROUTE + RTNLGRP_IPV6_IFADDR, +#define RTNLGRP_IPV6_IFADDR RTNLGRP_IPV6_IFADDR + RTNLGRP_IPV6_MROUTE, +#define RTNLGRP_IPV6_MROUTE RTNLGRP_IPV6_MROUTE + RTNLGRP_IPV6_ROUTE, +#define RTNLGRP_IPV6_ROUTE RTNLGRP_IPV6_ROUTE + RTNLGRP_IPV6_IFINFO, +#define RTNLGRP_IPV6_IFINFO RTNLGRP_IPV6_IFINFO + RTNLGRP_DECnet_IFADDR, +#define RTNLGRP_DECnet_IFADDR RTNLGRP_DECnet_IFADDR + RTNLGRP_DECnet_ROUTE, +#define RTNLGRP_DECnet_ROUTE RTNLGRP_DECnet_ROUTE + RTNLGRP_IPV6_PREFIX, +#define RTNLGRP_IPV6_PREFIX RTNLGRP_IPV6_PREFIX + __RTNLGRP_MAX +}; +#define RTNLGRP_MAX (__RTNLGRP_MAX - 1) /* TC action piece */ struct tcamsg diff --git a/include/linux/selinux_netlink.h b/include/linux/selinux_netlink.h index 957e6ebca4e..bbf489decd8 100644 --- a/include/linux/selinux_netlink.h +++ b/include/linux/selinux_netlink.h @@ -20,10 +20,21 @@ enum { SELNL_MSG_MAX }; -/* Multicast groups */ +#ifndef __KERNEL__ +/* Multicast groups - backwards compatiblility for userspace */ #define SELNL_GRP_NONE 0x00000000 #define SELNL_GRP_AVC 0x00000001 /* AVC notifications */ #define SELNL_GRP_ALL 0xffffffff +#endif + +enum selinux_nlgroups { + SELNLGRP_NONE, +#define SELNLGRP_NONE SELNLGRP_NONE + SELNLGRP_AVC, +#define SELNLGRP_AVC SELNLGRP_AVC + __SELNLGRP_MAX +}; +#define SELNLGRP_MAX (__SELNLGRP_MAX - 1) /* Message structures */ struct selnl_msg_setenforce { diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index f0d423300d8..0fb077d6844 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -258,9 +258,27 @@ struct xfrm_usersa_flush { __u8 proto; }; +#ifndef __KERNEL__ +/* backwards compatibility for userspace */ #define XFRMGRP_ACQUIRE 1 #define XFRMGRP_EXPIRE 2 #define XFRMGRP_SA 4 #define XFRMGRP_POLICY 8 +#endif + +enum xfrm_nlgroups { + XFRMNLGRP_NONE, +#define XFRMNLGRP_NONE XFRMNLGRP_NONE + XFRMNLGRP_ACQUIRE, +#define XFRMNLGRP_ACQUIRE XFRMNLGRP_ACQUIRE + XFRMNLGRP_EXPIRE, +#define XFRMNLGRP_EXPIRE XFRMNLGRP_EXPIRE + XFRMNLGRP_SA, +#define XFRMNLGRP_SA XFRMNLGRP_SA + XFRMNLGRP_POLICY, +#define XFRMNLGRP_POLICY XFRMNLGRP_POLICY + __XFRMNLGRP_MAX +}; +#define XFRMNLGRP_MAX (__XFRMNLGRP_MAX - 1) #endif /* _LINUX_XFRM_H */ -- cgit v1.2.3 From 9a4595bc7e67962f13232ee55a64e063062c3a99 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 15 Aug 2005 12:32:15 -0700 Subject: [NETLINK]: Add set/getsockopt options to support more than 32 groups NETLINK_ADD_MEMBERSHIP/NETLINK_DROP_MEMBERSHIP are used to join/leave groups, NETLINK_PKTINFO is used to enable nl_pktinfo control messages for received packets to get the extended destination group number. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netlink.h | 9 +++++++++ include/linux/socket.h | 1 + 2 files changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index c724c9d4984..36a40449f9f 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -90,6 +90,15 @@ struct nlmsgerr struct nlmsghdr msg; }; +#define NETLINK_ADD_MEMBERSHIP 1 +#define NETLINK_DROP_MEMBERSHIP 2 +#define NETLINK_PKTINFO 3 + +struct nl_pktinfo +{ + __u32 group; +}; + #define NET_MAJOR 36 /* Major 36 is reserved for networking */ enum { diff --git a/include/linux/socket.h b/include/linux/socket.h index ddf22559f48..acc55aac8a4 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -272,6 +272,7 @@ struct ucred { #define SOL_NETBEUI 267 #define SOL_LLC 268 #define SOL_DCCP 269 +#define SOL_NETLINK 270 /* IPX options */ #define IPX_TYPE 1 -- cgit v1.2.3 From 066286071d3542243baa68166acb779187c848b3 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 15 Aug 2005 12:33:26 -0700 Subject: [NETLINK]: Add "groups" argument to netlink_kernel_create Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 36a40449f9f..7d1d9683b24 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -125,7 +125,7 @@ struct netlink_skb_parms #define NETLINK_CREDS(skb) (&NETLINK_CB((skb)).creds) -extern struct sock *netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len), struct module *module); +extern struct sock *netlink_kernel_create(int unit, unsigned int groups, void (*input)(struct sock *sk, int len), struct module *module); extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock); extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid, -- cgit v1.2.3 From 20380731bc2897f2952ae055420972ded4cd786e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 16 Aug 2005 02:18:02 -0300 Subject: [NET]: Fix sparse warnings Of this type, mostly: CHECK net/ipv6/netfilter.c net/ipv6/netfilter.c:96:12: warning: symbol 'ipv6_netfilter_init' was not declared. Should it be static? net/ipv6/netfilter.c:101:6: warning: symbol 'ipv6_netfilter_fini' was not declared. Should it be static? Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/if_ether.h | 2 ++ include/linux/if_frad.h | 6 ++++-- include/linux/if_tr.h | 4 ++++ include/linux/igmp.h | 3 +++ include/linux/net.h | 7 +++++++ include/linux/netdevice.h | 10 ++++++++++ include/linux/netfilter_ipv6.h | 4 ++-- include/linux/security.h | 6 ++++-- include/linux/skbuff.h | 2 ++ include/linux/socket.h | 7 +++++++ include/net/addrconf.h | 6 ++++++ include/net/af_unix.h | 15 +++++++++++++++ include/net/icmp.h | 7 +++++++ include/net/ip.h | 23 +++++++++++++++++++++++ include/net/ip_fib.h | 5 +++++ include/net/ipv6.h | 35 +++++++++++++++++++++++++++++++++-- include/net/p8022.h | 2 ++ include/net/raw.h | 7 ++++++- include/net/route.h | 2 ++ include/net/sock.h | 12 ++++++++++++ include/net/tcp.h | 12 ++++++++++++ include/net/udp.h | 5 +++++ 22 files changed, 173 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index b5b58e9c054..fc2d4c8225a 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -110,6 +110,8 @@ static inline struct ethhdr *eth_hdr(const struct sk_buff *skb) { return (struct ethhdr *)skb->mac.raw; } + +extern struct ctl_table ether_table[]; #endif #endif /* _LINUX_IF_ETHER_H */ diff --git a/include/linux/if_frad.h b/include/linux/if_frad.h index 3c94b173657..511999c7eed 100644 --- a/include/linux/if_frad.h +++ b/include/linux/if_frad.h @@ -191,10 +191,12 @@ struct frad_local int buffer; /* current buffer for S508 firmware */ }; -extern void dlci_ioctl_set(int (*hook)(unsigned int, void __user *)); - #endif /* __KERNEL__ */ #endif /* CONFIG_DLCI || CONFIG_DLCI_MODULE */ +#ifdef __KERNEL__ +extern void dlci_ioctl_set(int (*hook)(unsigned int, void __user *)); +#endif + #endif diff --git a/include/linux/if_tr.h b/include/linux/if_tr.h index 3fba9e2f542..5502f597cf0 100644 --- a/include/linux/if_tr.h +++ b/include/linux/if_tr.h @@ -43,12 +43,16 @@ struct trh_hdr { }; #ifdef __KERNEL__ +#include #include static inline struct trh_hdr *tr_hdr(const struct sk_buff *skb) { return (struct trh_hdr *)skb->mac.raw; } +#ifdef CONFIG_SYSCTL +extern struct ctl_table tr_table[]; +#endif #endif /* This is an Token-Ring LLC structure */ diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 0c31ef0b5ba..28f4f3b3695 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -129,6 +129,9 @@ struct igmpv3_query { #include #include +extern int sysctl_igmp_max_memberships; +extern int sysctl_igmp_max_msf; + struct ip_sf_socklist { unsigned int sl_max; diff --git a/include/linux/net.h b/include/linux/net.h index 5f8b632ff65..4e981585a89 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -286,5 +286,12 @@ static struct proto_ops name##_ops = { \ #define MODULE_ALIAS_NET_PF_PROTO(pf, proto) \ MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto)) +#ifdef CONFIG_SYSCTL +#include +extern ctl_table net_table[]; +extern int net_msg_cost; +extern int net_msg_burst; +#endif + #endif /* __KERNEL__ */ #endif /* _LINUX_NET_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d8e52edfd52..1fcaa88b862 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -244,6 +244,7 @@ struct netdev_boot_setup { }; #define NETDEV_BOOT_SETUP_MAX 8 +extern int __init netdev_boot_setup(char *str); /* * The DEVICE structure. @@ -673,6 +674,7 @@ extern void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); extern void dev_init(void); extern int netdev_nit; +extern int netdev_budget; /* Called by rtnetlink.c:rtnl_unlock() */ extern void netdev_run_todo(void); @@ -908,6 +910,14 @@ extern int skb_checksum_help(struct sk_buff *skb, int inward); extern void net_enable_timestamp(void); extern void net_disable_timestamp(void); +#ifdef CONFIG_PROC_FS +extern void *dev_seq_start(struct seq_file *seq, loff_t *pos); +extern void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos); +extern void dev_seq_stop(struct seq_file *seq, void *v); +#endif + +extern void linkwatch_run_queue(void); + #endif /* __KERNEL__ */ #endif /* _LINUX_DEV_H */ diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 5d204ee7a31..edcc2c6eb5c 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -71,7 +71,7 @@ enum nf_ip6_hook_priorities { NF_IP6_PRI_LAST = INT_MAX, }; -int ipv6_netfilter_init(void); -void ipv6_netfilter_fini(void); +extern int ipv6_netfilter_init(void); +extern void ipv6_netfilter_fini(void); #endif /*__LINUX_IP6_NETFILTER_H*/ diff --git a/include/linux/security.h b/include/linux/security.h index b42095a68b1..7aab6ab7c57 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2727,7 +2727,8 @@ static inline int security_socket_getpeersec(struct socket *sock, char __user *o return security_ops->socket_getpeersec(sock, optval, optlen, len); } -static inline int security_sk_alloc(struct sock *sk, int family, int priority) +static inline int security_sk_alloc(struct sock *sk, int family, + unsigned int __nocast priority) { return security_ops->sk_alloc_security(sk, family, priority); } @@ -2844,7 +2845,8 @@ static inline int security_socket_getpeersec(struct socket *sock, char __user *o return -ENOPROTOOPT; } -static inline int security_sk_alloc(struct sock *sk, int family, int priority) +static inline int security_sk_alloc(struct sock *sk, int family, + unsigned int __nocast priority) { return 0; } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 32635c401d4..db10335e419 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1203,6 +1203,8 @@ extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); extern void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); +extern void skb_release_data(struct sk_buff *skb); + static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer) { diff --git a/include/linux/socket.h b/include/linux/socket.h index acc55aac8a4..1739c2d5b95 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -26,6 +26,13 @@ struct __kernel_sockaddr_storage { #include /* pid_t */ #include /* __user */ +extern int sysctl_somaxconn; +extern void sock_init(void); +#ifdef CONFIG_PROC_FS +struct seq_file; +extern void socket_seq_show(struct seq_file *seq); +#endif + typedef unsigned short sa_family_t; /* diff --git a/include/net/addrconf.h b/include/net/addrconf.h index a0ed9367217..750e2508dd9 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -45,6 +45,7 @@ struct prefix_info { #ifdef __KERNEL__ +#include #include #include #include @@ -238,5 +239,10 @@ static inline int ipv6_addr_is_ll_all_routers(const struct in6_addr *addr) addr->s6_addr32[3] == htonl(0x00000002)); } +#ifdef CONFIG_PROC_FS +extern int if6_proc_init(void); +extern void if6_proc_exit(void); +#endif + #endif #endif diff --git a/include/net/af_unix.h b/include/net/af_unix.h index b60b3846b9d..b5d785ab4a0 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -1,5 +1,11 @@ #ifndef __LINUX_NET_AFUNIX_H #define __LINUX_NET_AFUNIX_H + +#include +#include +#include +#include + extern void unix_inflight(struct file *fp); extern void unix_notinflight(struct file *fp); extern void unix_gc(void); @@ -74,5 +80,14 @@ struct unix_sock { wait_queue_head_t peer_wait; }; #define unix_sk(__sk) ((struct unix_sock *)__sk) + +#ifdef CONFIG_SYSCTL +extern int sysctl_unix_max_dgram_qlen; +extern void unix_sysctl_register(void); +extern void unix_sysctl_unregister(void); +#else +static inline void unix_sysctl_register(void) {} +static inline void unix_sysctl_unregister(void) {} +#endif #endif #endif diff --git a/include/net/icmp.h b/include/net/icmp.h index e5ef0d15fb4..6cdebeee5f9 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -57,4 +57,11 @@ static inline struct raw_sock *raw_sk(const struct sock *sk) return (struct raw_sock *)sk; } +extern int sysctl_icmp_echo_ignore_all; +extern int sysctl_icmp_echo_ignore_broadcasts; +extern int sysctl_icmp_ignore_bogus_error_responses; +extern int sysctl_icmp_errors_use_inbound_ifaddr; +extern int sysctl_icmp_ratelimit; +extern int sysctl_icmp_ratemask; + #endif /* _ICMP_H */ diff --git a/include/net/ip.h b/include/net/ip.h index c16fb6ac344..7623e414a5f 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -163,6 +163,24 @@ extern int sysctl_local_port_range[2]; extern int sysctl_ip_default_ttl; extern int sysctl_ip_nonlocal_bind; +/* From ip_fragment.c */ +extern int sysctl_ipfrag_high_thresh; +extern int sysctl_ipfrag_low_thresh; +extern int sysctl_ipfrag_time; +extern int sysctl_ipfrag_secret_interval; + +/* From inetpeer.c */ +extern int inet_peer_threshold; +extern int inet_peer_minttl; +extern int inet_peer_maxttl; +extern int inet_peer_gc_mintime; +extern int inet_peer_gc_maxtime; + +/* From ip_output.c */ +extern int sysctl_ip_dynaddr; + +extern void ipfrag_init(void); + #ifdef CONFIG_INET /* The function in 2.2 was invalid, producing wrong result for * check=0xFEFF. It was noticed by Arthur Skawina _year_ ago. --ANK(000625) */ @@ -348,5 +366,10 @@ int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen, void **context); +#ifdef CONFIG_PROC_FS +extern int ip_misc_proc_init(void); +#endif + +extern struct ctl_table ipv4_table[]; #endif /* _IP_H */ diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index a4208a336ac..14de4ebd121 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -295,4 +295,9 @@ static inline void fib_res_put(struct fib_result *res) #endif } +#ifdef CONFIG_PROC_FS +extern int fib_proc_init(void); +extern void fib_proc_exit(void); +#endif + #endif /* _NET_FIB_H */ diff --git a/include/net/ipv6.h b/include/net/ipv6.h index c5a02ddc594..3203eaff4bd 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -104,6 +104,7 @@ struct frag_hdr { #ifdef __KERNEL__ +#include #include /* sysctls */ @@ -464,8 +465,38 @@ extern int sysctl_ip6frag_low_thresh; extern int sysctl_ip6frag_time; extern int sysctl_ip6frag_secret_interval; -#endif /* __KERNEL__ */ -#endif /* _NET_IPV6_H */ +extern struct proto_ops inet6_stream_ops; +extern struct proto_ops inet6_dgram_ops; + +extern int ip6_mc_source(int add, int omode, struct sock *sk, + struct group_source_req *pgsr); +extern int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf); +extern int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf, + struct group_filter __user *optval, + int __user *optlen); + +#ifdef CONFIG_PROC_FS +extern int ac6_proc_init(void); +extern void ac6_proc_exit(void); +extern int raw6_proc_init(void); +extern void raw6_proc_exit(void); +extern int tcp6_proc_init(void); +extern void tcp6_proc_exit(void); +extern int udp6_proc_init(void); +extern void udp6_proc_exit(void); +extern int ipv6_misc_proc_init(void); +extern void ipv6_misc_proc_exit(void); + +extern struct rt6_statistics rt6_stats; +#endif +#ifdef CONFIG_SYSCTL +extern ctl_table ipv6_route_table[]; +extern ctl_table ipv6_icmp_table[]; +extern void ipv6_sysctl_register(void); +extern void ipv6_sysctl_unregister(void); +#endif +#endif /* __KERNEL__ */ +#endif /* _NET_IPV6_H */ diff --git a/include/net/p8022.h b/include/net/p8022.h index 223f8fa9ffc..42e9fac51b3 100644 --- a/include/net/p8022.h +++ b/include/net/p8022.h @@ -8,4 +8,6 @@ extern struct datalink_proto * struct net_device *orig_dev)); extern void unregister_8022_client(struct datalink_proto *proto); +extern struct datalink_proto *make_8023_client(void); +extern void destroy_8023_client(struct datalink_proto *dl); #endif diff --git a/include/net/raw.h b/include/net/raw.h index 1c4bc3e6809..f47917469b1 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -17,10 +17,10 @@ #ifndef _RAW_H #define _RAW_H +#include extern struct proto raw_prot; - extern void raw_err(struct sock *, struct sk_buff *, u32 info); extern int raw_rcv(struct sock *, struct sk_buff *); @@ -39,4 +39,9 @@ extern struct sock *__raw_v4_lookup(struct sock *sk, unsigned short num, extern int raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash); +#ifdef CONFIG_PROC_FS +extern int raw_proc_init(void); +extern void raw_proc_exit(void); +#endif + #endif /* _RAW_H */ diff --git a/include/net/route.h b/include/net/route.h index 63c94558236..dbe79ca67d3 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -195,4 +195,6 @@ static inline struct inet_peer *rt_get_peer(struct rtable *rt) return rt->peer; } +extern ctl_table ipv4_route_table[]; + #endif /* _ROUTE_H */ diff --git a/include/net/sock.h b/include/net/sock.h index d5942887707..14183883e8e 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1370,4 +1370,16 @@ static inline int siocdevprivate_ioctl(unsigned int fd, unsigned int cmd, unsign } #endif +extern void sk_init(void); + +#ifdef CONFIG_SYSCTL +extern struct ctl_table core_table[]; +extern int sysctl_optmem_max; +#endif + +#ifdef CONFIG_PROC_FS +extern __u32 sysctl_wmem_default; +extern __u32 sysctl_rmem_default; +#endif + #endif /* _SOCK_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index d958260af23..d6bcf1317a6 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1183,4 +1183,16 @@ struct tcp_iter_state { extern int tcp_proc_register(struct tcp_seq_afinfo *afinfo); extern void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo); +extern struct request_sock_ops tcp_request_sock_ops; + +extern int tcp_v4_destroy_sock(struct sock *sk); + +#ifdef CONFIG_PROC_FS +extern int tcp4_proc_init(void); +extern void tcp4_proc_exit(void); +#endif + +extern void tcp_v4_init(struct net_proto_family *ops); +extern void tcp_init(void); + #endif /* _TCP_H */ diff --git a/include/net/udp.h b/include/net/udp.h index ac229b761db..107b9d791a1 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -94,6 +94,11 @@ struct udp_iter_state { struct seq_operations seq_ops; }; +#ifdef CONFIG_PROC_FS extern int udp_proc_register(struct udp_seq_afinfo *afinfo); extern void udp_proc_unregister(struct udp_seq_afinfo *afinfo); + +extern int udp4_proc_init(void); +extern void udp4_proc_exit(void); +#endif #endif /* _UDP_H */ -- cgit v1.2.3 From 6ed8a48582c08432e84e5610564c1d25fe00dd7f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 16 Aug 2005 19:02:15 -0300 Subject: [NETLINK]: Fix sparse warnings Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 7d1d9683b24..16751866893 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -129,7 +129,7 @@ extern struct sock *netlink_kernel_create(int unit, unsigned int groups, void (* extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock); extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid, - __u32 group, int allocation); + __u32 group, unsigned int __nocast allocation); extern void netlink_set_err(struct sock *ssk, __u32 pid, __u32 group, int code); extern int netlink_register_notifier(struct notifier_block *nb); extern int netlink_unregister_notifier(struct notifier_block *nb); -- cgit v1.2.3 From 4c6ea29d82e0d1b9b37e6b879e0a7fd6c409333d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 16 Aug 2005 19:46:48 -0300 Subject: [IP]: Introduce ip_options_get_from_user This variant is needed to satisfy sparse __user annotations. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/ip.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/ip.h b/include/net/ip.h index 7623e414a5f..e4563bbee6e 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -335,7 +335,10 @@ extern void ip_options_build(struct sk_buff *skb, struct ip_options *opt, u32 da extern int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb); extern void ip_options_fragment(struct sk_buff *skb); extern int ip_options_compile(struct ip_options *opt, struct sk_buff *skb); -extern int ip_options_get(struct ip_options **optp, unsigned char *data, int optlen, int user); +extern int ip_options_get(struct ip_options **optp, + unsigned char *data, int optlen); +extern int ip_options_get_from_user(struct ip_options **optp, + unsigned char __user *data, int optlen); extern void ip_options_undo(struct ip_options * opt); extern void ip_forward_options(struct sk_buff *skb); extern int ip_options_rcv_srr(struct sk_buff *skb); -- cgit v1.2.3 From d179cd12928443f3ec29cfbc3567439644bd0afc Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 17 Aug 2005 14:57:30 -0700 Subject: [NET]: Implement SKB fast cloning. Protocols that make extensive use of SKB cloning, for example TCP, eat at least 2 allocations per packet sent as a result. To cut the kmalloc() count in half, we implement a pre-allocation scheme wherein we allocate 2 sk_buff objects in advance, then use a simple reference count to free up the memory at the correct time. Based upon an initial patch by Thomas Graf and suggestions from Herbert Xu. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 26 +++++++++++++++++++++++--- include/net/sock.h | 2 +- 2 files changed, 24 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index db10335e419..42edce6abe2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -162,6 +162,13 @@ struct skb_timeval { u32 off_usec; }; + +enum { + SKB_FCLONE_UNAVAILABLE, + SKB_FCLONE_ORIG, + SKB_FCLONE_CLONE, +}; + /** * struct sk_buff - socket buffer * @next: Next buffer in list @@ -255,7 +262,8 @@ struct sk_buff { ip_summed:2, nohdr:1, nfctinfo:3; - __u8 pkt_type; + __u8 pkt_type:3, + fclone:2; __be16 protocol; void (*destructor)(struct sk_buff *skb); @@ -295,8 +303,20 @@ struct sk_buff { #include extern void __kfree_skb(struct sk_buff *skb); -extern struct sk_buff *alloc_skb(unsigned int size, - unsigned int __nocast priority); +extern struct sk_buff *__alloc_skb(unsigned int size, + unsigned int __nocast priority, int fclone); +static inline struct sk_buff *alloc_skb(unsigned int size, + unsigned int __nocast priority) +{ + return __alloc_skb(size, priority, 0); +} + +static inline struct sk_buff *alloc_skb_fclone(unsigned int size, + unsigned int __nocast priority) +{ + return __alloc_skb(size, priority, 1); +} + extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, unsigned int size, unsigned int __nocast priority); diff --git a/include/net/sock.h b/include/net/sock.h index 14183883e8e..d57aece9492 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1200,7 +1200,7 @@ static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk, int hdr_len; hdr_len = SKB_DATA_ALIGN(sk->sk_prot->max_header); - skb = alloc_skb(size + hdr_len, gfp); + skb = alloc_skb_fclone(size + hdr_len, gfp); if (skb) { skb->truesize += mem; if (sk->sk_forward_alloc >= (int)skb->truesize || -- cgit v1.2.3 From 1bc0986957b63a2fbbc46ab95d3d1d72830bda83 Mon Sep 17 00:00:00 2001 From: Ian McDonald Date: Sat, 20 Aug 2005 00:23:43 -0300 Subject: [DCCP]: Fix the timestamp options This changes timestamp, timestamp echo, and elapsed time to use units of 10 usecs as per DCCP spec. This has been tested to verify that times are correct. Also fixed up length and used hton/ntoh more. Still to add in later patches: - actually use elapsed time to adjust RTT (commented out as was prior to this patch) - send options at times more closely following the spec (content is now correct) Signed-off-by: Ian McDonald Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/dccp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 3dccdd5108b..9e3a1370b90 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -415,7 +415,7 @@ struct dccp_sock { __u64 dccps_gsr; __u64 dccps_gar; unsigned long dccps_service; - unsigned long dccps_timestamp_time; + struct timeval dccps_timestamp_time; __u32 dccps_timestamp_echo; __u32 dccps_avg_packet_size; unsigned long dccps_ndp_count; -- cgit v1.2.3 From 8cd25c1fcfbf6460983e99091d278187421c1a1d Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sat, 20 Aug 2005 17:14:11 -0700 Subject: [NET]: fix PROC_FS=n compile Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- include/net/sock.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index d57aece9492..312cb25cbd1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1377,9 +1377,7 @@ extern struct ctl_table core_table[]; extern int sysctl_optmem_max; #endif -#ifdef CONFIG_PROC_FS extern __u32 sysctl_wmem_default; extern __u32 sysctl_rmem_default; -#endif #endif /* _SOCK_H */ -- cgit v1.2.3 From a6f9a70578b981321b63786ac8015f17cca4fcbd Mon Sep 17 00:00:00 2001 From: Jon Wetzel Date: Sat, 20 Aug 2005 17:15:54 -0700 Subject: [NET]: Add support for getting the permanent hardware address. This patch adds a new field to net device to hold the permanent hardware address, and adds a new generic ethtool_op function to get that address. Signed-off-by: Jon Wetzel Signed-off-by: John W. Linville Signed-off-by: David S. Miller --- include/linux/ethtool.h | 13 ++++++++++++- include/linux/netdevice.h | 1 + 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index d7021c391b2..ed1440ea4c9 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -250,6 +250,12 @@ struct ethtool_stats { u64 data[0]; }; +struct ethtool_perm_addr { + u32 cmd; /* ETHTOOL_GPERMADDR */ + u32 size; + u8 data[0]; +}; + struct net_device; /* Some generic methods drivers may use in their ethtool_ops */ @@ -261,6 +267,8 @@ u32 ethtool_op_get_sg(struct net_device *dev); int ethtool_op_set_sg(struct net_device *dev, u32 data); u32 ethtool_op_get_tso(struct net_device *dev); int ethtool_op_set_tso(struct net_device *dev, u32 data); +int ethtool_op_get_perm_addr(struct net_device *dev, + struct ethtool_perm_addr *addr, u8 *data); /** * ðtool_ops - Alter and report network device settings @@ -294,7 +302,8 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data); * get_strings: Return a set of strings that describe the requested objects * phys_id: Identify the device * get_stats: Return statistics about the device - * + * get_perm_addr: Gets the permanent hardware address + * * Description: * * get_settings: @@ -352,6 +361,7 @@ struct ethtool_ops { int (*phys_id)(struct net_device *, u32); int (*get_stats_count)(struct net_device *); void (*get_ethtool_stats)(struct net_device *, struct ethtool_stats *, u64 *); + int (*get_perm_addr)(struct net_device *, struct ethtool_perm_addr *, u8 *); int (*begin)(struct net_device *); void (*complete)(struct net_device *); }; @@ -389,6 +399,7 @@ struct ethtool_ops { #define ETHTOOL_GSTATS 0x0000001d /* get NIC-specific statistics */ #define ETHTOOL_GTSO 0x0000001e /* Get TSO enable (ethtool_value) */ #define ETHTOOL_STSO 0x0000001f /* Set TSO enable (ethtool_value) */ +#define ETHTOOL_GPERMADDR 0x00000020 /* Get permanent hardware address */ /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1fcaa88b862..7c717907896 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -337,6 +337,7 @@ struct net_device /* Interface address info. */ unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address */ + unsigned char perm_addr[MAX_ADDR_LEN]; /* permanent hw address */ unsigned char addr_len; /* hardware address length */ unsigned short dev_id; /* for shared network cards */ -- cgit v1.2.3 From 2c656491e9ce77e12337073973794c4be467a489 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Sat, 20 Aug 2005 17:24:25 -0700 Subject: [NET]: Fix ipl=>ihl typo in ip_fast_csum Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/asm-i386/checksum.h | 2 +- include/asm-m32r/checksum.h | 2 +- include/asm-x86_64/checksum.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-i386/checksum.h b/include/asm-i386/checksum.h index f949e44c2a3..67d3630c4e8 100644 --- a/include/asm-i386/checksum.h +++ b/include/asm-i386/checksum.h @@ -83,7 +83,7 @@ static inline unsigned short ip_fast_csum(unsigned char * iph, "adcl $0, %0 ;\n" "notl %0 ;\n" "2: ;\n" - /* Since the input registers which are loaded with iph and ipl + /* Since the input registers which are loaded with iph and ihl are modified, we must also specify them as outputs, or gcc will assume they contain their original values. */ : "=r" (sum), "=r" (iph), "=r" (ihl) diff --git a/include/asm-m32r/checksum.h b/include/asm-m32r/checksum.h index 99f37dbf255..877ebf46e9f 100644 --- a/include/asm-m32r/checksum.h +++ b/include/asm-m32r/checksum.h @@ -105,7 +105,7 @@ static inline unsigned short ip_fast_csum(unsigned char * iph, " addx %0, %3 \n" " .fillinsn\n" "2: \n" - /* Since the input registers which are loaded with iph and ipl + /* Since the input registers which are loaded with iph and ihl are modified, we must also specify them as outputs, or gcc will assume they contain their original values. */ : "=&r" (sum), "=r" (iph), "=r" (ihl), "=&r" (tmpreg0), "=&r" (tmpreg1) diff --git a/include/asm-x86_64/checksum.h b/include/asm-x86_64/checksum.h index d01356f0144..989469e8e0b 100644 --- a/include/asm-x86_64/checksum.h +++ b/include/asm-x86_64/checksum.h @@ -64,7 +64,7 @@ static inline unsigned short ip_fast_csum(unsigned char *iph, unsigned int ihl) " adcl $0, %0\n" " notl %0\n" "2:" - /* Since the input registers which are loaded with iph and ipl + /* Since the input registers which are loaded with iph and ihl are modified, we must also specify them as outputs, or gcc will assume they contain their original values. */ : "=r" (sum), "=r" (iph), "=r" (ihl) -- cgit v1.2.3 From 7567662ba896ee0c33d6215f32e2011488a6d1bf Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 21 Aug 2005 23:30:34 -0700 Subject: [NETFILTER]: Add string match Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ipt_string.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 include/linux/netfilter_ipv4/ipt_string.h (limited to 'include') diff --git a/include/linux/netfilter_ipv4/ipt_string.h b/include/linux/netfilter_ipv4/ipt_string.h new file mode 100644 index 00000000000..a265f6e44ea --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_string.h @@ -0,0 +1,18 @@ +#ifndef _IPT_STRING_H +#define _IPT_STRING_H + +#define IPT_STRING_MAX_PATTERN_SIZE 128 +#define IPT_STRING_MAX_ALGO_NAME_SIZE 16 + +struct ipt_string_info +{ + u_int16_t from_offset; + u_int16_t to_offset; + char algo[IPT_STRING_MAX_ALGO_NAME_SIZE]; + char pattern[IPT_STRING_MAX_PATTERN_SIZE]; + u_int8_t patlen; + u_int8_t invert; + struct ts_config __attribute__((aligned(8))) *config; +}; + +#endif /*_IPT_STRING_H*/ -- cgit v1.2.3 From 764d8a9f240729534a1d8a0ffd39e722cf5cc5af Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 21 Aug 2005 23:31:06 -0700 Subject: [NETFILTER]: Add IPv6 REJECT target Originally written by Yasuyuki Kozakai , taken from netfilter patch-o-matic and fixed up to work with current kernels. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv6/ip6t_REJECT.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 include/linux/netfilter_ipv6/ip6t_REJECT.h (limited to 'include') diff --git a/include/linux/netfilter_ipv6/ip6t_REJECT.h b/include/linux/netfilter_ipv6/ip6t_REJECT.h new file mode 100644 index 00000000000..6be6504162b --- /dev/null +++ b/include/linux/netfilter_ipv6/ip6t_REJECT.h @@ -0,0 +1,18 @@ +#ifndef _IP6T_REJECT_H +#define _IP6T_REJECT_H + +enum ip6t_reject_with { + IP6T_ICMP6_NO_ROUTE, + IP6T_ICMP6_ADM_PROHIBITED, + IP6T_ICMP6_NOT_NEIGHBOUR, + IP6T_ICMP6_ADDR_UNREACH, + IP6T_ICMP6_PORT_UNREACH, + IP6T_ICMP6_ECHOREPLY, + IP6T_TCP_RESET +}; + +struct ip6t_reject_info { + u_int32_t with; /* reject type */ +}; + +#endif /*_IP6T_REJECT_H*/ -- cgit v1.2.3 From 05465343bf74e00c8c2c5a310740157de3149f27 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 21 Aug 2005 23:31:43 -0700 Subject: [NETFILTER]: Add goto target Originally written by Henrik Nordstrom , taken from netfilter patch-o-matic and added ip6_tables support. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ip_tables.h | 3 ++- include/linux/netfilter_ipv6/ip6_tables.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 12ce47808e7..d19d65cf453 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -109,7 +109,8 @@ struct ipt_counters /* Values for "flag" field in struct ipt_ip (general ip structure). */ #define IPT_F_FRAG 0x01 /* Set if rule is a fragment rule */ -#define IPT_F_MASK 0x01 /* All possible flag bits mask. */ +#define IPT_F_GOTO 0x02 /* Set if jump is a goto */ +#define IPT_F_MASK 0x03 /* All possible flag bits mask. */ /* Values for "inv" field in struct ipt_ip. */ #define IPT_INV_VIA_IN 0x01 /* Invert the sense of IN IFACE. */ diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index f1ce3b00985..58c72a52dc6 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -111,7 +111,8 @@ struct ip6t_counters #define IP6T_F_PROTO 0x01 /* Set if rule cares about upper protocols */ #define IP6T_F_TOS 0x02 /* Match the TOS. */ -#define IP6T_F_MASK 0x03 /* All possible flag bits mask. */ +#define IP6T_F_GOTO 0x04 /* Set if jump is a goto */ +#define IP6T_F_MASK 0x07 /* All possible flag bits mask. */ /* Values for "inv" field in struct ip6t_ip6. */ #define IP6T_INV_VIA_IN 0x01 /* Invert the sense of IN IFACE. */ -- cgit v1.2.3 From dc40c7bc76054f5e4382835ca2bafb895b993a8a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Aug 2005 21:52:58 -0700 Subject: [ICSK]: Generalise tcp_listen_poll Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 4d7e708c07d..8a87a3a4f10 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -260,6 +260,16 @@ extern void inet_csk_reqsk_queue_prune(struct sock *parent, const unsigned long max_rto); extern void inet_csk_destroy_sock(struct sock *sk); + +/* + * LISTEN is a special case for poll.. + */ +static inline unsigned int inet_csk_listen_poll(const struct sock *sk) +{ + return !reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue) ? + (POLLIN | POLLRDNORM) : 0; +} + extern int inet_csk_listen_start(struct sock *sk, const int nr_table_entries); extern void inet_csk_listen_stop(struct sock *sk); -- cgit v1.2.3 From e5b4376074e02b783e56a8f7c42d544e18112c4e Mon Sep 17 00:00:00 2001 From: Robert Olsson Date: Thu, 25 Aug 2005 13:01:03 -0700 Subject: [IPV4]: Prepare FIB core for RCU. * RCU versions of hlist_***_rcu * fib_alias partial rcu port just whats needed now. Signed-off-by: Robert Olsson Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/list.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/linux/list.h b/include/linux/list.h index 0f2435f92db..9b9b0eec1e8 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -634,6 +634,27 @@ static inline void hlist_add_after(struct hlist_node *n, next->next->pprev = &next->next; } +static inline void hlist_add_before_rcu(struct hlist_node *n, + struct hlist_node *next) +{ + n->pprev = next->pprev; + n->next = next; + smp_wmb(); + next->pprev = &n->next; + *(n->pprev) = n; +} + +static inline void hlist_add_after_rcu(struct hlist_node *prev, + struct hlist_node *n) +{ + n->next = prev->next; + n->pprev = &prev->next; + smp_wmb(); + prev->next = n; + if (n->next) + n->next->pprev = &n->next; +} + #define hlist_entry(ptr, type, member) container_of(ptr,type,member) #define hlist_for_each(pos, head) \ -- cgit v1.2.3 From 57bf1451ac79640c5a0a4f31284c43539fac2903 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 25 Aug 2005 16:06:19 -0700 Subject: [NET]: net/802: more endian annotations The rest of endian warnings now belongs to tr.c exclusively. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/hippidevice.h | 3 +-- include/linux/if_fc.h | 2 +- include/linux/if_fddi.h | 2 +- include/linux/if_hippi.h | 6 +++--- 4 files changed, 6 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/hippidevice.h b/include/linux/hippidevice.h index 9bc3b688d2e..bab303dafd6 100644 --- a/include/linux/hippidevice.h +++ b/include/linux/hippidevice.h @@ -31,8 +31,7 @@ struct hippi_cb { __u32 ifield; }; -extern unsigned short hippi_type_trans(struct sk_buff *skb, - struct net_device *dev); +extern __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev); extern struct net_device *alloc_hippi_dev(int sizeof_priv); #endif diff --git a/include/linux/if_fc.h b/include/linux/if_fc.h index 33330b458b9..376a34ea472 100644 --- a/include/linux/if_fc.h +++ b/include/linux/if_fc.h @@ -44,7 +44,7 @@ struct fcllc { __u8 ssap; /* source SAP */ __u8 llc; /* LLC control field */ __u8 protid[3]; /* protocol id */ - __u16 ethertype; /* ether type field */ + __be16 ethertype; /* ether type field */ }; #endif /* _LINUX_IF_FC_H */ diff --git a/include/linux/if_fddi.h b/include/linux/if_fddi.h index a912818e636..1288a161bc0 100644 --- a/include/linux/if_fddi.h +++ b/include/linux/if_fddi.h @@ -85,7 +85,7 @@ struct fddi_snap_hdr __u8 ssap; /* always 0xAA */ __u8 ctrl; /* always 0x03 */ __u8 oui[FDDI_K_OUI_LEN]; /* organizational universal id */ - __u16 ethertype; /* packet type ID field */ + __be16 ethertype; /* packet type ID field */ } __attribute__ ((packed)); /* Define FDDI LLC frame header */ diff --git a/include/linux/if_hippi.h b/include/linux/if_hippi.h index c8ca72c46f7..94d31ca7d71 100644 --- a/include/linux/if_hippi.h +++ b/include/linux/if_hippi.h @@ -102,9 +102,9 @@ struct hippi_fp_hdr #error "Please fix " #endif #else - __u32 fixed; + __be32 fixed; #endif - __u32 d2_size; + __be32 d2_size; } __attribute__ ((packed)); struct hippi_le_hdr @@ -144,7 +144,7 @@ struct hippi_snap_hdr __u8 ssap; /* always 0xAA */ __u8 ctrl; /* always 0x03 */ __u8 oui[HIPPI_OUI_LEN]; /* organizational universal id (zero)*/ - __u16 ethertype; /* packet type ID field */ + __be16 ethertype; /* packet type ID field */ } __attribute__ ((packed)); struct hippi_hdr -- cgit v1.2.3 From cf4ef01440ca5c6d96f2ea2b793a37a0a863a045 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 25 Aug 2005 16:08:37 -0700 Subject: [LIST]: Add docbook header comments for hlist_add_{before,after}_rcu() Signed-off-by: David S. Miller --- include/linux/list.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include') diff --git a/include/linux/list.h b/include/linux/list.h index 9b9b0eec1e8..e6ec5968227 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -634,6 +634,21 @@ static inline void hlist_add_after(struct hlist_node *n, next->next->pprev = &next->next; } +/** + * hlist_add_before_rcu - adds the specified element to the specified hlist + * before the specified node while permitting racing traversals. + * @n: the new element to add to the hash list. + * @next: the existing element to add the new element before. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. + */ static inline void hlist_add_before_rcu(struct hlist_node *n, struct hlist_node *next) { @@ -644,6 +659,21 @@ static inline void hlist_add_before_rcu(struct hlist_node *n, *(n->pprev) = n; } +/** + * hlist_add_after_rcu - adds the specified element to the specified hlist + * after the specified node while permitting racing traversals. + * @prev: the existing element to add the new element after. + * @n: the new element to add to the hash list. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. + */ static inline void hlist_add_after_rcu(struct hlist_node *prev, struct hlist_node *n) { -- cgit v1.2.3 From 5f2c3b910744f68e1a507f027398f404b3feb5fb Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sat, 27 Aug 2005 22:37:03 -0700 Subject: [NETFILTER]: Add new iptables TTL target This new iptables target allows manipulation of the TTL of an IPv4 packet. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv4/ipt_TTL.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 include/linux/netfilter_ipv4/ipt_TTL.h (limited to 'include') diff --git a/include/linux/netfilter_ipv4/ipt_TTL.h b/include/linux/netfilter_ipv4/ipt_TTL.h new file mode 100644 index 00000000000..ee6611edc11 --- /dev/null +++ b/include/linux/netfilter_ipv4/ipt_TTL.h @@ -0,0 +1,21 @@ +/* TTL modification module for IP tables + * (C) 2000 by Harald Welte */ + +#ifndef _IPT_TTL_H +#define _IPT_TTL_H + +enum { + IPT_TTL_SET = 0, + IPT_TTL_INC, + IPT_TTL_DEC +}; + +#define IPT_TTL_MAXMODE IPT_TTL_DEC + +struct ipt_TTL_info { + u_int8_t mode; + u_int8_t ttl; +}; + + +#endif -- cgit v1.2.3 From 0ac4f893f20ed524198da5ebf591fc0b9e2ced2f Mon Sep 17 00:00:00 2001 From: Harald Welte Date: Sat, 27 Aug 2005 22:37:30 -0700 Subject: [NETFILTER6]: Add new ip6tables HOPLIMIT target This target allows users to modify the hoplimit header field of the IPv6 header. Signed-off-by: Harald Welte Signed-off-by: David S. Miller --- include/linux/netfilter_ipv6/ip6t_HL.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 include/linux/netfilter_ipv6/ip6t_HL.h (limited to 'include') diff --git a/include/linux/netfilter_ipv6/ip6t_HL.h b/include/linux/netfilter_ipv6/ip6t_HL.h new file mode 100644 index 00000000000..afb7813d45a --- /dev/null +++ b/include/linux/netfilter_ipv6/ip6t_HL.h @@ -0,0 +1,22 @@ +/* Hop Limit modification module for ip6tables + * Maciej Soltysiak + * Based on HW's TTL module */ + +#ifndef _IP6T_HL_H +#define _IP6T_HL_H + +enum { + IP6T_HL_SET = 0, + IP6T_HL_INC, + IP6T_HL_DEC +}; + +#define IP6T_HL_MAXMODE IP6T_HL_DEC + +struct ip6t_HL_info { + u_int8_t mode; + u_int8_t hop_limit; +}; + + +#endif -- cgit v1.2.3 From a84ffe430342db6ee585a5038f3242a6b4112d69 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 28 Aug 2005 04:51:32 -0300 Subject: [DCCP]: Introduce DCCP_SOCKOPT_PACKET_SIZE So that applications can set dccp_sock->dccps_pkt_size, that in turn is used in the CCID3 half connection init routines to set ccid3hc[tr]x_s and use it in its rate calculations. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/linux/dccp.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 9e3a1370b90..007c290f74d 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -186,6 +186,9 @@ enum { DCCPF_MAX_CCID_SPECIFIC = 255, }; +/* DCCP socket options */ +#define DCCP_SOCKOPT_PACKET_SIZE 1 + #ifdef __KERNEL__ #include @@ -396,7 +399,7 @@ enum dccp_role { * @dccps_timestamp_echo - latest timestamp received on a TIMESTAMP option * @dccps_ext_header_len - network protocol overhead (IP/IPv6 options) * @dccps_pmtu_cookie - Last pmtu seen by socket - * @dccps_avg_packet_size - FIXME: has to be set by the app thru some setsockopt or ioctl, CCID3 uses it + * @dccps_packet_size - Set thru setsockopt * @dccps_role - Role of this sock, one of %dccp_role * @dccps_ndp_count - number of Non Data Packets since last data packet * @dccps_hc_rx_ackpkts - receiver half connection acked packets @@ -417,7 +420,7 @@ struct dccp_sock { unsigned long dccps_service; struct timeval dccps_timestamp_time; __u32 dccps_timestamp_echo; - __u32 dccps_avg_packet_size; + __u32 dccps_packet_size; unsigned long dccps_ndp_count; __u16 dccps_ext_header_len; __u32 dccps_pmtu_cookie; -- cgit v1.2.3 From b74d0bd53406c23636707565d87ddaa55d315b26 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 10 Aug 2005 13:53:41 +1000 Subject: [PATCH] ppc64: four level pagetables fix With CONFIG_HUGETLB_PAGE=n: In file included from kernel/sysctl.c:37: include/linux/hugetlb.h:104:1: warning: "hugetlb_free_pgd_range" redefined In file included from include/linux/mm.h:36, from kernel/sysctl.c:23: include/asm/pgtable.h:492:1: warning: this is the location of the previous definition Signed-off-by: Andrew Morton Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- include/asm-ppc64/pgtable.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/asm-ppc64/pgtable.h b/include/asm-ppc64/pgtable.h index 5ea952ad716..c83679c9d2b 100644 --- a/include/asm-ppc64/pgtable.h +++ b/include/asm-ppc64/pgtable.h @@ -489,8 +489,10 @@ extern pgd_t swapper_pg_dir[]; extern void paging_init(void); +#ifdef CONFIG_HUGETLB_PAGE #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \ free_pgd_range(tlb, addr, end, floor, ceiling) +#endif /* * This gets called at the end of handling a page fault, when -- cgit v1.2.3 From b877b90f227fb9698d99fb70492d432362584082 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 17 Aug 2005 16:40:12 +1000 Subject: [PATCH] Create vio_register_device Take some assignments out of vio_register_device_common and rename it to vio_register_device. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- include/asm-ppc64/vio.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include') diff --git a/include/asm-ppc64/vio.h b/include/asm-ppc64/vio.h index a82e87c1c5f..578e30193b7 100644 --- a/include/asm-ppc64/vio.h +++ b/include/asm-ppc64/vio.h @@ -56,9 +56,7 @@ const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length); int vio_get_irq(struct vio_dev *dev); int vio_enable_interrupts(struct vio_dev *dev); int vio_disable_interrupts(struct vio_dev *dev); -extern struct vio_dev * __devinit vio_register_device_common( - struct vio_dev *viodev, char *name, char *type, - uint32_t unit_address, struct iommu_table *iommu_table); +extern struct vio_dev * __devinit vio_register_device(struct vio_dev *viodev); extern struct dma_mapping_ops vio_dma_ops; -- cgit v1.2.3 From 71d276d751ff5ddba28312aecefb174b20a5b970 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 17 Aug 2005 16:41:44 +1000 Subject: [PATCH] Create vio_bus_ops Create vio_bus_ops so that we just pass a structure to vio_bus_init instead of three separate function pointers. Rearrange vio.h to avoid forward references. vio.h only needs struct device_node from prom.h so remove the include and just declare it. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- include/asm-ppc64/vio.h | 97 ++++++++++++++++++++++++------------------------- 1 file changed, 48 insertions(+), 49 deletions(-) (limited to 'include') diff --git a/include/asm-ppc64/vio.h b/include/asm-ppc64/vio.h index 578e30193b7..85420bb37d5 100644 --- a/include/asm-ppc64/vio.h +++ b/include/asm-ppc64/vio.h @@ -19,13 +19,14 @@ #include #include #include + #include -#include #include -/* + +/* * Architecture-specific constants for drivers to * extract attributes of the device using vio_get_attribute() -*/ + */ #define VETH_MAC_ADDR "local-mac-address" #define VETH_MCAST_FILTER_SIZE "ibm,mac-address-filters" @@ -37,30 +38,19 @@ #define VIO_IRQ_DISABLE 0UL #define VIO_IRQ_ENABLE 1UL -struct vio_dev; -struct vio_driver; -struct vio_device_id; struct iommu_table; -int vio_register_driver(struct vio_driver *drv); -void vio_unregister_driver(struct vio_driver *drv); - -#ifdef CONFIG_PPC_PSERIES -struct vio_dev * __devinit vio_register_device_node( - struct device_node *node_vdev); -#endif -void __devinit vio_unregister_device(struct vio_dev *dev); -struct vio_dev *vio_find_node(struct device_node *vnode); - -const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length); -int vio_get_irq(struct vio_dev *dev); -int vio_enable_interrupts(struct vio_dev *dev); -int vio_disable_interrupts(struct vio_dev *dev); -extern struct vio_dev * __devinit vio_register_device(struct vio_dev *viodev); - -extern struct dma_mapping_ops vio_dma_ops; - -extern struct bus_type vio_bus_type; +/* + * The vio_dev structure is used to describe virtual I/O devices. + */ +struct vio_dev { + struct iommu_table *iommu_table; /* vio_map_* uses this */ + char *name; + char *type; + uint32_t unit_address; + unsigned int irq; + struct device dev; +}; struct vio_device_id { char *type; @@ -70,42 +60,51 @@ struct vio_device_id { struct vio_driver { struct list_head node; char *name; - const struct vio_device_id *id_table; /* NULL if wants all devices */ - int (*probe) (struct vio_dev *dev, const struct vio_device_id *id); /* New device inserted */ - int (*remove) (struct vio_dev *dev); /* Device removed (NULL if not a hot-plug capable driver) */ + const struct vio_device_id *id_table; + int (*probe)(struct vio_dev *dev, const struct vio_device_id *id); + int (*remove)(struct vio_dev *dev); unsigned long driver_data; - struct device_driver driver; }; +struct vio_bus_ops { + int (*match)(const struct vio_device_id *id, const struct vio_dev *dev); + void (*unregister_device)(struct vio_dev *); + void (*release_device)(struct device *); +}; + +extern struct dma_mapping_ops vio_dma_ops; +extern struct bus_type vio_bus_type; +extern struct vio_dev vio_bus_device; + +extern int vio_register_driver(struct vio_driver *drv); +extern void vio_unregister_driver(struct vio_driver *drv); + +extern struct vio_dev * __devinit vio_register_device(struct vio_dev *viodev); +extern void __devinit vio_unregister_device(struct vio_dev *dev); + +extern int vio_bus_init(struct vio_bus_ops *); + +#ifdef CONFIG_PPC_PSERIES +struct device_node; + +extern struct vio_dev * __devinit vio_register_device_node( + struct device_node *node_vdev); +extern struct vio_dev *vio_find_node(struct device_node *vnode); +extern const void *vio_get_attribute(struct vio_dev *vdev, void *which, + int *length); +extern int vio_enable_interrupts(struct vio_dev *dev); +extern int vio_disable_interrupts(struct vio_dev *dev); +#endif + static inline struct vio_driver *to_vio_driver(struct device_driver *drv) { return container_of(drv, struct vio_driver, driver); } -/* - * The vio_dev structure is used to describe virtual I/O devices. - */ -struct vio_dev { - struct iommu_table *iommu_table; /* vio_map_* uses this */ - char *name; - char *type; - uint32_t unit_address; - unsigned int irq; - - struct device dev; -}; - -extern struct vio_dev vio_bus_device; - static inline struct vio_dev *to_vio_dev(struct device *dev) { return container_of(dev, struct vio_dev, dev); } -extern int vio_bus_init(int (*is_match)(const struct vio_device_id *id, - const struct vio_dev *dev), - void (*)(struct vio_dev *), - void (*)(struct device *)); - #endif /* _ASM_VIO_H */ -- cgit v1.2.3 From fb120da678c517f72d4b39932062c2191827b331 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 17 Aug 2005 16:42:59 +1000 Subject: [PATCH] Make MODULE_DEVICE_TABLE work for vio devices Make MODULE_DEVICE_TABLE work for vio devices. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- include/asm-ppc64/vio.h | 6 +----- include/linux/mod_devicetable.h | 7 ++++++- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/asm-ppc64/vio.h b/include/asm-ppc64/vio.h index 85420bb37d5..03f1b95f433 100644 --- a/include/asm-ppc64/vio.h +++ b/include/asm-ppc64/vio.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -52,11 +53,6 @@ struct vio_dev { struct device dev; }; -struct vio_device_id { - char *type; - char *compat; -}; - struct vio_driver { struct list_head node; char *name; diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 97bbccdbcca..47da39ba3f0 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -1,6 +1,6 @@ /* * Device tables which are exported to userspace via - * scripts/table2alias.c. You must keep that file in sync with this + * scripts/mod/file2alias.c. You must keep that file in sync with this * header. */ @@ -190,6 +190,11 @@ struct of_device_id #endif }; +/* VIO */ +struct vio_device_id { + char type[32]; + char compat[32]; +}; /* PCMCIA */ -- cgit v1.2.3 From 45e2a6e4e5e22acd4321f69e84b726c2a568dacf Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 29 Aug 2005 13:15:50 +1000 Subject: [PATCH] Create include/asm-powerpc The ppc and ppc64 trees are hopefully going to merge over time, so this patch begins the process by creating a place for the merging of the header files. Create include/asm-powerpc (and move linkage.h into it from asm-{ppc,ppc64} since we don't like empty directories). Modify the ppc and ppc64 Makefiles to cope. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- include/asm-powerpc/linkage.h | 6 ++++++ include/asm-ppc/linkage.h | 6 ------ include/asm-ppc64/linkage.h | 6 ------ 3 files changed, 6 insertions(+), 12 deletions(-) create mode 100644 include/asm-powerpc/linkage.h delete mode 100644 include/asm-ppc/linkage.h delete mode 100644 include/asm-ppc64/linkage.h (limited to 'include') diff --git a/include/asm-powerpc/linkage.h b/include/asm-powerpc/linkage.h new file mode 100644 index 00000000000..291c2d01c44 --- /dev/null +++ b/include/asm-powerpc/linkage.h @@ -0,0 +1,6 @@ +#ifndef __ASM_LINKAGE_H +#define __ASM_LINKAGE_H + +/* Nothing to see here... */ + +#endif diff --git a/include/asm-ppc/linkage.h b/include/asm-ppc/linkage.h deleted file mode 100644 index 291c2d01c44..00000000000 --- a/include/asm-ppc/linkage.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_LINKAGE_H -#define __ASM_LINKAGE_H - -/* Nothing to see here... */ - -#endif diff --git a/include/asm-ppc64/linkage.h b/include/asm-ppc64/linkage.h deleted file mode 100644 index 291c2d01c44..00000000000 --- a/include/asm-ppc64/linkage.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ASM_LINKAGE_H -#define __ASM_LINKAGE_H - -/* Nothing to see here... */ - -#endif -- cgit v1.2.3 From 88999ceb55bf959e63df0c911915166b005977fc Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 29 Aug 2005 14:06:56 +1000 Subject: [PATCH] Move the identical files from include/asm-ppc{,64} Move the identical files from include/asm-ppc{,64}/ to include/asm-powerpc/. Remove hdreg.h completely as it is unused in the tree. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- include/asm-powerpc/8253pit.h | 10 ++++++++++ include/asm-powerpc/agp.h | 23 +++++++++++++++++++++++ include/asm-powerpc/cputime.h | 1 + include/asm-powerpc/div64.h | 1 + include/asm-powerpc/emergency-restart.h | 1 + include/asm-powerpc/ipc.h | 1 + include/asm-powerpc/xor.h | 1 + include/asm-ppc/8253pit.h | 10 ---------- include/asm-ppc/agp.h | 23 ----------------------- include/asm-ppc/cputime.h | 6 ------ include/asm-ppc/div64.h | 1 - include/asm-ppc/emergency-restart.h | 6 ------ include/asm-ppc/hdreg.h | 1 - include/asm-ppc/ipc.h | 1 - include/asm-ppc/xor.h | 1 - include/asm-ppc64/8253pit.h | 10 ---------- include/asm-ppc64/agp.h | 23 ----------------------- include/asm-ppc64/cputime.h | 6 ------ include/asm-ppc64/div64.h | 1 - include/asm-ppc64/emergency-restart.h | 6 ------ include/asm-ppc64/hdreg.h | 1 - include/asm-ppc64/ipc.h | 1 - include/asm-ppc64/xor.h | 1 - 23 files changed, 38 insertions(+), 98 deletions(-) create mode 100644 include/asm-powerpc/8253pit.h create mode 100644 include/asm-powerpc/agp.h create mode 100644 include/asm-powerpc/cputime.h create mode 100644 include/asm-powerpc/div64.h create mode 100644 include/asm-powerpc/emergency-restart.h create mode 100644 include/asm-powerpc/ipc.h create mode 100644 include/asm-powerpc/xor.h delete mode 100644 include/asm-ppc/8253pit.h delete mode 100644 include/asm-ppc/agp.h delete mode 100644 include/asm-ppc/cputime.h delete mode 100644 include/asm-ppc/div64.h delete mode 100644 include/asm-ppc/emergency-restart.h delete mode 100644 include/asm-ppc/hdreg.h delete mode 100644 include/asm-ppc/ipc.h delete mode 100644 include/asm-ppc/xor.h delete mode 100644 include/asm-ppc64/8253pit.h delete mode 100644 include/asm-ppc64/agp.h delete mode 100644 include/asm-ppc64/cputime.h delete mode 100644 include/asm-ppc64/div64.h delete mode 100644 include/asm-ppc64/emergency-restart.h delete mode 100644 include/asm-ppc64/hdreg.h delete mode 100644 include/asm-ppc64/ipc.h delete mode 100644 include/asm-ppc64/xor.h (limited to 'include') diff --git a/include/asm-powerpc/8253pit.h b/include/asm-powerpc/8253pit.h new file mode 100644 index 00000000000..862708a749b --- /dev/null +++ b/include/asm-powerpc/8253pit.h @@ -0,0 +1,10 @@ +/* + * 8253/8254 Programmable Interval Timer + */ + +#ifndef _8253PIT_H +#define _8253PIT_H + +#define PIT_TICK_RATE 1193182UL + +#endif diff --git a/include/asm-powerpc/agp.h b/include/asm-powerpc/agp.h new file mode 100644 index 00000000000..ca9e423307f --- /dev/null +++ b/include/asm-powerpc/agp.h @@ -0,0 +1,23 @@ +#ifndef AGP_H +#define AGP_H 1 + +#include + +/* nothing much needed here */ + +#define map_page_into_agp(page) +#define unmap_page_from_agp(page) +#define flush_agp_mappings() +#define flush_agp_cache() mb() + +/* Convert a physical address to an address suitable for the GART. */ +#define phys_to_gart(x) (x) +#define gart_to_phys(x) (x) + +/* GATT allocation. Returns/accepts GATT kernel virtual address. */ +#define alloc_gatt_pages(order) \ + ((char *)__get_free_pages(GFP_KERNEL, (order))) +#define free_gatt_pages(table, order) \ + free_pages((unsigned long)(table), (order)) + +#endif diff --git a/include/asm-powerpc/cputime.h b/include/asm-powerpc/cputime.h new file mode 100644 index 00000000000..6d68ad7e0ea --- /dev/null +++ b/include/asm-powerpc/cputime.h @@ -0,0 +1 @@ +#include diff --git a/include/asm-powerpc/div64.h b/include/asm-powerpc/div64.h new file mode 100644 index 00000000000..6cd978cefb2 --- /dev/null +++ b/include/asm-powerpc/div64.h @@ -0,0 +1 @@ +#include diff --git a/include/asm-powerpc/emergency-restart.h b/include/asm-powerpc/emergency-restart.h new file mode 100644 index 00000000000..3711bd9d50b --- /dev/null +++ b/include/asm-powerpc/emergency-restart.h @@ -0,0 +1 @@ +#include diff --git a/include/asm-powerpc/ipc.h b/include/asm-powerpc/ipc.h new file mode 100644 index 00000000000..a46e3d9c2a3 --- /dev/null +++ b/include/asm-powerpc/ipc.h @@ -0,0 +1 @@ +#include diff --git a/include/asm-powerpc/xor.h b/include/asm-powerpc/xor.h new file mode 100644 index 00000000000..c82eb12a5b1 --- /dev/null +++ b/include/asm-powerpc/xor.h @@ -0,0 +1 @@ +#include diff --git a/include/asm-ppc/8253pit.h b/include/asm-ppc/8253pit.h deleted file mode 100644 index 285f78488cc..00000000000 --- a/include/asm-ppc/8253pit.h +++ /dev/null @@ -1,10 +0,0 @@ -/* - * 8253/8254 Programmable Interval Timer - */ - -#ifndef _8253PIT_H -#define _8253PIT_H - -#define PIT_TICK_RATE 1193182UL - -#endif diff --git a/include/asm-ppc/agp.h b/include/asm-ppc/agp.h deleted file mode 100644 index ca9e423307f..00000000000 --- a/include/asm-ppc/agp.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef AGP_H -#define AGP_H 1 - -#include - -/* nothing much needed here */ - -#define map_page_into_agp(page) -#define unmap_page_from_agp(page) -#define flush_agp_mappings() -#define flush_agp_cache() mb() - -/* Convert a physical address to an address suitable for the GART. */ -#define phys_to_gart(x) (x) -#define gart_to_phys(x) (x) - -/* GATT allocation. Returns/accepts GATT kernel virtual address. */ -#define alloc_gatt_pages(order) \ - ((char *)__get_free_pages(GFP_KERNEL, (order))) -#define free_gatt_pages(table, order) \ - free_pages((unsigned long)(table), (order)) - -#endif diff --git a/include/asm-ppc/cputime.h b/include/asm-ppc/cputime.h deleted file mode 100644 index 8e9faf5ce72..00000000000 --- a/include/asm-ppc/cputime.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __PPC_CPUTIME_H -#define __PPC_CPUTIME_H - -#include - -#endif /* __PPC_CPUTIME_H */ diff --git a/include/asm-ppc/div64.h b/include/asm-ppc/div64.h deleted file mode 100644 index 6cd978cefb2..00000000000 --- a/include/asm-ppc/div64.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-ppc/emergency-restart.h b/include/asm-ppc/emergency-restart.h deleted file mode 100644 index 108d8c48e42..00000000000 --- a/include/asm-ppc/emergency-restart.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_EMERGENCY_RESTART_H -#define _ASM_EMERGENCY_RESTART_H - -#include - -#endif /* _ASM_EMERGENCY_RESTART_H */ diff --git a/include/asm-ppc/hdreg.h b/include/asm-ppc/hdreg.h deleted file mode 100644 index 7f7fd1af0af..00000000000 --- a/include/asm-ppc/hdreg.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-ppc/ipc.h b/include/asm-ppc/ipc.h deleted file mode 100644 index a46e3d9c2a3..00000000000 --- a/include/asm-ppc/ipc.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-ppc/xor.h b/include/asm-ppc/xor.h deleted file mode 100644 index c82eb12a5b1..00000000000 --- a/include/asm-ppc/xor.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-ppc64/8253pit.h b/include/asm-ppc64/8253pit.h deleted file mode 100644 index 285f78488cc..00000000000 --- a/include/asm-ppc64/8253pit.h +++ /dev/null @@ -1,10 +0,0 @@ -/* - * 8253/8254 Programmable Interval Timer - */ - -#ifndef _8253PIT_H -#define _8253PIT_H - -#define PIT_TICK_RATE 1193182UL - -#endif diff --git a/include/asm-ppc64/agp.h b/include/asm-ppc64/agp.h deleted file mode 100644 index ca9e423307f..00000000000 --- a/include/asm-ppc64/agp.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef AGP_H -#define AGP_H 1 - -#include - -/* nothing much needed here */ - -#define map_page_into_agp(page) -#define unmap_page_from_agp(page) -#define flush_agp_mappings() -#define flush_agp_cache() mb() - -/* Convert a physical address to an address suitable for the GART. */ -#define phys_to_gart(x) (x) -#define gart_to_phys(x) (x) - -/* GATT allocation. Returns/accepts GATT kernel virtual address. */ -#define alloc_gatt_pages(order) \ - ((char *)__get_free_pages(GFP_KERNEL, (order))) -#define free_gatt_pages(table, order) \ - free_pages((unsigned long)(table), (order)) - -#endif diff --git a/include/asm-ppc64/cputime.h b/include/asm-ppc64/cputime.h deleted file mode 100644 index 8e9faf5ce72..00000000000 --- a/include/asm-ppc64/cputime.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __PPC_CPUTIME_H -#define __PPC_CPUTIME_H - -#include - -#endif /* __PPC_CPUTIME_H */ diff --git a/include/asm-ppc64/div64.h b/include/asm-ppc64/div64.h deleted file mode 100644 index 6cd978cefb2..00000000000 --- a/include/asm-ppc64/div64.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-ppc64/emergency-restart.h b/include/asm-ppc64/emergency-restart.h deleted file mode 100644 index 108d8c48e42..00000000000 --- a/include/asm-ppc64/emergency-restart.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _ASM_EMERGENCY_RESTART_H -#define _ASM_EMERGENCY_RESTART_H - -#include - -#endif /* _ASM_EMERGENCY_RESTART_H */ diff --git a/include/asm-ppc64/hdreg.h b/include/asm-ppc64/hdreg.h deleted file mode 100644 index 7f7fd1af0af..00000000000 --- a/include/asm-ppc64/hdreg.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-ppc64/ipc.h b/include/asm-ppc64/ipc.h deleted file mode 100644 index a46e3d9c2a3..00000000000 --- a/include/asm-ppc64/ipc.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-ppc64/xor.h b/include/asm-ppc64/xor.h deleted file mode 100644 index c82eb12a5b1..00000000000 --- a/include/asm-ppc64/xor.h +++ /dev/null @@ -1 +0,0 @@ -#include -- cgit v1.2.3 From 6f9aa727433fe7647869c9b64ce2f7b5feac0052 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Mon, 29 Aug 2005 14:08:11 +1000 Subject: [PATCH] Move all the very similar files to asm-powerpc They differed in either simple comments or in the protecting ifdefs. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- include/asm-powerpc/errno.h | 11 ++++ include/asm-powerpc/ioctl.h | 69 ++++++++++++++++++++++++ include/asm-powerpc/ioctls.h | 107 +++++++++++++++++++++++++++++++++++++ include/asm-powerpc/local.h | 1 + include/asm-powerpc/namei.h | 20 +++++++ include/asm-powerpc/percpu.h | 1 + include/asm-powerpc/poll.h | 23 ++++++++ include/asm-powerpc/resource.h | 1 + include/asm-powerpc/shmparam.h | 6 +++ include/asm-powerpc/string.h | 32 +++++++++++ include/asm-powerpc/unaligned.h | 18 +++++++ include/asm-ppc/errno.h | 11 ---- include/asm-ppc/ioctl.h | 69 ------------------------ include/asm-ppc/ioctls.h | 107 ------------------------------------- include/asm-ppc/local.h | 6 --- include/asm-ppc/namei.h | 20 ------- include/asm-ppc/percpu.h | 6 --- include/asm-ppc/poll.h | 23 -------- include/asm-ppc/resource.h | 6 --- include/asm-ppc/shmparam.h | 6 --- include/asm-ppc/string.h | 32 ----------- include/asm-ppc/unaligned.h | 18 ------- include/asm-ppc64/errno.h | 18 ------- include/asm-ppc64/ioctl.h | 74 -------------------------- include/asm-ppc64/ioctls.h | 114 ---------------------------------------- include/asm-ppc64/local.h | 1 - include/asm-ppc64/namei.h | 23 -------- include/asm-ppc64/percpu.h | 6 --- include/asm-ppc64/poll.h | 32 ----------- include/asm-ppc64/resource.h | 6 --- include/asm-ppc64/shmparam.h | 13 ----- include/asm-ppc64/string.h | 35 ------------ include/asm-ppc64/unaligned.h | 21 -------- 33 files changed, 289 insertions(+), 647 deletions(-) create mode 100644 include/asm-powerpc/errno.h create mode 100644 include/asm-powerpc/ioctl.h create mode 100644 include/asm-powerpc/ioctls.h create mode 100644 include/asm-powerpc/local.h create mode 100644 include/asm-powerpc/namei.h create mode 100644 include/asm-powerpc/percpu.h create mode 100644 include/asm-powerpc/poll.h create mode 100644 include/asm-powerpc/resource.h create mode 100644 include/asm-powerpc/shmparam.h create mode 100644 include/asm-powerpc/string.h create mode 100644 include/asm-powerpc/unaligned.h delete mode 100644 include/asm-ppc/errno.h delete mode 100644 include/asm-ppc/ioctl.h delete mode 100644 include/asm-ppc/ioctls.h delete mode 100644 include/asm-ppc/local.h delete mode 100644 include/asm-ppc/namei.h delete mode 100644 include/asm-ppc/percpu.h delete mode 100644 include/asm-ppc/poll.h delete mode 100644 include/asm-ppc/resource.h delete mode 100644 include/asm-ppc/shmparam.h delete mode 100644 include/asm-ppc/string.h delete mode 100644 include/asm-ppc/unaligned.h delete mode 100644 include/asm-ppc64/errno.h delete mode 100644 include/asm-ppc64/ioctl.h delete mode 100644 include/asm-ppc64/ioctls.h delete mode 100644 include/asm-ppc64/local.h delete mode 100644 include/asm-ppc64/namei.h delete mode 100644 include/asm-ppc64/percpu.h delete mode 100644 include/asm-ppc64/poll.h delete mode 100644 include/asm-ppc64/resource.h delete mode 100644 include/asm-ppc64/shmparam.h delete mode 100644 include/asm-ppc64/string.h delete mode 100644 include/asm-ppc64/unaligned.h (limited to 'include') diff --git a/include/asm-powerpc/errno.h b/include/asm-powerpc/errno.h new file mode 100644 index 00000000000..19f20bd41ae --- /dev/null +++ b/include/asm-powerpc/errno.h @@ -0,0 +1,11 @@ +#ifndef _PPC_ERRNO_H +#define _PPC_ERRNO_H + +#include + +#undef EDEADLOCK +#define EDEADLOCK 58 /* File locking deadlock error */ + +#define _LAST_ERRNO 516 + +#endif diff --git a/include/asm-powerpc/ioctl.h b/include/asm-powerpc/ioctl.h new file mode 100644 index 00000000000..93c6acfdd0f --- /dev/null +++ b/include/asm-powerpc/ioctl.h @@ -0,0 +1,69 @@ +#ifndef _PPC_IOCTL_H +#define _PPC_IOCTL_H + + +/* + * this was copied from the alpha as it's a bit cleaner there. + * -- Cort + */ + +#define _IOC_NRBITS 8 +#define _IOC_TYPEBITS 8 +#define _IOC_SIZEBITS 13 +#define _IOC_DIRBITS 3 + +#define _IOC_NRMASK ((1 << _IOC_NRBITS)-1) +#define _IOC_TYPEMASK ((1 << _IOC_TYPEBITS)-1) +#define _IOC_SIZEMASK ((1 << _IOC_SIZEBITS)-1) +#define _IOC_DIRMASK ((1 << _IOC_DIRBITS)-1) + +#define _IOC_NRSHIFT 0 +#define _IOC_TYPESHIFT (_IOC_NRSHIFT+_IOC_NRBITS) +#define _IOC_SIZESHIFT (_IOC_TYPESHIFT+_IOC_TYPEBITS) +#define _IOC_DIRSHIFT (_IOC_SIZESHIFT+_IOC_SIZEBITS) + +/* + * Direction bits _IOC_NONE could be 0, but OSF/1 gives it a bit. + * And this turns out useful to catch old ioctl numbers in header + * files for us. + */ +#define _IOC_NONE 1U +#define _IOC_READ 2U +#define _IOC_WRITE 4U + +#define _IOC(dir,type,nr,size) \ + (((dir) << _IOC_DIRSHIFT) | \ + ((type) << _IOC_TYPESHIFT) | \ + ((nr) << _IOC_NRSHIFT) | \ + ((size) << _IOC_SIZESHIFT)) + +/* provoke compile error for invalid uses of size argument */ +extern unsigned int __invalid_size_argument_for_IOC; +#define _IOC_TYPECHECK(t) \ + ((sizeof(t) == sizeof(t[1]) && \ + sizeof(t) < (1 << _IOC_SIZEBITS)) ? \ + sizeof(t) : __invalid_size_argument_for_IOC) + +/* used to create numbers */ +#define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0) +#define _IOR(type,nr,size) _IOC(_IOC_READ,(type),(nr),(_IOC_TYPECHECK(size))) +#define _IOW(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(size))) +#define _IOWR(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(size))) +#define _IOR_BAD(type,nr,size) _IOC(_IOC_READ,(type),(nr),sizeof(size)) +#define _IOW_BAD(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),sizeof(size)) +#define _IOWR_BAD(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size)) + +/* used to decode them.. */ +#define _IOC_DIR(nr) (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK) +#define _IOC_TYPE(nr) (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK) +#define _IOC_NR(nr) (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK) +#define _IOC_SIZE(nr) (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK) + +/* various drivers, such as the pcmcia stuff, need these... */ +#define IOC_IN (_IOC_WRITE << _IOC_DIRSHIFT) +#define IOC_OUT (_IOC_READ << _IOC_DIRSHIFT) +#define IOC_INOUT ((_IOC_WRITE|_IOC_READ) << _IOC_DIRSHIFT) +#define IOCSIZE_MASK (_IOC_SIZEMASK << _IOC_SIZESHIFT) +#define IOCSIZE_SHIFT (_IOC_SIZESHIFT) + +#endif diff --git a/include/asm-powerpc/ioctls.h b/include/asm-powerpc/ioctls.h new file mode 100644 index 00000000000..f5b7f2b055e --- /dev/null +++ b/include/asm-powerpc/ioctls.h @@ -0,0 +1,107 @@ +#ifndef _ASM_PPC_IOCTLS_H +#define _ASM_PPC_IOCTLS_H + +#include + +#define FIOCLEX _IO('f', 1) +#define FIONCLEX _IO('f', 2) +#define FIOASYNC _IOW('f', 125, int) +#define FIONBIO _IOW('f', 126, int) +#define FIONREAD _IOR('f', 127, int) +#define TIOCINQ FIONREAD +#define FIOQSIZE _IOR('f', 128, loff_t) + +#define TIOCGETP _IOR('t', 8, struct sgttyb) +#define TIOCSETP _IOW('t', 9, struct sgttyb) +#define TIOCSETN _IOW('t', 10, struct sgttyb) /* TIOCSETP wo flush */ + +#define TIOCSETC _IOW('t', 17, struct tchars) +#define TIOCGETC _IOR('t', 18, struct tchars) +#define TCGETS _IOR('t', 19, struct termios) +#define TCSETS _IOW('t', 20, struct termios) +#define TCSETSW _IOW('t', 21, struct termios) +#define TCSETSF _IOW('t', 22, struct termios) + +#define TCGETA _IOR('t', 23, struct termio) +#define TCSETA _IOW('t', 24, struct termio) +#define TCSETAW _IOW('t', 25, struct termio) +#define TCSETAF _IOW('t', 28, struct termio) + +#define TCSBRK _IO('t', 29) +#define TCXONC _IO('t', 30) +#define TCFLSH _IO('t', 31) + +#define TIOCSWINSZ _IOW('t', 103, struct winsize) +#define TIOCGWINSZ _IOR('t', 104, struct winsize) +#define TIOCSTART _IO('t', 110) /* start output, like ^Q */ +#define TIOCSTOP _IO('t', 111) /* stop output, like ^S */ +#define TIOCOUTQ _IOR('t', 115, int) /* output queue size */ + +#define TIOCGLTC _IOR('t', 116, struct ltchars) +#define TIOCSLTC _IOW('t', 117, struct ltchars) +#define TIOCSPGRP _IOW('t', 118, int) +#define TIOCGPGRP _IOR('t', 119, int) + +#define TIOCEXCL 0x540C +#define TIOCNXCL 0x540D +#define TIOCSCTTY 0x540E + +#define TIOCSTI 0x5412 +#define TIOCMGET 0x5415 +#define TIOCMBIS 0x5416 +#define TIOCMBIC 0x5417 +#define TIOCMSET 0x5418 +# define TIOCM_LE 0x001 +# define TIOCM_DTR 0x002 +# define TIOCM_RTS 0x004 +# define TIOCM_ST 0x008 +# define TIOCM_SR 0x010 +# define TIOCM_CTS 0x020 +# define TIOCM_CAR 0x040 +# define TIOCM_RNG 0x080 +# define TIOCM_DSR 0x100 +# define TIOCM_CD TIOCM_CAR +# define TIOCM_RI TIOCM_RNG + +#define TIOCGSOFTCAR 0x5419 +#define TIOCSSOFTCAR 0x541A +#define TIOCLINUX 0x541C +#define TIOCCONS 0x541D +#define TIOCGSERIAL 0x541E +#define TIOCSSERIAL 0x541F +#define TIOCPKT 0x5420 +# define TIOCPKT_DATA 0 +# define TIOCPKT_FLUSHREAD 1 +# define TIOCPKT_FLUSHWRITE 2 +# define TIOCPKT_STOP 4 +# define TIOCPKT_START 8 +# define TIOCPKT_NOSTOP 16 +# define TIOCPKT_DOSTOP 32 + + +#define TIOCNOTTY 0x5422 +#define TIOCSETD 0x5423 +#define TIOCGETD 0x5424 +#define TCSBRKP 0x5425 /* Needed for POSIX tcsendbreak() */ +#define TIOCSBRK 0x5427 /* BSD compatibility */ +#define TIOCCBRK 0x5428 /* BSD compatibility */ +#define TIOCGSID 0x5429 /* Return the session ID of FD */ +#define TIOCGPTN _IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */ +#define TIOCSPTLCK _IOW('T',0x31, int) /* Lock/unlock Pty */ + +#define TIOCSERCONFIG 0x5453 +#define TIOCSERGWILD 0x5454 +#define TIOCSERSWILD 0x5455 +#define TIOCGLCKTRMIOS 0x5456 +#define TIOCSLCKTRMIOS 0x5457 +#define TIOCSERGSTRUCT 0x5458 /* For debugging only */ +#define TIOCSERGETLSR 0x5459 /* Get line status register */ + /* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ +# define TIOCSER_TEMT 0x01 /* Transmitter physically empty */ +#define TIOCSERGETMULTI 0x545A /* Get multiport config */ +#define TIOCSERSETMULTI 0x545B /* Set multiport config */ + +#define TIOCMIWAIT 0x545C /* wait for a change on serial input line(s) */ +#define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */ + +#endif /* _ASM_PPC_IOCTLS_H */ diff --git a/include/asm-powerpc/local.h b/include/asm-powerpc/local.h new file mode 100644 index 00000000000..c11c530f74d --- /dev/null +++ b/include/asm-powerpc/local.h @@ -0,0 +1 @@ +#include diff --git a/include/asm-powerpc/namei.h b/include/asm-powerpc/namei.h new file mode 100644 index 00000000000..29c9ec83213 --- /dev/null +++ b/include/asm-powerpc/namei.h @@ -0,0 +1,20 @@ +/* + * include/asm-ppc/namei.h + * Adapted from include/asm-alpha/namei.h + * + * Included from fs/namei.c + */ + +#ifdef __KERNEL__ +#ifndef __PPC_NAMEI_H +#define __PPC_NAMEI_H + +/* This dummy routine maybe changed to something useful + * for /usr/gnemul/ emulation stuff. + * Look at asm-sparc/namei.h for details. + */ + +#define __emul_prefix() NULL + +#endif /* __PPC_NAMEI_H */ +#endif /* __KERNEL__ */ diff --git a/include/asm-powerpc/percpu.h b/include/asm-powerpc/percpu.h new file mode 100644 index 00000000000..06a959d6723 --- /dev/null +++ b/include/asm-powerpc/percpu.h @@ -0,0 +1 @@ +#include diff --git a/include/asm-powerpc/poll.h b/include/asm-powerpc/poll.h new file mode 100644 index 00000000000..be5024913c6 --- /dev/null +++ b/include/asm-powerpc/poll.h @@ -0,0 +1,23 @@ +#ifndef __PPC_POLL_H +#define __PPC_POLL_H + +#define POLLIN 0x0001 +#define POLLPRI 0x0002 +#define POLLOUT 0x0004 +#define POLLERR 0x0008 +#define POLLHUP 0x0010 +#define POLLNVAL 0x0020 +#define POLLRDNORM 0x0040 +#define POLLRDBAND 0x0080 +#define POLLWRNORM 0x0100 +#define POLLWRBAND 0x0200 +#define POLLMSG 0x0400 +#define POLLREMOVE 0x1000 + +struct pollfd { + int fd; + short events; + short revents; +}; + +#endif diff --git a/include/asm-powerpc/resource.h b/include/asm-powerpc/resource.h new file mode 100644 index 00000000000..04bc4db8921 --- /dev/null +++ b/include/asm-powerpc/resource.h @@ -0,0 +1 @@ +#include diff --git a/include/asm-powerpc/shmparam.h b/include/asm-powerpc/shmparam.h new file mode 100644 index 00000000000..d6250602ae6 --- /dev/null +++ b/include/asm-powerpc/shmparam.h @@ -0,0 +1,6 @@ +#ifndef _PPC_SHMPARAM_H +#define _PPC_SHMPARAM_H + +#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */ + +#endif /* _PPC_SHMPARAM_H */ diff --git a/include/asm-powerpc/string.h b/include/asm-powerpc/string.h new file mode 100644 index 00000000000..22557599739 --- /dev/null +++ b/include/asm-powerpc/string.h @@ -0,0 +1,32 @@ +#ifndef _PPC_STRING_H_ +#define _PPC_STRING_H_ + +#ifdef __KERNEL__ + +#define __HAVE_ARCH_STRCPY +#define __HAVE_ARCH_STRNCPY +#define __HAVE_ARCH_STRLEN +#define __HAVE_ARCH_STRCMP +#define __HAVE_ARCH_STRCAT +#define __HAVE_ARCH_MEMSET +#define __HAVE_ARCH_MEMCPY +#define __HAVE_ARCH_MEMMOVE +#define __HAVE_ARCH_MEMCMP +#define __HAVE_ARCH_MEMCHR + +extern int strcasecmp(const char *, const char *); +extern int strncasecmp(const char *, const char *, int); +extern char * strcpy(char *,const char *); +extern char * strncpy(char *,const char *, __kernel_size_t); +extern __kernel_size_t strlen(const char *); +extern int strcmp(const char *,const char *); +extern char * strcat(char *, const char *); +extern void * memset(void *,int,__kernel_size_t); +extern void * memcpy(void *,const void *,__kernel_size_t); +extern void * memmove(void *,const void *,__kernel_size_t); +extern int memcmp(const void *,const void *,__kernel_size_t); +extern void * memchr(const void *,int,__kernel_size_t); + +#endif /* __KERNEL__ */ + +#endif diff --git a/include/asm-powerpc/unaligned.h b/include/asm-powerpc/unaligned.h new file mode 100644 index 00000000000..45520d9b85d --- /dev/null +++ b/include/asm-powerpc/unaligned.h @@ -0,0 +1,18 @@ +#ifdef __KERNEL__ +#ifndef __PPC_UNALIGNED_H +#define __PPC_UNALIGNED_H + +/* + * The PowerPC can do unaligned accesses itself in big endian mode. + * + * The strange macros are there to make sure these can't + * be misused in a way that makes them not work on other + * architectures where unaligned accesses aren't as simple. + */ + +#define get_unaligned(ptr) (*(ptr)) + +#define put_unaligned(val, ptr) ((void)( *(ptr) = (val) )) + +#endif +#endif /* __KERNEL__ */ diff --git a/include/asm-ppc/errno.h b/include/asm-ppc/errno.h deleted file mode 100644 index 19f20bd41ae..00000000000 --- a/include/asm-ppc/errno.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef _PPC_ERRNO_H -#define _PPC_ERRNO_H - -#include - -#undef EDEADLOCK -#define EDEADLOCK 58 /* File locking deadlock error */ - -#define _LAST_ERRNO 516 - -#endif diff --git a/include/asm-ppc/ioctl.h b/include/asm-ppc/ioctl.h deleted file mode 100644 index 93c6acfdd0f..00000000000 --- a/include/asm-ppc/ioctl.h +++ /dev/null @@ -1,69 +0,0 @@ -#ifndef _PPC_IOCTL_H -#define _PPC_IOCTL_H - - -/* - * this was copied from the alpha as it's a bit cleaner there. - * -- Cort - */ - -#define _IOC_NRBITS 8 -#define _IOC_TYPEBITS 8 -#define _IOC_SIZEBITS 13 -#define _IOC_DIRBITS 3 - -#define _IOC_NRMASK ((1 << _IOC_NRBITS)-1) -#define _IOC_TYPEMASK ((1 << _IOC_TYPEBITS)-1) -#define _IOC_SIZEMASK ((1 << _IOC_SIZEBITS)-1) -#define _IOC_DIRMASK ((1 << _IOC_DIRBITS)-1) - -#define _IOC_NRSHIFT 0 -#define _IOC_TYPESHIFT (_IOC_NRSHIFT+_IOC_NRBITS) -#define _IOC_SIZESHIFT (_IOC_TYPESHIFT+_IOC_TYPEBITS) -#define _IOC_DIRSHIFT (_IOC_SIZESHIFT+_IOC_SIZEBITS) - -/* - * Direction bits _IOC_NONE could be 0, but OSF/1 gives it a bit. - * And this turns out useful to catch old ioctl numbers in header - * files for us. - */ -#define _IOC_NONE 1U -#define _IOC_READ 2U -#define _IOC_WRITE 4U - -#define _IOC(dir,type,nr,size) \ - (((dir) << _IOC_DIRSHIFT) | \ - ((type) << _IOC_TYPESHIFT) | \ - ((nr) << _IOC_NRSHIFT) | \ - ((size) << _IOC_SIZESHIFT)) - -/* provoke compile error for invalid uses of size argument */ -extern unsigned int __invalid_size_argument_for_IOC; -#define _IOC_TYPECHECK(t) \ - ((sizeof(t) == sizeof(t[1]) && \ - sizeof(t) < (1 << _IOC_SIZEBITS)) ? \ - sizeof(t) : __invalid_size_argument_for_IOC) - -/* used to create numbers */ -#define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0) -#define _IOR(type,nr,size) _IOC(_IOC_READ,(type),(nr),(_IOC_TYPECHECK(size))) -#define _IOW(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(size))) -#define _IOWR(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(size))) -#define _IOR_BAD(type,nr,size) _IOC(_IOC_READ,(type),(nr),sizeof(size)) -#define _IOW_BAD(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),sizeof(size)) -#define _IOWR_BAD(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size)) - -/* used to decode them.. */ -#define _IOC_DIR(nr) (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK) -#define _IOC_TYPE(nr) (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK) -#define _IOC_NR(nr) (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK) -#define _IOC_SIZE(nr) (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK) - -/* various drivers, such as the pcmcia stuff, need these... */ -#define IOC_IN (_IOC_WRITE << _IOC_DIRSHIFT) -#define IOC_OUT (_IOC_READ << _IOC_DIRSHIFT) -#define IOC_INOUT ((_IOC_WRITE|_IOC_READ) << _IOC_DIRSHIFT) -#define IOCSIZE_MASK (_IOC_SIZEMASK << _IOC_SIZESHIFT) -#define IOCSIZE_SHIFT (_IOC_SIZESHIFT) - -#endif diff --git a/include/asm-ppc/ioctls.h b/include/asm-ppc/ioctls.h deleted file mode 100644 index f5b7f2b055e..00000000000 --- a/include/asm-ppc/ioctls.h +++ /dev/null @@ -1,107 +0,0 @@ -#ifndef _ASM_PPC_IOCTLS_H -#define _ASM_PPC_IOCTLS_H - -#include - -#define FIOCLEX _IO('f', 1) -#define FIONCLEX _IO('f', 2) -#define FIOASYNC _IOW('f', 125, int) -#define FIONBIO _IOW('f', 126, int) -#define FIONREAD _IOR('f', 127, int) -#define TIOCINQ FIONREAD -#define FIOQSIZE _IOR('f', 128, loff_t) - -#define TIOCGETP _IOR('t', 8, struct sgttyb) -#define TIOCSETP _IOW('t', 9, struct sgttyb) -#define TIOCSETN _IOW('t', 10, struct sgttyb) /* TIOCSETP wo flush */ - -#define TIOCSETC _IOW('t', 17, struct tchars) -#define TIOCGETC _IOR('t', 18, struct tchars) -#define TCGETS _IOR('t', 19, struct termios) -#define TCSETS _IOW('t', 20, struct termios) -#define TCSETSW _IOW('t', 21, struct termios) -#define TCSETSF _IOW('t', 22, struct termios) - -#define TCGETA _IOR('t', 23, struct termio) -#define TCSETA _IOW('t', 24, struct termio) -#define TCSETAW _IOW('t', 25, struct termio) -#define TCSETAF _IOW('t', 28, struct termio) - -#define TCSBRK _IO('t', 29) -#define TCXONC _IO('t', 30) -#define TCFLSH _IO('t', 31) - -#define TIOCSWINSZ _IOW('t', 103, struct winsize) -#define TIOCGWINSZ _IOR('t', 104, struct winsize) -#define TIOCSTART _IO('t', 110) /* start output, like ^Q */ -#define TIOCSTOP _IO('t', 111) /* stop output, like ^S */ -#define TIOCOUTQ _IOR('t', 115, int) /* output queue size */ - -#define TIOCGLTC _IOR('t', 116, struct ltchars) -#define TIOCSLTC _IOW('t', 117, struct ltchars) -#define TIOCSPGRP _IOW('t', 118, int) -#define TIOCGPGRP _IOR('t', 119, int) - -#define TIOCEXCL 0x540C -#define TIOCNXCL 0x540D -#define TIOCSCTTY 0x540E - -#define TIOCSTI 0x5412 -#define TIOCMGET 0x5415 -#define TIOCMBIS 0x5416 -#define TIOCMBIC 0x5417 -#define TIOCMSET 0x5418 -# define TIOCM_LE 0x001 -# define TIOCM_DTR 0x002 -# define TIOCM_RTS 0x004 -# define TIOCM_ST 0x008 -# define TIOCM_SR 0x010 -# define TIOCM_CTS 0x020 -# define TIOCM_CAR 0x040 -# define TIOCM_RNG 0x080 -# define TIOCM_DSR 0x100 -# define TIOCM_CD TIOCM_CAR -# define TIOCM_RI TIOCM_RNG - -#define TIOCGSOFTCAR 0x5419 -#define TIOCSSOFTCAR 0x541A -#define TIOCLINUX 0x541C -#define TIOCCONS 0x541D -#define TIOCGSERIAL 0x541E -#define TIOCSSERIAL 0x541F -#define TIOCPKT 0x5420 -# define TIOCPKT_DATA 0 -# define TIOCPKT_FLUSHREAD 1 -# define TIOCPKT_FLUSHWRITE 2 -# define TIOCPKT_STOP 4 -# define TIOCPKT_START 8 -# define TIOCPKT_NOSTOP 16 -# define TIOCPKT_DOSTOP 32 - - -#define TIOCNOTTY 0x5422 -#define TIOCSETD 0x5423 -#define TIOCGETD 0x5424 -#define TCSBRKP 0x5425 /* Needed for POSIX tcsendbreak() */ -#define TIOCSBRK 0x5427 /* BSD compatibility */ -#define TIOCCBRK 0x5428 /* BSD compatibility */ -#define TIOCGSID 0x5429 /* Return the session ID of FD */ -#define TIOCGPTN _IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */ -#define TIOCSPTLCK _IOW('T',0x31, int) /* Lock/unlock Pty */ - -#define TIOCSERCONFIG 0x5453 -#define TIOCSERGWILD 0x5454 -#define TIOCSERSWILD 0x5455 -#define TIOCGLCKTRMIOS 0x5456 -#define TIOCSLCKTRMIOS 0x5457 -#define TIOCSERGSTRUCT 0x5458 /* For debugging only */ -#define TIOCSERGETLSR 0x5459 /* Get line status register */ - /* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ -# define TIOCSER_TEMT 0x01 /* Transmitter physically empty */ -#define TIOCSERGETMULTI 0x545A /* Get multiport config */ -#define TIOCSERSETMULTI 0x545B /* Set multiport config */ - -#define TIOCMIWAIT 0x545C /* wait for a change on serial input line(s) */ -#define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */ - -#endif /* _ASM_PPC_IOCTLS_H */ diff --git a/include/asm-ppc/local.h b/include/asm-ppc/local.h deleted file mode 100644 index b08e3eced10..00000000000 --- a/include/asm-ppc/local.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __PPC_LOCAL_H -#define __PPC_LOCAL_H - -#include - -#endif /* __PPC_LOCAL_H */ diff --git a/include/asm-ppc/namei.h b/include/asm-ppc/namei.h deleted file mode 100644 index 29c9ec83213..00000000000 --- a/include/asm-ppc/namei.h +++ /dev/null @@ -1,20 +0,0 @@ -/* - * include/asm-ppc/namei.h - * Adapted from include/asm-alpha/namei.h - * - * Included from fs/namei.c - */ - -#ifdef __KERNEL__ -#ifndef __PPC_NAMEI_H -#define __PPC_NAMEI_H - -/* This dummy routine maybe changed to something useful - * for /usr/gnemul/ emulation stuff. - * Look at asm-sparc/namei.h for details. - */ - -#define __emul_prefix() NULL - -#endif /* __PPC_NAMEI_H */ -#endif /* __KERNEL__ */ diff --git a/include/asm-ppc/percpu.h b/include/asm-ppc/percpu.h deleted file mode 100644 index d66667cd587..00000000000 --- a/include/asm-ppc/percpu.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ARCH_PPC_PERCPU__ -#define __ARCH_PPC_PERCPU__ - -#include - -#endif /* __ARCH_PPC_PERCPU__ */ diff --git a/include/asm-ppc/poll.h b/include/asm-ppc/poll.h deleted file mode 100644 index be5024913c6..00000000000 --- a/include/asm-ppc/poll.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef __PPC_POLL_H -#define __PPC_POLL_H - -#define POLLIN 0x0001 -#define POLLPRI 0x0002 -#define POLLOUT 0x0004 -#define POLLERR 0x0008 -#define POLLHUP 0x0010 -#define POLLNVAL 0x0020 -#define POLLRDNORM 0x0040 -#define POLLRDBAND 0x0080 -#define POLLWRNORM 0x0100 -#define POLLWRBAND 0x0200 -#define POLLMSG 0x0400 -#define POLLREMOVE 0x1000 - -struct pollfd { - int fd; - short events; - short revents; -}; - -#endif diff --git a/include/asm-ppc/resource.h b/include/asm-ppc/resource.h deleted file mode 100644 index 86a1ea23a6e..00000000000 --- a/include/asm-ppc/resource.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _PPC_RESOURCE_H -#define _PPC_RESOURCE_H - -#include - -#endif diff --git a/include/asm-ppc/shmparam.h b/include/asm-ppc/shmparam.h deleted file mode 100644 index d6250602ae6..00000000000 --- a/include/asm-ppc/shmparam.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _PPC_SHMPARAM_H -#define _PPC_SHMPARAM_H - -#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */ - -#endif /* _PPC_SHMPARAM_H */ diff --git a/include/asm-ppc/string.h b/include/asm-ppc/string.h deleted file mode 100644 index 22557599739..00000000000 --- a/include/asm-ppc/string.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef _PPC_STRING_H_ -#define _PPC_STRING_H_ - -#ifdef __KERNEL__ - -#define __HAVE_ARCH_STRCPY -#define __HAVE_ARCH_STRNCPY -#define __HAVE_ARCH_STRLEN -#define __HAVE_ARCH_STRCMP -#define __HAVE_ARCH_STRCAT -#define __HAVE_ARCH_MEMSET -#define __HAVE_ARCH_MEMCPY -#define __HAVE_ARCH_MEMMOVE -#define __HAVE_ARCH_MEMCMP -#define __HAVE_ARCH_MEMCHR - -extern int strcasecmp(const char *, const char *); -extern int strncasecmp(const char *, const char *, int); -extern char * strcpy(char *,const char *); -extern char * strncpy(char *,const char *, __kernel_size_t); -extern __kernel_size_t strlen(const char *); -extern int strcmp(const char *,const char *); -extern char * strcat(char *, const char *); -extern void * memset(void *,int,__kernel_size_t); -extern void * memcpy(void *,const void *,__kernel_size_t); -extern void * memmove(void *,const void *,__kernel_size_t); -extern int memcmp(const void *,const void *,__kernel_size_t); -extern void * memchr(const void *,int,__kernel_size_t); - -#endif /* __KERNEL__ */ - -#endif diff --git a/include/asm-ppc/unaligned.h b/include/asm-ppc/unaligned.h deleted file mode 100644 index 45520d9b85d..00000000000 --- a/include/asm-ppc/unaligned.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifdef __KERNEL__ -#ifndef __PPC_UNALIGNED_H -#define __PPC_UNALIGNED_H - -/* - * The PowerPC can do unaligned accesses itself in big endian mode. - * - * The strange macros are there to make sure these can't - * be misused in a way that makes them not work on other - * architectures where unaligned accesses aren't as simple. - */ - -#define get_unaligned(ptr) (*(ptr)) - -#define put_unaligned(val, ptr) ((void)( *(ptr) = (val) )) - -#endif -#endif /* __KERNEL__ */ diff --git a/include/asm-ppc64/errno.h b/include/asm-ppc64/errno.h deleted file mode 100644 index 69bc3b0c6cb..00000000000 --- a/include/asm-ppc64/errno.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef _PPC64_ERRNO_H -#define _PPC64_ERRNO_H - -/* - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include - -#undef EDEADLOCK -#define EDEADLOCK 58 /* File locking deadlock error */ - -#define _LAST_ERRNO 516 - -#endif diff --git a/include/asm-ppc64/ioctl.h b/include/asm-ppc64/ioctl.h deleted file mode 100644 index 42b8c5da7fb..00000000000 --- a/include/asm-ppc64/ioctl.h +++ /dev/null @@ -1,74 +0,0 @@ -#ifndef _PPC64_IOCTL_H -#define _PPC64_IOCTL_H - - -/* - * This was copied from the alpha as it's a bit cleaner there. - * -- Cort - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#define _IOC_NRBITS 8 -#define _IOC_TYPEBITS 8 -#define _IOC_SIZEBITS 13 -#define _IOC_DIRBITS 3 - -#define _IOC_NRMASK ((1 << _IOC_NRBITS)-1) -#define _IOC_TYPEMASK ((1 << _IOC_TYPEBITS)-1) -#define _IOC_SIZEMASK ((1 << _IOC_SIZEBITS)-1) -#define _IOC_DIRMASK ((1 << _IOC_DIRBITS)-1) - -#define _IOC_NRSHIFT 0 -#define _IOC_TYPESHIFT (_IOC_NRSHIFT+_IOC_NRBITS) -#define _IOC_SIZESHIFT (_IOC_TYPESHIFT+_IOC_TYPEBITS) -#define _IOC_DIRSHIFT (_IOC_SIZESHIFT+_IOC_SIZEBITS) - -/* - * Direction bits _IOC_NONE could be 0, but OSF/1 gives it a bit. - * And this turns out useful to catch old ioctl numbers in header - * files for us. - */ -#define _IOC_NONE 1U -#define _IOC_READ 2U -#define _IOC_WRITE 4U - -#define _IOC(dir,type,nr,size) \ - (((dir) << _IOC_DIRSHIFT) | \ - ((type) << _IOC_TYPESHIFT) | \ - ((nr) << _IOC_NRSHIFT) | \ - ((size) << _IOC_SIZESHIFT)) - -/* provoke compile error for invalid uses of size argument */ -extern unsigned int __invalid_size_argument_for_IOC; -#define _IOC_TYPECHECK(t) \ - ((sizeof(t) == sizeof(t[1]) && \ - sizeof(t) < (1 << _IOC_SIZEBITS)) ? \ - sizeof(t) : __invalid_size_argument_for_IOC) - -/* used to create numbers */ -#define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0) -#define _IOR(type,nr,size) _IOC(_IOC_READ,(type),(nr),(_IOC_TYPECHECK(size))) -#define _IOW(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(size))) -#define _IOWR(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),(_IOC_TYPECHECK(size))) -#define _IOR_BAD(type,nr,size) _IOC(_IOC_READ,(type),(nr),sizeof(size)) -#define _IOW_BAD(type,nr,size) _IOC(_IOC_WRITE,(type),(nr),sizeof(size)) -#define _IOWR_BAD(type,nr,size) _IOC(_IOC_READ|_IOC_WRITE,(type),(nr),sizeof(size)) - -/* used to decode them.. */ -#define _IOC_DIR(nr) (((nr) >> _IOC_DIRSHIFT) & _IOC_DIRMASK) -#define _IOC_TYPE(nr) (((nr) >> _IOC_TYPESHIFT) & _IOC_TYPEMASK) -#define _IOC_NR(nr) (((nr) >> _IOC_NRSHIFT) & _IOC_NRMASK) -#define _IOC_SIZE(nr) (((nr) >> _IOC_SIZESHIFT) & _IOC_SIZEMASK) - -/* various drivers, such as the pcmcia stuff, need these... */ -#define IOC_IN (_IOC_WRITE << _IOC_DIRSHIFT) -#define IOC_OUT (_IOC_READ << _IOC_DIRSHIFT) -#define IOC_INOUT ((_IOC_WRITE|_IOC_READ) << _IOC_DIRSHIFT) -#define IOCSIZE_MASK (_IOC_SIZEMASK << _IOC_SIZESHIFT) -#define IOCSIZE_SHIFT (_IOC_SIZESHIFT) - -#endif /* _PPC64_IOCTL_H */ diff --git a/include/asm-ppc64/ioctls.h b/include/asm-ppc64/ioctls.h deleted file mode 100644 index 48796bf3e4f..00000000000 --- a/include/asm-ppc64/ioctls.h +++ /dev/null @@ -1,114 +0,0 @@ -#ifndef _ASM_PPC64_IOCTLS_H -#define _ASM_PPC64_IOCTLS_H - -/* - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include - -#define FIOCLEX _IO('f', 1) -#define FIONCLEX _IO('f', 2) -#define FIOASYNC _IOW('f', 125, int) -#define FIONBIO _IOW('f', 126, int) -#define FIONREAD _IOR('f', 127, int) -#define TIOCINQ FIONREAD -#define FIOQSIZE _IOR('f', 128, loff_t) - -#define TIOCGETP _IOR('t', 8, struct sgttyb) -#define TIOCSETP _IOW('t', 9, struct sgttyb) -#define TIOCSETN _IOW('t', 10, struct sgttyb) /* TIOCSETP wo flush */ - -#define TIOCSETC _IOW('t', 17, struct tchars) -#define TIOCGETC _IOR('t', 18, struct tchars) -#define TCGETS _IOR('t', 19, struct termios) -#define TCSETS _IOW('t', 20, struct termios) -#define TCSETSW _IOW('t', 21, struct termios) -#define TCSETSF _IOW('t', 22, struct termios) - -#define TCGETA _IOR('t', 23, struct termio) -#define TCSETA _IOW('t', 24, struct termio) -#define TCSETAW _IOW('t', 25, struct termio) -#define TCSETAF _IOW('t', 28, struct termio) - -#define TCSBRK _IO('t', 29) -#define TCXONC _IO('t', 30) -#define TCFLSH _IO('t', 31) - -#define TIOCSWINSZ _IOW('t', 103, struct winsize) -#define TIOCGWINSZ _IOR('t', 104, struct winsize) -#define TIOCSTART _IO('t', 110) /* start output, like ^Q */ -#define TIOCSTOP _IO('t', 111) /* stop output, like ^S */ -#define TIOCOUTQ _IOR('t', 115, int) /* output queue size */ - -#define TIOCGLTC _IOR('t', 116, struct ltchars) -#define TIOCSLTC _IOW('t', 117, struct ltchars) -#define TIOCSPGRP _IOW('t', 118, int) -#define TIOCGPGRP _IOR('t', 119, int) - -#define TIOCEXCL 0x540C -#define TIOCNXCL 0x540D -#define TIOCSCTTY 0x540E - -#define TIOCSTI 0x5412 -#define TIOCMGET 0x5415 -#define TIOCMBIS 0x5416 -#define TIOCMBIC 0x5417 -#define TIOCMSET 0x5418 -# define TIOCM_LE 0x001 -# define TIOCM_DTR 0x002 -# define TIOCM_RTS 0x004 -# define TIOCM_ST 0x008 -# define TIOCM_SR 0x010 -# define TIOCM_CTS 0x020 -# define TIOCM_CAR 0x040 -# define TIOCM_RNG 0x080 -# define TIOCM_DSR 0x100 -# define TIOCM_CD TIOCM_CAR -# define TIOCM_RI TIOCM_RNG - -#define TIOCGSOFTCAR 0x5419 -#define TIOCSSOFTCAR 0x541A -#define TIOCLINUX 0x541C -#define TIOCCONS 0x541D -#define TIOCGSERIAL 0x541E -#define TIOCSSERIAL 0x541F -#define TIOCPKT 0x5420 -# define TIOCPKT_DATA 0 -# define TIOCPKT_FLUSHREAD 1 -# define TIOCPKT_FLUSHWRITE 2 -# define TIOCPKT_STOP 4 -# define TIOCPKT_START 8 -# define TIOCPKT_NOSTOP 16 -# define TIOCPKT_DOSTOP 32 - - -#define TIOCNOTTY 0x5422 -#define TIOCSETD 0x5423 -#define TIOCGETD 0x5424 -#define TCSBRKP 0x5425 /* Needed for POSIX tcsendbreak() */ -#define TIOCSBRK 0x5427 /* BSD compatibility */ -#define TIOCCBRK 0x5428 /* BSD compatibility */ -#define TIOCGSID 0x5429 /* Return the session ID of FD */ -#define TIOCGPTN _IOR('T',0x30, unsigned int) /* Get Pty Number (of pty-mux device) */ -#define TIOCSPTLCK _IOW('T',0x31, int) /* Lock/unlock Pty */ - -#define TIOCSERCONFIG 0x5453 -#define TIOCSERGWILD 0x5454 -#define TIOCSERSWILD 0x5455 -#define TIOCGLCKTRMIOS 0x5456 -#define TIOCSLCKTRMIOS 0x5457 -#define TIOCSERGSTRUCT 0x5458 /* For debugging only */ -#define TIOCSERGETLSR 0x5459 /* Get line status register */ - /* ioctl (fd, TIOCSERGETLSR, &result) where result may be as below */ -# define TIOCSER_TEMT 0x01 /* Transmitter physically empty */ -#define TIOCSERGETMULTI 0x545A /* Get multiport config */ -#define TIOCSERSETMULTI 0x545B /* Set multiport config */ - -#define TIOCMIWAIT 0x545C /* wait for a change on serial input line(s) */ -#define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */ - -#endif /* _ASM_PPC64_IOCTLS_H */ diff --git a/include/asm-ppc64/local.h b/include/asm-ppc64/local.h deleted file mode 100644 index c11c530f74d..00000000000 --- a/include/asm-ppc64/local.h +++ /dev/null @@ -1 +0,0 @@ -#include diff --git a/include/asm-ppc64/namei.h b/include/asm-ppc64/namei.h deleted file mode 100644 index a1412a2d102..00000000000 --- a/include/asm-ppc64/namei.h +++ /dev/null @@ -1,23 +0,0 @@ -/* - * linux/include/asm-ppc/namei.h - * Adapted from linux/include/asm-alpha/namei.h - * - * Included from linux/fs/namei.c - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef __PPC64_NAMEI_H -#define __PPC64_NAMEI_H - -/* This dummy routine maybe changed to something useful - * for /usr/gnemul/ emulation stuff. - * Look at asm-sparc/namei.h for details. - */ - -#define __emul_prefix() NULL - -#endif /* __PPC64_NAMEI_H */ diff --git a/include/asm-ppc64/percpu.h b/include/asm-ppc64/percpu.h deleted file mode 100644 index 60a659a4ce1..00000000000 --- a/include/asm-ppc64/percpu.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef __ARCH_PPC64_PERCPU__ -#define __ARCH_PPC64_PERCPU__ - -#include - -#endif /* __ARCH_PPC64_PERCPU__ */ diff --git a/include/asm-ppc64/poll.h b/include/asm-ppc64/poll.h deleted file mode 100644 index 370fa3ba0db..00000000000 --- a/include/asm-ppc64/poll.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef __PPC64_POLL_H -#define __PPC64_POLL_H - -/* - * Copyright (C) 2001 PPC64 Team, IBM Corp - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#define POLLIN 0x0001 -#define POLLPRI 0x0002 -#define POLLOUT 0x0004 -#define POLLERR 0x0008 -#define POLLHUP 0x0010 -#define POLLNVAL 0x0020 -#define POLLRDNORM 0x0040 -#define POLLRDBAND 0x0080 -#define POLLWRNORM 0x0100 -#define POLLWRBAND 0x0200 -#define POLLMSG 0x0400 -#define POLLREMOVE 0x1000 - -struct pollfd { - int fd; - short events; - short revents; -}; - -#endif /* __PPC64_POLL_H */ diff --git a/include/asm-ppc64/resource.h b/include/asm-ppc64/resource.h deleted file mode 100644 index add031b9dfd..00000000000 --- a/include/asm-ppc64/resource.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _PPC64_RESOURCE_H -#define _PPC64_RESOURCE_H - -#include - -#endif /* _PPC64_RESOURCE_H */ diff --git a/include/asm-ppc64/shmparam.h b/include/asm-ppc64/shmparam.h deleted file mode 100644 index b2825ceff05..00000000000 --- a/include/asm-ppc64/shmparam.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef _PPC64_SHMPARAM_H -#define _PPC64_SHMPARAM_H - -/* - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#define SHMLBA PAGE_SIZE /* attach addr a multiple of this */ - -#endif /* _PPC64_SHMPARAM_H */ diff --git a/include/asm-ppc64/string.h b/include/asm-ppc64/string.h deleted file mode 100644 index eeca68ef1e9..00000000000 --- a/include/asm-ppc64/string.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef _PPC64_STRING_H_ -#define _PPC64_STRING_H_ - -/* - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#define __HAVE_ARCH_STRCPY -#define __HAVE_ARCH_STRNCPY -#define __HAVE_ARCH_STRLEN -#define __HAVE_ARCH_STRCMP -#define __HAVE_ARCH_STRCAT -#define __HAVE_ARCH_MEMSET -#define __HAVE_ARCH_MEMCPY -#define __HAVE_ARCH_MEMMOVE -#define __HAVE_ARCH_MEMCMP -#define __HAVE_ARCH_MEMCHR - -extern int strcasecmp(const char *, const char *); -extern int strncasecmp(const char *, const char *, int); -extern char * strcpy(char *,const char *); -extern char * strncpy(char *,const char *, __kernel_size_t); -extern __kernel_size_t strlen(const char *); -extern int strcmp(const char *,const char *); -extern char * strcat(char *, const char *); -extern void * memset(void *,int,__kernel_size_t); -extern void * memcpy(void *,const void *,__kernel_size_t); -extern void * memmove(void *,const void *,__kernel_size_t); -extern int memcmp(const void *,const void *,__kernel_size_t); -extern void * memchr(const void *,int,__kernel_size_t); - -#endif /* _PPC64_STRING_H_ */ diff --git a/include/asm-ppc64/unaligned.h b/include/asm-ppc64/unaligned.h deleted file mode 100644 index 636e93c4f37..00000000000 --- a/include/asm-ppc64/unaligned.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef __PPC64_UNALIGNED_H -#define __PPC64_UNALIGNED_H - -/* - * The PowerPC can do unaligned accesses itself in big endian mode. - * - * The strange macros are there to make sure these can't - * be misused in a way that makes them not work on other - * architectures where unaligned accesses aren't as simple. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#define get_unaligned(ptr) (*(ptr)) - -#define put_unaligned(val, ptr) ((void)( *(ptr) = (val) )) - -#endif /* __PPC64_UNALIGNED_H */ -- cgit v1.2.3 From 04ed65190a5d1562220dd3a7fc9eac2402c7104c Mon Sep 17 00:00:00 2001 From: Jake Moilanen Date: Wed, 24 Aug 2005 15:22:12 -0500 Subject: [PATCH] oprofile PVR 970MP Here's the 970MP's PVR (processor version register) entry for oprofile. Signed-off-by: Jake Moilanen Signed-off-by: Paul Mackerras --- include/asm-ppc64/processor.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/asm-ppc64/processor.h b/include/asm-ppc64/processor.h index 50b14c0ddb8..7bd4796f123 100644 --- a/include/asm-ppc64/processor.h +++ b/include/asm-ppc64/processor.h @@ -268,6 +268,7 @@ #define PV_970FX 0x003C #define PV_630 0x0040 #define PV_630p 0x0041 +#define PV_970MP 0x0044 #define PV_BE 0x0070 /* Platforms supported by PPC64 */ -- cgit v1.2.3 From 717522ff44f1fbee5ea09e83d7cd4b5c956e30f9 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Wed, 24 Aug 2005 08:53:03 +1000 Subject: [PATCH] ppc64: Add CONFIG_HZ While ppc64 has the CONFIG_HZ Kconfig option, it wasnt actually being used. Connect it up and set all platforms to 250Hz. Signed-off-by: Anton Blanchard Signed-off-by: Paul Mackerras --- include/asm-ppc64/param.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/asm-ppc64/param.h b/include/asm-ppc64/param.h index 1fad38dcf70..76c212d475b 100644 --- a/include/asm-ppc64/param.h +++ b/include/asm-ppc64/param.h @@ -1,6 +1,8 @@ #ifndef _ASM_PPC64_PARAM_H #define _ASM_PPC64_PARAM_H +#include + /* * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -9,7 +11,7 @@ */ #ifdef __KERNEL__ -# define HZ 1000 /* Internal kernel timer frequency */ +# define HZ CONFIG_HZ /* Internal kernel timer frequency */ # define USER_HZ 100 /* .. some user interfaces are in "ticks" */ # define CLOCKS_PER_SEC (USER_HZ) /* like times() */ #endif -- cgit v1.2.3 From d8971fcb702e24d1e22c77fd1772f182ffee87e3 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 29 Aug 2005 22:51:28 -0700 Subject: [INET]: compile errors when DEBUG is defined Fix build problem found by compiling driver with DEBUG defined that used tcp.h. Since pr_debug(arg) expands to printk("<7>" arg) the argument needs to be string that can be concatenated. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 8a87a3a4f10..651f824c100 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -147,7 +147,7 @@ static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) } #ifdef INET_CSK_DEBUG else { - pr_debug(inet_csk_timer_bug_msg); + pr_debug("%s", inet_csk_timer_bug_msg); } #endif } @@ -180,7 +180,7 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, } #ifdef INET_CSK_DEBUG else { - pr_debug(inet_csk_timer_bug_msg); + pr_debug("%s", inet_csk_timer_bug_msg); } #endif } -- cgit v1.2.3