aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-05-19 15:29:23 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-05-19 15:29:23 -0700
commit804c64ea864d0a8ee13f3de0b74158a3e9c3166d (patch)
tree842b223e9db75ece9f4e2a5daf0f519b07b4a92a
parent49a43876b935c811cfd29d8fe998a6912a1cc5c4 (diff)
parentaa1c6a6f7f0518b42994d02756a41cbfdcac1916 (diff)
Merge of rsync://rsync.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6.git/
-rw-r--r--drivers/net/tg3.c480
-rw-r--r--drivers/net/tg3.h8
-rw-r--r--fs/namei.c1
-rw-r--r--include/net/act_generic.h4
-rw-r--r--net/ipv4/ip_output.c8
-rw-r--r--net/ipv4/ipvs/ip_vs_xmit.c1
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c28
-rw-r--r--net/ipv6/ip6_output.c14
-rw-r--r--net/netlink/af_netlink.c13
-rw-r--r--net/unix/af_unix.c28
-rw-r--r--net/xfrm/xfrm_algo.c2
-rw-r--r--net/xfrm/xfrm_user.c15
12 files changed, 423 insertions, 179 deletions
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index f79b02e80e7..4d2bdbdd34e 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -420,7 +420,8 @@ static void tg3_enable_ints(struct tg3 *tp)
{
tw32(TG3PCI_MISC_HOST_CTRL,
(tp->misc_host_ctrl & ~MISC_HOST_CTRL_MASK_PCI_INT));
- tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000000);
+ tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW,
+ (tp->last_tag << 24));
tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW);
tg3_cond_int(tp);
@@ -455,10 +456,16 @@ static void tg3_restart_ints(struct tg3 *tp)
{
tw32(TG3PCI_MISC_HOST_CTRL,
(tp->misc_host_ctrl & ~MISC_HOST_CTRL_MASK_PCI_INT));
- tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000000);
+ tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW,
+ tp->last_tag << 24);
mmiowb();
- if (tg3_has_work(tp))
+ /* When doing tagged status, this work check is unnecessary.
+ * The last_tag we write above tells the chip which piece of
+ * work we've completed.
+ */
+ if (!(tp->tg3_flags & TG3_FLAG_TAGGED_STATUS) &&
+ tg3_has_work(tp))
tw32(HOSTCC_MODE, tp->coalesce_mode |
(HOSTCC_MODE_ENABLE | HOSTCC_MODE_NOW));
}
@@ -2500,7 +2507,7 @@ static int tg3_setup_phy(struct tg3 *tp, int force_reset)
if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS)) {
if (netif_carrier_ok(tp->dev)) {
tw32(HOSTCC_STAT_COAL_TICKS,
- DEFAULT_STAT_COAL_TICKS);
+ tp->coal.stats_block_coalesce_usecs);
} else {
tw32(HOSTCC_STAT_COAL_TICKS, 0);
}
@@ -2886,7 +2893,6 @@ static int tg3_poll(struct net_device *netdev, int *budget)
* All RX "locking" is done by ensuring outside
* code synchronizes with dev->poll()
*/
- done = 1;
if (sblk->idx[0].rx_producer != tp->rx_rcb_ptr) {
int orig_budget = *budget;
int work_done;
@@ -2898,12 +2904,14 @@ static int tg3_poll(struct net_device *netdev, int *budget)
*budget -= work_done;
netdev->quota -= work_done;
-
- if (work_done >= orig_budget)
- done = 0;
}
+ if (tp->tg3_flags & TG3_FLAG_TAGGED_STATUS)
+ tp->last_tag = sblk->status_tag;
+ rmb();
+
/* if no more work, tell net stack and NIC we're done */
+ done = !tg3_has_work(tp);
if (done) {
spin_lock_irqsave(&tp->lock, flags);
__netif_rx_complete(netdev);
@@ -2928,22 +2936,21 @@ static irqreturn_t tg3_msi(int irq, void *dev_id, struct pt_regs *regs)
spin_lock_irqsave(&tp->lock, flags);
/*
- * writing any value to intr-mbox-0 clears PCI INTA# and
+ * Writing any value to intr-mbox-0 clears PCI INTA# and
* chip-internal interrupt pending events.
- * writing non-zero to intr-mbox-0 additional tells the
+ * Writing non-zero to intr-mbox-0 additional tells the
* NIC to stop sending us irqs, engaging "in-intr-handler"
* event coalescing.
*/
tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000001);
+ tp->last_tag = sblk->status_tag;
sblk->status &= ~SD_STATUS_UPDATED;
-
if (likely(tg3_has_work(tp)))
netif_rx_schedule(dev); /* schedule NAPI poll */
else {
- /* no work, re-enable interrupts
- */
+ /* No work, re-enable interrupts. */
tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW,
- 0x00000000);
+ tp->last_tag << 24);
}
spin_unlock_irqrestore(&tp->lock, flags);
@@ -2969,21 +2976,62 @@ static irqreturn_t tg3_interrupt(int irq, void *dev_id, struct pt_regs *regs)
if ((sblk->status & SD_STATUS_UPDATED) ||
!(tr32(TG3PCI_PCISTATE) & PCISTATE_INT_NOT_ACTIVE)) {
/*
- * writing any value to intr-mbox-0 clears PCI INTA# and
+ * Writing any value to intr-mbox-0 clears PCI INTA# and
* chip-internal interrupt pending events.
- * writing non-zero to intr-mbox-0 additional tells the
+ * Writing non-zero to intr-mbox-0 additional tells the
* NIC to stop sending us irqs, engaging "in-intr-handler"
* event coalescing.
*/
tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW,
0x00000001);
+ sblk->status &= ~SD_STATUS_UPDATED;
+ if (likely(tg3_has_work(tp)))
+ netif_rx_schedule(dev); /* schedule NAPI poll */
+ else {
+ /* No work, shared interrupt perhaps? re-enable
+ * interrupts, and flush that PCI write
+ */
+ tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW,
+ 0x00000000);
+ tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW);
+ }
+ } else { /* shared interrupt */
+ handled = 0;
+ }
+
+ spin_unlock_irqrestore(&tp->lock, flags);
+
+ return IRQ_RETVAL(handled);
+}
+
+static irqreturn_t tg3_interrupt_tagged(int irq, void *dev_id, struct pt_regs *regs)
+{
+ struct net_device *dev = dev_id;
+ struct tg3 *tp = netdev_priv(dev);
+ struct tg3_hw_status *sblk = tp->hw_status;
+ unsigned long flags;
+ unsigned int handled = 1;
+
+ spin_lock_irqsave(&tp->lock, flags);
+
+ /* In INTx mode, it is possible for the interrupt to arrive at
+ * the CPU before the status block posted prior to the interrupt.
+ * Reading the PCI State register will confirm whether the
+ * interrupt is ours and will flush the status block.
+ */
+ if ((sblk->status & SD_STATUS_UPDATED) ||
+ !(tr32(TG3PCI_PCISTATE) & PCISTATE_INT_NOT_ACTIVE)) {
/*
- * Flush PCI write. This also guarantees that our
- * status block has been flushed to host memory.
+ * writing any value to intr-mbox-0 clears PCI INTA# and
+ * chip-internal interrupt pending events.
+ * writing non-zero to intr-mbox-0 additional tells the
+ * NIC to stop sending us irqs, engaging "in-intr-handler"
+ * event coalescing.
*/
- tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW);
+ tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW,
+ 0x00000001);
+ tp->last_tag = sblk->status_tag;
sblk->status &= ~SD_STATUS_UPDATED;
-
if (likely(tg3_has_work(tp)))
netif_rx_schedule(dev); /* schedule NAPI poll */
else {
@@ -2991,7 +3039,7 @@ static irqreturn_t tg3_interrupt(int irq, void *dev_id, struct pt_regs *regs)
* interrupts, and flush that PCI write
*/
tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW,
- 0x00000000);
+ tp->last_tag << 24);
tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW);
}
} else { /* shared interrupt */
@@ -5044,6 +5092,27 @@ static void tg3_set_bdinfo(struct tg3 *tp, u32 bdinfo_addr,
}
static void __tg3_set_rx_mode(struct net_device *);
+static void tg3_set_coalesce(struct tg3 *tp, struct ethtool_coalesce *ec)
+{
+ tw32(HOSTCC_RXCOL_TICKS, ec->rx_coalesce_usecs);
+ tw32(HOSTCC_TXCOL_TICKS, ec->tx_coalesce_usecs);
+ tw32(HOSTCC_RXMAX_FRAMES, ec->rx_max_coalesced_frames);
+ tw32(HOSTCC_TXMAX_FRAMES, ec->tx_max_coalesced_frames);
+ if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS)) {
+ tw32(HOSTCC_RXCOAL_TICK_INT, ec->rx_coalesce_usecs_irq);
+ tw32(HOSTCC_TXCOAL_TICK_INT, ec->tx_coalesce_usecs_irq);
+ }
+ tw32(HOSTCC_RXCOAL_MAXF_INT, ec->rx_max_coalesced_frames_irq);
+ tw32(HOSTCC_TXCOAL_MAXF_INT, ec->tx_max_coalesced_frames_irq);
+ if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS)) {
+ u32 val = ec->stats_block_coalesce_usecs;
+
+ if (!netif_carrier_ok(tp->dev))
+ val = 0;
+
+ tw32(HOSTCC_STAT_COAL_TICKS, val);
+ }
+}
/* tp->lock is held. */
static int tg3_reset_hw(struct tg3 *tp)
@@ -5366,16 +5435,7 @@ static int tg3_reset_hw(struct tg3 *tp)
udelay(10);
}
- tw32(HOSTCC_RXCOL_TICKS, 0);
- tw32(HOSTCC_TXCOL_TICKS, LOW_TXCOL_TICKS);
- tw32(HOSTCC_RXMAX_FRAMES, 1);
- tw32(HOSTCC_TXMAX_FRAMES, LOW_RXMAX_FRAMES);
- if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS)) {
- tw32(HOSTCC_RXCOAL_TICK_INT, 0);
- tw32(HOSTCC_TXCOAL_TICK_INT, 0);
- }
- tw32(HOSTCC_RXCOAL_MAXF_INT, 1);
- tw32(HOSTCC_TXCOAL_MAXF_INT, 0);
+ tg3_set_coalesce(tp, &tp->coal);
/* set status block DMA address */
tw32(HOSTCC_STATUS_BLK_HOST_ADDR + TG3_64BIT_REG_HIGH,
@@ -5388,8 +5448,6 @@ static int tg3_reset_hw(struct tg3 *tp)
* the tg3_periodic_fetch_stats call there, and
* tg3_get_stats to see how this works for 5705/5750 chips.
*/
- tw32(HOSTCC_STAT_COAL_TICKS,
- DEFAULT_STAT_COAL_TICKS);
tw32(HOSTCC_STATS_BLK_HOST_ADDR + TG3_64BIT_REG_HIGH,
((u64) tp->stats_mapping >> 32));
tw32(HOSTCC_STATS_BLK_HOST_ADDR + TG3_64BIT_REG_LOW,
@@ -5445,7 +5503,8 @@ static int tg3_reset_hw(struct tg3 *tp)
udelay(100);
tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0);
- tr32(MAILBOX_INTERRUPT_0);
+ tr32(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW);
+ tp->last_tag = 0;
if (!(tp->tg3_flags2 & TG3_FLG2_5705_PLUS)) {
tw32_f(DMAC_MODE, DMAC_MODE_ENABLE);
@@ -5723,31 +5782,33 @@ static void tg3_timer(unsigned long __opaque)
spin_lock_irqsave(&tp->lock, flags);
spin_lock(&tp->tx_lock);
- /* All of this garbage is because when using non-tagged
- * IRQ status the mailbox/status_block protocol the chip
- * uses with the cpu is race prone.
- */
- if (tp->hw_status->status & SD_STATUS_UPDATED) {
- tw32(GRC_LOCAL_CTRL,
- tp->grc_local_ctrl | GRC_LCLCTRL_SETINT);
- } else {
- tw32(HOSTCC_MODE, tp->coalesce_mode |
- (HOSTCC_MODE_ENABLE | HOSTCC_MODE_NOW));
- }
+ if (!(tp->tg3_flags & TG3_FLAG_TAGGED_STATUS)) {
+ /* All of this garbage is because when using non-tagged
+ * IRQ status the mailbox/status_block protocol the chip
+ * uses with the cpu is race prone.
+ */
+ if (tp->hw_status->status & SD_STATUS_UPDATED) {
+ tw32(GRC_LOCAL_CTRL,
+ tp->grc_local_ctrl | GRC_LCLCTRL_SETINT);
+ } else {
+ tw32(HOSTCC_MODE, tp->coalesce_mode |
+ (HOSTCC_MODE_ENABLE | HOSTCC_MODE_NOW));
+ }
- if (!(tr32(WDMAC_MODE) & WDMAC_MODE_ENABLE)) {
- tp->tg3_flags2 |= TG3_FLG2_RESTART_TIMER;
- spin_unlock(&tp->tx_lock);
- spin_unlock_irqrestore(&tp->lock, flags);
- schedule_work(&tp->reset_task);
- return;
+ if (!(tr32(WDMAC_MODE) & WDMAC_MODE_ENABLE)) {
+ tp->tg3_flags2 |= TG3_FLG2_RESTART_TIMER;
+ spin_unlock(&tp->tx_lock);
+ spin_unlock_irqrestore(&tp->lock, flags);
+ schedule_work(&tp->reset_task);
+ return;
+ }
}
- if (tp->tg3_flags2 & TG3_FLG2_5705_PLUS)
- tg3_periodic_fetch_stats(tp);
-
/* This part only runs once per second. */
if (!--tp->timer_counter) {
+ if (tp->tg3_flags2 & TG3_FLG2_5705_PLUS)
+ tg3_periodic_fetch_stats(tp);
+
if (tp->tg3_flags & TG3_FLAG_USE_LINKCHG_REG) {
u32 mac_stat;
int phy_event;
@@ -5846,9 +5907,13 @@ static int tg3_test_interrupt(struct tg3 *tp)
if (tp->tg3_flags2 & TG3_FLG2_USING_MSI)
err = request_irq(tp->pdev->irq, tg3_msi,
SA_SAMPLE_RANDOM, dev->name, dev);
- else
- err = request_irq(tp->pdev->irq, tg3_interrupt,
+ else {
+ irqreturn_t (*fn)(int, void *, struct pt_regs *)=tg3_interrupt;
+ if (tp->tg3_flags & TG3_FLAG_TAGGED_STATUS)
+ fn = tg3_interrupt_tagged;
+ err = request_irq(tp->pdev->irq, fn,
SA_SHIRQ | SA_SAMPLE_RANDOM, dev->name, dev);
+ }
if (err)
return err;
@@ -5900,9 +5965,14 @@ static int tg3_test_msi(struct tg3 *tp)
tp->tg3_flags2 &= ~TG3_FLG2_USING_MSI;
- err = request_irq(tp->pdev->irq, tg3_interrupt,
- SA_SHIRQ | SA_SAMPLE_RANDOM, dev->name, dev);
+ {
+ irqreturn_t (*fn)(int, void *, struct pt_regs *)=tg3_interrupt;
+ if (tp->tg3_flags & TG3_FLAG_TAGGED_STATUS)
+ fn = tg3_interrupt_tagged;
+ err = request_irq(tp->pdev->irq, fn,
+ SA_SHIRQ | SA_SAMPLE_RANDOM, dev->name, dev);
+ }
if (err)
return err;
@@ -5948,7 +6018,13 @@ static int tg3_open(struct net_device *dev)
if ((tp->tg3_flags2 & TG3_FLG2_5750_PLUS) &&
(GET_CHIP_REV(tp->pci_chip_rev_id) != CHIPREV_5750_AX) &&
(GET_CHIP_REV(tp->pci_chip_rev_id) != CHIPREV_5750_BX)) {
- if (pci_enable_msi(tp->pdev) == 0) {
+ /* All MSI supporting chips should support tagged
+ * status. Assert that this is the case.
+ */
+ if (!(tp->tg3_flags & TG3_FLAG_TAGGED_STATUS)) {
+ printk(KERN_WARNING PFX "%s: MSI without TAGGED? "
+ "Not using MSI.\n", tp->dev->name);
+ } else if (pci_enable_msi(tp->pdev) == 0) {
u32 msi_mode;
msi_mode = tr32(MSGINT_MODE);
@@ -5959,9 +6035,14 @@ static int tg3_open(struct net_device *dev)
if (tp->tg3_flags2 & TG3_FLG2_USING_MSI)
err = request_irq(tp->pdev->irq, tg3_msi,
SA_SAMPLE_RANDOM, dev->name, dev);
- else
- err = request_irq(tp->pdev->irq, tg3_interrupt,
+ else {
+ irqreturn_t (*fn)(int, void *, struct pt_regs *)=tg3_interrupt;
+ if (tp->tg3_flags & TG3_FLAG_TAGGED_STATUS)
+ fn = tg3_interrupt_tagged;
+
+ err = request_irq(tp->pdev->irq, fn,
SA_SHIRQ | SA_SAMPLE_RANDOM, dev->name, dev);
+ }
if (err) {
if (tp->tg3_flags2 & TG3_FLG2_USING_MSI) {
@@ -5980,9 +6061,16 @@ static int tg3_open(struct net_device *dev)
tg3_halt(tp, 1);
tg3_free_rings(tp);
} else {
- tp->timer_offset = HZ / 10;
- tp->timer_counter = tp->timer_multiplier = 10;
- tp->asf_counter = tp->asf_multiplier = (10 * 120);
+ if (tp->tg3_flags & TG3_FLAG_TAGGED_STATUS)
+ tp->timer_offset = HZ;
+ else
+ tp->timer_offset = HZ / 10;
+
+ BUG_ON(tp->timer_offset > HZ);
+ tp->timer_counter = tp->timer_multiplier =
+ (HZ / tp->timer_offset);
+ tp->asf_counter = tp->asf_multiplier =
+ ((HZ / tp->timer_offset) * 120);
init_timer(&tp->timer);
tp->timer.expires = jiffies + tp->timer_offset;
@@ -6005,6 +6093,7 @@ static int tg3_open(struct net_device *dev)
if (tp->tg3_flags2 & TG3_FLG2_USING_MSI) {
err = tg3_test_msi(tp);
+
if (err) {
spin_lock_irq(&tp->lock);
spin_lock(&tp->tx_lock);
@@ -7203,6 +7292,14 @@ static void tg3_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
}
#endif
+static int tg3_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
+{
+ struct tg3 *tp = netdev_priv(dev);
+
+ memcpy(ec, &tp->coal, sizeof(*ec));
+ return 0;
+}
+
static struct ethtool_ops tg3_ethtool_ops = {
.get_settings = tg3_get_settings,
.set_settings = tg3_set_settings,
@@ -7235,6 +7332,7 @@ static struct ethtool_ops tg3_ethtool_ops = {
.get_strings = tg3_get_strings,
.get_stats_count = tg3_get_stats_count,
.get_ethtool_stats = tg3_get_ethtool_stats,
+ .get_coalesce = tg3_get_coalesce,
};
static void __devinit tg3_get_eeprom_size(struct tg3 *tp)
@@ -8422,15 +8520,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
if (tp->tg3_flags2 & TG3_FLG2_5705_PLUS)
tp->tg3_flags2 |= TG3_FLG2_PHY_BER_BUG;
- /* Only 5701 and later support tagged irq status mode.
- * Also, 5788 chips cannot use tagged irq status.
- *
- * However, since we are using NAPI avoid tagged irq status
- * because the interrupt condition is more difficult to
- * fully clear in that mode.
- */
tp->coalesce_mode = 0;
-
if (GET_CHIP_REV(tp->pci_chip_rev_id) != CHIPREV_5700_AX &&
GET_CHIP_REV(tp->pci_chip_rev_id) != CHIPREV_5700_BX)
tp->coalesce_mode |= HOSTCC_MODE_32BYTE;
@@ -8494,6 +8584,18 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
grc_misc_cfg == GRC_MISC_CFG_BOARD_ID_5788M))
tp->tg3_flags2 |= TG3_FLG2_IS_5788;
+ if (!(tp->tg3_flags2 & TG3_FLG2_IS_5788) &&
+ (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5700))
+ tp->tg3_flags |= TG3_FLAG_TAGGED_STATUS;
+ if (tp->tg3_flags & TG3_FLAG_TAGGED_STATUS) {
+ tp->coalesce_mode |= (HOSTCC_MODE_CLRTICK_RXBD |
+ HOSTCC_MODE_CLRTICK_TXBD);
+
+ tp->misc_host_ctrl |= MISC_HOST_CTRL_TAGGED_STATUS;
+ pci_write_config_dword(tp->pdev, TG3PCI_MISC_HOST_CTRL,
+ tp->misc_host_ctrl);
+ }
+
/* these are limited to 10/100 only */
if ((GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5703 &&
(grc_misc_cfg == 0x8000 || grc_misc_cfg == 0x4000)) ||
@@ -8671,6 +8773,146 @@ static int __devinit tg3_get_device_address(struct tg3 *tp)
return 0;
}
+#define BOUNDARY_SINGLE_CACHELINE 1
+#define BOUNDARY_MULTI_CACHELINE 2
+
+static u32 __devinit tg3_calc_dma_bndry(struct tg3 *tp, u32 val)
+{
+ int cacheline_size;
+ u8 byte;
+ int goal;
+
+ pci_read_config_byte(tp->pdev, PCI_CACHE_LINE_SIZE, &byte);
+ if (byte == 0)
+ cacheline_size = 1024;
+ else
+ cacheline_size = (int) byte * 4;
+
+ /* On 5703 and later chips, the boundary bits have no
+ * effect.
+ */
+ if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5700 &&
+ GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5701 &&
+ !(tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS))
+ goto out;
+
+#if defined(CONFIG_PPC64) || defined(CONFIG_IA64) || defined(CONFIG_PARISC)
+ goal = BOUNDARY_MULTI_CACHELINE;
+#else
+#if defined(CONFIG_SPARC64) || defined(CONFIG_ALPHA)
+ goal = BOUNDARY_SINGLE_CACHELINE;
+#else
+ goal = 0;
+#endif
+#endif
+
+ if (!goal)
+ goto out;
+
+ /* PCI controllers on most RISC systems tend to disconnect
+ * when a device tries to burst across a cache-line boundary.
+ * Therefore, letting tg3 do so just wastes PCI bandwidth.
+ *
+ * Unfortunately, for PCI-E there are only limited
+ * write-side controls for this, and thus for reads
+ * we will still get the disconnects. We'll also waste
+ * these PCI cycles for both read and write for chips
+ * other than 5700 and 5701 which do not implement the
+ * boundary bits.
+ */
+ if ((tp->tg3_flags & TG3_FLAG_PCIX_MODE) &&
+ !(tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS)) {
+ switch (cacheline_size) {
+ case 16:
+ case 32:
+ case 64:
+ case 128:
+ if (goal == BOUNDARY_SINGLE_CACHELINE) {
+ val |= (DMA_RWCTRL_READ_BNDRY_128_PCIX |
+ DMA_RWCTRL_WRITE_BNDRY_128_PCIX);
+ } else {
+ val |= (DMA_RWCTRL_READ_BNDRY_384_PCIX |
+ DMA_RWCTRL_WRITE_BNDRY_384_PCIX);
+ }
+ break;
+
+ case 256:
+ val |= (DMA_RWCTRL_READ_BNDRY_256_PCIX |
+ DMA_RWCTRL_WRITE_BNDRY_256_PCIX);
+ break;
+
+ default:
+ val |= (DMA_RWCTRL_READ_BNDRY_384_PCIX |
+ DMA_RWCTRL_WRITE_BNDRY_384_PCIX);
+ break;
+ };
+ } else if (tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS) {
+ switch (cacheline_size) {
+ case 16:
+ case 32:
+ case 64:
+ if (goal == BOUNDARY_SINGLE_CACHELINE) {
+ val &= ~DMA_RWCTRL_WRITE_BNDRY_DISAB_PCIE;
+ val |= DMA_RWCTRL_WRITE_BNDRY_64_PCIE;
+ break;
+ }
+ /* fallthrough */
+ case 128:
+ default:
+ val &= ~DMA_RWCTRL_WRITE_BNDRY_DISAB_PCIE;
+ val |= DMA_RWCTRL_WRITE_BNDRY_128_PCIE;
+ break;
+ };
+ } else {
+ switch (cacheline_size) {
+ case 16:
+ if (goal == BOUNDARY_SINGLE_CACHELINE) {
+ val |= (DMA_RWCTRL_READ_BNDRY_16 |
+ DMA_RWCTRL_WRITE_BNDRY_16);
+ break;
+ }
+ /* fallthrough */
+ case 32:
+ if (goal == BOUNDARY_SINGLE_CACHELINE) {
+ val |= (DMA_RWCTRL_READ_BNDRY_32 |
+ DMA_RWCTRL_WRITE_BNDRY_32);
+ break;
+ }
+ /* fallthrough */
+ case 64:
+ if (goal == BOUNDARY_SINGLE_CACHELINE) {
+ val |= (DMA_RWCTRL_READ_BNDRY_64 |
+ DMA_RWCTRL_WRITE_BNDRY_64);
+ break;
+ }
+ /* fallthrough */
+ case 128:
+ if (goal == BOUNDARY_SINGLE_CACHELINE) {
+ val |= (DMA_RWCTRL_READ_BNDRY_128 |
+ DMA_RWCTRL_WRITE_BNDRY_128);
+ break;
+ }
+ /* fallthrough */
+ case 256:
+ val |= (DMA_RWCTRL_READ_BNDRY_256 |
+ DMA_RWCTRL_WRITE_BNDRY_256);
+ break;
+ case 512:
+ val |= (DMA_RWCTRL_READ_BNDRY_512 |
+ DMA_RWCTRL_WRITE_BNDRY_512);
+ break;
+ case 1024:
+ default:
+ val |= (DMA_RWCTRL_READ_BNDRY_1024 |
+ DMA_RWCTRL_WRITE_BNDRY_1024);
+ break;
+ };
+ }
+
+out:
+ return val;
+}
+
static int __devinit tg3_do_test_dma(struct tg3 *tp, u32 *buf, dma_addr_t buf_dma, int size, int to_device)
{
struct tg3_internal_buffer_desc test_desc;
@@ -8757,7 +8999,7 @@ static int __devinit tg3_do_test_dma(struct tg3 *tp, u32 *buf, dma_addr_t buf_dm
static int __devinit tg3_test_dma(struct tg3 *tp)
{
dma_addr_t buf_dma;
- u32 *buf;
+ u32 *buf, saved_dma_rwctrl;
int ret;
buf = pci_alloc_consistent(tp->pdev, TEST_BUFFER_SIZE, &buf_dma);
@@ -8769,46 +9011,7 @@ static int __devinit tg3_test_dma(struct tg3 *tp)
tp->dma_rwctrl = ((0x7 << DMA_RWCTRL_PCI_WRITE_CMD_SHIFT) |
(0x6 << DMA_RWCTRL_PCI_READ_CMD_SHIFT));
-#ifndef CONFIG_X86
- {
- u8 byte;
- int cacheline_size;
- pci_read_config_byte(tp->pdev, PCI_CACHE_LINE_SIZE, &byte);
-
- if (byte == 0)
- cacheline_size = 1024;
- else
- cacheline_size = (int) byte * 4;
-
- switch (cacheline_size) {
- case 16:
- case 32:
- case 64:
- case 128:
- if ((tp->tg3_flags & TG3_FLAG_PCIX_MODE) &&
- !(tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS)) {
- tp->dma_rwctrl |=
- DMA_RWCTRL_WRITE_BNDRY_384_PCIX;
- break;
- } else if (tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS) {
- tp->dma_rwctrl &=
- ~(DMA_RWCTRL_PCI_WRITE_CMD);
- tp->dma_rwctrl |=
- DMA_RWCTRL_WRITE_BNDRY_128_PCIE;
- break;
- }
- /* fallthrough */
- case 256:
- if (!(tp->tg3_flags & TG3_FLAG_PCIX_MODE) &&
- !(tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS))
- tp->dma_rwctrl |=
- DMA_RWCTRL_WRITE_BNDRY_256;
- else if (!(tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS))
- tp->dma_rwctrl |=
- DMA_RWCTRL_WRITE_BNDRY_256_PCIX;
- };
- }
-#endif
+ tp->dma_rwctrl = tg3_calc_dma_bndry(tp, tp->dma_rwctrl);
if (tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS) {
/* DMA read watermark not used on PCIE */
@@ -8827,7 +9030,7 @@ static int __devinit tg3_test_dma(struct tg3 *tp)
if (ccval == 0x6 || ccval == 0x7)
tp->dma_rwctrl |= DMA_RWCTRL_ONE_DMA;
- /* Set bit 23 to renable PCIX hw bug fix */
+ /* Set bit 23 to enable PCIX hw bug fix */
tp->dma_rwctrl |= 0x009f0000;
} else {
tp->dma_rwctrl |= 0x001b000f;
@@ -8868,6 +9071,13 @@ static int __devinit tg3_test_dma(struct tg3 *tp)
GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5701)
goto out;
+ /* It is best to perform DMA test with maximum write burst size
+ * to expose the 5700/5701 write DMA bug.
+ */
+ saved_dma_rwctrl = tp->dma_rwctrl;
+ tp->dma_rwctrl &= ~DMA_RWCTRL_WRITE_BNDRY_MASK;
+ tw32(TG3PCI_DMA_RW_CTRL, tp->dma_rwctrl);
+
while (1) {
u32 *p = buf, i;
@@ -8906,8 +9116,9 @@ static int __devinit tg3_test_dma(struct tg3 *tp)
if (p[i] == i)
continue;
- if ((tp->dma_rwctrl & DMA_RWCTRL_WRITE_BNDRY_MASK) ==
- DMA_RWCTRL_WRITE_BNDRY_DISAB) {
+ if ((tp->dma_rwctrl & DMA_RWCTRL_WRITE_BNDRY_MASK) !=
+ DMA_RWCTRL_WRITE_BNDRY_16) {
+ tp->dma_rwctrl &= ~DMA_RWCTRL_WRITE_BNDRY_MASK;
tp->dma_rwctrl |= DMA_RWCTRL_WRITE_BNDRY_16;
tw32(TG3PCI_DMA_RW_CTRL, tp->dma_rwctrl);
break;
@@ -8924,6 +9135,14 @@ static int __devinit tg3_test_dma(struct tg3 *tp)
break;
}
}
+ if ((tp->dma_rwctrl & DMA_RWCTRL_WRITE_BNDRY_MASK) !=
+ DMA_RWCTRL_WRITE_BNDRY_16) {
+ /* DMA test passed without adjusting DMA boundary,
+ * just restore the calculated DMA boundary
+ */
+ tp->dma_rwctrl = saved_dma_rwctrl;
+ tw32(TG3PCI_DMA_RW_CTRL, tp->dma_rwctrl);
+ }
out:
pci_free_consistent(tp->pdev, TEST_BUFFER_SIZE, buf, buf_dma);
@@ -9011,6 +9230,31 @@ static struct pci_dev * __devinit tg3_find_5704_peer(struct tg3 *tp)
return peer;
}
+static void __devinit tg3_init_coal(struct tg3 *tp)
+{
+ struct ethtool_coalesce *ec = &tp->coal;
+
+ memset(ec, 0, sizeof(*ec));
+ ec->cmd = ETHTOOL_GCOALESCE;
+ ec->rx_coalesce_usecs = LOW_RXCOL_TICKS;
+ ec->tx_coalesce_usecs = LOW_TXCOL_TICKS;
+ ec->rx_max_coalesced_frames = LOW_RXMAX_FRAMES;
+ ec->tx_max_coalesced_frames = LOW_TXMAX_FRAMES;
+ ec->rx_coalesce_usecs_irq = DEFAULT_RXCOAL_TICK_INT;
+ ec->tx_coalesce_usecs_irq = DEFAULT_TXCOAL_TICK_INT;
+ ec->rx_max_coalesced_frames_irq = DEFAULT_RXCOAL_MAXF_INT;
+ ec->tx_max_coalesced_frames_irq = DEFAULT_TXCOAL_MAXF_INT;
+ ec->stats_block_coalesce_usecs = DEFAULT_STAT_COAL_TICKS;
+
+ if (tp->coalesce_mode & (HOSTCC_MODE_CLRTICK_RXBD |
+ HOSTCC_MODE_CLRTICK_TXBD)) {
+ ec->rx_coalesce_usecs = LOW_RXCOL_TICKS_CLRTCKS;
+ ec->rx_coalesce_usecs_irq = DEFAULT_RXCOAL_TICK_INT_CLRTCKS;
+ ec->tx_coalesce_usecs = LOW_TXCOL_TICKS_CLRTCKS;
+ ec->tx_coalesce_usecs_irq = DEFAULT_TXCOAL_TICK_INT_CLRTCKS;
+ }
+}
+
static int __devinit tg3_init_one(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
@@ -9256,6 +9500,8 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
/* flow control autonegotiation is default behavior */
tp->tg3_flags |= TG3_FLAG_PAUSE_AUTONEG;
+ tg3_init_coal(tp);
+
err = register_netdev(dev);
if (err) {
printk(KERN_ERR PFX "Cannot register net device, "
@@ -9298,6 +9544,8 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
(tp->tg3_flags & TG3_FLAG_SPLIT_MODE) != 0,
(tp->tg3_flags2 & TG3_FLG2_NO_ETH_WIRE_SPEED) == 0,
(tp->tg3_flags2 & TG3_FLG2_TSO_CAPABLE) != 0);
+ printk(KERN_INFO "%s: dma_rwctrl[%08x]\n",
+ dev->name, tp->dma_rwctrl);
return 0;
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index 8de6f21037b..993f84c93dc 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -876,10 +876,12 @@
#define HOSTCC_STATUS_ERROR_ATTN 0x00000004
#define HOSTCC_RXCOL_TICKS 0x00003c08
#define LOW_RXCOL_TICKS 0x00000032
+#define LOW_RXCOL_TICKS_CLRTCKS 0x00000014
#define DEFAULT_RXCOL_TICKS 0x00000048
#define HIGH_RXCOL_TICKS 0x00000096
#define HOSTCC_TXCOL_TICKS 0x00003c0c
#define LOW_TXCOL_TICKS 0x00000096
+#define LOW_TXCOL_TICKS_CLRTCKS 0x00000048
#define DEFAULT_TXCOL_TICKS 0x0000012c
#define HIGH_TXCOL_TICKS 0x00000145
#define HOSTCC_RXMAX_FRAMES 0x00003c10
@@ -892,8 +894,10 @@
#define HIGH_TXMAX_FRAMES 0x00000052
#define HOSTCC_RXCOAL_TICK_INT 0x00003c18
#define DEFAULT_RXCOAL_TICK_INT 0x00000019
+#define DEFAULT_RXCOAL_TICK_INT_CLRTCKS 0x00000014
#define HOSTCC_TXCOAL_TICK_INT 0x00003c1c
#define DEFAULT_TXCOAL_TICK_INT 0x00000019
+#define DEFAULT_TXCOAL_TICK_INT_CLRTCKS 0x00000014
#define HOSTCC_RXCOAL_MAXF_INT 0x00003c20
#define DEFAULT_RXCOAL_MAXF_INT 0x00000005
#define HOSTCC_TXCOAL_MAXF_INT 0x00003c24
@@ -2023,6 +2027,7 @@ struct tg3 {
struct tg3_hw_status *hw_status;
dma_addr_t status_mapping;
+ u32 last_tag;
u32 msg_enable;
@@ -2068,6 +2073,7 @@ struct tg3 {
u32 rx_offset;
u32 tg3_flags;
+#define TG3_FLAG_TAGGED_STATUS 0x00000001
#define TG3_FLAG_TXD_MBOX_HWBUG 0x00000002
#define TG3_FLAG_RX_CHECKSUMS 0x00000004
#define TG3_FLAG_USE_LINKCHG_REG 0x00000008
@@ -2225,7 +2231,7 @@ struct tg3 {
#define SST_25VF0X0_PAGE_SIZE 4098
-
+ struct ethtool_coalesce coal;
};
#endif /* !(_T3_H) */
diff --git a/fs/namei.c b/fs/namei.c
index defe6781e00..dd78f01b6de 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1580,6 +1580,7 @@ enoent:
fail:
return dentry;
}
+EXPORT_SYMBOL_GPL(lookup_create);
int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
{
diff --git a/include/net/act_generic.h b/include/net/act_generic.h
index 95b120781c1..c9daa7e5230 100644
--- a/include/net/act_generic.h
+++ b/include/net/act_generic.h
@@ -2,8 +2,8 @@
* include/net/act_generic.h
*
*/
-#ifndef ACT_GENERIC_H
-#define ACT_GENERIC_H
+#ifndef _NET_ACT_GENERIC_H
+#define _NET_ACT_GENERIC_H
static inline int tcf_defact_release(struct tcf_defact *p, int bind)
{
int ret = 0;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index daebd93fd8a..760dc8238d6 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -490,6 +490,14 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
/* Partially cloned skb? */
if (skb_shared(frag))
goto slow_path;
+
+ BUG_ON(frag->sk);
+ if (skb->sk) {
+ sock_hold(skb->sk);
+ frag->sk = skb->sk;
+ frag->destructor = sock_wfree;
+ skb->truesize -= frag->truesize;
+ }
}
/* Everything is OK. Generate! */
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index faa6176bbeb..de21da00057 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -508,7 +508,6 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
rc = NF_ACCEPT;
/* do not touch skb anymore */
atomic_inc(&cp->in_pkts);
- __ip_vs_conn_put(cp);
goto out;
}
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 28d9425d5c3..09e82462297 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -940,37 +940,25 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct,
struct sk_buff *
ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
{
- struct sock *sk = skb->sk;
#ifdef CONFIG_NETFILTER_DEBUG
unsigned int olddebug = skb->nf_debug;
#endif
- if (sk) {
- sock_hold(sk);
- skb_orphan(skb);
- }
+ skb_orphan(skb);
local_bh_disable();
skb = ip_defrag(skb, user);
local_bh_enable();
- if (!skb) {
- if (sk)
- sock_put(sk);
- return skb;
- }
-
- if (sk) {
- skb_set_owner_w(skb, sk);
- sock_put(sk);
- }
-
- ip_send_check(skb->nh.iph);
- skb->nfcache |= NFC_ALTERED;
+ if (skb) {
+ ip_send_check(skb->nh.iph);
+ skb->nfcache |= NFC_ALTERED;
#ifdef CONFIG_NETFILTER_DEBUG
- /* Packet path as if nothing had happened. */
- skb->nf_debug = olddebug;
+ /* Packet path as if nothing had happened. */
+ skb->nf_debug = olddebug;
#endif
+ }
+
return skb;
}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 0f0711417c9..b78a5358680 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -552,13 +552,17 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
skb_headroom(frag) < hlen)
goto slow_path;
- /* Correct socket ownership. */
- if (frag->sk == NULL)
- goto slow_path;
-
/* Partially cloned skb? */
if (skb_shared(frag))
goto slow_path;
+
+ BUG_ON(frag->sk);
+ if (skb->sk) {
+ sock_hold(skb->sk);
+ frag->sk = skb->sk;
+ frag->destructor = sock_wfree;
+ skb->truesize -= frag->truesize;
+ }
}
err = 0;
@@ -1116,12 +1120,10 @@ int ip6_push_pending_frames(struct sock *sk)
tail_skb = &(tmp_skb->next);
skb->len += tmp_skb->len;
skb->data_len += tmp_skb->len;
-#if 0 /* Logically correct, but useless work, ip_fragment() will have to undo */
skb->truesize += tmp_skb->truesize;
__sock_put(tmp_skb->sk);
tmp_skb->destructor = NULL;
tmp_skb->sk = NULL;
-#endif
}
ipv6_addr_copy(final_dst, &fl->fl6_dst);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 733bf52cef3..e41ce458c2a 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -735,11 +735,15 @@ static inline int do_one_broadcast(struct sock *sk,
sock_hold(sk);
if (p->skb2 == NULL) {
- if (atomic_read(&p->skb->users) != 1) {
+ if (skb_shared(p->skb)) {
p->skb2 = skb_clone(p->skb, p->allocation);
} else {
- p->skb2 = p->skb;
- atomic_inc(&p->skb->users);
+ p->skb2 = skb_get(p->skb);
+ /*
+ * skb ownership may have been set when
+ * delivered to a previous socket.
+ */
+ skb_orphan(p->skb2);
}
}
if (p->skb2 == NULL) {
@@ -785,11 +789,12 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list)
do_one_broadcast(sk, &info);
+ kfree_skb(skb);
+
netlink_unlock_table();
if (info.skb2)
kfree_skb(info.skb2);
- kfree_skb(skb);
if (info.delivered) {
if (info.congested && (allocation & __GFP_WAIT))
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index c478fc8db77..c420eba4876 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -770,33 +770,12 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
if (err)
goto out_mknod_parent;
- /*
- * Yucky last component or no last component at all?
- * (foo/., foo/.., /////)
- */
- err = -EEXIST;
- if (nd.last_type != LAST_NORM)
- goto out_mknod;
- /*
- * Lock the directory.
- */
- down(&nd.dentry->d_inode->i_sem);
- /*
- * Do the final lookup.
- */
- dentry = lookup_hash(&nd.last, nd.dentry);
+
+ dentry = lookup_create(&nd, 0);
err = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto out_mknod_unlock;
- err = -ENOENT;
- /*
- * Special case - lookup gave negative, but... we had foo/bar/
- * From the vfs_mknod() POV we just have a negative dentry -
- * all is fine. Let's be bastards - you had / on the end, you've
- * been asking for (non-existent) directory. -ENOENT for you.
- */
- if (nd.last.name[nd.last.len] && !dentry->d_inode)
- goto out_mknod_dput;
+
/*
* All right, let's create it.
*/
@@ -845,7 +824,6 @@ out_mknod_dput:
dput(dentry);
out_mknod_unlock:
up(&nd.dentry->d_inode->i_sem);
-out_mknod:
path_release(&nd);
out_mknod_parent:
if (err==-EEXIST)
diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
index 080aae243ce..2f4531fcaca 100644
--- a/net/xfrm/xfrm_algo.c
+++ b/net/xfrm/xfrm_algo.c
@@ -698,7 +698,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
return -ENOMEM;
if (skb1->sk)
- skb_set_owner_w(skb, skb1->sk);
+ skb_set_owner_w(skb2, skb1->sk);
/* Looking around. Are we still alive?
* OK, link new skb, drop old one */
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 5ddda2c98af..97509011c27 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -34,14 +34,21 @@ static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type)
{
struct rtattr *rt = xfrma[type - 1];
struct xfrm_algo *algp;
+ int len;
if (!rt)
return 0;
- if ((rt->rta_len - sizeof(*rt)) < sizeof(*algp))
+ len = (rt->rta_len - sizeof(*rt)) - sizeof(*algp);
+ if (len < 0)
return -EINVAL;
algp = RTA_DATA(rt);
+
+ len -= (algp->alg_key_len + 7U) / 8;
+ if (len < 0)
+ return -EINVAL;
+
switch (type) {
case XFRMA_ALG_AUTH:
if (!algp->alg_key_len &&
@@ -162,6 +169,7 @@ static int attach_one_algo(struct xfrm_algo **algpp, u8 *props,
struct rtattr *rta = u_arg;
struct xfrm_algo *p, *ualg;
struct xfrm_algo_desc *algo;
+ int len;
if (!rta)
return 0;
@@ -173,11 +181,12 @@ static int attach_one_algo(struct xfrm_algo **algpp, u8 *props,
return -ENOSYS;
*props = algo->desc.sadb_alg_id;
- p = kmalloc(sizeof(*ualg) + ualg->alg_key_len, GFP_KERNEL);
+ len = sizeof(*ualg) + (ualg->alg_key_len + 7U) / 8;
+ p = kmalloc(len, GFP_KERNEL);
if (!p)
return -ENOMEM;
- memcpy(p, ualg, sizeof(*ualg) + ualg->alg_key_len);
+ memcpy(p, ualg, len);
*algpp = p;
return 0;
}