From 4adf0af6818f3ea52421dc0bae836cfaf20ef72a Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Wed, 30 Jul 2008 16:27:55 -0700 Subject: bridge: send correct MTU value in PMTU (revised) When bridging interfaces with different MTUs, the bridge correctly chooses the minimum of the MTUs of the physical devices as the bridges MTU. But when a frame is passed which fits through the incoming, but not through the outgoing interface, a "Fragmentation Needed" packet is generated. However, the propagated MTU is hardcoded to 1500, which is wrong in this situation. The sender will repeat the packet again with the same frame size, and the same problem will occur again. Instead of sending 1500, the (correct) MTU value of the bridge is now sent via PMTU. To achieve this, the corresponding rtable structure is stored in its net_bridge structure. Modified to get rid of fake_net_device as well. Signed-off-by: Simon Wunderlich Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_device.c | 9 ++++++- net/bridge/br_if.c | 3 +++ net/bridge/br_netfilter.c | 63 +++++++++++++++++++++++++---------------------- net/bridge/br_private.h | 6 +++++ 4 files changed, 51 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index d9449df7cad..9b58d70b0e7 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -68,10 +68,17 @@ static int br_dev_stop(struct net_device *dev) static int br_change_mtu(struct net_device *dev, int new_mtu) { - if (new_mtu < 68 || new_mtu > br_min_mtu(netdev_priv(dev))) + struct net_bridge *br = netdev_priv(dev); + if (new_mtu < 68 || new_mtu > br_min_mtu(br)) return -EINVAL; dev->mtu = new_mtu; + +#ifdef CONFIG_BRIDGE_NETFILTER + /* remember the MTU in the rtable for PMTU */ + br->fake_rtable.u.dst.metrics[RTAX_MTU - 1] = new_mtu; +#endif + return 0; } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index a072ea5ca6f..63c18aacde8 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -202,6 +202,9 @@ static struct net_device *new_bridge_dev(const char *name) br->topology_change = 0; br->topology_change_detected = 0; br->ageing_time = 300 * HZ; + + br_netfilter_rtable_init(br); + INIT_LIST_HEAD(&br->age_list); br_stp_timer_init(br); diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index bb90cd7bace..6e280a8a31e 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -101,33 +101,30 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb) pppoe_proto(skb) == htons(PPP_IPV6) && \ brnf_filter_pppoe_tagged) -/* We need these fake structures to make netfilter happy -- - * lots of places assume that skb->dst != NULL, which isn't - * all that unreasonable. - * +/* + * Initialize bogus route table used to keep netfilter happy. * Currently, we fill in the PMTU entry because netfilter * refragmentation needs it, and the rt_flags entry because * ipt_REJECT needs it. Future netfilter modules might - * require us to fill additional fields. */ -static struct net_device __fake_net_device = { - .hard_header_len = ETH_HLEN, -#ifdef CONFIG_NET_NS - .nd_net = &init_net, -#endif -}; + * require us to fill additional fields. + */ +void br_netfilter_rtable_init(struct net_bridge *br) +{ + struct rtable *rt = &br->fake_rtable; -static struct rtable __fake_rtable = { - .u = { - .dst = { - .__refcnt = ATOMIC_INIT(1), - .dev = &__fake_net_device, - .path = &__fake_rtable.u.dst, - .metrics = {[RTAX_MTU - 1] = 1500}, - .flags = DST_NOXFRM, - } - }, - .rt_flags = 0, -}; + atomic_set(&rt->u.dst.__refcnt, 1); + rt->u.dst.dev = &br->dev; + rt->u.dst.path = &rt->u.dst; + rt->u.dst.metrics[RTAX_MTU - 1] = 1500; + rt->u.dst.flags = DST_NOXFRM; +} + +static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) +{ + struct net_bridge_port *port = rcu_dereference(dev->br_port); + + return port ? &port->br->fake_rtable : NULL; +} static inline struct net_device *bridge_parent(const struct net_device *dev) { @@ -226,8 +223,12 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) } nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; - skb->rtable = &__fake_rtable; - dst_hold(&__fake_rtable.u.dst); + skb->rtable = bridge_parent_rtable(nf_bridge->physindev); + if (!skb->rtable) { + kfree_skb(skb); + return 0; + } + dst_hold(&skb->rtable->u.dst); skb->dev = nf_bridge->physindev; nf_bridge_push_encap_header(skb); @@ -391,8 +392,12 @@ bridged_dnat: skb->pkt_type = PACKET_HOST; } } else { - skb->rtable = &__fake_rtable; - dst_hold(&__fake_rtable.u.dst); + skb->rtable = bridge_parent_rtable(nf_bridge->physindev); + if (!skb->rtable) { + kfree_skb(skb); + return 0; + } + dst_hold(&skb->rtable->u.dst); } skb->dev = nf_bridge->physindev; @@ -611,8 +616,8 @@ static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff *skb, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - if (skb->rtable == &__fake_rtable) { - dst_release(&__fake_rtable.u.dst); + if (skb->rtable && skb->rtable == bridge_parent_rtable(in)) { + dst_release(&skb->rtable->u.dst); skb->rtable = NULL; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 815ed38925b..c3dc18ddc04 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -15,6 +15,7 @@ #include #include +#include #define BR_HASH_BITS 8 #define BR_HASH_SIZE (1 << BR_HASH_BITS) @@ -92,6 +93,9 @@ struct net_bridge struct hlist_head hash[BR_HASH_SIZE]; struct list_head age_list; unsigned long feature_mask; +#ifdef CONFIG_BRIDGE_NETFILTER + struct rtable fake_rtable; +#endif unsigned long flags; #define BR_SET_MAC_ADDR 0x00000001 @@ -197,9 +201,11 @@ extern int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __us #ifdef CONFIG_BRIDGE_NETFILTER extern int br_netfilter_init(void); extern void br_netfilter_fini(void); +extern void br_netfilter_rtable_init(struct net_bridge *); #else #define br_netfilter_init() (0) #define br_netfilter_fini() do { } while(0) +#define br_netfilter_rtable_init(x) #endif /* br_stp.c */ -- cgit v1.2.3 From 6a8341b68b5269de71c32c6df91f4b0298da031d Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Wed, 30 Jul 2008 16:30:15 -0700 Subject: net: use the common ascii hex helpers Signed-off-by: Harvey Harrison Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 1819ad7ab91..fafe8ebb4c5 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -475,11 +475,10 @@ static void arp_print(struct arp_payload *payload) #define HBUFFERLEN 30 char hbuffer[HBUFFERLEN]; int j,k; - const char hexbuf[]= "0123456789abcdef"; for (k=0, j=0; k < HBUFFERLEN-3 && j < ETH_ALEN; j++) { - hbuffer[k++]=hexbuf[(payload->src_hw[j]>>4)&15]; - hbuffer[k++]=hexbuf[payload->src_hw[j]&15]; + hbuffer[k++] = hex_asc_hi(payload->src_hw[j]); + hbuffer[k++] = hex_asc_lo(payload->src_hw[j]); hbuffer[k++]=':'; } hbuffer[--k]='\0'; -- cgit v1.2.3 From cba5cbd1559f49bec76e54de6ed21b7df3742ada Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 30 Jul 2008 16:31:46 -0700 Subject: atm: fix const assignment/discard warnings in the ATM networking driver Fix const assignment/discard warnings in the ATM networking driver. The lane2_assoc_ind() function needed its arguments changing to match changes in the lane2_ops struct (patch 61c33e012964ce358b42d2a1e9cd309af5dab02b "atm: use const where reasonable"). Signed-off-by: David Howells Acked-by: Chas Williams Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/atm/mpc.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 4fccaa1e07b..11b16d16661 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -62,11 +62,13 @@ static void MPOA_cache_impos_rcvd(struct k_message *msg, struct mpoa_client *mpc static void set_mpc_ctrl_addr_rcvd(struct k_message *mesg, struct mpoa_client *mpc); static void set_mps_mac_addr_rcvd(struct k_message *mesg, struct mpoa_client *mpc); -static uint8_t *copy_macs(struct mpoa_client *mpc, uint8_t *router_mac, - uint8_t *tlvs, uint8_t mps_macs, uint8_t device_type); +static const uint8_t *copy_macs(struct mpoa_client *mpc, + const uint8_t *router_mac, + const uint8_t *tlvs, uint8_t mps_macs, + uint8_t device_type); static void purge_egress_shortcut(struct atm_vcc *vcc, eg_cache_entry *entry); -static void send_set_mps_ctrl_addr(char *addr, struct mpoa_client *mpc); +static void send_set_mps_ctrl_addr(const char *addr, struct mpoa_client *mpc); static void mpoad_close(struct atm_vcc *vcc); static int msg_from_mpoad(struct atm_vcc *vcc, struct sk_buff *skb); @@ -351,12 +353,12 @@ static const char *mpoa_device_type_string(char type) * lec sees a TLV it uses the pointer to call this function. * */ -static void lane2_assoc_ind(struct net_device *dev, uint8_t *mac_addr, - uint8_t *tlvs, uint32_t sizeoftlvs) +static void lane2_assoc_ind(struct net_device *dev, const u8 *mac_addr, + const u8 *tlvs, u32 sizeoftlvs) { uint32_t type; uint8_t length, mpoa_device_type, number_of_mps_macs; - uint8_t *end_of_tlvs; + const uint8_t *end_of_tlvs; struct mpoa_client *mpc; mpoa_device_type = number_of_mps_macs = 0; /* silence gcc */ @@ -430,8 +432,10 @@ static void lane2_assoc_ind(struct net_device *dev, uint8_t *mac_addr, * plus the possible MAC address(es) to mpc->mps_macs. * For a freshly allocated MPOA client mpc->mps_macs == 0. */ -static uint8_t *copy_macs(struct mpoa_client *mpc, uint8_t *router_mac, - uint8_t *tlvs, uint8_t mps_macs, uint8_t device_type) +static const uint8_t *copy_macs(struct mpoa_client *mpc, + const uint8_t *router_mac, + const uint8_t *tlvs, uint8_t mps_macs, + uint8_t device_type) { int num_macs; num_macs = (mps_macs > 1) ? mps_macs : 1; @@ -811,7 +815,7 @@ static int atm_mpoa_mpoad_attach (struct atm_vcc *vcc, int arg) return arg; } -static void send_set_mps_ctrl_addr(char *addr, struct mpoa_client *mpc) +static void send_set_mps_ctrl_addr(const char *addr, struct mpoa_client *mpc) { struct k_message mesg; -- cgit v1.2.3 From ae375044d31075a31de5a839e07ded7f67b660aa Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 31 Jul 2008 00:38:01 -0700 Subject: netfilter: nf_conntrack_tcp: decrease timeouts while data in unacknowledged In order to time out dead connections quicker, keep track of outstanding data and cap the timeout. Suggested by Herbert Xu. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_proto_tcp.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 420a10d8eb1..6f61261888e 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -67,7 +67,8 @@ static const char *const tcp_conntrack_names[] = { /* RFC1122 says the R2 limit should be at least 100 seconds. Linux uses 15 packets as limit, which corresponds to ~13-30min depending on RTO. */ -static unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS; +static unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS; +static unsigned int nf_ct_tcp_timeout_unacknowledged __read_mostly = 5 MINS; static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = { [TCP_CONNTRACK_SYN_SENT] = 2 MINS, @@ -625,8 +626,10 @@ static bool tcp_in_window(const struct nf_conn *ct, swin = win + (sack - ack); if (sender->td_maxwin < swin) sender->td_maxwin = swin; - if (after(end, sender->td_end)) + if (after(end, sender->td_end)) { sender->td_end = end; + sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; + } /* * Update receiver data. */ @@ -637,6 +640,8 @@ static bool tcp_in_window(const struct nf_conn *ct, if (win == 0) receiver->td_maxend++; } + if (ack == receiver->td_end) + receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; /* * Check retransmissions. @@ -951,9 +956,16 @@ static int tcp_packet(struct nf_conn *ct, if (old_state != new_state && new_state == TCP_CONNTRACK_FIN_WAIT) ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; - timeout = ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans - && tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans - ? nf_ct_tcp_timeout_max_retrans : tcp_timeouts[new_state]; + + if (ct->proto.tcp.retrans >= nf_ct_tcp_max_retrans && + tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans) + timeout = nf_ct_tcp_timeout_max_retrans; + else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) & + IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED && + tcp_timeouts[new_state] > nf_ct_tcp_timeout_unacknowledged) + timeout = nf_ct_tcp_timeout_unacknowledged; + else + timeout = tcp_timeouts[new_state]; write_unlock_bh(&tcp_lock); nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); @@ -1235,6 +1247,13 @@ static struct ctl_table tcp_sysctl_table[] = { .mode = 0644, .proc_handler = &proc_dointvec_jiffies, }, + { + .procname = "nf_conntrack_tcp_timeout_unacknowledged", + .data = &nf_ct_tcp_timeout_unacknowledged, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, { .ctl_name = NET_NF_CONNTRACK_TCP_LOOSE, .procname = "nf_conntrack_tcp_loose", -- cgit v1.2.3 From a8ddc9163c6a16cd62531dba1ec5020484e33b02 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 31 Jul 2008 00:38:31 -0700 Subject: netfilter: ipt_recent: fix race between recent_mt_destroy and proc manipulations The thing is that recent_mt_destroy first flushes the entries from table with the recent_table_flush and only *after* this removes the proc file, corresponding to that table. Thus, if we manage to write to this file the '+XXX' command we will leak some entries. If we manage to write there a 'clean' command we'll race in two recent_table_flush flows, since the recent_mt_destroy calls this outside the recent_lock. The proper solution as I see it is to remove the proc file first and then go on with flushing the table. This flushing becomes safe w/o the lock, since the table is already inaccessible from the outside. Signed-off-by: Pavel Emelyanov Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_recent.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index 21cb053f5d7..3974d7cae5c 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c @@ -305,10 +305,10 @@ static void recent_mt_destroy(const struct xt_match *match, void *matchinfo) spin_lock_bh(&recent_lock); list_del(&t->list); spin_unlock_bh(&recent_lock); - recent_table_flush(t); #ifdef CONFIG_PROC_FS remove_proc_entry(t->name, proc_dir); #endif + recent_table_flush(t); kfree(t); } mutex_unlock(&recent_mutex); -- cgit v1.2.3 From 967ab999a090b1a4e7d3c7febfd6d89b42fb4cf4 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 31 Jul 2008 00:38:52 -0700 Subject: netfilter: xt_hashlimit: fix race between htable_destroy and htable_gc Deleting a timer with del_timer doesn't guarantee, that the timer function is not running at the moment of deletion. Thus in the xt_hashlimit case we can get into a ticklish situation when the htable_gc rearms the timer back and we'll actually delete an entry with a pending timer. Fix it with using del_timer_sync(). AFAIK del_timer_sync checks for the timer to be pending by itself, so I remove the check. Signed-off-by: Pavel Emelyanov Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/xt_hashlimit.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 6809af542a2..d9418a26781 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -367,9 +367,7 @@ static void htable_gc(unsigned long htlong) static void htable_destroy(struct xt_hashlimit_htable *hinfo) { - /* remove timer, if it is pending */ - if (timer_pending(&hinfo->timer)) - del_timer(&hinfo->timer); + del_timer_sync(&hinfo->timer); /* remove proc entry */ remove_proc_entry(hinfo->pde->name, -- cgit v1.2.3 From c3f26a269c2421f97f10cf8ed05d5099b573af4d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 31 Jul 2008 16:58:50 -0700 Subject: netdev: Fix lockdep warnings in multiqueue configurations. When support for multiple TX queues were added, the netif_tx_lock() routines we converted to iterate over all TX queues and grab each queue's spinlock. This causes heartburn for lockdep and it's not a healthy thing to do with lots of TX queues anyways. So modify this to use a top-level lock and a "frozen" state for the individual TX queues. Signed-off-by: David S. Miller --- net/core/dev.c | 1 + net/core/netpoll.c | 1 + net/core/pktgen.c | 7 +++++-- net/sched/sch_generic.c | 6 ++++-- net/sched/sch_teql.c | 9 +++++---- 5 files changed, 16 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 63d6bcddbf4..69320a56a08 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4200,6 +4200,7 @@ static void netdev_init_queues(struct net_device *dev) { netdev_init_one_queue(dev, &dev->rx_queue, NULL); netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL); + spin_lock_init(&dev->tx_global_lock); } /** diff --git a/net/core/netpoll.c b/net/core/netpoll.c index c12720895ec..6c7af390be0 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -70,6 +70,7 @@ static void queue_process(struct work_struct *work) local_irq_save(flags); __netif_tx_lock(txq, smp_processor_id()); if (netif_tx_queue_stopped(txq) || + netif_tx_queue_frozen(txq) || dev->hard_start_xmit(skb, dev) != NETDEV_TX_OK) { skb_queue_head(&npinfo->txq, skb); __netif_tx_unlock(txq); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index c7d484f7e1c..3284605f2ec 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3305,6 +3305,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) txq = netdev_get_tx_queue(odev, queue_map); if (netif_tx_queue_stopped(txq) || + netif_tx_queue_frozen(txq) || need_resched()) { idle_start = getCurUs(); @@ -3320,7 +3321,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->idle_acc += getCurUs() - idle_start; - if (netif_tx_queue_stopped(txq)) { + if (netif_tx_queue_stopped(txq) || + netif_tx_queue_frozen(txq)) { pkt_dev->next_tx_us = getCurUs(); /* TODO */ pkt_dev->next_tx_ns = 0; goto out; /* Try the next interface */ @@ -3352,7 +3354,8 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) txq = netdev_get_tx_queue(odev, queue_map); __netif_tx_lock_bh(txq); - if (!netif_tx_queue_stopped(txq)) { + if (!netif_tx_queue_stopped(txq) && + !netif_tx_queue_frozen(txq)) { atomic_inc(&(pkt_dev->skb->users)); retry_now: diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 345838a2e36..9c9cd4d9489 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -135,7 +135,8 @@ static inline int qdisc_restart(struct Qdisc *q) txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); HARD_TX_LOCK(dev, txq, smp_processor_id()); - if (!netif_subqueue_stopped(dev, skb)) + if (!netif_tx_queue_stopped(txq) && + !netif_tx_queue_frozen(txq)) ret = dev_hard_start_xmit(skb, dev, txq); HARD_TX_UNLOCK(dev, txq); @@ -162,7 +163,8 @@ static inline int qdisc_restart(struct Qdisc *q) break; } - if (ret && netif_tx_queue_stopped(txq)) + if (ret && (netif_tx_queue_stopped(txq) || + netif_tx_queue_frozen(txq))) ret = 0; return ret; diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 537223642b6..2c35c678563 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -305,10 +305,11 @@ restart: switch (teql_resolve(skb, skb_res, slave)) { case 0: - if (netif_tx_trylock(slave)) { - if (!__netif_subqueue_stopped(slave, subq) && + if (__netif_tx_trylock(slave_txq)) { + if (!netif_tx_queue_stopped(slave_txq) && + !netif_tx_queue_frozen(slave_txq) && slave->hard_start_xmit(skb, slave) == 0) { - netif_tx_unlock(slave); + __netif_tx_unlock(slave_txq); master->slaves = NEXT_SLAVE(q); netif_wake_queue(dev); master->stats.tx_packets++; @@ -316,7 +317,7 @@ restart: qdisc_pkt_len(skb); return 0; } - netif_tx_unlock(slave); + __netif_tx_unlock(slave_txq); } if (netif_queue_stopped(dev)) busy = 1; -- cgit v1.2.3 From 77e2f14f71d68d05945f1d30ca55b5194d6ab1ce Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 31 Jul 2008 20:46:47 -0700 Subject: ipv6: Fix ip6_xmit to send fragments if ipfragok is true SCTP used ip6_xmit() to send fragments after received ICMP packet too big message. But while send packet used ip6_xmit, the skb->local_df is not initialized. So when skb if enter ip6_fragment(), the following code will discard the skb. ip6_fragment(...) { if (!skb->local_df) { ... return -EMSGSIZE; } ... } SCTP do the following step: 1. send packet ip6_xmit(skb, ipfragok=0) 2. received ICMP packet too big message 3. if PMTUD_ENABLE: ip6_xmit(skb, ipfragok=1) This patch fixed the problem by set local_df if ipfragok is true. Signed-off-by: Wei Yongjun Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 6811901e6b1..a027003d69a 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -236,6 +236,10 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, skb_reset_network_header(skb); hdr = ipv6_hdr(skb); + /* Allow local fragmentation. */ + if (ipfragok) + skb->local_df = 1; + /* * Fill in the IPv6 header */ -- cgit v1.2.3 From 90b7e1120bb43ffaabb88d28f80a0c2e13167b15 Mon Sep 17 00:00:00 2001 From: Adam Langley Date: Thu, 31 Jul 2008 20:49:48 -0700 Subject: tcp: MD5: Fix MD5 signatures on certain ACK packets I noticed, looking at tcpdumps, that timewait ACKs were getting sent with an incorrect MD5 signature when signatures were enabled. I broke this in 49a72dfb8814c2d65bd9f8c9c6daf6395a1ec58d ("tcp: Fix MD5 signatures for non-linear skbs"). I didn't take into account that the skb passed to tcp_*_send_ack was the inbound packet, thus the source and dest addresses need to be swapped when calculating the MD5 pseudoheader. Signed-off-by: Adam Langley Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 4 ++-- net/ipv6/tcp_ipv6.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b3875c0d83c..91a8cfddf1c 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -655,8 +655,8 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, rep.th.doff = arg.iov[0].iov_len/4; tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset], - key, ip_hdr(skb)->daddr, - ip_hdr(skb)->saddr, &rep.th); + key, ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, &rep.th); } #endif arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 1db45216b23..1bcdcbc71d6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1094,8 +1094,8 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); tcp_v6_md5_hash_hdr((__u8 *)topt, key, - &ipv6_hdr(skb)->daddr, - &ipv6_hdr(skb)->saddr, t1); + &ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, t1); } #endif -- cgit v1.2.3 From 8a9204db665365354b349ed5b0bc054f0433a2a4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 31 Jul 2008 20:51:22 -0700 Subject: net/ipv4/route.c: fix build error fix: net/ipv4/route.c: In function 'ip_static_sysctl_init': net/ipv4/route.c:3225: error: 'ipv4_route_path' undeclared (first use in this function) net/ipv4/route.c:3225: error: (Each undeclared identifier is reported only once net/ipv4/route.c:3225: error: for each function it appears in.) net/ipv4/route.c:3225: error: 'ipv4_route_table' undeclared (first use in this function) Signed-off-by: Ingo Molnar Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 380d6474cf6..d7bf0564b5f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3222,7 +3222,9 @@ int __init ip_rt_init(void) */ void __init ip_static_sysctl_init(void) { +#ifdef CONFIG_SYSCTL register_sysctl_paths(ipv4_route_path, ipv4_route_table); +#endif } EXPORT_SYMBOL(__ip_select_ident); -- cgit v1.2.3 From 00b1304c4ca81dd893973cc620b87a5c3ff3f660 Mon Sep 17 00:00:00 2001 From: Adam Langley Date: Thu, 31 Jul 2008 21:36:07 -0700 Subject: tcp: MD5: Fix IPv6 signatures Reported by Stefanos Harhalakis; although 2.6.27-rc1 talks to itself using IPv6 TCP MD5 packets just fine, Stefanos noted that tcpdump claimed that the signatures were invalid. I broke this in 49a72dfb8814c2d65bd9f8c9c6daf6395a1ec58d ("tcp: Fix MD5 signatures for non-linear skbs"), it was just a typo. Note that tcpdump will still sometimes claim that the signatures are incorrect. A patch to tcpdump has been submitted for this[1]. [1] http://tinyurl.com/6a4fl2 Signed-off-by: Adam Langley Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 1bcdcbc71d6..78185a40921 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -748,7 +748,7 @@ static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp, ipv6_addr_copy(&bp->saddr, saddr); ipv6_addr_copy(&bp->daddr, daddr); bp->protocol = cpu_to_be32(IPPROTO_TCP); - bp->len = cpu_to_be16(nbytes); + bp->len = cpu_to_be32(nbytes); sg_init_one(&sg, bp, sizeof(*bp)); return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp)); -- cgit v1.2.3