From 6640e69731b42fd5e3d2b26201c8b34fc897a0ee Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 24 Jan 2007 14:42:04 -0800 Subject: [IPV4]: Fix the fib trie iterator to work with a single entry routing tables In a kernel with trie routing enabled I had a simple routing setup with only a single route to the outside world and no default route. "ip route table list main" showed my the route just fine but /proc/net/route was an empty file. What was going on? Thinking it was a bug in something I did and I looked deeper. Eventually I setup a second route and everything looked correct, huh? Finally I realized that the it was just the iterator pair in fib_trie_get_first, fib_trie_get_next just could not handle a routing table with a single entry. So to save myself and others further confusion, here is a simple fix for the fib proc iterator so it works even when there is only a single route in a routing table. Signed-off-by: Eric W. Biederman Signed-off-by: Robert Olsson Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index cfb249cc0a5..13307c04d5a 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1989,6 +1989,10 @@ static struct node *fib_trie_get_next(struct fib_trie_iter *iter) unsigned cindex = iter->index; struct tnode *p; + /* A single entry routing table */ + if (!tn) + return NULL; + pr_debug("get_next iter={node=%p index=%d depth=%d}\n", iter->tnode, iter->index, iter->depth); rescan: @@ -2037,11 +2041,18 @@ static struct node *fib_trie_get_first(struct fib_trie_iter *iter, if(!iter) return NULL; - if (n && IS_TNODE(n)) { - iter->tnode = (struct tnode *) n; - iter->trie = t; - iter->index = 0; - iter->depth = 1; + if (n) { + if (IS_TNODE(n)) { + iter->tnode = (struct tnode *) n; + iter->trie = t; + iter->index = 0; + iter->depth = 1; + } else { + iter->tnode = NULL; + iter->trie = t; + iter->index = 0; + iter->depth = 0; + } return n; } return NULL; -- cgit v1.2.3 From dbcb5855d108b7fa20ab42567a5412ce9dcd776a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 24 Jan 2007 15:21:02 -0800 Subject: [AF_PACKET]: Fix BPF handling. This fixes a bug introduced by: commit fda9ef5d679b07c9d9097aaf6ef7f069d794a8f9 Author: Dmitry Mishin Date: Thu Aug 31 15:28:39 2006 -0700 [NET]: Fix sk->sk_filter field access sk_run_filter() returns either 0 or an unsigned 32-bit length which says how much of the packet to retain. If that 32-bit unsigned integer is larger than the packet, this is fine we just leave the packet unchanged. The above commit caused all filter return values which were negative when interpreted as a signed integer to indicate a packet drop, which is wrong. Based upon a report and initial patch by Raivis Bucis. Signed-off-by: David S. Miller --- net/packet/af_packet.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index da73e8a8c18..594c078c5eb 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -428,24 +428,18 @@ out_unlock: } #endif -static inline int run_filter(struct sk_buff *skb, struct sock *sk, - unsigned *snaplen) +static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk, + unsigned int res) { struct sk_filter *filter; - int err = 0; rcu_read_lock_bh(); filter = rcu_dereference(sk->sk_filter); - if (filter != NULL) { - err = sk_run_filter(skb, filter->insns, filter->len); - if (!err) - err = -EPERM; - else if (*snaplen > err) - *snaplen = err; - } + if (filter != NULL) + res = sk_run_filter(skb, filter->insns, filter->len); rcu_read_unlock_bh(); - return err; + return res; } /* @@ -467,7 +461,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet struct packet_sock *po; u8 * skb_head = skb->data; int skb_len = skb->len; - unsigned snaplen; + unsigned int snaplen, res; if (skb->pkt_type == PACKET_LOOPBACK) goto drop; @@ -495,8 +489,11 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet snaplen = skb->len; - if (run_filter(skb, sk, &snaplen) < 0) + res = run_filter(skb, sk, snaplen); + if (!res) goto drop_n_restore; + if (snaplen > res) + snaplen = res; if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= (unsigned)sk->sk_rcvbuf) @@ -568,7 +565,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe struct tpacket_hdr *h; u8 * skb_head = skb->data; int skb_len = skb->len; - unsigned snaplen; + unsigned int snaplen, res; unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER; unsigned short macoff, netoff; struct sk_buff *copy_skb = NULL; @@ -592,8 +589,11 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe snaplen = skb->len; - if (run_filter(skb, sk, &snaplen) < 0) + res = run_filter(skb, sk, snaplen); + if (!res) goto drop_n_restore; + if (snaplen > res) + snaplen = res; if (sk->sk_type == SOCK_DGRAM) { macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16; -- cgit v1.2.3 From db3ccdac261e015023cfd922840170f14c9cdc09 Mon Sep 17 00:00:00 2001 From: Baruch Even Date: Thu, 25 Jan 2007 13:35:06 -0800 Subject: [TCP]: Fix sorting of SACK blocks. The sorting of SACK blocks actually munges them rather than sort, causing the TCP stack to ignore some SACK information and breaking the assumption of ordered SACK blocks after sorting. The sort takes the data from a second buffer which isn't moved causing subsequent data moves to occur from the wrong location. The fix is to use a temporary buffer as a normal sort does. Signed-off-By: Baruch Even Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5c16e24a606..c26076fb890 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1011,10 +1011,11 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ for (j = 0; j < i; j++){ if (after(ntohl(sp[j].start_seq), ntohl(sp[j+1].start_seq))){ - sp[j].start_seq = htonl(tp->recv_sack_cache[j+1].start_seq); - sp[j].end_seq = htonl(tp->recv_sack_cache[j+1].end_seq); - sp[j+1].start_seq = htonl(tp->recv_sack_cache[j].start_seq); - sp[j+1].end_seq = htonl(tp->recv_sack_cache[j].end_seq); + struct tcp_sack_block_wire tmp; + + tmp = sp[j]; + sp[j] = sp[j+1]; + sp[j+1] = tmp; } } -- cgit v1.2.3 From 95743deb34de29defbb98ad477700aaf344f93f3 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 25 Jan 2007 15:51:51 -0800 Subject: [DECNET]: Handle a failure in neigh_parms_alloc (take 2) While enhancing the neighbour code to handle multiple network namespaces I noticed that decnet is assuming neigh_parms_alloc will allways succeed, which is clearly wrong. So handle the failure. Signed-off-by: Eric W. Biederman Acked-by: Steven Whitehouse Signed-off-by: David S. Miller --- net/decnet/dn_dev.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index fc6f3c023a5..ed083ab455b 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -1145,16 +1145,23 @@ struct dn_dev *dn_dev_create(struct net_device *dev, int *err) init_timer(&dn_db->timer); dn_db->uptime = jiffies; + + dn_db->neigh_parms = neigh_parms_alloc(dev, &dn_neigh_table); + if (!dn_db->neigh_parms) { + dev->dn_ptr = NULL; + kfree(dn_db); + return NULL; + } + if (dn_db->parms.up) { if (dn_db->parms.up(dev) < 0) { + neigh_parms_release(&dn_neigh_table, dn_db->neigh_parms); dev->dn_ptr = NULL; kfree(dn_db); return NULL; } } - dn_db->neigh_parms = neigh_parms_alloc(dev, &dn_neigh_table); - dn_dev_sysctl_register(dev, &dn_db->parms); dn_dev_set_timer(dev); -- cgit v1.2.3 From d5e76b0a280f71b20bdd20d1c1b4d6812ceb8c3a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 25 Jan 2007 19:30:36 -0800 Subject: [AF_PACKET]: Check device down state before hard header callbacks. If the device is down, invoking the device hard header callbacks is not legal, so check it early. Based upon a shaper OOPS report from Frederik Deweerdt. Signed-off-by: David S. Miller --- net/packet/af_packet.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 594c078c5eb..6dc01bdeb76 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -359,6 +359,10 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, if (dev == NULL) goto out_unlock; + err = -ENETDOWN; + if (!(dev->flags & IFF_UP)) + goto out_unlock; + /* * You may not queue a frame bigger than the mtu. This is the lowest level * raw protocol and you must do your own fragmentation at this level. @@ -407,10 +411,6 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, if (err) goto out_free; - err = -ENETDOWN; - if (!(dev->flags & IFF_UP)) - goto out_free; - /* * Now send it */ @@ -738,6 +738,10 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, if (sock->type == SOCK_RAW) reserve = dev->hard_header_len; + err = -ENETDOWN; + if (!(dev->flags & IFF_UP)) + goto out_unlock; + err = -EMSGSIZE; if (len > dev->mtu+reserve) goto out_unlock; @@ -770,10 +774,6 @@ static int packet_sendmsg(struct kiocb *iocb, struct socket *sock, skb->dev = dev; skb->priority = sk->sk_priority; - err = -ENETDOWN; - if (!(dev->flags & IFF_UP)) - goto out_free; - /* * Now send it */ -- cgit v1.2.3 From e89862f4c5b3c4ac9afcd8cb1365d2f1e16ddc3b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 26 Jan 2007 01:04:55 -0800 Subject: [TCP]: Restore SKB socket owner setting in tcp_transmit_skb(). Revert 931731123a103cfb3f70ac4b7abfc71d94ba1f03 We can't elide the skb_set_owner_w() here because things like certain netfilter targets (such as owner MATCH) need a socket to be set on the SKB for correct operation. Thanks to Jan Engelhardt and other netfilter list members for pointing this out. Signed-off-by: David S. Miller --- net/dccp/output.c | 4 ++-- net/ipv4/ip_output.c | 3 ++- net/ipv4/tcp_output.c | 3 ++- net/ipv6/inet6_connection_sock.c | 3 ++- net/sctp/protocol.c | 2 +- 5 files changed, 9 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/dccp/output.c b/net/dccp/output.c index 82456965908..3435542e965 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -124,7 +124,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) DCCP_INC_STATS(DCCP_MIB_OUTSEGS); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - err = icsk->icsk_af_ops->queue_xmit(skb, sk, 0); + err = icsk->icsk_af_ops->queue_xmit(skb, 0); return net_xmit_eval(err); } return -ENOBUFS; @@ -396,7 +396,7 @@ int dccp_send_reset(struct sock *sk, enum dccp_reset_codes code) code); if (skb != NULL) { memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - err = inet_csk(sk)->icsk_af_ops->queue_xmit(skb, sk, 0); + err = inet_csk(sk)->icsk_af_ops->queue_xmit(skb, 0); return net_xmit_eval(err); } } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index f071f84808f..a0f2008584b 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -281,8 +281,9 @@ int ip_output(struct sk_buff *skb) !(IPCB(skb)->flags & IPSKB_REROUTED)); } -int ip_queue_xmit(struct sk_buff *skb, struct sock *sk, int ipfragok) +int ip_queue_xmit(struct sk_buff *skb, int ipfragok) { + struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(sk); struct ip_options *opt = inet->opt; struct rtable *rt; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 23e32c80691..975f4472af2 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -467,6 +467,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, th = (struct tcphdr *) skb_push(skb, tcp_header_size); skb->h.th = th; + skb_set_owner_w(skb, sk); /* Build TCP header and checksum it. */ th->source = inet->sport; @@ -540,7 +541,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) TCP_INC_STATS(TCP_MIB_OUTSEGS); - err = icsk->icsk_af_ops->queue_xmit(skb, sk, 0); + err = icsk->icsk_af_ops->queue_xmit(skb, 0); if (likely(err <= 0)) return err; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index c700302ad51..116f94a4907 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -139,8 +139,9 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr) EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr); -int inet6_csk_xmit(struct sk_buff *skb, struct sock *sk, int ipfragok) +int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) { + struct sock *sk = skb->sk; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct flowi fl; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 225f39b5d59..0ef48126b11 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -804,7 +804,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, NIPQUAD(((struct rtable *)skb->dst)->rt_dst)); SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS); - return ip_queue_xmit(skb, skb->sk, ipfragok); + return ip_queue_xmit(skb, ipfragok); } static struct sctp_af sctp_ipv4_specific; -- cgit v1.2.3 From c72c6b2a291bb6c61b1546d116784a79e15a6c29 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 26 Jan 2007 01:06:47 -0800 Subject: [NETFILTER]: nf_nat: fix ICMP translation with statically linked conntrack When nf_nat/nf_conntrack_ipv4 are linked statically, nf_nat is initialized before nf_conntrack_ipv4, which makes the nf_ct_l3proto_find_get(AF_INET) call during nf_nat initialization return the generic l3proto instead of the AF_INET specific one. This breaks ICMP error translation since the generic protocol always initializes the IPs in the tuple to 0. Change the linking order and put nf_conntrack_ipv4 first. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/Makefile | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 15e741aeb29..16d177b71bf 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -4,6 +4,14 @@ # objects for the standalone - connection tracking / NAT ip_conntrack-objs := ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o +# objects for l3 independent conntrack +nf_conntrack_ipv4-objs := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o +ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y) +ifeq ($(CONFIG_PROC_FS),y) +nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o +endif +endif + ip_nat-objs := ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o nf_nat-objs := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o ifneq ($(CONFIG_NF_NAT),) @@ -20,6 +28,8 @@ ip_nat_h323-objs := ip_nat_helper_h323.o # connection tracking obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o +obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o + obj-$(CONFIG_IP_NF_NAT) += ip_nat.o obj-$(CONFIG_NF_NAT) += nf_nat.o @@ -106,13 +116,3 @@ obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o -# objects for l3 independent conntrack -nf_conntrack_ipv4-objs := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o -ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y) -ifeq ($(CONFIG_PROC_FS),y) -nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o -endif -endif - -# l3 independent conntrack -obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o -- cgit v1.2.3 From a46bf7d5a81b350cd204b82bd25ee6ffbc2967d4 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 26 Jan 2007 01:07:30 -0800 Subject: [NETFILTER]: nf_nat_pptp: fix expectation removal When removing the expectation for the opposite direction, the PPTP NAT helper initializes the tuple for lookup with the addresses of the opposite direction, which makes the lookup fail. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/nf_nat_pptp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index 0ae45b79a4e..5df4fcae3ab 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -72,9 +72,9 @@ static void pptp_nat_expected(struct nf_conn *ct, DEBUGP("we are PAC->PNS\n"); /* build tuple for PNS->PAC */ t.src.l3num = AF_INET; - t.src.u3.ip = master->tuplehash[exp->dir].tuple.src.u3.ip; + t.src.u3.ip = master->tuplehash[!exp->dir].tuple.src.u3.ip; t.src.u.gre.key = nat_pptp_info->pns_call_id; - t.dst.u3.ip = master->tuplehash[exp->dir].tuple.dst.u3.ip; + t.dst.u3.ip = master->tuplehash[!exp->dir].tuple.dst.u3.ip; t.dst.u.gre.key = nat_pptp_info->pac_call_id; t.dst.protonum = IPPROTO_GRE; } -- cgit v1.2.3 From 7399072a7348d025e7bcb5eb5d5e9be941d490b7 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 26 Jan 2007 01:07:59 -0800 Subject: [NETFILTER]: nf_conntrack_pptp: fix NAT setup of expected GRE connections When an expected connection arrives, the NAT helper should be called to set up NAT similar to the master connection. The PPTP conntrack helper incorrectly checks whether the _expected_ connection has NAT setup before calling the NAT helper (which is never the case), instead of checkeing whether the _master_ connection is NATed. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_pptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index f0ff00e0d05..c59df3bc2bb 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -113,7 +113,7 @@ static void pptp_expectfn(struct nf_conn *ct, rcu_read_lock(); nf_nat_pptp_expectfn = rcu_dereference(nf_nat_pptp_hook_expectfn); - if (nf_nat_pptp_expectfn && ct->status & IPS_NAT_MASK) + if (nf_nat_pptp_expectfn && ct->master->status & IPS_NAT_MASK) nf_nat_pptp_expectfn(ct, exp); else { struct nf_conntrack_tuple inv_t; -- cgit v1.2.3