From 73f306024c15bd12e59677d6eaf43ecced614f04 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Thu, 1 Dec 2005 14:28:58 -0800 Subject: [NETFILTER]: Ignore ACKs ACKs on half open connections in TCP conntrack Mounting NFS file systems after a (warm) reboot could take a long time if firewalling and connection tracking was enabled. The reason is that the NFS clients tends to use the same ports (800 and counting down). Now on reboot, the server would still have a TCB for an existing TCP connection client:800 -> server:2049. The client sends a SYN from port 800 to server:2049, which elicits an ACK from the server. The firewall on the client drops the ACK because (from its point of view) the connection is still in half-open state, and it expects to see a SYNACK. The client will eventually time out after several minutes. The following patch corrects this, by accepting ACKs on half open connections as well. Signed-off-by: Jozsef Kadlecsik Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_conntrack_proto_tcp.c | 29 ++++++++++++++++++++--------- net/netfilter/nf_conntrack_proto_tcp.c | 29 ++++++++++++++++++++--------- 2 files changed, 40 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index 62598167677..aeb7353d477 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -272,9 +272,9 @@ static const enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sCL -> sCL */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*ack*/ { sIV, sIV, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV }, +/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV }, /* - * sSS -> sIV Might be a half-open connection. + * sSS -> sIG Might be a half-open connection. * sSR -> sSR Might answer late resent SYN. * sES -> sES :-) * sFW -> sCW Normal close request answered by ACK. @@ -917,8 +917,12 @@ static int tcp_packet(struct ip_conntrack *conntrack, switch (new_state) { case TCP_CONNTRACK_IGNORE: - /* Either SYN in ORIGINAL - * or SYN/ACK in REPLY. */ + /* Ignored packets: + * + * a) SYN in ORIGINAL + * b) SYN/ACK in REPLY + * c) ACK in reply direction after initial SYN in original. + */ if (index == TCP_SYNACK_SET && conntrack->proto.tcp.last_index == TCP_SYN_SET && conntrack->proto.tcp.last_dir != dir @@ -985,13 +989,20 @@ static int tcp_packet(struct ip_conntrack *conntrack, } case TCP_CONNTRACK_CLOSE: if (index == TCP_RST_SET - && test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) - && conntrack->proto.tcp.last_index == TCP_SYN_SET + && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) + && conntrack->proto.tcp.last_index == TCP_SYN_SET) + || (!test_bit(IPS_ASSURED_BIT, &conntrack->status) + && conntrack->proto.tcp.last_index == TCP_ACK_SET)) && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) { - /* RST sent to invalid SYN we had let trough - * SYN was in window then, tear down connection. + /* RST sent to invalid SYN or ACK we had let trough + * at a) and c) above: + * + * a) SYN was in window then + * c) we hold a half-open connection. + * + * Delete our connection entry. * We skip window checking, because packet might ACK - * segments we ignored in the SYN. */ + * segments we ignored. */ goto in_window; } /* Just fall trough */ diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 5a6fcf349bd..6035633d822 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -280,9 +280,9 @@ static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sCL -> sCL */ /* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*ack*/ { sIV, sIV, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV }, +/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV }, /* - * sSS -> sIV Might be a half-open connection. + * sSS -> sIG Might be a half-open connection. * sSR -> sSR Might answer late resent SYN. * sES -> sES :-) * sFW -> sCW Normal close request answered by ACK. @@ -912,8 +912,12 @@ static int tcp_packet(struct nf_conn *conntrack, switch (new_state) { case TCP_CONNTRACK_IGNORE: - /* Either SYN in ORIGINAL - * or SYN/ACK in REPLY. */ + /* Ignored packets: + * + * a) SYN in ORIGINAL + * b) SYN/ACK in REPLY + * c) ACK in reply direction after initial SYN in original. + */ if (index == TCP_SYNACK_SET && conntrack->proto.tcp.last_index == TCP_SYN_SET && conntrack->proto.tcp.last_dir != dir @@ -979,13 +983,20 @@ static int tcp_packet(struct nf_conn *conntrack, } case TCP_CONNTRACK_CLOSE: if (index == TCP_RST_SET - && test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) - && conntrack->proto.tcp.last_index == TCP_SYN_SET + && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) + && conntrack->proto.tcp.last_index == TCP_SYN_SET) + || (!test_bit(IPS_ASSURED_BIT, &conntrack->status) + && conntrack->proto.tcp.last_index == TCP_ACK_SET)) && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) { - /* RST sent to invalid SYN we had let trough - * SYN was in window then, tear down connection. + /* RST sent to invalid SYN or ACK we had let trough + * at a) and c) above: + * + * a) SYN was in window then + * c) we hold a half-open connection. + * + * Delete our connection entry. * We skip window checking, because packet might ACK - * segments we ignored in the SYN. */ + * segments we ignored. */ goto in_window; } /* Just fall trough */ -- cgit v1.2.3 From 2a43c4af3fa2e701008d51c28365e26fccf9cbb0 Mon Sep 17 00:00:00 2001 From: Phil Oester Date: Thu, 1 Dec 2005 14:29:24 -0800 Subject: [NETFILTER]: Fix recent match jiffies wrap mismatches Around jiffies wrap time (i.e. within first 5 mins after boot), recent match rules which contain both --seconds and --hitcount arguments experience false matches. This is because the last_pkts array is filled with zeros on creation, and when comparing 'now' to 0 (+ --seconds argument), time_before_eq thinks it has found a hit. Below patch adds a break if the packet value is zero. This has the unfortunate side effect of causing mismatches if a packet was received when jiffies really was equal to zero. The odds of that happening are slim compared to the problems caused by not adding the break however. Plus, the author used this same method just below, so it is "good enough". This fixes netfilter bugs #383 and #395. Signed-off-by: Phil Oester Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ipt_recent.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c index 2d44b07688a..261cbb4d4c4 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/ipv4/netfilter/ipt_recent.c @@ -532,6 +532,7 @@ match(const struct sk_buff *skb, } if(info->seconds && info->hit_count) { for(pkt_count = 0, hits_found = 0; pkt_count < ip_pkt_list_tot; pkt_count++) { + if(r_list[location].last_pkts[pkt_count] == 0) break; if(time_before_eq(now,r_list[location].last_pkts[pkt_count]+info->seconds*HZ)) hits_found++; } if(hits_found >= info->hit_count) ans = !info->invert; else ans = info->invert; -- cgit v1.2.3 From ea86575eaf99a9262a969309d934318028dbfacb Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 1 Dec 2005 14:30:00 -0800 Subject: [NETLINK]: Fix processing of fib_lookup netlink messages The receive path for fib_lookup netlink messages is lacking sanity checks for header and payload and is thus vulnerable to malformed netlink messages causing illegal memory references. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 882f88f6d13..19b1b984d68 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -544,12 +544,16 @@ static void nl_fib_input(struct sock *sk, int len) struct sk_buff *skb = NULL; struct nlmsghdr *nlh = NULL; struct fib_result_nl *frn; - int err; u32 pid; struct fib_table *tb; - skb = skb_recv_datagram(sk, 0, 0, &err); + skb = skb_dequeue(&sk->sk_receive_queue); nlh = (struct nlmsghdr *)skb->data; + if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len || + nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) { + kfree_skb(skb); + return; + } frn = (struct fib_result_nl *) NLMSG_DATA(nlh); tb = fib_get_table(frn->tb_id_in); -- cgit v1.2.3 From 6736dc35e9e1b9c8084d5c362a429a3e8189af6b Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Fri, 2 Dec 2005 20:30:06 -0800 Subject: [SCTP]: Return socket errors only if the receive queue is empty. This patch fixes an issue where it is possible to get valid data after a ENOTCONN error. It returns socket errors only after data queued on socket receive queue is consumed. Signed-off-by: Neil Horman Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- net/sctp/socket.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index abab81f3818..d890dfa8818 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4743,11 +4743,6 @@ static struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, struct sk_buff *skb; long timeo; - /* Caller is allowed not to check sk->sk_err before calling. */ - error = sock_error(sk); - if (error) - goto no_packet; - timeo = sock_rcvtimeo(sk, noblock); SCTP_DEBUG_PRINTK("Timeout: timeo: %ld, MAX: %ld.\n", @@ -4774,6 +4769,11 @@ static struct sk_buff *sctp_skb_recv_datagram(struct sock *sk, int flags, if (skb) return skb; + /* Caller is allowed not to check sk->sk_err before calling. */ + error = sock_error(sk); + if (error) + goto no_packet; + if (sk->sk_shutdown & RCV_SHUTDOWN) break; -- cgit v1.2.3 From bf031fff1fac77775b2cd2c72ad8b017f4c0af13 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Fri, 2 Dec 2005 20:32:29 -0800 Subject: [SCTP]: Fix getsockname for sctp when an ipv6 socket accepts a connection from an ipv4 socket. Signed-off-by: Neil Horman Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- net/sctp/transport.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 6bc27200e6c..268ddaf2dc0 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -261,7 +261,8 @@ void sctp_transport_route(struct sctp_transport *transport, * association's active path for getsockname(). */ if (asoc && (transport == asoc->peer.active_path)) - af->to_sk_saddr(&transport->saddr, asoc->base.sk); + opt->pf->af->to_sk_saddr(&transport->saddr, + asoc->base.sk); } else transport->pmtu = SCTP_DEFAULT_MAXSEGMENT; } -- cgit v1.2.3 From 24c6927505ca77ee4ac25fb31dcd56f6506979ed Mon Sep 17 00:00:00 2001 From: David Stevens Date: Fri, 2 Dec 2005 20:32:59 -0800 Subject: [IGMP]: workaround for IGMP v1/v2 bug From: David Stevens As explained at: http://www.cs.ucsb.edu/~krishna/igmp_dos/ With IGMP version 1 and 2 it is possible to inject a unicast report to a client which will make it ignore multicast reports sent later by the router. The fix is to only accept the report if is was sent to a multicast or unicast address. Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 5 ++++- net/ipv6/mcast.c | 5 +++++ 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index c04607b4921..4a195c724f0 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -897,7 +897,10 @@ int igmp_rcv(struct sk_buff *skb) /* Is it our report looped back? */ if (((struct rtable*)skb->dst)->fl.iif == 0) break; - igmp_heard_report(in_dev, ih->group); + /* don't rely on MC router hearing unicast reports */ + if (skb->pkt_type == PACKET_MULTICAST || + skb->pkt_type == PACKET_BROADCAST) + igmp_heard_report(in_dev, ih->group); break; case IGMP_PIM: #ifdef CONFIG_IP_PIMSM_V1 diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index f15e04ad026..fd939da090c 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1231,6 +1231,11 @@ int igmp6_event_report(struct sk_buff *skb) if (skb->pkt_type == PACKET_LOOPBACK) return 0; + /* send our report if the MC router may not have heard this report */ + if (skb->pkt_type != PACKET_MULTICAST && + skb->pkt_type != PACKET_BROADCAST) + return 0; + if (!pskb_may_pull(skb, sizeof(struct in6_addr))) return -EINVAL; -- cgit v1.2.3 From 86c8f9d158f68538a971a47206a46a22c7479bac Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 2 Dec 2005 20:43:26 -0800 Subject: [IPV4] Fix EPROTONOSUPPORT error in inet_create There is a coding error in inet_create that causes it to always return ESOCKTNOSUPPORT. It should return EPROTONOSUPPORT when there are protocols registered for a given socket type but none of them match the requested protocol. This is based on a patch by Jayachandran C. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index eaa150c33b0..d368cf24900 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -228,13 +228,14 @@ static int inet_create(struct socket *sock, int protocol) unsigned char answer_flags; char answer_no_check; int try_loading_module = 0; - int err = -ESOCKTNOSUPPORT; + int err; sock->state = SS_UNCONNECTED; /* Look for the requested type/protocol pair. */ answer = NULL; lookup_protocol: + err = -ESOCKTNOSUPPORT; rcu_read_lock(); list_for_each_rcu(p, &inetsw[sock->type]) { answer = list_entry(p, struct inet_protosw, list); @@ -252,6 +253,7 @@ lookup_protocol: if (IPPROTO_IP == answer->protocol) break; } + err = -EPROTONOSUPPORT; answer = NULL; } @@ -280,9 +282,6 @@ lookup_protocol: err = -EPERM; if (answer->capability > 0 && !capable(answer->capability)) goto out_rcu_unlock; - err = -EPROTONOSUPPORT; - if (!protocol) - goto out_rcu_unlock; sock->ops = answer->ops; answer_prot = answer->prot; -- cgit v1.2.3 From af1afe866297448ad8a1da99fa8a6af86c43c909 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Fri, 2 Dec 2005 20:56:57 -0800 Subject: [IPV6]: Load protocol module dynamically. [ Modified to match inet_create() bug fix by Herbert Xu -DaveM ] Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 47 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index c63b8ce0e1b..d9546380fa0 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -92,10 +92,13 @@ static int inet6_create(struct socket *sock, int protocol) struct proto *answer_prot; unsigned char answer_flags; char answer_no_check; - int rc; + int try_loading_module = 0; + int err; /* Look for the requested type/protocol pair. */ answer = NULL; +lookup_protocol: + err = -ESOCKTNOSUPPORT; rcu_read_lock(); list_for_each_rcu(p, &inetsw6[sock->type]) { answer = list_entry(p, struct inet_protosw, list); @@ -113,21 +116,37 @@ static int inet6_create(struct socket *sock, int protocol) if (IPPROTO_IP == answer->protocol) break; } + err = -EPROTONOSUPPORT; answer = NULL; } - rc = -ESOCKTNOSUPPORT; - if (!answer) - goto out_rcu_unlock; - rc = -EPERM; + if (!answer) { + if (try_loading_module < 2) { + rcu_read_unlock(); + /* + * Be more specific, e.g. net-pf-10-proto-132-type-1 + * (net-pf-PF_INET6-proto-IPPROTO_SCTP-type-SOCK_STREAM) + */ + if (++try_loading_module == 1) + request_module("net-pf-%d-proto-%d-type-%d", + PF_INET6, protocol, sock->type); + /* + * Fall back to generic, e.g. net-pf-10-proto-132 + * (net-pf-PF_INET6-proto-IPPROTO_SCTP) + */ + else + request_module("net-pf-%d-proto-%d", + PF_INET6, protocol); + goto lookup_protocol; + } else + goto out_rcu_unlock; + } + + err = -EPERM; if (answer->capability > 0 && !capable(answer->capability)) goto out_rcu_unlock; - rc = -EPROTONOSUPPORT; - if (!protocol) - goto out_rcu_unlock; sock->ops = answer->ops; - answer_prot = answer->prot; answer_no_check = answer->no_check; answer_flags = answer->flags; @@ -135,14 +154,14 @@ static int inet6_create(struct socket *sock, int protocol) BUG_TRAP(answer_prot->slab != NULL); - rc = -ENOBUFS; + err = -ENOBUFS; sk = sk_alloc(PF_INET6, GFP_KERNEL, answer_prot, 1); if (sk == NULL) goto out; sock_init_data(sock, sk); - rc = 0; + err = 0; sk->sk_no_check = answer_no_check; if (INET_PROTOSW_REUSE & answer_flags) sk->sk_reuse = 1; @@ -202,14 +221,14 @@ static int inet6_create(struct socket *sock, int protocol) sk->sk_prot->hash(sk); } if (sk->sk_prot->init) { - rc = sk->sk_prot->init(sk); - if (rc) { + err = sk->sk_prot->init(sk); + if (err) { sk_common_release(sk); goto out; } } out: - return rc; + return err; out_rcu_unlock: rcu_read_unlock(); goto out; -- cgit v1.2.3 From bb184f3356adbbb1605df2f7c570b4c92231fa3a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 3 Dec 2005 15:20:10 -0500 Subject: SUNRPC: Fix Oopsable condition in rpc_pipefs The elements on rpci->in_upcall are tracked by the filp->private_data, which will ensure that they get released when the file is closed. The exception is if rpc_close_pipes() gets called first, since that sets rpci->ops to NULL. Signed-off-by: Trond Myklebust --- net/sunrpc/rpc_pipe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index e3b242daf53..c76ea221798 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -59,7 +59,6 @@ __rpc_purge_upcall(struct inode *inode, int err) struct rpc_inode *rpci = RPC_I(inode); __rpc_purge_list(rpci, &rpci->pipe, err); - __rpc_purge_list(rpci, &rpci->in_upcall, err); rpci->pipelen = 0; wake_up(&rpci->waitq); } @@ -119,6 +118,7 @@ rpc_close_pipes(struct inode *inode) down(&inode->i_sem); if (rpci->ops != NULL) { rpci->nreaders = 0; + __rpc_purge_list(rpci, &rpci->in_upcall, -EPIPE); __rpc_purge_upcall(inode, -EPIPE); rpci->nwriters = 0; if (rpci->ops->release_pipe) -- cgit v1.2.3